"""socks-proxy.net / sslproxies.org 表格(README 参考 GetProxyFromSocks-proxy.py)。""" import re from typing import List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class FpwSocksSslProxyPlugin(BaseHTTPPlugin): name = "fpw_socks_ssl_proxy" display_name = "Socks-Proxy / SSLProxies" description = "socks-proxy.net 与 sslproxies.org 首页表格(HTTP/HTTPS 列表)" def __init__(self): super().__init__() self.max_concurrency = 2 # 与 sslproxies 同模板的镜像站较多,socks-proxy 在部分网络下不稳定,多源提高成功率 self.urls = [ "https://www.sslproxies.org/", "https://free-proxy-list.net/", "https://www.us-proxy.org/", "https://www.socks-proxy.net/", ] def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]: results = [] pattern = re.compile( r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*]*>\s*(\d+)", re.I, ) for ip, port in pattern.findall(html): if port.isdigit() and 1 <= int(port) <= 65535: try: results.append(ProxyRaw(ip, int(port), default_protocol)) except ValueError: continue return results async def crawl(self) -> List[ProxyRaw]: results: List[ProxyRaw] = [] htmls = await self.fetch_all(self.urls, timeout=25, retries=2) for url, html in zip(self.urls, htmls): if not html: continue if "socks-proxy" in url: proto = "socks4" else: proto = "http" batch = self._parse_page(html, proto) results.extend(batch) if batch: logger.info(f"{self.display_name} {url}: {len(batch)} 条") if results: logger.info(f"{self.display_name} 合计 {len(results)} 条") return results