ProxyPool/app/plugins/fpw_socks_ssl_proxy.py

"""socks-proxy.net / sslproxies.org 表格（README 参考 GetProxyFromSocks-proxy.py）。"""
import re
from typing import List

from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger


class FpwSocksSslProxyPlugin(BaseHTTPPlugin):
    name = "fpw_socks_ssl_proxy"
    display_name = "Socks-Proxy / SSLProxies"
    description = "socks-proxy.net 与 sslproxies.org 首页表格（HTTP/HTTPS 列表）"

    def __init__(self):
        super().__init__()
        self.max_concurrency = 2
        # 与 sslproxies 同模板的镜像站较多，socks-proxy 在部分网络下不稳定，多源提高成功率
        self.urls = [
            "https://www.sslproxies.org/",
            "https://free-proxy-list.net/",
            "https://www.us-proxy.org/",
            "https://www.socks-proxy.net/",
        ]

    def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]:
        results = []
        pattern = re.compile(
            r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>\s*<td[^>]*>\s*(\d{1,5})",
            re.I,
        )
        for ip, port in pattern.findall(html):
            if port.isdigit() and 1 <= int(port) <= 65535:
                try:
                    results.append(ProxyRaw(ip, int(port), default_protocol))
                except ValueError:
                    continue
        return results

    async def crawl(self) -> List[ProxyRaw]:
        results: List[ProxyRaw] = []
        htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
        for url, html in zip(self.urls, htmls):
            if not html:
                continue
            if "socks-proxy" in url:
                proto = "socks4"
            else:
                proto = "http"
            batch = self.parse_html_table(
                html, column_map={"ip": 0, "port": 1}, protocol=proto
            )
            if not batch:
                batch = self._parse_page(html, proto)
            results.extend(batch)
            if batch:
                logger.info(f"{self.display_name} {url}: {len(batch)} 条")
        if results:
            logger.info(f"{self.display_name} 合计 {len(results)} 条")
        return results