ProxyPool/app/plugins/speedx.py

import re
from typing import List
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger


class SpeedXPlugin(BaseHTTPPlugin):
    default_config = {"max_pages": 5}
    name = "speedx"
    display_name = "SpeedX代理库"
    description = "从 SpeedX GitHub 仓库获取 SOCKS 代理列表"

    def __init__(self):
        super().__init__()
        self.urls = [
            "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
            "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
            "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt",
        ]
        self.fallback_urls = [
            "https://cdn.jsdelivr.net/gh/TheSpeedX/SOCKS-List@master/http.txt",
            "https://cdn.jsdelivr.net/gh/TheSpeedX/SOCKS-List@master/socks4.txt",
            "https://cdn.jsdelivr.net/gh/TheSpeedX/SOCKS-List@master/socks5.txt",
        ]

    def _parse_htmls(self, htmls: List[str], urls: List[str]) -> List[ProxyRaw]:
        results = []
        for idx, html in enumerate(htmls):
            if not html:
                continue
            url = urls[idx]

            # 根据 URL 判断协议
            protocol = "http"
            if "socks5" in url:
                protocol = "socks5"
            elif "socks4" in url:
                protocol = "socks4"

            for line in html.splitlines():
                line = line.strip()
                if not line or ":" not in line:
                    continue
                parts = line.split(":")
                if len(parts) >= 2:
                    ip = parts[0].strip()
                    port = parts[1].strip()
                    if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
                        continue
                    if not port.isdigit() or not (1 <= int(port) <= 65535):
                        continue
                    results.append(ProxyRaw(ip, int(port), protocol))
        return results

    async def crawl(self) -> List[ProxyRaw]:
        htmls = await self.fetch_all(self.urls, timeout=15)
        results = self._parse_htmls(htmls, self.urls)

        if not results:
            logger.warning(f"{self.display_name} GitHub 源全部返回空，尝试 jsdelivr fallback")
            htmls = await self.fetch_all(self.fallback_urls, timeout=15)
            results = self._parse_htmls(htmls, self.fallback_urls)

        if results:
            logger.info(f"{self.display_name} 解析完成，获取 {len(results)} 个潜在代理")
        return results