ProxyPool/app/plugins/fpw_checkerproxy.py

"""checkerproxy.net：尝试常见导出路径 + 正文中的 ip:port（排除示例占位）。"""
import re
from typing import List, Set, Tuple

from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger


class FpwCheckerproxyPlugin(BaseHTTPPlugin):
    name = "fpw_checkerproxy"
    display_name = "CheckerProxy.net"
    description = "checkerproxy.net（无稳定公开 API 时可能为空；多路径尝试）"

    def __init__(self):
        super().__init__()
        self.urls = [
            "https://checkerproxy.net/",
            "https://checkerproxy.net/export",
            "https://checkerproxy.net/api/export",
        ]

    @staticmethod
    def _parse_ip_ports(text: str) -> List[ProxyRaw]:
        bad = {"123.123.123.123", "127.0.0.1", "0.0.0.0"}
        seen: Set[Tuple[str, int]] = set()
        out: List[ProxyRaw] = []
        for m in re.finditer(
            r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
            text,
        ):
            ip, ps = m.group(1), m.group(2)
            if ip in bad:
                continue
            if not ps.isdigit() or not (1 <= int(ps) <= 65535):
                continue
            key = (ip, int(ps))
            if key in seen:
                continue
            seen.add(key)
            try:
                out.append(ProxyRaw(ip, int(ps), "http"))
            except ValueError:
                continue
        return out

    async def crawl(self) -> List[ProxyRaw]:
        merged: List[ProxyRaw] = []
        seen: Set[Tuple[str, int, str]] = set()
        htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
        for html in htmls:
            if not html or len(html) < 200:
                continue
            for p in self._parse_ip_ports(html):
                k = (p.ip, p.port, p.protocol)
                if k not in seen:
                    seen.add(k)
                    merged.append(p)
            if len(merged) >= 50:
                break
        if merged:
            logger.info(f"{self.display_name} 解析 {len(merged)} 条")
        else:
            logger.warning(f"{self.display_name} 未解析到代理（站点可能仅提供在线检测）")
        return merged