"""checkerproxy.net:尝试常见导出路径 + 正文中的 ip:port(排除示例占位)。""" import re from typing import List, Set, Tuple from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class FpwCheckerproxyPlugin(BaseHTTPPlugin): name = "fpw_checkerproxy" display_name = "CheckerProxy.net" description = "checkerproxy.net(无稳定公开 API 时可能为空;多路径尝试)" def __init__(self): super().__init__() self.urls = [ "https://checkerproxy.net/", "https://checkerproxy.net/export", "https://checkerproxy.net/api/export", ] @staticmethod def _parse_ip_ports(text: str) -> List[ProxyRaw]: bad = {"123.123.123.123", "127.0.0.1", "0.0.0.0"} seen: Set[Tuple[str, int]] = set() out: List[ProxyRaw] = [] for m in re.finditer( r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b", text, ): ip, ps = m.group(1), m.group(2) if ip in bad: continue if not ps.isdigit() or not (1 <= int(ps) <= 65535): continue key = (ip, int(ps)) if key in seen: continue seen.add(key) try: out.append(ProxyRaw(ip, int(ps), "http")) except ValueError: continue return out async def crawl(self) -> List[ProxyRaw]: merged: List[ProxyRaw] = [] seen: Set[Tuple[str, int, str]] = set() htmls = await self.fetch_all(self.urls, timeout=25, retries=2) for html in htmls: if not html or len(html) < 80: continue for p in self._parse_ip_ports(html): k = (p.ip, p.port, p.protocol) if k not in seen: seen.add(k) merged.append(p) if len(merged) >= 50: break if merged: logger.info(f"{self.display_name} 解析 {len(merged)} 条") else: logger.warning(f"{self.display_name} 未解析到代理(站点可能仅提供在线检测)") return merged