import re from typing import List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger VALID_PROTOCOLS = ("http", "https", "socks4", "socks5") class YunDaiLiPlugin(BaseHTTPPlugin): default_config = {"max_pages": 5} name = "yundaili" display_name = "云代理" description = "从 GitHub 公开代理列表获取免费代理" def __init__(self): super().__init__() # 主数据源:GitHub raw self.urls = [ ("http", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt"), ("socks4", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt"), ("socks5", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt"), ] # Fallback:jsdelivr CDN 加速 self.fallback_urls = [ ("http", "https://cdn.jsdelivr.net/gh/mmpx12/proxy-list@master/http.txt"), ("socks4", "https://cdn.jsdelivr.net/gh/mmpx12/proxy-list@master/socks4.txt"), ("socks5", "https://cdn.jsdelivr.net/gh/mmpx12/proxy-list@master/socks5.txt"), ] def _parse_htmls(self, htmls: List[str], url_mapping: List[tuple]) -> List[ProxyRaw]: results: List[ProxyRaw] = [] for (protocol, _), html in zip(url_mapping, htmls): if not html: logger.warning(f"{self.display_name} {protocol.upper()} 返回空内容,可能网络受限或源已失效") continue count = 0 for line in html.splitlines(): line = line.strip() if not line or ":" not in line: continue ip, _, port_str = line.rpartition(":") ip = ip.strip() port_str = port_str.strip() if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip): continue if not port_str.isdigit() or not (1 <= int(port_str) <= 65535): continue final_protocol = protocol if protocol in VALID_PROTOCOLS else "http" try: results.append(ProxyRaw(ip, int(port_str), final_protocol)) except ValueError: continue count += 1 if count: logger.info(f"{self.display_name} {protocol.upper()} 解析完成,获取 {count} 个潜在代理") return results async def crawl(self) -> List[ProxyRaw]: results: List[ProxyRaw] = [] # 顺序请求主源,避免某个 URL 卡住拖慢整体 for protocol, url in self.urls: html = await self.fetch(url, timeout=12) if html: results.extend(self._parse_htmls([html], [(protocol, url)])) # 主源为空时尝试 fallback(也顺序请求) if not results: logger.warning(f"{self.display_name} GitHub 主源全部返回空,尝试 jsdelivr fallback") for protocol, url in self.fallback_urls: html = await self.fetch(url, timeout=12) if html: results.extend(self._parse_htmls([html], [(protocol, url)])) if results: logger.info(f"{self.display_name} 总计解析完成,获取 {len(results)} 个潜在代理") else: logger.warning(f"{self.display_name} 未获取到任何代理") return results