from typing import List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class ProxyListDownloadPlugin(BaseHTTPPlugin): default_config = {"max_pages": 5} name = "proxylist_download" display_name = "ProxyListDownload" description = "从 GitHub 公开代理列表获取代理" def __init__(self): super().__init__() # 首选 GitHub raw + fallback 备用源(jsdelivr CDN 或 ProxyScrape API) self.sources = [ { "primary": "https://raw.githubusercontent.com/komutan234/Proxy-List-Free/main/proxies/http.txt", "fallbacks": [ "https://cdn.jsdelivr.net/gh/komutan234/Proxy-List-Free@main/proxies/http.txt", "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all", ], "protocol": "http", }, { "primary": "https://raw.githubusercontent.com/komutan234/Proxy-List-Free/main/proxies/socks4.txt", "fallbacks": [ "https://cdn.jsdelivr.net/gh/komutan234/Proxy-List-Free@main/proxies/socks4.txt", "https://api.proxyscrape.com/v2/?request=get&protocol=socks4&timeout=10000&country=all", ], "protocol": "socks4", }, { "primary": "https://raw.githubusercontent.com/komutan234/Proxy-List-Free/main/proxies/socks5.txt", "fallbacks": [ "https://cdn.jsdelivr.net/gh/komutan234/Proxy-List-Free@main/proxies/socks5.txt", "https://api.proxyscrape.com/v2/?request=get&protocol=socks5&timeout=10000&country=all", ], "protocol": "socks5", }, ] def _detect_protocol(self, url: str) -> str: """根据 URL 判断协议(注意不要用 https:// 来判断)""" if "socks4" in url: return "socks4" elif "socks5" in url: return "socks5" elif "/http.txt" in url or "protocol=http" in url: return "http" return "http" def _parse_lines(self, html: str, protocol: str) -> List[ProxyRaw]: """解析代理文本,统一处理 \r\n、\n 两种换行以及可能存在的空行""" results = [] # 统一替换为 \n 后再分割 text = html.replace("\r\n", "\n").replace("\r", "\n") for line in text.split("\n"): line = line.strip() if not line or ":" not in line: continue ip, _, port = line.rpartition(":") ip = ip.strip() port = port.strip() if ip and port.isdigit() and 1 <= int(port) <= 65535: try: results.append(ProxyRaw(ip, int(port), protocol)) except ValueError: continue return results async def crawl(self) -> List[ProxyRaw]: results = [] # 并发请求所有 primary URL primary_urls = [s["primary"] for s in self.sources] primary_htmls = await self.fetch_all(primary_urls, timeout=15) for idx, html in enumerate(primary_htmls): source = self.sources[idx] protocol = source.get("protocol") or self._detect_protocol(source["primary"]) if html and html.strip(): results.extend(self._parse_lines(html, protocol)) continue # primary 返回空或仅空白字符,依次尝试 fallback logger.warning(f"{self.display_name} 主源返回空,尝试 fallback: {source['primary']}") for fallback_url in source["fallbacks"]: fallback_html = await self.fetch(fallback_url, timeout=15) if fallback_html and fallback_html.strip(): fb_protocol = source.get("protocol") or self._detect_protocol(fallback_url) results.extend(self._parse_lines(fallback_html, fb_protocol)) break if results: logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理") return results