"""ProxyScrape 测试爬虫 - 用于验证架构,支持全协议类型""" from typing import List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class ProxyScrapePlugin(BaseHTTPPlugin): default_config = {"max_pages": 5} """ 从 ProxyScrape 公开 API 获取代理库 覆盖 http/https/socks4/socks5 全协议,专门用于测试插件系统的可扩展性 """ name = "proxyscrape" display_name = "ProxyScrape测试站" description = "从 ProxyScrape API 获取各类型代理(HTTP/HTTPS/SOCKS4/SOCKS5),用于测试架构扩展" enabled = True def __init__(self): super().__init__() # 使用多个公开 GitHub 代理列表作为源,稳定性较差 self.urls = [ ("http", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt"), ("https", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/https.txt"), ("socks4", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt"), ("socks5", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt"), ] async def crawl(self) -> List[ProxyRaw]: results: List[ProxyRaw] = [] for protocol, url in self.urls: try: html = await self.fetch(url, timeout=30) if not html: logger.warning(f"ProxyScrape {protocol.upper()} 返回空内容") continue count = 0 for line in html.splitlines(): line = line.strip() if not line or ":" not in line: continue parts = line.split(":") if len(parts) >= 2: ip = parts[0].strip() port_str = parts[1].strip() if port_str.isdigit(): results.append(ProxyRaw(ip, int(port_str), protocol)) count += 1 logger.info(f"ProxyScrape {protocol.upper()} 获取 {count} 个代理") except Exception as e: logger.error(f"ProxyScrape {protocol.upper()} 爬取失败: {e}") if results: logger.info(f"ProxyScrape 总计获取 {len(results)} 个代理") else: # Fallback:生成测试代理,确保在测试环境也能验证完整流程 logger.warning("ProxyScrape 所有真实源均不可用,生成测试代理用于架构验证") results = self._generate_test_proxies() return results def _generate_test_proxies(self) -> List[ProxyRaw]: """生成测试代理数据,覆盖全协议类型,用于验证插件系统""" import random test_proxies = [] protocols = ["http", "https", "socks4", "socks5"] for protocol in protocols: for _ in range(3): # 生成随机公网格式 IP(仅用于测试流程) ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}" port = random.randint(1024, 65535) test_proxies.append(ProxyRaw(ip, port, protocol)) logger.info(f"生成 {len(test_proxies)} 个测试代理 HTTP/HTTPS/SOCKS4/SOCKS5 各 3 个") return test_proxies