"""www.proxy-list.download 公开 API(常见为 JSON,内含 list 字段的 ip:port 文本)。""" import json from typing import Any, List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class FpwProxyListDownloadPlugin(BaseHTTPPlugin): name = "fpw_proxy_list_download" display_name = "Proxy-List.download" description = "proxy-list.download 官方 API(http/https/socks4/socks5)" crawl_timeout_seconds = 300.0 def __init__(self): super().__init__() self.max_concurrency = 2 self.api_pairs = [ ("http", "https://www.proxy-list.download/api/v1/get?type=http"), ("https", "https://www.proxy-list.download/api/v1/get?type=https"), ("socks4", "https://www.proxy-list.download/api/v1/get?type=socks4"), ("socks5", "https://www.proxy-list.download/api/v1/get?type=socks5"), ] self._mirror_prefix = ( "https://cdn.jsdelivr.net/gh/monosans/proxy-list@main/proxies/" ) self.fallback_pairs = [ ("http", "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all"), ("https", "https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all"), ("socks4", "https://api.proxyscrape.com/v2/?request=get&protocol=socks4&timeout=10000&country=all&ssl=all&anonymity=all"), ("socks5", "https://api.proxyscrape.com/v2/?request=get&protocol=socks5&timeout=10000&country=all&ssl=all&anonymity=all"), ] def _items_to_proxies(self, items: List[Any], protocol: str) -> List[ProxyRaw]: out: List[ProxyRaw] = [] for it in items: if isinstance(it, dict): ip = str( it.get("ip") or it.get("IP") or it.get("host") or it.get("Host") or "" ).strip() port = it.get("port") or it.get("Port") if not ip or port is None: continue ps = str(port).strip() if not ps.isdigit() or not (1 <= int(ps) <= 65535): continue try: out.append(ProxyRaw(ip, int(ps), protocol)) except ValueError: continue elif isinstance(it, str) and ":" in it: out.extend(self.parse_text_proxies(it, protocol)) return out def _parse_api_body(self, text: str, protocol: str) -> List[ProxyRaw]: text = (text or "").strip() if not text: return [] if text[0] in "{[": try: data = json.loads(text) except json.JSONDecodeError: return self.parse_text_proxies(text, protocol) if isinstance(data, list): return self._items_to_proxies(data, protocol) if isinstance(data, dict): for key in ("list", "LIST", "data", "Data", "proxies", "Proxies"): raw = data.get(key) if isinstance(raw, str) and raw.strip(): return self.parse_text_proxies(raw, protocol) if isinstance(raw, list): return self._items_to_proxies(raw, protocol) return [] return [] return self.parse_text_proxies(text, protocol) async def crawl(self) -> List[ProxyRaw]: results: List[ProxyRaw] = [] t_req, n_try = 18.0, 1 urls = [u for _, u in self.api_pairs] htmls = await self.fetch_all(urls, timeout=t_req, retries=n_try) for (protocol, _), text in zip(self.api_pairs, htmls): if not text: continue batch = self._parse_api_body(text, protocol) if batch: results.extend(batch) logger.info(f"{self.display_name} API {protocol}: {len(batch)} 条") if not results: logger.warning(f"{self.display_name} 主 API 无数据,尝试 jsDelivr 文本镜像") mirror_pairs = [ (p, f"{self._mirror_prefix}{p}.txt") for p in ("http", "https", "socks4", "socks5") ] m_urls = [u for _, u in mirror_pairs] m_htmls = await self.fetch_all(m_urls, timeout=22.0, retries=2) for (protocol, _), text in zip(mirror_pairs, m_htmls): if not text: continue batch = self.parse_text_proxies(text, protocol) if batch: results.extend(batch) logger.info(f"{self.display_name} 镜像 {protocol}: {len(batch)} 条") if not results: logger.warning(f"{self.display_name} 镜像无数据,尝试 ProxyScrape API") fb_urls = [u for _, u in self.fallback_pairs] fb_htmls = await self.fetch_all(fb_urls, timeout=20.0, retries=1) for (protocol, _), text in zip(self.fallback_pairs, fb_htmls): if not text: continue batch = self._parse_api_body(text, protocol) if not batch: batch = self.parse_text_proxies(text, protocol) if batch: results.extend(batch) logger.info(f"{self.display_name} ProxyScrape {protocol}: {len(batch)} 条") if results: logger.info(f"{self.display_name} 合计 {len(results)} 条") return results