Files
ProxyPool/app/plugins/fpw_proxy_list_download.py
祀梦 e582067316 fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML
- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
2026-04-05 14:16:03 +08:00

125 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""www.proxy-list.download 公开 API常见为 JSON内含 list 字段的 ip:port 文本)。"""
import json
from typing import Any, List
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwProxyListDownloadPlugin(BaseHTTPPlugin):
name = "fpw_proxy_list_download"
display_name = "Proxy-List.download"
description = "proxy-list.download 官方 APIhttp/https/socks4/socks5"
crawl_timeout_seconds = 300.0
def __init__(self):
super().__init__()
self.max_concurrency = 2
self.api_pairs = [
("http", "https://www.proxy-list.download/api/v1/get?type=http"),
("https", "https://www.proxy-list.download/api/v1/get?type=https"),
("socks4", "https://www.proxy-list.download/api/v1/get?type=socks4"),
("socks5", "https://www.proxy-list.download/api/v1/get?type=socks5"),
]
self._mirror_prefix = (
"https://cdn.jsdelivr.net/gh/monosans/proxy-list@main/proxies/"
)
self.fallback_pairs = [
("http", "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all"),
("https", "https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all"),
("socks4", "https://api.proxyscrape.com/v2/?request=get&protocol=socks4&timeout=10000&country=all&ssl=all&anonymity=all"),
("socks5", "https://api.proxyscrape.com/v2/?request=get&protocol=socks5&timeout=10000&country=all&ssl=all&anonymity=all"),
]
def _items_to_proxies(self, items: List[Any], protocol: str) -> List[ProxyRaw]:
out: List[ProxyRaw] = []
for it in items:
if isinstance(it, dict):
ip = str(
it.get("ip")
or it.get("IP")
or it.get("host")
or it.get("Host")
or ""
).strip()
port = it.get("port") or it.get("Port")
if not ip or port is None:
continue
ps = str(port).strip()
if not ps.isdigit() or not (1 <= int(ps) <= 65535):
continue
try:
out.append(ProxyRaw(ip, int(ps), protocol))
except ValueError:
continue
elif isinstance(it, str) and ":" in it:
out.extend(self.parse_text_proxies(it, protocol))
return out
def _parse_api_body(self, text: str, protocol: str) -> List[ProxyRaw]:
text = (text or "").strip()
if not text:
return []
if text[0] in "{[":
try:
data = json.loads(text)
except json.JSONDecodeError:
return self.parse_text_proxies(text, protocol)
if isinstance(data, list):
return self._items_to_proxies(data, protocol)
if isinstance(data, dict):
for key in ("list", "LIST", "data", "Data", "proxies", "Proxies"):
raw = data.get(key)
if isinstance(raw, str) and raw.strip():
return self.parse_text_proxies(raw, protocol)
if isinstance(raw, list):
return self._items_to_proxies(raw, protocol)
return []
return []
return self.parse_text_proxies(text, protocol)
async def crawl(self) -> List[ProxyRaw]:
results: List[ProxyRaw] = []
t_req, n_try = 18.0, 1
urls = [u for _, u in self.api_pairs]
htmls = await self.fetch_all(urls, timeout=t_req, retries=n_try)
for (protocol, _), text in zip(self.api_pairs, htmls):
if not text:
continue
batch = self._parse_api_body(text, protocol)
if batch:
results.extend(batch)
logger.info(f"{self.display_name} API {protocol}: {len(batch)}")
if not results:
logger.warning(f"{self.display_name} 主 API 无数据,尝试 jsDelivr 文本镜像")
mirror_pairs = [
(p, f"{self._mirror_prefix}{p}.txt")
for p in ("http", "https", "socks4", "socks5")
]
m_urls = [u for _, u in mirror_pairs]
m_htmls = await self.fetch_all(m_urls, timeout=22.0, retries=2)
for (protocol, _), text in zip(mirror_pairs, m_htmls):
if not text:
continue
batch = self.parse_text_proxies(text, protocol)
if batch:
results.extend(batch)
logger.info(f"{self.display_name} 镜像 {protocol}: {len(batch)}")
if not results:
logger.warning(f"{self.display_name} 镜像无数据,尝试 ProxyScrape API")
fb_urls = [u for _, u in self.fallback_pairs]
fb_htmls = await self.fetch_all(fb_urls, timeout=20.0, retries=1)
for (protocol, _), text in zip(self.fallback_pairs, fb_htmls):
if not text:
continue
batch = self._parse_api_body(text, protocol)
if not batch:
batch = self.parse_text_proxies(text, protocol)
if batch:
results.extend(batch)
logger.info(f"{self.display_name} ProxyScrape {protocol}: {len(batch)}")
if results:
logger.info(f"{self.display_name} 合计 {len(results)}")
return results