feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -0,0 +1,74 @@
"""proxynova.com 表格内 JS 混淆 IP + 明文端口。"""
import re
from typing import List, Optional
from bs4 import BeautifulSoup
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwProxynovaPlugin(BaseHTTPPlugin):
name = "fpw_proxynova"
display_name = "ProxyNova"
description = "proxynova.com 代理列表(解析 document.write 混淆 IP"
def __init__(self):
super().__init__()
self.urls = ["https://www.proxynova.com/proxy-server-list/"]
@staticmethod
def _decode_proxynova_ip(script_inner: str) -> Optional[str]:
"""解析 document.write(\".081.301\".split(\"\").reverse()...concat(\"118.174\"...))"""
m1 = re.search(r'document\.write\("([^"]+)"\.split', script_inner)
m2 = re.search(r'\.concat\("([^"]+)"', script_inner)
if not m1 or not m2:
return None
a, b = m1.group(1), m2.group(1)
part1 = "".join(reversed(a))
return part1 + b
def _parse_rows(self, html: str) -> List[ProxyRaw]:
soup = BeautifulSoup(html, "lxml")
tbody = soup.find("tbody")
if not tbody:
return []
out: List[ProxyRaw] = []
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
script = tds[0].find("script")
if not script or not script.string:
continue
ip = self._decode_proxynova_ip(script.string)
port_txt = tds[1].get_text(strip=True)
if not ip or not port_txt.isdigit():
continue
port = int(port_txt)
if not (1 <= port <= 65535):
continue
row_text = tr.get_text(" ", strip=True).upper()
if "SOCKS5" in row_text:
proto = "socks5"
elif "SOCKS4" in row_text:
proto = "socks4"
elif "HTTPS" in row_text:
proto = "https"
else:
proto = "http"
try:
out.append(ProxyRaw(ip, port, proto))
except ValueError:
continue
return out
async def crawl(self) -> List[ProxyRaw]:
html = await self.fetch(self.urls[0], timeout=14, retries=1)
if not html:
return []
results = self._parse_rows(html)
if results:
logger.info(f"{self.display_name} 解析 {len(results)}")
return results