feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -0,0 +1,61 @@
"""gatherproxy.com 页面内嵌 JSONPROXY_IP / PROXY_PORT"""
import re
from typing import List
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwGatherproxyPlugin(BaseHTTPPlugin):
name = "fpw_gatherproxy"
display_name = "GatherProxy"
description = "gatherproxy.com 内嵌代理 JSON站点常有限流"
def __init__(self):
super().__init__()
self.urls = [
"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
"http://www.gatherproxy.com/proxylist/country/?c=United%20States",
]
def _extract_from_text(self, text: str) -> List[ProxyRaw]:
results: List[ProxyRaw] = []
for m in re.finditer(
r"PROXY_IP['\"]?\s*:\s*['\"]([\d.]+)['\"].{0,120}?PROXY_PORT['\"]?\s*:\s*['\"](\d+)['\"]",
text,
re.DOTALL | re.IGNORECASE,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
for m in re.finditer(
r"\{[^{}]*\"PROXY_IP\"\s*:\s*\"([\d.]+)\"[^{}]*\"PROXY_PORT\"\s*:\s*\"(\d+)\"[^{}]*\}",
text,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
return results
async def crawl(self) -> List[ProxyRaw]:
seen = set()
out: List[ProxyRaw] = []
htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
for url, html in zip(self.urls, htmls):
if not html:
continue
for p in self._extract_from_text(html):
k = (p.ip, p.port)
if k not in seen:
seen.add(k)
out.append(p)
if out:
logger.info(f"{self.display_name}{url} 累计 {len(out)}")
return out