feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions
--- a/app/plugins/fpw_freeproxylists.py
+++ b/app/plugins/fpw_freeproxylists.py
@@ -0,0 +1,69 @@
+"""freeproxylists.net 及常见镜像路径（表格 / 纯文本）。"""
+import re
+from typing import List
+
+from bs4 import BeautifulSoup
+
+from app.core.plugin_system import ProxyRaw
+from app.plugins.base import BaseHTTPPlugin
+from app.core.log import logger
+
+
+class FpwFreeproxylistsPlugin(BaseHTTPPlugin):
+    name = "fpw_freeproxylists"
+    display_name = "FreeProxyLists"
+    description = "freeproxylists.net 系列页面（易被 403，多 URL 尝试）"
+
+    def __init__(self):
+        super().__init__()
+        self.urls = [
+            "http://www.freeproxylists.net/",
+            "http://freeproxylists.net/",
+            "http://www.freeproxylists.net/en/http-txt.html",
+        ]
+
+    def _parse_any(self, html: str) -> List[ProxyRaw]:
+        ipport = re.findall(
+            r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
+            html,
+        )
+        if len(ipport) >= 5:
+            out: List[ProxyRaw] = []
+            for ip, ps in ipport:
+                if ps.isdigit() and 1 <= int(ps) <= 65535:
+                    try:
+                        out.append(ProxyRaw(ip, int(ps), "http"))
+                    except ValueError:
+                        pass
+            return out
+        soup = BeautifulSoup(html, "lxml")
+        results: List[ProxyRaw] = []
+        for tr in soup.find_all("tr"):
+            tds = tr.find_all("td")
+            if len(tds) < 2:
+                continue
+            ip = tds[0].get_text(strip=True)
+            port = tds[1].get_text(strip=True)
+            if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip) and port.isdigit():
+                if 1 <= int(port) <= 65535:
+                    try:
+                        results.append(ProxyRaw(ip, int(port), "http"))
+                    except ValueError:
+                        pass
+        return results
+
+    async def crawl(self) -> List[ProxyRaw]:
+        seen = set()
+        out: List[ProxyRaw] = []
+        htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
+        for url, html in zip(self.urls, htmls):
+            if not html:
+                continue
+            for p in self._parse_any(html):
+                key = (p.ip, p.port, p.protocol)
+                if key not in seen:
+                    seen.add(key)
+                    out.append(p)
+            if out:
+                logger.info(f"{self.display_name} 自 {url} 累计 {len(out)} 条")
+        return out