feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
61
app/plugins/fpw_gatherproxy.py
Normal file
61
app/plugins/fpw_gatherproxy.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""gatherproxy.com 页面内嵌 JSON(PROXY_IP / PROXY_PORT)。"""
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class FpwGatherproxyPlugin(BaseHTTPPlugin):
|
||||
name = "fpw_gatherproxy"
|
||||
display_name = "GatherProxy"
|
||||
description = "gatherproxy.com 内嵌代理 JSON(站点常有限流)"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [
|
||||
"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
|
||||
"http://www.gatherproxy.com/proxylist/country/?c=United%20States",
|
||||
]
|
||||
|
||||
def _extract_from_text(self, text: str) -> List[ProxyRaw]:
|
||||
results: List[ProxyRaw] = []
|
||||
for m in re.finditer(
|
||||
r"PROXY_IP['\"]?\s*:\s*['\"]([\d.]+)['\"].{0,120}?PROXY_PORT['\"]?\s*:\s*['\"](\d+)['\"]",
|
||||
text,
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
):
|
||||
ip, port = m.group(1), m.group(2)
|
||||
if port.isdigit() and 1 <= int(port) <= 65535:
|
||||
try:
|
||||
results.append(ProxyRaw(ip, int(port), "http"))
|
||||
except ValueError:
|
||||
continue
|
||||
for m in re.finditer(
|
||||
r"\{[^{}]*\"PROXY_IP\"\s*:\s*\"([\d.]+)\"[^{}]*\"PROXY_PORT\"\s*:\s*\"(\d+)\"[^{}]*\}",
|
||||
text,
|
||||
):
|
||||
ip, port = m.group(1), m.group(2)
|
||||
if port.isdigit() and 1 <= int(port) <= 65535:
|
||||
try:
|
||||
results.append(ProxyRaw(ip, int(port), "http"))
|
||||
except ValueError:
|
||||
continue
|
||||
return results
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
seen = set()
|
||||
out: List[ProxyRaw] = []
|
||||
htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
|
||||
for url, html in zip(self.urls, htmls):
|
||||
if not html:
|
||||
continue
|
||||
for p in self._extract_from_text(html):
|
||||
k = (p.ip, p.port)
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
out.append(p)
|
||||
if out:
|
||||
logger.info(f"{self.display_name} 自 {url} 累计 {len(out)} 条")
|
||||
return out
|
||||
Reference in New Issue
Block a user