Files
ProxyPool/app/plugins/fpw_gatherproxy.py
祀梦 0131c8b408 feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
2026-04-05 13:39:19 +08:00

62 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""gatherproxy.com 页面内嵌 JSONPROXY_IP / PROXY_PORT"""
import re
from typing import List
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwGatherproxyPlugin(BaseHTTPPlugin):
name = "fpw_gatherproxy"
display_name = "GatherProxy"
description = "gatherproxy.com 内嵌代理 JSON站点常有限流"
def __init__(self):
super().__init__()
self.urls = [
"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
"http://www.gatherproxy.com/proxylist/country/?c=United%20States",
]
def _extract_from_text(self, text: str) -> List[ProxyRaw]:
results: List[ProxyRaw] = []
for m in re.finditer(
r"PROXY_IP['\"]?\s*:\s*['\"]([\d.]+)['\"].{0,120}?PROXY_PORT['\"]?\s*:\s*['\"](\d+)['\"]",
text,
re.DOTALL | re.IGNORECASE,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
for m in re.finditer(
r"\{[^{}]*\"PROXY_IP\"\s*:\s*\"([\d.]+)\"[^{}]*\"PROXY_PORT\"\s*:\s*\"(\d+)\"[^{}]*\}",
text,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
return results
async def crawl(self) -> List[ProxyRaw]:
seen = set()
out: List[ProxyRaw] = []
htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
for url, html in zip(self.urls, htmls):
if not html:
continue
for p in self._extract_from_text(html):
k = (p.ip, p.port)
if k not in seen:
seen.add(k)
out.append(p)
if out:
logger.info(f"{self.display_name}{url} 累计 {len(out)}")
return out