- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
75 lines
2.5 KiB
Python
75 lines
2.5 KiB
Python
"""proxynova.com 表格内 JS 混淆 IP + 明文端口。"""
|
||
import re
|
||
from typing import List, Optional
|
||
|
||
from bs4 import BeautifulSoup
|
||
|
||
from app.core.plugin_system import ProxyRaw
|
||
from app.plugins.base import BaseHTTPPlugin
|
||
from app.core.log import logger
|
||
|
||
|
||
class FpwProxynovaPlugin(BaseHTTPPlugin):
|
||
name = "fpw_proxynova"
|
||
display_name = "ProxyNova"
|
||
description = "proxynova.com 代理列表(解析 document.write 混淆 IP)"
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.urls = ["https://www.proxynova.com/proxy-server-list/"]
|
||
|
||
@staticmethod
|
||
def _decode_proxynova_ip(script_inner: str) -> Optional[str]:
|
||
"""解析 document.write(\".081.301\".split(\"\").reverse()...concat(\"118.174\"...))"""
|
||
m1 = re.search(r'document\.write\("([^"]+)"\.split', script_inner)
|
||
m2 = re.search(r'\.concat\("([^"]+)"', script_inner)
|
||
if not m1 or not m2:
|
||
return None
|
||
a, b = m1.group(1), m2.group(1)
|
||
part1 = "".join(reversed(a))
|
||
return part1 + b
|
||
|
||
def _parse_rows(self, html: str) -> List[ProxyRaw]:
|
||
soup = BeautifulSoup(html, "lxml")
|
||
tbody = soup.find("tbody")
|
||
if not tbody:
|
||
return []
|
||
out: List[ProxyRaw] = []
|
||
for tr in tbody.find_all("tr"):
|
||
tds = tr.find_all("td")
|
||
if len(tds) < 2:
|
||
continue
|
||
script = tds[0].find("script")
|
||
if not script or not script.string:
|
||
continue
|
||
ip = self._decode_proxynova_ip(script.string)
|
||
port_txt = tds[1].get_text(strip=True)
|
||
if not ip or not port_txt.isdigit():
|
||
continue
|
||
port = int(port_txt)
|
||
if not (1 <= port <= 65535):
|
||
continue
|
||
row_text = tr.get_text(" ", strip=True).upper()
|
||
if "SOCKS5" in row_text:
|
||
proto = "socks5"
|
||
elif "SOCKS4" in row_text:
|
||
proto = "socks4"
|
||
elif "HTTPS" in row_text:
|
||
proto = "https"
|
||
else:
|
||
proto = "http"
|
||
try:
|
||
out.append(ProxyRaw(ip, port, proto))
|
||
except ValueError:
|
||
continue
|
||
return out
|
||
|
||
async def crawl(self) -> List[ProxyRaw]:
|
||
html = await self.fetch(self.urls[0], timeout=14, retries=1)
|
||
if not html:
|
||
return []
|
||
results = self._parse_rows(html)
|
||
if results:
|
||
logger.info(f"{self.display_name} 解析 {len(results)} 条")
|
||
return results
|