"""proxynova.com 表格内 JS 混淆 IP + 明文端口。""" import re from typing import List, Optional from bs4 import BeautifulSoup from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class FpwProxynovaPlugin(BaseHTTPPlugin): name = "fpw_proxynova" display_name = "ProxyNova" description = "proxynova.com 代理列表(解析 document.write 混淆 IP)" def __init__(self): super().__init__() self.urls = ["https://www.proxynova.com/proxy-server-list/"] @staticmethod def _decode_proxynova_ip(script_inner: str) -> Optional[str]: """解析 document.write(\".081.301\".split(\"\").reverse()...concat(\"118.174\"...))""" m1 = re.search(r'document\.write\("([^"]+)"\.split', script_inner) m2 = re.search(r'\.concat\("([^"]+)"', script_inner) if not m1 or not m2: return None a, b = m1.group(1), m2.group(1) part1 = "".join(reversed(a)) return part1 + b def _parse_rows(self, html: str) -> List[ProxyRaw]: soup = BeautifulSoup(html, "lxml") tbody = soup.find("tbody") if not tbody: return [] out: List[ProxyRaw] = [] for tr in tbody.find_all("tr"): tds = tr.find_all("td") if len(tds) < 2: continue script = tds[0].find("script") if not script or not script.string: continue ip = self._decode_proxynova_ip(script.string) port_txt = tds[1].get_text(strip=True) if not ip or not port_txt.isdigit(): continue port = int(port_txt) if not (1 <= port <= 65535): continue row_text = tr.get_text(" ", strip=True).upper() if "SOCKS5" in row_text: proto = "socks5" elif "SOCKS4" in row_text: proto = "socks4" elif "HTTPS" in row_text: proto = "https" else: proto = "http" try: out.append(ProxyRaw(ip, port, proto)) except ValueError: continue return out def _parse_plain_ip_port_rows(self, html: str) -> List[ProxyRaw]: soup = BeautifulSoup(html, "lxml") out: List[ProxyRaw] = [] for tr in soup.find_all("tr"): tds = tr.find_all("td") if len(tds) < 2: continue ip = tds[0].get_text(strip=True) port_txt = tds[1].get_text(strip=True) if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip): continue if not port_txt.isdigit() or not (1 <= int(port_txt) <= 65535): continue row_text = tr.get_text(" ", strip=True).upper() if "SOCKS5" in row_text: proto = "socks5" elif "SOCKS4" in row_text: proto = "socks4" elif "HTTPS" in row_text: proto = "https" else: proto = "http" try: out.append(ProxyRaw(ip, int(port_txt), proto)) except ValueError: continue return out async def crawl(self) -> List[ProxyRaw]: html = await self.fetch(self.urls[0], timeout=25, retries=2) if not html: return [] results = self._parse_rows(html) if not results: results = self._parse_plain_ip_port_rows(html) if results: logger.info(f"{self.display_name} 解析 {len(results)} 条") return results