Files
ProxyPool/app/plugins/fpw_proxynova.py
祀梦 e582067316 fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML
- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
2026-04-05 14:16:03 +08:00

105 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""proxynova.com 表格内 JS 混淆 IP + 明文端口。"""
import re
from typing import List, Optional
from bs4 import BeautifulSoup
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwProxynovaPlugin(BaseHTTPPlugin):
name = "fpw_proxynova"
display_name = "ProxyNova"
description = "proxynova.com 代理列表(解析 document.write 混淆 IP"
def __init__(self):
super().__init__()
self.urls = ["https://www.proxynova.com/proxy-server-list/"]
@staticmethod
def _decode_proxynova_ip(script_inner: str) -> Optional[str]:
"""解析 document.write(\".081.301\".split(\"\").reverse()...concat(\"118.174\"...))"""
m1 = re.search(r'document\.write\("([^"]+)"\.split', script_inner)
m2 = re.search(r'\.concat\("([^"]+)"', script_inner)
if not m1 or not m2:
return None
a, b = m1.group(1), m2.group(1)
part1 = "".join(reversed(a))
return part1 + b
def _parse_rows(self, html: str) -> List[ProxyRaw]:
soup = BeautifulSoup(html, "lxml")
tbody = soup.find("tbody")
if not tbody:
return []
out: List[ProxyRaw] = []
for tr in tbody.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
script = tds[0].find("script")
if not script or not script.string:
continue
ip = self._decode_proxynova_ip(script.string)
port_txt = tds[1].get_text(strip=True)
if not ip or not port_txt.isdigit():
continue
port = int(port_txt)
if not (1 <= port <= 65535):
continue
row_text = tr.get_text(" ", strip=True).upper()
if "SOCKS5" in row_text:
proto = "socks5"
elif "SOCKS4" in row_text:
proto = "socks4"
elif "HTTPS" in row_text:
proto = "https"
else:
proto = "http"
try:
out.append(ProxyRaw(ip, port, proto))
except ValueError:
continue
return out
def _parse_plain_ip_port_rows(self, html: str) -> List[ProxyRaw]:
soup = BeautifulSoup(html, "lxml")
out: List[ProxyRaw] = []
for tr in soup.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
ip = tds[0].get_text(strip=True)
port_txt = tds[1].get_text(strip=True)
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
continue
if not port_txt.isdigit() or not (1 <= int(port_txt) <= 65535):
continue
row_text = tr.get_text(" ", strip=True).upper()
if "SOCKS5" in row_text:
proto = "socks5"
elif "SOCKS4" in row_text:
proto = "socks4"
elif "HTTPS" in row_text:
proto = "https"
else:
proto = "http"
try:
out.append(ProxyRaw(ip, int(port_txt), proto))
except ValueError:
continue
return out
async def crawl(self) -> List[ProxyRaw]:
html = await self.fetch(self.urls[0], timeout=25, retries=2)
if not html:
return []
results = self._parse_rows(html)
if not results:
results = self._parse_plain_ip_port_rows(html)
if results:
logger.info(f"{self.display_name} 解析 {len(results)}")
return results