fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML

- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 14:16:03 +08:00
parent a26ae50051
commit e582067316
8 changed files with 193 additions and 17 deletions

View File

@@ -64,11 +64,41 @@ class FpwProxynovaPlugin(BaseHTTPPlugin):
continue
return out
def _parse_plain_ip_port_rows(self, html: str) -> List[ProxyRaw]:
soup = BeautifulSoup(html, "lxml")
out: List[ProxyRaw] = []
for tr in soup.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
ip = tds[0].get_text(strip=True)
port_txt = tds[1].get_text(strip=True)
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
continue
if not port_txt.isdigit() or not (1 <= int(port_txt) <= 65535):
continue
row_text = tr.get_text(" ", strip=True).upper()
if "SOCKS5" in row_text:
proto = "socks5"
elif "SOCKS4" in row_text:
proto = "socks4"
elif "HTTPS" in row_text:
proto = "https"
else:
proto = "http"
try:
out.append(ProxyRaw(ip, int(port_txt), proto))
except ValueError:
continue
return out
async def crawl(self) -> List[ProxyRaw]:
html = await self.fetch(self.urls[0], timeout=25, retries=2)
if not html:
return []
results = self._parse_rows(html)
if not results:
results = self._parse_plain_ip_port_rows(html)
if results:
logger.info(f"{self.display_name} 解析 {len(results)}")
return results