fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML

- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 14:16:03 +08:00
parent a26ae50051
commit e582067316
8 changed files with 193 additions and 17 deletions

View File

@@ -1,4 +1,5 @@
"""hidemyna.me 免费代理列表表格。"""
import re
from typing import List
from app.core.plugin_system import ProxyRaw
@@ -19,6 +20,46 @@ class FpwHidemyPlugin(BaseHTTPPlugin):
"https://hidemyna.me/en/proxy-list/?type=socks4",
]
def _parse_rows_loose(self, html: str) -> List[ProxyRaw]:
from bs4 import BeautifulSoup
out: List[ProxyRaw] = []
soup = BeautifulSoup(html, "lxml")
for tr in soup.find_all("tr"):
tds = tr.find_all("td")
if len(tds) < 2:
continue
row = " ".join(td.get_text(" ", strip=True) for td in tds)
ip_m = re.search(
r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b",
row,
)
if not ip_m:
continue
ip = ip_m.group(1)
port_val = None
for td in tds:
t = td.get_text(strip=True)
if t.isdigit() and 1 <= int(t) <= 65535:
port_val = int(t)
break
if port_val is None:
continue
u = row.upper()
if "SOCKS5" in u:
proto = "socks5"
elif "SOCKS4" in u or "SOCKS" in u:
proto = "socks4"
elif "HTTPS" in u:
proto = "https"
else:
proto = "http"
try:
out.append(ProxyRaw(ip, port_val, proto))
except ValueError:
continue
return out
async def crawl(self) -> List[ProxyRaw]:
results: List[ProxyRaw] = []
htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
@@ -30,6 +71,8 @@ class FpwHidemyPlugin(BaseHTTPPlugin):
column_map={"ip": 0, "port": 1, "protocol": 4},
protocol="http",
)
if not batch:
batch = self._parse_rows_loose(html)
if batch:
results.extend(batch)
logger.info(f"{self.display_name} {url}: {len(batch)}")