- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s - fpw_socks_ssl: try parse_html_table before regex - fpw_hidemy: loose row scan when fixed columns fail - fpw_proxynova: plain IP/port row fallback - fpw_spys_one: HTTPS endpoints; crawl timeout 180s - fpw_gatherproxy: HTTPS + extra JSON key patterns - fpw_checkerproxy: lower min HTML length for parse - fpw_premproxy: ip:port regex fallback when few table rows Made-with: Cursor
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
"""hidemyna.me 免费代理列表表格。"""
|
||
import re
|
||
from typing import List
|
||
|
||
from app.core.plugin_system import ProxyRaw
|
||
from app.plugins.base import BaseHTTPPlugin
|
||
from app.core.log import logger
|
||
|
||
|
||
class FpwHidemyPlugin(BaseHTTPPlugin):
|
||
name = "fpw_hidemy"
|
||
display_name = "HideMy.name"
|
||
description = "hidemyna.me 英文代理列表(HTTP/HTTPS/SOCKS)"
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.urls = [
|
||
"https://hidemyna.me/en/proxy-list/",
|
||
"https://hidemyna.me/en/proxy-list/?type=hs",
|
||
"https://hidemyna.me/en/proxy-list/?type=socks4",
|
||
]
|
||
|
||
def _parse_rows_loose(self, html: str) -> List[ProxyRaw]:
|
||
from bs4 import BeautifulSoup
|
||
|
||
out: List[ProxyRaw] = []
|
||
soup = BeautifulSoup(html, "lxml")
|
||
for tr in soup.find_all("tr"):
|
||
tds = tr.find_all("td")
|
||
if len(tds) < 2:
|
||
continue
|
||
row = " ".join(td.get_text(" ", strip=True) for td in tds)
|
||
ip_m = re.search(
|
||
r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b",
|
||
row,
|
||
)
|
||
if not ip_m:
|
||
continue
|
||
ip = ip_m.group(1)
|
||
port_val = None
|
||
for td in tds:
|
||
t = td.get_text(strip=True)
|
||
if t.isdigit() and 1 <= int(t) <= 65535:
|
||
port_val = int(t)
|
||
break
|
||
if port_val is None:
|
||
continue
|
||
u = row.upper()
|
||
if "SOCKS5" in u:
|
||
proto = "socks5"
|
||
elif "SOCKS4" in u or "SOCKS" in u:
|
||
proto = "socks4"
|
||
elif "HTTPS" in u:
|
||
proto = "https"
|
||
else:
|
||
proto = "http"
|
||
try:
|
||
out.append(ProxyRaw(ip, port_val, proto))
|
||
except ValueError:
|
||
continue
|
||
return out
|
||
|
||
async def crawl(self) -> List[ProxyRaw]:
|
||
results: List[ProxyRaw] = []
|
||
htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
|
||
for url, html in zip(self.urls, htmls):
|
||
if not html:
|
||
continue
|
||
batch = self.parse_html_table(
|
||
html,
|
||
column_map={"ip": 0, "port": 1, "protocol": 4},
|
||
protocol="http",
|
||
)
|
||
if not batch:
|
||
batch = self._parse_rows_loose(html)
|
||
if batch:
|
||
results.extend(batch)
|
||
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||
if results:
|
||
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||
return results
|