"""hidemyna.me 免费代理列表表格。""" import re from typing import List from app.core.plugin_system import ProxyRaw from app.plugins.base import BaseHTTPPlugin from app.core.log import logger class FpwHidemyPlugin(BaseHTTPPlugin): name = "fpw_hidemy" display_name = "HideMy.name" description = "hidemyna.me 英文代理列表(HTTP/HTTPS/SOCKS)" def __init__(self): super().__init__() self.urls = [ "https://hidemyna.me/en/proxy-list/", "https://hidemyna.me/en/proxy-list/?type=hs", "https://hidemyna.me/en/proxy-list/?type=socks4", ] def _parse_rows_loose(self, html: str) -> List[ProxyRaw]: from bs4 import BeautifulSoup out: List[ProxyRaw] = [] soup = BeautifulSoup(html, "lxml") for tr in soup.find_all("tr"): tds = tr.find_all("td") if len(tds) < 2: continue row = " ".join(td.get_text(" ", strip=True) for td in tds) ip_m = re.search( r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b", row, ) if not ip_m: continue ip = ip_m.group(1) port_val = None for td in tds: t = td.get_text(strip=True) if t.isdigit() and 1 <= int(t) <= 65535: port_val = int(t) break if port_val is None: continue u = row.upper() if "SOCKS5" in u: proto = "socks5" elif "SOCKS4" in u or "SOCKS" in u: proto = "socks4" elif "HTTPS" in u: proto = "https" else: proto = "http" try: out.append(ProxyRaw(ip, port_val, proto)) except ValueError: continue return out async def crawl(self) -> List[ProxyRaw]: results: List[ProxyRaw] = [] htmls = await self.fetch_all(self.urls, timeout=25, retries=2) for url, html in zip(self.urls, htmls): if not html: continue batch = self.parse_html_table( html, column_map={"ip": 0, "port": 1, "protocol": 4}, protocol="http", ) if not batch: batch = self._parse_rows_loose(html) if batch: results.extend(batch) logger.info(f"{self.display_name} {url}: {len(batch)} 条") if results: logger.info(f"{self.display_name} 合计 {len(results)} 条") return results