From e582067316dd13670070a0e9264281812dcba9d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A5=80=E6=A2=A6?= <3501646051@qq.com>
Date: Sun, 5 Apr 2026 14:16:03 +0800
Subject: [PATCH] fix(plugins): fpw parsers for JSON API, mirrors, and looser
 HTML

- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
---
 app/plugins/fpw_checkerproxy.py        |  2 +-
 app/plugins/fpw_gatherproxy.py         | 15 ++++-
 app/plugins/fpw_hidemy.py              | 43 ++++++++++++
 app/plugins/fpw_premproxy.py           | 17 +++++
 app/plugins/fpw_proxy_list_download.py | 90 +++++++++++++++++++++++---
 app/plugins/fpw_proxynova.py           | 30 +++++++++
 app/plugins/fpw_socks_ssl_proxy.py     |  8 ++-
 app/plugins/fpw_spys_one.py            |  5 +-
 8 files changed, 193 insertions(+), 17 deletions(-)

diff --git a/app/plugins/fpw_checkerproxy.py b/app/plugins/fpw_checkerproxy.py
index bdd81ea..aad4b0c 100644
--- a/app/plugins/fpw_checkerproxy.py
+++ b/app/plugins/fpw_checkerproxy.py
@@ -49,7 +49,7 @@ class FpwCheckerproxyPlugin(BaseHTTPPlugin):
         seen: Set[Tuple[str, int, str]] = set()
         htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
         for html in htmls:
-            if not html or len(html) < 200:
+            if not html or len(html) < 80:
                 continue
             for p in self._parse_ip_ports(html):
                 k = (p.ip, p.port, p.protocol)
diff --git a/app/plugins/fpw_gatherproxy.py b/app/plugins/fpw_gatherproxy.py
index 3e68770..2e93bfb 100644
--- a/app/plugins/fpw_gatherproxy.py
+++ b/app/plugins/fpw_gatherproxy.py
@@ -15,8 +15,8 @@ class FpwGatherproxyPlugin(BaseHTTPPlugin):
     def __init__(self):
         super().__init__()
         self.urls = [
-            "http://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
-            "http://www.gatherproxy.com/proxylist/country/?c=United%20States",
+            "https://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
+            "https://www.gatherproxy.com/proxylist/country/?c=United%20States",
         ]
 
     def _extract_from_text(self, text: str) -> List[ProxyRaw]:
@@ -42,6 +42,17 @@ class FpwGatherproxyPlugin(BaseHTTPPlugin):
                     results.append(ProxyRaw(ip, int(port), "http"))
                 except ValueError:
                     continue
+        for m in re.finditer(
+            r'"(?:proxy_)?ip"\s*:\s*"([\d.]+)"\s*,\s*"(?:proxy_)?port"\s*:\s*"?(\d+)"?',
+            text,
+            re.I,
+        ):
+            ip, port = m.group(1), m.group(2)
+            if port.isdigit() and 1 <= int(port) <= 65535:
+                try:
+                    results.append(ProxyRaw(ip, int(port), "http"))
+                except ValueError:
+                    continue
         return results
 
     async def crawl(self) -> List[ProxyRaw]:
diff --git a/app/plugins/fpw_hidemy.py b/app/plugins/fpw_hidemy.py
index 86aae01..d1a501c 100644
--- a/app/plugins/fpw_hidemy.py
+++ b/app/plugins/fpw_hidemy.py
@@ -1,4 +1,5 @@
 """hidemyna.me 免费代理列表表格。"""
+import re
 from typing import List
 
 from app.core.plugin_system import ProxyRaw
@@ -19,6 +20,46 @@ class FpwHidemyPlugin(BaseHTTPPlugin):
             "https://hidemyna.me/en/proxy-list/?type=socks4",
         ]
 
+    def _parse_rows_loose(self, html: str) -> List[ProxyRaw]:
+        from bs4 import BeautifulSoup
+
+        out: List[ProxyRaw] = []
+        soup = BeautifulSoup(html, "lxml")
+        for tr in soup.find_all("tr"):
+            tds = tr.find_all("td")
+            if len(tds) < 2:
+                continue
+            row = " ".join(td.get_text(" ", strip=True) for td in tds)
+            ip_m = re.search(
+                r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b",
+                row,
+            )
+            if not ip_m:
+                continue
+            ip = ip_m.group(1)
+            port_val = None
+            for td in tds:
+                t = td.get_text(strip=True)
+                if t.isdigit() and 1 <= int(t) <= 65535:
+                    port_val = int(t)
+                    break
+            if port_val is None:
+                continue
+            u = row.upper()
+            if "SOCKS5" in u:
+                proto = "socks5"
+            elif "SOCKS4" in u or "SOCKS" in u:
+                proto = "socks4"
+            elif "HTTPS" in u:
+                proto = "https"
+            else:
+                proto = "http"
+            try:
+                out.append(ProxyRaw(ip, port_val, proto))
+            except ValueError:
+                continue
+        return out
+
     async def crawl(self) -> List[ProxyRaw]:
         results: List[ProxyRaw] = []
         htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
@@ -30,6 +71,8 @@ class FpwHidemyPlugin(BaseHTTPPlugin):
                 column_map={"ip": 0, "port": 1, "protocol": 4},
                 protocol="http",
             )
+            if not batch:
+                batch = self._parse_rows_loose(html)
             if batch:
                 results.extend(batch)
                 logger.info(f"{self.display_name} {url}: {len(batch)} 条")
diff --git a/app/plugins/fpw_premproxy.py b/app/plugins/fpw_premproxy.py
index 9660f8c..651029c 100644
--- a/app/plugins/fpw_premproxy.py
+++ b/app/plugins/fpw_premproxy.py
@@ -21,6 +21,21 @@ class FpwPremproxyPlugin(BaseHTTPPlugin):
             "https://premproxy.com/socks-list/",
         ]
 
+    def _parse_ipport_embedded(self, html: str) -> List[ProxyRaw]:
+        found = re.findall(
+            r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
+            html,
+        )
+        out: List[ProxyRaw] = []
+        for ip, ps in found:
+            if not ps.isdigit() or not (1 <= int(ps) <= 65535):
+                continue
+            try:
+                out.append(ProxyRaw(ip, int(ps), "http"))
+            except ValueError:
+                continue
+        return out
+
     def _parse_html(self, html: str) -> List[ProxyRaw]:
         soup = BeautifulSoup(html, "lxml")
         results: List[ProxyRaw] = []
@@ -47,6 +62,8 @@ class FpwPremproxyPlugin(BaseHTTPPlugin):
                 results.append(ProxyRaw(ip, int(port), proto))
             except ValueError:
                 continue
+        if len(results) < 5:
+            results.extend(self._parse_ipport_embedded(html))
         return results
 
     async def crawl(self) -> List[ProxyRaw]:
diff --git a/app/plugins/fpw_proxy_list_download.py b/app/plugins/fpw_proxy_list_download.py
index 7b83871..e4e295e 100644
--- a/app/plugins/fpw_proxy_list_download.py
+++ b/app/plugins/fpw_proxy_list_download.py
@@ -1,5 +1,6 @@
-"""www.proxy-list.download 公开 API（README: Free_Proxy_Website）。"""
-from typing import List
+"""www.proxy-list.download 公开 API（常见为 JSON，内含 list 字段的 ip:port 文本）。"""
+import json
+from typing import Any, List
 
 from app.core.plugin_system import ProxyRaw
 from app.plugins.base import BaseHTTPPlugin
@@ -10,6 +11,7 @@ class FpwProxyListDownloadPlugin(BaseHTTPPlugin):
     name = "fpw_proxy_list_download"
     display_name = "Proxy-List.download"
     description = "proxy-list.download 官方 API（http/https/socks4/socks5）"
+    crawl_timeout_seconds = 300.0
 
     def __init__(self):
         super().__init__()
@@ -20,6 +22,9 @@ class FpwProxyListDownloadPlugin(BaseHTTPPlugin):
             ("socks4", "https://www.proxy-list.download/api/v1/get?type=socks4"),
             ("socks5", "https://www.proxy-list.download/api/v1/get?type=socks5"),
         ]
+        self._mirror_prefix = (
+            "https://cdn.jsdelivr.net/gh/monosans/proxy-list@main/proxies/"
+        )
         self.fallback_pairs = [
             ("http", "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all"),
             ("https", "https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all"),
@@ -27,28 +32,93 @@ class FpwProxyListDownloadPlugin(BaseHTTPPlugin):
             ("socks5", "https://api.proxyscrape.com/v2/?request=get&protocol=socks5&timeout=10000&country=all&ssl=all&anonymity=all"),
         ]
 
+    def _items_to_proxies(self, items: List[Any], protocol: str) -> List[ProxyRaw]:
+        out: List[ProxyRaw] = []
+        for it in items:
+            if isinstance(it, dict):
+                ip = str(
+                    it.get("ip")
+                    or it.get("IP")
+                    or it.get("host")
+                    or it.get("Host")
+                    or ""
+                ).strip()
+                port = it.get("port") or it.get("Port")
+                if not ip or port is None:
+                    continue
+                ps = str(port).strip()
+                if not ps.isdigit() or not (1 <= int(ps) <= 65535):
+                    continue
+                try:
+                    out.append(ProxyRaw(ip, int(ps), protocol))
+                except ValueError:
+                    continue
+            elif isinstance(it, str) and ":" in it:
+                out.extend(self.parse_text_proxies(it, protocol))
+        return out
+
+    def _parse_api_body(self, text: str, protocol: str) -> List[ProxyRaw]:
+        text = (text or "").strip()
+        if not text:
+            return []
+        if text[0] in "{[":
+            try:
+                data = json.loads(text)
+            except json.JSONDecodeError:
+                return self.parse_text_proxies(text, protocol)
+            if isinstance(data, list):
+                return self._items_to_proxies(data, protocol)
+            if isinstance(data, dict):
+                for key in ("list", "LIST", "data", "Data", "proxies", "Proxies"):
+                    raw = data.get(key)
+                    if isinstance(raw, str) and raw.strip():
+                        return self.parse_text_proxies(raw, protocol)
+                    if isinstance(raw, list):
+                        return self._items_to_proxies(raw, protocol)
+                return []
+            return []
+        return self.parse_text_proxies(text, protocol)
+
     async def crawl(self) -> List[ProxyRaw]:
         results: List[ProxyRaw] = []
+        t_req, n_try = 18.0, 1
         urls = [u for _, u in self.api_pairs]
-        htmls = await self.fetch_all(urls, timeout=25, retries=2)
+        htmls = await self.fetch_all(urls, timeout=t_req, retries=n_try)
         for (protocol, _), text in zip(self.api_pairs, htmls):
             if not text:
                 continue
-            batch = self.parse_text_proxies(text, protocol)
+            batch = self._parse_api_body(text, protocol)
             if batch:
                 results.extend(batch)
-                logger.info(f"{self.display_name} {protocol}: {len(batch)} 条")
+                logger.info(f"{self.display_name} API {protocol}: {len(batch)} 条")
         if not results:
-            logger.warning(f"{self.display_name} 主 API 无数据，尝试 ProxyScrape 备用")
-            fb_urls = [u for _, u in self.fallback_pairs]
-            fb_htmls = await self.fetch_all(fb_urls, timeout=25, retries=2)
-            for (protocol, _), text in zip(self.fallback_pairs, fb_htmls):
+            logger.warning(f"{self.display_name} 主 API 无数据，尝试 jsDelivr 文本镜像")
+            mirror_pairs = [
+                (p, f"{self._mirror_prefix}{p}.txt")
+                for p in ("http", "https", "socks4", "socks5")
+            ]
+            m_urls = [u for _, u in mirror_pairs]
+            m_htmls = await self.fetch_all(m_urls, timeout=22.0, retries=2)
+            for (protocol, _), text in zip(mirror_pairs, m_htmls):
                 if not text:
                     continue
                 batch = self.parse_text_proxies(text, protocol)
                 if batch:
                     results.extend(batch)
-                    logger.info(f"{self.display_name} fallback {protocol}: {len(batch)} 条")
+                    logger.info(f"{self.display_name} 镜像 {protocol}: {len(batch)} 条")
+        if not results:
+            logger.warning(f"{self.display_name} 镜像无数据，尝试 ProxyScrape API")
+            fb_urls = [u for _, u in self.fallback_pairs]
+            fb_htmls = await self.fetch_all(fb_urls, timeout=20.0, retries=1)
+            for (protocol, _), text in zip(self.fallback_pairs, fb_htmls):
+                if not text:
+                    continue
+                batch = self._parse_api_body(text, protocol)
+                if not batch:
+                    batch = self.parse_text_proxies(text, protocol)
+                if batch:
+                    results.extend(batch)
+                    logger.info(f"{self.display_name} ProxyScrape {protocol}: {len(batch)} 条")
         if results:
             logger.info(f"{self.display_name} 合计 {len(results)} 条")
         return results
diff --git a/app/plugins/fpw_proxynova.py b/app/plugins/fpw_proxynova.py
index ae493e5..e12abf5 100644
--- a/app/plugins/fpw_proxynova.py
+++ b/app/plugins/fpw_proxynova.py
@@ -64,11 +64,41 @@ class FpwProxynovaPlugin(BaseHTTPPlugin):
                 continue
         return out
 
+    def _parse_plain_ip_port_rows(self, html: str) -> List[ProxyRaw]:
+        soup = BeautifulSoup(html, "lxml")
+        out: List[ProxyRaw] = []
+        for tr in soup.find_all("tr"):
+            tds = tr.find_all("td")
+            if len(tds) < 2:
+                continue
+            ip = tds[0].get_text(strip=True)
+            port_txt = tds[1].get_text(strip=True)
+            if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
+                continue
+            if not port_txt.isdigit() or not (1 <= int(port_txt) <= 65535):
+                continue
+            row_text = tr.get_text(" ", strip=True).upper()
+            if "SOCKS5" in row_text:
+                proto = "socks5"
+            elif "SOCKS4" in row_text:
+                proto = "socks4"
+            elif "HTTPS" in row_text:
+                proto = "https"
+            else:
+                proto = "http"
+            try:
+                out.append(ProxyRaw(ip, int(port_txt), proto))
+            except ValueError:
+                continue
+        return out
+
     async def crawl(self) -> List[ProxyRaw]:
         html = await self.fetch(self.urls[0], timeout=25, retries=2)
         if not html:
             return []
         results = self._parse_rows(html)
+        if not results:
+            results = self._parse_plain_ip_port_rows(html)
         if results:
             logger.info(f"{self.display_name} 解析 {len(results)} 条")
         return results
diff --git a/app/plugins/fpw_socks_ssl_proxy.py b/app/plugins/fpw_socks_ssl_proxy.py
index dffdd58..cb79386 100644
--- a/app/plugins/fpw_socks_ssl_proxy.py
+++ b/app/plugins/fpw_socks_ssl_proxy.py
@@ -26,7 +26,7 @@ class FpwSocksSslProxyPlugin(BaseHTTPPlugin):
     def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]:
         results = []
         pattern = re.compile(
-            r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>\s*<td[^>]*>\s*(\d+)",
+            r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>\s*<td[^>]*>\s*(\d{1,5})",
             re.I,
         )
         for ip, port in pattern.findall(html):
@@ -47,7 +47,11 @@ class FpwSocksSslProxyPlugin(BaseHTTPPlugin):
                 proto = "socks4"
             else:
                 proto = "http"
-            batch = self._parse_page(html, proto)
+            batch = self.parse_html_table(
+                html, column_map={"ip": 0, "port": 1}, protocol=proto
+            )
+            if not batch:
+                batch = self._parse_page(html, proto)
             results.extend(batch)
             if batch:
                 logger.info(f"{self.display_name} {url}: {len(batch)} 条")
diff --git a/app/plugins/fpw_spys_one.py b/app/plugins/fpw_spys_one.py
index 29ee73f..994f439 100644
--- a/app/plugins/fpw_spys_one.py
+++ b/app/plugins/fpw_spys_one.py
@@ -12,12 +12,13 @@ class FpwSpysOnePlugin(BaseHTTPPlugin):
     name = "fpw_spys_one"
     display_name = "Spys.one"
     description = "spys.one HTTP/SOCKS 列表（POST 筛选 + XOR 端口解码）"
+    crawl_timeout_seconds = 180.0
 
     def __init__(self):
         super().__init__()
         self.pages: List[Tuple[str, str, str]] = [
-            ("http", "http://spys.one/en/http-proxy-list/", "1"),
-            ("socks5", "http://spys.one/en/socks-proxy-list/", "2"),
+            ("http", "https://spys.one/en/http-proxy-list/", "1"),
+            ("socks5", "https://spys.one/en/socks-proxy-list/", "2"),
         ]
 
     @staticmethod