Files
ProxyPool/app/plugins/fpw_gatherproxy.py
祀梦 e582067316 fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML
- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s
- fpw_socks_ssl: try parse_html_table before regex
- fpw_hidemy: loose row scan when fixed columns fail
- fpw_proxynova: plain IP/port row fallback
- fpw_spys_one: HTTPS endpoints; crawl timeout 180s
- fpw_gatherproxy: HTTPS + extra JSON key patterns
- fpw_checkerproxy: lower min HTML length for parse
- fpw_premproxy: ip:port regex fallback when few table rows

Made-with: Cursor
2026-04-05 14:16:03 +08:00

73 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""gatherproxy.com 页面内嵌 JSONPROXY_IP / PROXY_PORT"""
import re
from typing import List
from app.core.plugin_system import ProxyRaw
from app.plugins.base import BaseHTTPPlugin
from app.core.log import logger
class FpwGatherproxyPlugin(BaseHTTPPlugin):
name = "fpw_gatherproxy"
display_name = "GatherProxy"
description = "gatherproxy.com 内嵌代理 JSON站点常有限流"
def __init__(self):
super().__init__()
self.urls = [
"https://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
"https://www.gatherproxy.com/proxylist/country/?c=United%20States",
]
def _extract_from_text(self, text: str) -> List[ProxyRaw]:
results: List[ProxyRaw] = []
for m in re.finditer(
r"PROXY_IP['\"]?\s*:\s*['\"]([\d.]+)['\"].{0,120}?PROXY_PORT['\"]?\s*:\s*['\"](\d+)['\"]",
text,
re.DOTALL | re.IGNORECASE,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
for m in re.finditer(
r"\{[^{}]*\"PROXY_IP\"\s*:\s*\"([\d.]+)\"[^{}]*\"PROXY_PORT\"\s*:\s*\"(\d+)\"[^{}]*\}",
text,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
for m in re.finditer(
r'"(?:proxy_)?ip"\s*:\s*"([\d.]+)"\s*,\s*"(?:proxy_)?port"\s*:\s*"?(\d+)"?',
text,
re.I,
):
ip, port = m.group(1), m.group(2)
if port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
return results
async def crawl(self) -> List[ProxyRaw]:
seen = set()
out: List[ProxyRaw] = []
htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
for url, html in zip(self.urls, htmls):
if not html:
continue
for p in self._extract_from_text(html):
k = (p.ip, p.port)
if k not in seen:
seen.add(k)
out.append(p)
if out:
logger.info(f"{self.display_name}{url} 累计 {len(out)}")
return out