- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s - fpw_socks_ssl: try parse_html_table before regex - fpw_hidemy: loose row scan when fixed columns fail - fpw_proxynova: plain IP/port row fallback - fpw_spys_one: HTTPS endpoints; crawl timeout 180s - fpw_gatherproxy: HTTPS + extra JSON key patterns - fpw_checkerproxy: lower min HTML length for parse - fpw_premproxy: ip:port regex fallback when few table rows Made-with: Cursor
61 lines
2.2 KiB
Python
61 lines
2.2 KiB
Python
"""socks-proxy.net / sslproxies.org 表格(README 参考 GetProxyFromSocks-proxy.py)。"""
|
||
import re
|
||
from typing import List
|
||
|
||
from app.core.plugin_system import ProxyRaw
|
||
from app.plugins.base import BaseHTTPPlugin
|
||
from app.core.log import logger
|
||
|
||
|
||
class FpwSocksSslProxyPlugin(BaseHTTPPlugin):
|
||
name = "fpw_socks_ssl_proxy"
|
||
display_name = "Socks-Proxy / SSLProxies"
|
||
description = "socks-proxy.net 与 sslproxies.org 首页表格(HTTP/HTTPS 列表)"
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.max_concurrency = 2
|
||
# 与 sslproxies 同模板的镜像站较多,socks-proxy 在部分网络下不稳定,多源提高成功率
|
||
self.urls = [
|
||
"https://www.sslproxies.org/",
|
||
"https://free-proxy-list.net/",
|
||
"https://www.us-proxy.org/",
|
||
"https://www.socks-proxy.net/",
|
||
]
|
||
|
||
def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]:
|
||
results = []
|
||
pattern = re.compile(
|
||
r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>\s*<td[^>]*>\s*(\d{1,5})",
|
||
re.I,
|
||
)
|
||
for ip, port in pattern.findall(html):
|
||
if port.isdigit() and 1 <= int(port) <= 65535:
|
||
try:
|
||
results.append(ProxyRaw(ip, int(port), default_protocol))
|
||
except ValueError:
|
||
continue
|
||
return results
|
||
|
||
async def crawl(self) -> List[ProxyRaw]:
|
||
results: List[ProxyRaw] = []
|
||
htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
|
||
for url, html in zip(self.urls, htmls):
|
||
if not html:
|
||
continue
|
||
if "socks-proxy" in url:
|
||
proto = "socks4"
|
||
else:
|
||
proto = "http"
|
||
batch = self.parse_html_table(
|
||
html, column_map={"ip": 0, "port": 1}, protocol=proto
|
||
)
|
||
if not batch:
|
||
batch = self._parse_page(html, proto)
|
||
results.extend(batch)
|
||
if batch:
|
||
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||
if results:
|
||
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||
return results
|