- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
"""premproxy.com 列表页表格。"""
|
|
import re
|
|
from typing import List
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from app.core.plugin_system import ProxyRaw
|
|
from app.plugins.base import BaseHTTPPlugin
|
|
from app.core.log import logger
|
|
|
|
|
|
class FpwPremproxyPlugin(BaseHTTPPlugin):
|
|
name = "fpw_premproxy"
|
|
display_name = "PremProxy"
|
|
description = "premproxy.com HTTP/SOCKS 列表页"
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.urls = [
|
|
"https://premproxy.com/list/",
|
|
"https://premproxy.com/socks-list/",
|
|
]
|
|
|
|
def _parse_html(self, html: str) -> List[ProxyRaw]:
|
|
soup = BeautifulSoup(html, "lxml")
|
|
results: List[ProxyRaw] = []
|
|
for tr in soup.find_all("tr"):
|
|
tds = tr.find_all("td")
|
|
if len(tds) < 2:
|
|
continue
|
|
ip = tds[0].get_text(strip=True)
|
|
port = tds[1].get_text(strip=True)
|
|
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
|
|
continue
|
|
if not port.isdigit() or not (1 <= int(port) <= 65535):
|
|
continue
|
|
row = tr.get_text(" ", strip=True).lower()
|
|
if "socks5" in row:
|
|
proto = "socks5"
|
|
elif "socks4" in row or "socks" in row:
|
|
proto = "socks4"
|
|
elif "https" in row:
|
|
proto = "https"
|
|
else:
|
|
proto = "http"
|
|
try:
|
|
results.append(ProxyRaw(ip, int(port), proto))
|
|
except ValueError:
|
|
continue
|
|
return results
|
|
|
|
async def crawl(self) -> List[ProxyRaw]:
|
|
merged: List[ProxyRaw] = []
|
|
htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
|
|
for url, html in zip(self.urls, htmls):
|
|
if not html:
|
|
continue
|
|
batch = self._parse_html(html)
|
|
if batch:
|
|
merged.extend(batch)
|
|
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
|
if merged:
|
|
logger.info(f"{self.display_name} 合计 {len(merged)} 条")
|
|
return merged
|