- CrawlJob waits on crawl_slot before JobExecutor semaphore so crawl-all does not fill slots while queued - BaseHTTPPlugin: longer connect budget for slow international links - proxyscrape: jsDelivr mirror + longer GitHub/API phases - fpw_*: higher timeouts/retries; lower internal concurrency on heavy multi-URL plugins Made-with: Cursor
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
"""hidemyna.me 免费代理列表表格。"""
|
||
from typing import List
|
||
|
||
from app.core.plugin_system import ProxyRaw
|
||
from app.plugins.base import BaseHTTPPlugin
|
||
from app.core.log import logger
|
||
|
||
|
||
class FpwHidemyPlugin(BaseHTTPPlugin):
|
||
name = "fpw_hidemy"
|
||
display_name = "HideMy.name"
|
||
description = "hidemyna.me 英文代理列表(HTTP/HTTPS/SOCKS)"
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.urls = [
|
||
"https://hidemyna.me/en/proxy-list/",
|
||
"https://hidemyna.me/en/proxy-list/?type=hs",
|
||
"https://hidemyna.me/en/proxy-list/?type=socks4",
|
||
]
|
||
|
||
async def crawl(self) -> List[ProxyRaw]:
|
||
results: List[ProxyRaw] = []
|
||
htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
|
||
for url, html in zip(self.urls, htmls):
|
||
if not html:
|
||
continue
|
||
batch = self.parse_html_table(
|
||
html,
|
||
column_map={"ip": 0, "port": 1, "protocol": 4},
|
||
protocol="http",
|
||
)
|
||
if batch:
|
||
results.extend(batch)
|
||
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||
if results:
|
||
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||
return results
|