fix(crawl): throttle concurrent CrawlJobs and relax fpw/proxyscrape HTTP

- CrawlJob waits on crawl_slot before JobExecutor semaphore so crawl-all does not fill slots while queued - BaseHTTPPlugin: longer connect budget for slow international links - proxyscrape: jsDelivr mirror + longer GitHub/API phases - fpw_*: higher timeouts/retries; lower internal concurrency on heavy multi-URL plugins Made-with: Cursor
2026-04-05 13:48:41 +08:00
parent 2c98abaf91
commit 957cee3100
13 changed files with 116 additions and 52 deletions
--- a/app/plugins/fpw_socks_ssl_proxy.py
+++ b/app/plugins/fpw_socks_ssl_proxy.py
@@ -14,7 +14,7 @@ class FpwSocksSslProxyPlugin(BaseHTTPPlugin):

    def __init__(self):
        super().__init__()
-        self.max_concurrency = 6
+        self.max_concurrency = 4
        # 与 sslproxies 同模板的镜像站较多，socks-proxy 在部分网络下不稳定，多源提高成功率
        self.urls = [
            "https://www.sslproxies.org/",
@@ -39,7 +39,7 @@ class FpwSocksSslProxyPlugin(BaseHTTPPlugin):

    async def crawl(self) -> List[ProxyRaw]:
        results: List[ProxyRaw] = []
-        htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
+        htmls = await self.fetch_all(self.urls, timeout=25, retries=2)
        for url, html in zip(self.urls, htmls):
            if not html:
                continue