feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
@@ -113,8 +113,8 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
class CrawlAllAggregator(Job):
|
||||
async def run(self):
|
||||
self._set_running()
|
||||
# 等待所有子 job 完成(最多等 30 秒)
|
||||
for _ in range(300):
|
||||
# 等待所有子 job 完成(最多约 5 分钟,与前端轮询一致)
|
||||
for _ in range(3000):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
all_done = all(
|
||||
@@ -125,15 +125,56 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
total = 0
|
||||
valid = 0
|
||||
invalid = 0
|
||||
plugins_failed = 0
|
||||
per_plugin = []
|
||||
for jid in job_ids:
|
||||
job = executor.get_job(jid)
|
||||
if job and job.result:
|
||||
total += job.result.get("proxy_count", 0)
|
||||
valid += job.result.get("success_count", 0)
|
||||
invalid += job.result.get("failure_count", 0)
|
||||
result = {"total_crawled": total, "valid_count": valid, "invalid_count": invalid}
|
||||
plugin_id = getattr(job, "plugin_id", "") if job else ""
|
||||
proxy_count = 0
|
||||
crawl_failed = False
|
||||
err_msg = None
|
||||
job_status = job.status.value if job else "missing"
|
||||
|
||||
if not job:
|
||||
per_plugin.append({
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": 0,
|
||||
"crawl_failed": True,
|
||||
"error": "任务不存在",
|
||||
"job_status": job_status,
|
||||
})
|
||||
plugins_failed += 1
|
||||
continue
|
||||
|
||||
if job.status.value == "failed":
|
||||
crawl_failed = True
|
||||
plugins_failed += 1
|
||||
err_msg = job.error or "任务失败"
|
||||
elif job.result:
|
||||
r = job.result
|
||||
plugin_id = r.get("plugin_id") or plugin_id
|
||||
proxy_count = r.get("proxy_count", 0)
|
||||
total += proxy_count
|
||||
if r.get("crawl_failed") or r.get("failure_count", 0) > 0:
|
||||
crawl_failed = True
|
||||
plugins_failed += 1
|
||||
err_msg = r.get("error")
|
||||
else:
|
||||
total += 0
|
||||
|
||||
per_plugin.append({
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": proxy_count,
|
||||
"crawl_failed": crawl_failed,
|
||||
"error": err_msg,
|
||||
"job_status": job_status,
|
||||
})
|
||||
|
||||
result = {
|
||||
"total_crawled": total,
|
||||
"plugins_failed": plugins_failed,
|
||||
"per_plugin": per_plugin,
|
||||
}
|
||||
if self.is_cancelled:
|
||||
result["cancelled"] = True
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user