refactor(crawl): parallel plugins via JobExecutor; per-plugin throttle

- Remove global crawl_slot gate; all CrawlJobs share only executor semaphore - max_concurrent_jobs = max(24, n_plugins+8) for crawl-all + aggregator headroom - BaseHTTPPlugin max_concurrency 3->2; fpw multi-URL plugins 4->2 - fetch_all: short random delay before each request to ease single-host pressure Made-with: Cursor
2026-04-05 14:08:26 +08:00
parent 957cee3100
commit a26ae50051
6 changed files with 11 additions and 38 deletions
--- a/app/core/execution/executor.py
+++ b/app/core/execution/executor.py
@@ -1,11 +1,9 @@
 """Job 执行器 - 统一管理所有后台 Job 的生命周期"""
 import asyncio
-from contextlib import asynccontextmanager
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional

-from app.core.execution.job import CrawlJob, Job, JobStatus
-from app.core.execution.crawl_gate import crawl_slot
+from app.core.execution.job import Job, JobStatus
 from app.core.execution.worker_pool import AsyncWorkerPool
 from app.core.log import logger

@@ -86,14 +84,8 @@ class JobExecutor:
                logger.error(f"Job {job.id} failed: {e}", exc_info=True)

        try:
-            # CrawlJob 先等爬取槽位再占执行器，避免十几个任务占满 max_concurrent_jobs 却只排队等外网
-            if isinstance(job, CrawlJob):
-                async with crawl_slot():
-                    async with self._semaphore:
-                        await _execute()
-            else:
-                async with self._semaphore:
-                    await _execute()
+            async with self._semaphore:
+                await _execute()
        finally:
            self._tasks.pop(job.id, None)