refactor(crawl): parallel plugins via JobExecutor; per-plugin throttle
- Remove global crawl_slot gate; all CrawlJobs share only executor semaphore - max_concurrent_jobs = max(24, n_plugins+8) for crawl-all + aggregator headroom - BaseHTTPPlugin max_concurrency 3->2; fpw multi-URL plugins 4->2 - fetch_all: short random delay before each request to ease single-host pressure Made-with: Cursor
This commit is contained in:
@@ -106,8 +106,10 @@ async def lifespan(app: FastAPI):
|
||||
)
|
||||
await stack.enter_async_context(worker_pool)
|
||||
|
||||
# Job 执行器
|
||||
executor = JobExecutor(worker_pool=worker_pool, max_concurrent_jobs=10)
|
||||
# Job 执行器:槽位需覆盖「全部爬取」时 N 个 CrawlJob + 聚合任务 + 全量验证等
|
||||
_n_plugins = len(registry.list_plugins())
|
||||
_max_jobs = max(24, _n_plugins + 8)
|
||||
executor = JobExecutor(worker_pool=worker_pool, max_concurrent_jobs=_max_jobs)
|
||||
await stack.enter_async_context(executor)
|
||||
|
||||
# 插件运行器
|
||||
|
||||
Reference in New Issue
Block a user