ProxyPool/app/core/execution/crawl_gate.py

"""批量爬取时限制同时发起 HTTP 的插件数，避免 crawl-all 与验证/聚合任务抢满执行器槽位。"""
import asyncio
from contextlib import asynccontextmanager
from typing import AsyncIterator

# 与单插件内 max_concurrency 相乘后仍应对外网友好；过小会拉长总耗时。
CRAWL_MAX_CONCURRENT = 4

_sem: asyncio.Semaphore | None = None


def _get_sem() -> asyncio.Semaphore:
    global _sem
    if _sem is None:
        _sem = asyncio.Semaphore(CRAWL_MAX_CONCURRENT)
    return _sem


@asynccontextmanager
async def crawl_slot() -> AsyncIterator[None]:
    async with _get_sem():
        yield