feat: external plugin loading, score threshold, expiry cleanup and more improvements

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 18:53:33 +08:00
parent 7bc6d4e4de
commit 7d5eaa438a
13 changed files with 302 additions and 39 deletions

View File

@@ -5,7 +5,12 @@ from typing import List, Optional
from app.core.db import get_db
from app.core.plugin_system.registry import registry
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.core.exceptions import PluginNotFoundException, ValidationException
from app.core.exceptions import (
PluginNotFoundException,
ProxyPoolException,
ValidationException,
)
from app.core.config import settings as app_settings
from app.repositories.settings_repo import PluginSettingsRepository
from app.models.domain import PluginInfo, ProxyRaw, CrawlResult
from app.core.log import logger
@@ -110,7 +115,8 @@ class PluginService:
async def run_all_plugins(self, plugin_runner) -> List[ProxyRaw]:
"""执行所有启用插件的爬取,限制并发数以避免触发目标站反爬"""
all_results: List[ProxyRaw] = []
semaphore = asyncio.Semaphore(5)
n = max(1, int(app_settings.crawler_num_validators))
semaphore = asyncio.Semaphore(n)
async def _run_with_limit(plugin_name: str):
plugin = self.get_plugin_or_raise(plugin_name)