feat: external plugin loading, score threshold, expiry cleanup and more improvements

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 18:53:33 +08:00
parent 7bc6d4e4de
commit 7d5eaa438a
13 changed files with 302 additions and 39 deletions

View File

@@ -19,7 +19,7 @@ _DEFAULTS: Dict[str, Any] = {
"validator_max_concurrency": 200,
"validator_connect_timeout": 3,
"crawler_num_validators": 50,
"crawler_max_queue_size": 500,
"crawler_max_queue_size": 48,
"log_level": "INFO",
"log_dir": "logs",
"ws_stats_interval_seconds": 1,

View File

@@ -102,14 +102,22 @@ class CrawlJob(Job):
proxies: List[ProxyRaw] = result.proxies if result else []
if proxies:
from app.core.config import settings as app_settings
from app.core.db import transaction
from app.repositories.proxy_repo import ProxyRepository
try:
initial = max(
app_settings.score_min,
min(app_settings.score_max, int(app_settings.score_valid)),
)
async with transaction() as db:
await ProxyRepository.upsert_many_from_crawl(db, proxies, 0)
await ProxyRepository.upsert_many_from_crawl(
db, proxies, initial
)
logger.info(
f"CrawlJob {self.id}: persisted {len(proxies)} crawled proxies as pending"
f"CrawlJob {self.id}: persisted {len(proxies)} crawled proxies "
f"as pending (initial score={initial})"
)
except Exception as e:
logger.error(

View File

@@ -1,7 +1,10 @@
"""插件注册中心 - 显式注册,类型安全,测试友好"""
import importlib
import importlib.util
import inspect
import os
import sys
from pathlib import Path
from typing import Dict, List, Type, Optional
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.core.log import logger
@@ -77,6 +80,57 @@ class PluginRegistry:
except Exception as e:
logger.error(f"Failed to load module {module_name}: {e}")
def load_external_plugins_directory(self, directory: Path) -> int:
"""从项目下任意目录加载 ``BaseCrawlerPlugin`` 子类(每个 ``.py`` 一个模块)。
与内置 ``app.plugins`` 并存;若 ``name`` 与已注册插件冲突则跳过并打日志。
"""
directory = Path(directory).resolve()
if not directory.is_dir():
logger.info("外部插件目录不存在,已跳过: %s", directory)
return 0
loaded = 0
for path in sorted(directory.glob("*.py")):
if path.name.startswith("_"):
continue
mod_name = f"proxypool_ext_{path.stem}_{abs(hash(str(path))) % 10_000_000_000}"
try:
spec = importlib.util.spec_from_file_location(mod_name, path)
if spec is None or spec.loader is None:
continue
module = importlib.util.module_from_spec(spec)
sys.modules[mod_name] = module
spec.loader.exec_module(module)
for attr_name in dir(module):
obj = getattr(module, attr_name)
if (
inspect.isclass(obj)
and issubclass(obj, BaseCrawlerPlugin)
and obj is not BaseCrawlerPlugin
and obj not in self._plugins.values()
):
if not getattr(obj, "name", None):
logger.warning(
"跳过外部插件类(缺少 name: %s in %s",
obj.__name__,
path,
)
continue
if obj.name in self._plugins:
logger.warning(
"外部插件 %s 与已注册插件重名,已跳过: %s",
obj.name,
path,
)
continue
self.register(obj)
loaded += 1
except Exception as e:
logger.error("加载外部插件失败 %s: %s", path, e, exc_info=True)
if loaded:
logger.info("%s 额外加载 %s 个插件", directory, loaded)
return loaded
# 全局注册中心实例
registry = PluginRegistry()