修复问题: - 添加缺失的 httpx 依赖到 requirements.txt - 修复前端批量删除参数格式与后端不匹配(数组->对象数组) - 移除 app/api/main.py 中重复创建 app 的冗余代码 - 修复 Plugins.vue v-model 直接修改 store 状态的 Vue 警告 - 修复 README 端口/启动命令文档与实际配置不一致 - 修正 pytest.ini 过时配置 (asyncio_default_fixture_loop_scope) - 修复 WebUI index.html 语言设置为 zh-CN - 修复 .gitignore 错误忽略 tests/ 目录 后端优化: - 修复调度器默认间隔从 5 秒改为 30 分钟,避免无节制验证 - 修复 validate_all_now 在调度器停止时无法执行的 bug - 设置保存后热更新运行中调度器的验证间隔 - 将 update_score 优化为原子单事务 SQL,消除并发竞态 - 导出功能改为真正的流式分批读取(iter_batches),降低大导出内存占用 - ProxyResponse Schema 补齐 response_time_ms 字段 - 日志级别改为从配置动态读取,不再硬编码 INFO - 清理 validator_service 中的冗余 try/finally 代码 插件健壮性: - 修复 ip3366/ip89/kuaidaili/proxylist_download/speedx/yundaili/proxyscrape 的端口范围检查和 IPv6 地址解析问题(改用 rsplit + 1-65535 校验) - 修复 PluginService.list_plugins 并发竞争条件 - 修复 run_all_plugins 去重逻辑与数据库 UNIQUE 约束保持一致 - 修复 proxyscrape 异常时错误跳过 fallback 的 bug 测试: - 新增 7 个插件解析单元测试 - 新增 update_score 自动删除和 iter_batches 流式读取测试 - 全部 74 个测试通过
167 lines
6.8 KiB
Python
167 lines
6.8 KiB
Python
"""插件业务服务"""
|
|
import asyncio
|
|
from datetime import datetime
|
|
from typing import List, Optional
|
|
from app.core.db import get_db
|
|
from app.core.plugin_system.registry import registry
|
|
from app.core.plugin_system.base import BaseCrawlerPlugin
|
|
from app.repositories.settings_repo import PluginSettingsRepository
|
|
from app.models.domain import PluginInfo, ProxyRaw
|
|
from app.core.log import logger
|
|
|
|
|
|
class PluginService:
|
|
"""插件业务服务:管理插件生命周期、执行爬取、配置管理"""
|
|
|
|
def __init__(self):
|
|
self.plugin_settings_repo = PluginSettingsRepository()
|
|
self._stats: dict[str, dict] = {}
|
|
|
|
async def list_plugins(self) -> List[PluginInfo]:
|
|
"""获取所有插件信息(合并持久化状态和配置)"""
|
|
async with get_db() as db:
|
|
db_states = await self.plugin_settings_repo.list_all(db)
|
|
|
|
result = []
|
|
for plugin in registry.list_plugins():
|
|
# 合并持久化状态(不修改全局实例,避免并发竞争)
|
|
state = db_states.get(plugin.name, {})
|
|
enabled = state.get("enabled", plugin.enabled)
|
|
if "config" in state and isinstance(state["config"], dict):
|
|
plugin.update_config(state["config"])
|
|
|
|
# 合并数据库统计与内存统计(内存优先)
|
|
db_stat = state.get("stats", {})
|
|
stat = {
|
|
"success_count": db_stat.get("success_count", 0),
|
|
"failure_count": db_stat.get("failure_count", 0),
|
|
"last_run": datetime.fromisoformat(db_stat["last_run"]) if db_stat.get("last_run") else None,
|
|
}
|
|
mem_stat = self._stats.get(plugin.name, {})
|
|
if mem_stat:
|
|
stat["success_count"] = mem_stat.get("success_count", stat["success_count"])
|
|
stat["failure_count"] = mem_stat.get("failure_count", stat["failure_count"])
|
|
if mem_stat.get("last_run"):
|
|
stat["last_run"] = mem_stat["last_run"]
|
|
|
|
result.append(PluginInfo(
|
|
id=plugin.name,
|
|
name=plugin.name,
|
|
display_name=plugin.display_name or plugin.name,
|
|
description=plugin.description or f"从 {plugin.name} 爬取代理",
|
|
enabled=enabled,
|
|
last_run=stat.get("last_run"),
|
|
success_count=stat.get("success_count", 0),
|
|
failure_count=stat.get("failure_count", 0),
|
|
))
|
|
return result
|
|
|
|
async def toggle_plugin(self, plugin_id: str, enabled: bool) -> bool:
|
|
plugin = registry.get(plugin_id)
|
|
if not plugin:
|
|
return False
|
|
async with get_db() as db:
|
|
success = await self.plugin_settings_repo.set_enabled(db, plugin_id, enabled)
|
|
if success:
|
|
plugin.enabled = enabled
|
|
logger.info(f"Plugin {plugin_id} toggled to {enabled}")
|
|
return success
|
|
|
|
async def get_plugin_config(self, plugin_id: str) -> Optional[dict]:
|
|
"""获取插件当前配置(合并默认值和持久化值)"""
|
|
plugin = registry.get(plugin_id)
|
|
if not plugin:
|
|
return None
|
|
async with get_db() as db:
|
|
saved = await self.plugin_settings_repo.get_config(db, plugin_id)
|
|
config = dict(plugin.default_config)
|
|
if saved:
|
|
config.update(saved)
|
|
return config
|
|
|
|
async def update_plugin_config(self, plugin_id: str, config: dict) -> bool:
|
|
"""更新插件配置(只保存已存在于 default_config 中的键)"""
|
|
plugin = registry.get(plugin_id)
|
|
if not plugin:
|
|
return False
|
|
# 过滤非法键
|
|
safe_config = {k: v for k, v in config.items() if k in plugin.default_config}
|
|
if not safe_config:
|
|
return False
|
|
plugin.update_config(safe_config)
|
|
async with get_db() as db:
|
|
return await self.plugin_settings_repo.set_config(db, plugin_id, plugin.config)
|
|
|
|
def get_plugin(self, plugin_id: str) -> Optional[BaseCrawlerPlugin]:
|
|
return registry.get(plugin_id)
|
|
|
|
async def run_plugin(self, plugin_id: str) -> List[ProxyRaw]:
|
|
"""执行单个插件爬取"""
|
|
plugin = self.get_plugin(plugin_id)
|
|
if not plugin:
|
|
raise ValueError(f"Plugin {plugin_id} not found")
|
|
if not plugin.enabled:
|
|
logger.warning(f"Plugin {plugin_id} is disabled, skip crawl")
|
|
return []
|
|
|
|
try:
|
|
results = await plugin.crawl()
|
|
self._record_stat(plugin_id, success=len(results))
|
|
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
|
|
return results
|
|
except Exception as e:
|
|
self._record_stat(plugin_id, failure=1)
|
|
logger.error(f"Plugin {plugin_id} crawl failed: {e}")
|
|
return []
|
|
finally:
|
|
await self._save_stats(plugin_id)
|
|
|
|
async def run_all_plugins(self) -> List[ProxyRaw]:
|
|
"""执行所有启用插件的爬取,限制并发数以避免触发目标站反爬"""
|
|
all_results: List[ProxyRaw] = []
|
|
semaphore = asyncio.Semaphore(5)
|
|
|
|
async def _run_with_limit(plugin_name: str):
|
|
async with semaphore:
|
|
return await self.run_plugin(plugin_name)
|
|
|
|
tasks = [_run_with_limit(plugin.name) for plugin in registry.list_plugins() if plugin.enabled]
|
|
results_list = await asyncio.gather(*tasks, return_exceptions=True)
|
|
for results in results_list:
|
|
if isinstance(results, Exception):
|
|
logger.error(f"Run all plugins error: {results}")
|
|
continue
|
|
all_results.extend(results)
|
|
# 去重(与数据库 UNIQUE(ip, port) 约束保持一致)
|
|
seen = set()
|
|
unique = []
|
|
for p in all_results:
|
|
key = (p.ip, p.port)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique.append(p)
|
|
return unique
|
|
|
|
def _record_stat(self, plugin_id: str, success: int = 0, failure: int = 0):
|
|
if plugin_id not in self._stats:
|
|
self._stats[plugin_id] = {
|
|
"success_count": 0,
|
|
"failure_count": 0,
|
|
"last_run": None,
|
|
}
|
|
self._stats[plugin_id]["success_count"] += success
|
|
self._stats[plugin_id]["failure_count"] += failure
|
|
if success or failure:
|
|
self._stats[plugin_id]["last_run"] = datetime.now()
|
|
|
|
async def _save_stats(self, plugin_id: str):
|
|
"""将内存中的统计持久化到数据库"""
|
|
stats = self._stats.get(plugin_id, {})
|
|
payload = {
|
|
"success_count": stats.get("success_count", 0),
|
|
"failure_count": stats.get("failure_count", 0),
|
|
"last_run": stats.get("last_run").isoformat() if stats.get("last_run") else None,
|
|
}
|
|
async with get_db() as db:
|
|
await self.plugin_settings_repo.set_stats(db, plugin_id, payload)
|