后端重构: - 新增分层架构:API Routes -> Services -> Repositories -> Infrastructure - 彻底移除全局单例,全面采用 FastAPI 依赖注入 - 新增 api/ 目录拆分路由(proxies, plugins, scheduler, settings, stats) - 新增 services/ 业务逻辑层:ProxyService, PluginService, SchedulerService, ValidatorService, SettingsService - 新增 repositories/ 数据访问层:ProxyRepository, SettingsRepository, PluginSettingsRepository - 新增 models/ 层:Pydantic Schemas + Domain Models - 重写 core/config.py:采用 Pydantic Settings 管理配置 - 新增 core/db.py:基于 asynccontextmanager 的连接管理,支持数据库迁移 - 新增 core/exceptions.py:统一业务异常体系 插件系统重构(核心): - 新增 core/plugin_system/:BaseCrawlerPlugin + PluginRegistry - 采用显式注册模式(装饰器 + plugins/__init__.py),类型安全、测试友好 - 新增 plugins/base.py:BaseHTTPPlugin 通用 HTTP 爬虫基类 - 迁移全部 7 个插件到新架构(fate0, proxylist_download, ip3366, ip89, kuaidaili, speedx, yundaili) - 插件状态持久化到 plugin_settings 表 任务调度重构: - 新增 core/tasks/queue.py:ValidationQueue + WorkerPool - 解耦爬取与验证:爬虫只负责爬取,代理提交队列后由 Worker 异步验证 - 调度器定时从数据库拉取存量代理并分批投入验证队列 前端调整: - 新增 frontend/src/services/ 层拆分 API 调用逻辑 - 调整 stores/ 和 views/ 使用 Service 层 - 保持 API 兼容性,页面无需大幅修改 其他: - 新增 main.py 作为新入口 - 新增 DESIGN.md 架构设计文档 - 更新 requirements.txt 增加 pydantic-settings
112 lines
4.1 KiB
Python
112 lines
4.1 KiB
Python
"""插件业务服务"""
|
|
from datetime import datetime
|
|
from typing import List, Optional
|
|
from core.db import get_db
|
|
from core.plugin_system.registry import registry
|
|
from core.plugin_system.base import BaseCrawlerPlugin
|
|
from repositories.settings_repo import PluginSettingsRepository
|
|
from models.domain import PluginInfo, ProxyRaw
|
|
from core.log import logger
|
|
|
|
|
|
class PluginService:
|
|
"""插件业务服务:管理插件生命周期、执行爬取"""
|
|
|
|
def __init__(self):
|
|
self.plugin_settings_repo = PluginSettingsRepository()
|
|
self._stats: dict[str, dict] = {}
|
|
|
|
async def list_plugins(self) -> List[PluginInfo]:
|
|
"""获取所有插件信息(合并持久化状态)"""
|
|
async with get_db() as db:
|
|
db_states = await self.plugin_settings_repo.list_all(db)
|
|
|
|
result = []
|
|
for plugin in registry.list_plugins():
|
|
# 如果有持久化状态,覆盖内存状态
|
|
if plugin.name in db_states:
|
|
plugin.enabled = db_states[plugin.name]
|
|
|
|
stat = self._stats.get(plugin.name, {
|
|
"success_count": 0,
|
|
"failure_count": 0,
|
|
"last_run": None,
|
|
})
|
|
result.append(PluginInfo(
|
|
id=plugin.name,
|
|
name=plugin.name,
|
|
display_name=plugin.display_name or plugin.name,
|
|
description=plugin.description or f"从 {plugin.name} 爬取代理",
|
|
enabled=plugin.enabled,
|
|
last_run=stat.get("last_run"),
|
|
success_count=stat.get("success_count", 0),
|
|
failure_count=stat.get("failure_count", 0),
|
|
))
|
|
return result
|
|
|
|
async def toggle_plugin(self, plugin_id: str, enabled: bool) -> bool:
|
|
plugin = registry.get(plugin_id)
|
|
if not plugin:
|
|
return False
|
|
async with get_db() as db:
|
|
success = await self.plugin_settings_repo.set_enabled(db, plugin_id, enabled)
|
|
if success:
|
|
plugin.enabled = enabled
|
|
logger.info(f"Plugin {plugin_id} toggled to {enabled}")
|
|
return success
|
|
|
|
def get_plugin(self, plugin_id: str) -> Optional[BaseCrawlerPlugin]:
|
|
return registry.get(plugin_id)
|
|
|
|
async def run_plugin(self, plugin_id: str) -> List[ProxyRaw]:
|
|
"""执行单个插件爬取"""
|
|
plugin = self.get_plugin(plugin_id)
|
|
if not plugin:
|
|
raise ValueError(f"Plugin {plugin_id} not found")
|
|
if not plugin.enabled:
|
|
logger.warning(f"Plugin {plugin_id} is disabled, skip crawl")
|
|
return []
|
|
|
|
try:
|
|
results = await plugin.crawl()
|
|
self._record_stat(plugin_id, success=len(results))
|
|
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
|
|
return results
|
|
except Exception as e:
|
|
self._record_stat(plugin_id, failure=1)
|
|
logger.error(f"Plugin {plugin_id} crawl failed: {e}")
|
|
return []
|
|
|
|
async def run_all_plugins(self) -> List[ProxyRaw]:
|
|
"""执行所有启用插件的爬取"""
|
|
all_results: List[ProxyRaw] = []
|
|
for plugin in registry.list_plugins():
|
|
if not plugin.enabled:
|
|
continue
|
|
try:
|
|
results = await self.run_plugin(plugin.name)
|
|
all_results.extend(results)
|
|
except Exception as e:
|
|
logger.error(f"Run all plugins error at {plugin.name}: {e}")
|
|
# 去重
|
|
seen = set()
|
|
unique = []
|
|
for p in all_results:
|
|
key = (p.ip, p.port, p.protocol)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique.append(p)
|
|
return unique
|
|
|
|
def _record_stat(self, plugin_id: str, success: int = 0, failure: int = 0):
|
|
if plugin_id not in self._stats:
|
|
self._stats[plugin_id] = {
|
|
"success_count": 0,
|
|
"failure_count": 0,
|
|
"last_run": None,
|
|
}
|
|
self._stats[plugin_id]["success_count"] += success
|
|
self._stats[plugin_id]["failure_count"] += failure
|
|
if success or failure:
|
|
self._stats[plugin_id]["last_run"] = datetime.now()
|