重构: 迁移后端代码到 app 目录,前端移动到 WebUI,添加完整测试套件

主要变更:
- 后端代码从根目录迁移到 app/ 目录
- 前端代码从 frontend/ 重命名为 WebUI/
- 更新所有导入路径以适配新结构
- 提取公共 API 响应函数到 app/api/common.py
- 精简验证器服务代码
- 更新启动脚本和文档

测试:
- 新增完整测试套件 (tests/)
- 单元测试: 模型、仓库层
- 集成测试: 覆盖所有 22+ API 端点
- E2E 测试: 4个完整工作流场景
- 添加 pytest 配置和测试运行脚本
This commit is contained in:
祀梦
2026-04-04 13:32:36 +08:00
parent df3cc87f88
commit 38bd66128b
109 changed files with 2017 additions and 548 deletions

View File

@@ -0,0 +1,5 @@
"""插件系统包"""
from .base import BaseCrawlerPlugin, ProxyRaw
from .registry import registry
__all__ = ["BaseCrawlerPlugin", "ProxyRaw", "registry"]

View File

@@ -0,0 +1,55 @@
"""插件基类 - 所有爬虫插件必须继承此基类"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Dict, Any
@dataclass
class ProxyRaw:
"""爬虫产出的原始代理数据"""
ip: str
port: int
protocol: str = "http"
def __post_init__(self):
self.protocol = self.protocol.lower().strip()
if self.protocol not in ("http", "https", "socks4", "socks5"):
self.protocol = "http"
class BaseCrawlerPlugin(ABC):
"""爬虫插件基类
添加新爬虫只需:
1. 继承 BaseCrawlerPlugin
2. 实现 crawl() 方法返回 List[ProxyRaw]
3. 用 @registry.register 装饰或在 __init__ 中显式注册
"""
name: str = ""
display_name: str = ""
description: str = ""
enabled: bool = True
default_config: Dict[str, Any] = {}
def __init__(self):
self._config: Dict[str, Any] = dict(self.default_config or {})
@property
def config(self) -> Dict[str, Any]:
return self._config
def update_config(self, updates: Dict[str, Any]) -> None:
"""更新插件配置,只覆盖存在的键"""
for key, value in updates.items():
if key in self._config:
self._config[key] = value
@abstractmethod
async def crawl(self) -> List[ProxyRaw]:
"""爬取代理的核心方法。只负责爬取,不要在这里验证。"""
raise NotImplementedError
async def health_check(self) -> bool:
"""可选:检查插件健康状态"""
return True

View File

@@ -0,0 +1,77 @@
"""插件注册中心 - 显式注册,类型安全,测试友好"""
import importlib
import inspect
import os
from typing import Dict, List, Type, Optional
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.core.log import logger
class PluginRegistry:
"""插件注册中心"""
def __init__(self):
self._plugins: Dict[str, Type[BaseCrawlerPlugin]] = {}
self._instances: Dict[str, BaseCrawlerPlugin] = {}
def register(self, plugin_cls: Type[BaseCrawlerPlugin]) -> Type[BaseCrawlerPlugin]:
"""注册一个插件类。支持装饰器语法。"""
if not inspect.isclass(plugin_cls) or not issubclass(plugin_cls, BaseCrawlerPlugin):
raise ValueError("Plugin must be a subclass of BaseCrawlerPlugin")
if not plugin_cls.name:
raise ValueError(f"Plugin {plugin_cls.__name__} must have a 'name' attribute")
self._plugins[plugin_cls.name] = plugin_cls
logger.info(f"Plugin registered: {plugin_cls.name} ({plugin_cls.__name__})")
return plugin_cls
def get(self, name: str) -> Optional[BaseCrawlerPlugin]:
"""获取插件实例(懒加载)"""
if name not in self._instances:
cls = self._plugins.get(name)
if cls:
self._instances[name] = cls()
return self._instances.get(name)
def list_plugins(self) -> List[BaseCrawlerPlugin]:
"""获取所有已注册插件的实例列表"""
result = []
for name in self._plugins:
instance = self.get(name)
if instance:
result.append(instance)
return result
def get_plugin_names(self) -> List[str]:
return list(self._plugins.keys())
def auto_discover(self, package_name: str):
"""自动扫描指定包下的所有模块并注册其中的插件类。
注意为了类型安全和可控性推荐显式注册。auto_discover 仅作为兼容。"""
try:
package = importlib.import_module(package_name)
package_dir = os.path.dirname(package.__file__)
except Exception as e:
logger.error(f"Auto discover failed for package {package_name}: {e}")
return
for filename in os.listdir(package_dir):
if filename.endswith(".py") and not filename.startswith("__"):
module_name = f"{package_name}.{filename[:-3]}"
try:
module = importlib.import_module(module_name)
for attr_name in dir(module):
obj = getattr(module, attr_name)
if (
inspect.isclass(obj)
and issubclass(obj, BaseCrawlerPlugin)
and obj is not BaseCrawlerPlugin
and obj not in self._plugins.values()
):
self.register(obj)
except Exception as e:
logger.error(f"Failed to load module {module_name}: {e}")
# 全局注册中心实例
registry = PluginRegistry()