全面架构重构:建立分层架构与高度可扩展的插件系统

后端重构:
- 新增分层架构:API Routes -> Services -> Repositories -> Infrastructure
- 彻底移除全局单例,全面采用 FastAPI 依赖注入
- 新增 api/ 目录拆分路由(proxies, plugins, scheduler, settings, stats)
- 新增 services/ 业务逻辑层:ProxyService, PluginService, SchedulerService, ValidatorService, SettingsService
- 新增 repositories/ 数据访问层:ProxyRepository, SettingsRepository, PluginSettingsRepository
- 新增 models/ 层:Pydantic Schemas + Domain Models
- 重写 core/config.py:采用 Pydantic Settings 管理配置
- 新增 core/db.py:基于 asynccontextmanager 的连接管理,支持数据库迁移
- 新增 core/exceptions.py:统一业务异常体系

插件系统重构(核心):
- 新增 core/plugin_system/:BaseCrawlerPlugin + PluginRegistry
- 采用显式注册模式(装饰器 + plugins/__init__.py),类型安全、测试友好
- 新增 plugins/base.py:BaseHTTPPlugin 通用 HTTP 爬虫基类
- 迁移全部 7 个插件到新架构(fate0, proxylist_download, ip3366, ip89, kuaidaili, speedx, yundaili)
- 插件状态持久化到 plugin_settings 表

任务调度重构:
- 新增 core/tasks/queue.py:ValidationQueue + WorkerPool
- 解耦爬取与验证:爬虫只负责爬取,代理提交队列后由 Worker 异步验证
- 调度器定时从数据库拉取存量代理并分批投入验证队列

前端调整:
- 新增 frontend/src/services/ 层拆分 API 调用逻辑
- 调整 stores/ 和 views/ 使用 Service 层
- 保持 API 兼容性,页面无需大幅修改

其他:
- 新增 main.py 作为新入口
- 新增 DESIGN.md 架构设计文档
- 更新 requirements.txt 增加 pydantic-settings
This commit is contained in:
祀梦
2026-04-02 11:55:05 +08:00
parent a79f78b338
commit 209a744d94
56 changed files with 2891 additions and 2095 deletions

3
api/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
from .main import create_app
__all__ = ["create_app"]

55
api/deps.py Normal file
View File

@@ -0,0 +1,55 @@
"""依赖注入"""
from fastapi import Request
from services.proxy_service import ProxyService
from services.plugin_service import PluginService
from services.settings_service import SettingsService
from services.scheduler_service import SchedulerService
from services.validator_service import ValidatorService
from repositories.proxy_repo import ProxyRepository
from core.tasks.queue import ValidationQueue
from core.config import settings as app_settings
def get_proxy_service() -> ProxyService:
return ProxyService()
def get_plugin_service() -> PluginService:
return PluginService()
def get_settings_service() -> SettingsService:
return SettingsService()
def get_scheduler_service(request: Request) -> SchedulerService:
return request.app.state.scheduler_service
def get_validation_queue(request: Request) -> ValidationQueue:
return request.app.state.validation_queue
def create_scheduler_service() -> SchedulerService:
"""在应用启动时创建 SchedulerService非请求上下文"""
validator = ValidatorService(
timeout=app_settings.validator_timeout,
connect_timeout=app_settings.validator_connect_timeout,
max_concurrency=app_settings.validator_max_concurrency,
)
proxy_repo = ProxyRepository()
queue = ValidationQueue(
validator=validator,
proxy_repo=proxy_repo,
db_ctx=get_db,
worker_count=app_settings.validator_max_concurrency,
score_valid=app_settings.score_valid,
score_invalid=app_settings.score_invalid,
score_min=app_settings.score_min,
score_max=app_settings.score_max,
)
return SchedulerService(validation_queue=queue, proxy_repo=proxy_repo)
# 避免循环导入
from core.db import get_db

33
api/errors.py Normal file
View File

@@ -0,0 +1,33 @@
"""统一异常处理"""
from fastapi import Request
from fastapi.responses import JSONResponse
from pydantic import ValidationError
from core.exceptions import ProxyPoolException
from core.log import logger
async def proxy_pool_exception_handler(request: Request, exc: ProxyPoolException):
return JSONResponse(
status_code=exc.code,
content={"code": exc.code, "message": exc.message, "data": None},
)
async def pydantic_validation_handler(request: Request, exc: ValidationError):
logger.error(f"Validation error: {exc}")
return JSONResponse(
status_code=422,
content={
"code": 422,
"message": "参数验证失败",
"data": exc.errors(),
},
)
async def general_exception_handler(request: Request, exc: Exception):
logger.error(f"Unhandled exception: {exc}", exc_info=True)
return JSONResponse(
status_code=500,
content={"code": 500, "message": "服务器内部错误", "data": None},
)

39
api/lifespan.py Normal file
View File

@@ -0,0 +1,39 @@
"""应用生命周期管理"""
from contextlib import asynccontextmanager
from fastapi import FastAPI
from core.db import init_db
from core.config import settings as app_settings
from core.log import logger
from api.deps import create_scheduler_service
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用启动和关闭时的生命周期管理"""
# 初始化数据库
await init_db()
# 创建调度器并挂载到 app.state
scheduler_service = create_scheduler_service()
app.state.scheduler_service = scheduler_service
app.state.validation_queue = scheduler_service.validation_queue
# 加载设置并决定是否启动调度器
from services.settings_service import SettingsService
settings_service = SettingsService()
try:
settings = await settings_service.get_settings()
scheduler_service.interval_minutes = settings.get(
"validate_interval_minutes", app_settings.validator_timeout
)
if settings.get("auto_validate", True):
await scheduler_service.start()
except Exception as e:
logger.error(f"Failed to load settings on startup: {e}")
logger.info("API server started")
yield
# 关闭调度器
await scheduler_service.stop()
logger.info("API server shutdown")

55
api/main.py Normal file
View File

@@ -0,0 +1,55 @@
"""FastAPI 应用工厂"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from api.lifespan import lifespan
from api.routes import api_router
from api.errors import proxy_pool_exception_handler, pydantic_validation_handler, general_exception_handler
from core.exceptions import ProxyPoolException
from pydantic import ValidationError
from core.config import settings as app_settings
# 导入并注册所有插件(显式注册模式)
import plugins
def create_app() -> FastAPI:
app = FastAPI(
title="代理池API",
version="2.0.0",
lifespan=lifespan,
)
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=app_settings.cors_origins_list,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 异常处理
app.add_exception_handler(ProxyPoolException, proxy_pool_exception_handler)
app.add_exception_handler(ValidationError, pydantic_validation_handler)
app.add_exception_handler(Exception, general_exception_handler)
# 路由
app.include_router(api_router)
@app.get("/")
async def root():
return {"message": "欢迎使用代理池API", "status": "running", "data": None}
@app.get("/health")
async def health_check():
from datetime import datetime
scheduler = app.state.scheduler_service
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"database": "connected",
"scheduler": "running" if scheduler.running else "stopped",
"version": "2.0.0",
}
return app

9
api/routes/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
from fastapi import APIRouter
from . import stats, proxies, plugins, scheduler, settings
api_router = APIRouter()
api_router.include_router(stats.router)
api_router.include_router(proxies.router)
api_router.include_router(plugins.router)
api_router.include_router(scheduler.router)
api_router.include_router(settings.router)

139
api/routes/plugins.py Normal file
View File

@@ -0,0 +1,139 @@
"""插件相关路由"""
from fastapi import APIRouter, Depends
from services.plugin_service import PluginService
from services.scheduler_service import SchedulerService
from models.schemas import PluginToggleRequest
from api.deps import get_plugin_service, get_scheduler_service
from core.log import logger
router = APIRouter(prefix="/api/plugins", tags=["plugins"])
def success_response(message: str, data=None):
return {"code": 200, "message": message, "data": data}
def error_response(message: str, code: int = 500):
return {"code": code, "message": message, "data": None}
@router.get("")
async def list_plugins(service: PluginService = Depends(get_plugin_service)):
plugins = await service.list_plugins()
return success_response(
"获取插件列表成功",
{
"plugins": [
{
"id": p.id,
"name": p.display_name, # 保持旧版本兼容name 用于展示
"display_name": p.display_name,
"description": p.description,
"enabled": p.enabled,
"last_run": p.last_run.isoformat() if p.last_run else None,
"success_count": p.success_count,
"failure_count": p.failure_count,
}
for p in plugins
]
},
)
@router.put("/{plugin_id}/toggle")
async def toggle_plugin(
plugin_id: str,
request: PluginToggleRequest,
service: PluginService = Depends(get_plugin_service),
):
success = await service.toggle_plugin(plugin_id, request.enabled)
if not success:
return error_response("插件不存在", 404)
return success_response(
f"插件 {plugin_id}{'启用' if request.enabled else '禁用'}",
{"plugin_id": plugin_id, "enabled": request.enabled},
)
@router.post("/{plugin_id}/crawl")
async def crawl_plugin(
plugin_id: str,
plugin_service: PluginService = Depends(get_plugin_service),
scheduler_service: SchedulerService = Depends(get_scheduler_service),
):
plugin = plugin_service.get_plugin(plugin_id)
if not plugin:
return error_response("插件不存在", 404)
try:
results = await plugin_service.run_plugin(plugin_id)
if not results:
return success_response(
f"插件 {plugin_id} 爬取完成,未获取到代理",
{"plugin_id": plugin_id, "proxy_count": 0, "valid_count": 0},
)
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies, sending to validation queue")
scheduler_service.validation_queue.reset_stats()
await scheduler_service.validation_queue.submit(results)
# 等待队列排空(最多等 30 秒,避免前端超时)
try:
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=30.0)
except asyncio.TimeoutError:
pass
valid_count = scheduler_service.validation_queue.valid_count
invalid_count = scheduler_service.validation_queue.invalid_count
return success_response(
f"插件 {plugin_id} 爬取并验证完成",
{
"plugin_id": plugin_id,
"proxy_count": len(results),
"valid_count": valid_count,
"invalid_count": invalid_count,
},
)
except Exception as e:
logger.error(f"Crawl plugin {plugin_id} failed: {e}")
return error_response(f"插件爬取失败: {str(e)}")
@router.post("/crawl-all")
async def crawl_all(
plugin_service: PluginService = Depends(get_plugin_service),
scheduler_service: SchedulerService = Depends(get_scheduler_service),
):
try:
results = await plugin_service.run_all_plugins()
if not results:
return success_response(
"所有插件爬取完成,未获取到代理",
{"total_crawled": 0, "valid_count": 0, "invalid_count": 0},
)
logger.info(f"All plugins crawled {len(results)} unique proxies, sending to validation queue")
scheduler_service.validation_queue.reset_stats()
await scheduler_service.validation_queue.submit(results)
try:
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=60.0)
except asyncio.TimeoutError:
pass
valid_count = scheduler_service.validation_queue.valid_count
invalid_count = scheduler_service.validation_queue.invalid_count
return success_response(
"所有插件爬取并验证完成",
{
"total_crawled": len(results),
"valid_count": valid_count,
"invalid_count": invalid_count,
},
)
except Exception as e:
logger.error(f"Crawl all failed: {e}")
return error_response(f"批量爬取失败: {str(e)}")
import asyncio

114
api/routes/proxies.py Normal file
View File

@@ -0,0 +1,114 @@
"""代理相关路由"""
from typing import Optional
from fastapi import APIRouter, Depends, Query
from services.proxy_service import ProxyService
from models.schemas import ProxyListRequest, BatchDeleteRequest
from api.deps import get_proxy_service
router = APIRouter(prefix="/api/proxies", tags=["proxies"])
def success_response(message: str, data=None):
return {"code": 200, "message": message, "data": data}
def error_response(message: str, code: int = 500):
return {"code": code, "message": message, "data": None}
@router.post("")
async def list_proxies(
request: ProxyListRequest,
service: ProxyService = Depends(get_proxy_service),
):
proxies, total = await service.list_proxies(
page=request.page,
page_size=request.page_size,
protocol=request.protocol,
min_score=request.min_score,
max_score=request.max_score,
sort_by=request.sort_by,
sort_order=request.sort_order,
)
return success_response(
"获取代理列表成功",
{
"list": [
{
"ip": p.ip,
"port": p.port,
"protocol": p.protocol,
"score": p.score,
"last_check": p.last_check.isoformat() if p.last_check else None,
}
for p in proxies
],
"total": total,
"page": request.page,
"page_size": request.page_size,
},
)
@router.get("/random")
async def get_random_proxy(service: ProxyService = Depends(get_proxy_service)):
proxy = await service.get_random_proxy()
if not proxy:
return error_response("没有找到可用的代理", 404)
return success_response(
"获取随机代理成功",
{
"ip": proxy.ip,
"port": proxy.port,
"protocol": proxy.protocol,
"score": proxy.score,
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
},
)
@router.get("/export/{fmt}")
async def export_proxies(
fmt: str,
protocol: Optional[str] = None,
limit: int = Query(default=10000, ge=1, le=100000),
service: ProxyService = Depends(get_proxy_service),
):
if fmt not in ("csv", "txt", "json"):
return error_response("不支持的导出格式", 400)
from fastapi.responses import StreamingResponse
media_types = {"csv": "text/csv", "txt": "text/plain", "json": "application/json"}
async def generate():
async for chunk in service.export_proxies(fmt, protocol, limit):
yield chunk
return StreamingResponse(
generate(),
media_type=media_types[fmt],
headers={"Content-Disposition": f"attachment; filename=proxies.{fmt}"},
)
@router.delete("/{ip}/{port}")
async def delete_proxy(ip: str, port: int, service: ProxyService = Depends(get_proxy_service)):
await service.delete_proxy(ip, port)
return success_response("删除代理成功")
@router.post("/batch-delete")
async def batch_delete(
request: BatchDeleteRequest,
service: ProxyService = Depends(get_proxy_service),
):
proxies = [(item.ip, item.port) for item in request.proxies]
deleted = await service.batch_delete(proxies)
return success_response(f"批量删除 {deleted} 个代理成功", {"deleted_count": deleted})
@router.delete("/clean-invalid")
async def clean_invalid(service: ProxyService = Depends(get_proxy_service)):
count = await service.clean_invalid()
return success_response(f"清理了 {count} 个无效代理", {"deleted_count": count})

78
api/routes/scheduler.py Normal file
View File

@@ -0,0 +1,78 @@
"""调度器相关路由"""
from fastapi import APIRouter, Depends
from services.scheduler_service import SchedulerService
from services.settings_service import SettingsService
from api.deps import get_scheduler_service
from core.log import logger
router = APIRouter(prefix="/api/scheduler", tags=["scheduler"])
def success_response(message: str, data=None):
return {"code": 200, "message": message, "data": data}
def error_response(message: str, code: int = 500):
return {"code": code, "message": message, "data": None}
@router.post("/start")
async def start_scheduler(
scheduler: SchedulerService = Depends(get_scheduler_service),
):
try:
if scheduler.running:
return success_response("验证调度器已在运行", {"running": True})
await scheduler.start()
# 持久化设置
settings_service = SettingsService()
settings = await settings_service.get_settings()
settings["auto_validate"] = True
from models.schemas import SettingsSchema
await settings_service.save_settings(SettingsSchema(**settings))
return success_response("验证调度器已启动", {"running": True})
except Exception as e:
logger.error(f"Start scheduler failed: {e}")
return error_response(f"启动调度器失败: {str(e)}")
@router.post("/stop")
async def stop_scheduler(
scheduler: SchedulerService = Depends(get_scheduler_service),
):
try:
if not scheduler.running:
return success_response("验证调度器未运行", {"running": False})
await scheduler.stop()
# 持久化设置
settings_service = SettingsService()
settings = await settings_service.get_settings()
settings["auto_validate"] = False
from models.schemas import SettingsSchema
await settings_service.save_settings(SettingsSchema(**settings))
return success_response("验证调度器已停止", {"running": False})
except Exception as e:
logger.error(f"Stop scheduler failed: {e}")
return error_response(f"停止调度器失败: {str(e)}")
@router.post("/validate-now")
async def validate_now(
scheduler: SchedulerService = Depends(get_scheduler_service),
):
try:
scheduler.validate_all_now()
return success_response("已开始全量验证", {"started": True})
except Exception as e:
logger.error(f"Validate now failed: {e}")
return error_response(f"启动验证失败: {str(e)}")
@router.get("/status")
async def scheduler_status(
scheduler: SchedulerService = Depends(get_scheduler_service),
):
return success_response(
"获取状态成功",
{"running": scheduler.running, "interval_minutes": scheduler.interval_minutes},
)

41
api/routes/settings.py Normal file
View File

@@ -0,0 +1,41 @@
"""设置相关路由"""
from fastapi import APIRouter, Depends
from services.settings_service import SettingsService
from models.schemas import SettingsSchema
from api.deps import get_settings_service
from core.log import logger
router = APIRouter(prefix="/api/settings", tags=["settings"])
def success_response(message: str, data=None):
return {"code": 200, "message": message, "data": data}
def error_response(message: str, code: int = 500):
return {"code": code, "message": message, "data": None}
@router.get("")
async def get_settings(service: SettingsService = Depends(get_settings_service)):
try:
settings = await service.get_settings()
return success_response("获取设置成功", settings)
except Exception as e:
logger.error(f"Get settings failed: {e}")
return error_response("获取设置失败")
@router.post("")
async def save_settings(
request: SettingsSchema,
service: SettingsService = Depends(get_settings_service),
):
try:
success = await service.save_settings(request)
if not success:
return error_response("保存设置失败")
return success_response("保存设置成功", request.model_dump())
except Exception as e:
logger.error(f"Save settings failed: {e}")
return error_response(f"保存设置失败: {str(e)}")

30
api/routes/stats.py Normal file
View File

@@ -0,0 +1,30 @@
"""统计信息路由"""
from fastapi import APIRouter, Depends
from services.proxy_service import ProxyService
from services.scheduler_service import SchedulerService
from api.deps import get_proxy_service, get_scheduler_service
from core.log import logger
router = APIRouter(prefix="/api/stats", tags=["stats"])
def success_response(message: str, data=None):
return {"code": 200, "message": message, "data": data}
def error_response(message: str, code: int = 500):
return {"code": code, "message": message, "data": None}
@router.get("")
async def get_stats(
proxy_service: ProxyService = Depends(get_proxy_service),
scheduler_service: SchedulerService = Depends(get_scheduler_service),
):
try:
stats = await proxy_service.get_stats()
stats["scheduler_running"] = scheduler_service.running
return success_response("获取统计信息成功", stats)
except Exception as e:
logger.error(f"Get stats failed: {e}")
return error_response("获取统计信息失败")