后端重构: - 新增分层架构:API Routes -> Services -> Repositories -> Infrastructure - 彻底移除全局单例,全面采用 FastAPI 依赖注入 - 新增 api/ 目录拆分路由(proxies, plugins, scheduler, settings, stats) - 新增 services/ 业务逻辑层:ProxyService, PluginService, SchedulerService, ValidatorService, SettingsService - 新增 repositories/ 数据访问层:ProxyRepository, SettingsRepository, PluginSettingsRepository - 新增 models/ 层:Pydantic Schemas + Domain Models - 重写 core/config.py:采用 Pydantic Settings 管理配置 - 新增 core/db.py:基于 asynccontextmanager 的连接管理,支持数据库迁移 - 新增 core/exceptions.py:统一业务异常体系 插件系统重构(核心): - 新增 core/plugin_system/:BaseCrawlerPlugin + PluginRegistry - 采用显式注册模式(装饰器 + plugins/__init__.py),类型安全、测试友好 - 新增 plugins/base.py:BaseHTTPPlugin 通用 HTTP 爬虫基类 - 迁移全部 7 个插件到新架构(fate0, proxylist_download, ip3366, ip89, kuaidaili, speedx, yundaili) - 插件状态持久化到 plugin_settings 表 任务调度重构: - 新增 core/tasks/queue.py:ValidationQueue + WorkerPool - 解耦爬取与验证:爬虫只负责爬取,代理提交队列后由 Worker 异步验证 - 调度器定时从数据库拉取存量代理并分批投入验证队列 前端调整: - 新增 frontend/src/services/ 层拆分 API 调用逻辑 - 调整 stores/ 和 views/ 使用 Service 层 - 保持 API 兼容性,页面无需大幅修改 其他: - 新增 main.py 作为新入口 - 新增 DESIGN.md 架构设计文档 - 更新 requirements.txt 增加 pydantic-settings
89 lines
2.9 KiB
Python
89 lines
2.9 KiB
Python
"""调度器服务 - 定时验证存量代理"""
|
|
import asyncio
|
|
from datetime import datetime
|
|
from core.db import get_db
|
|
from repositories.proxy_repo import ProxyRepository
|
|
from core.tasks.queue import ValidationQueue
|
|
from core.config import settings as app_settings
|
|
from core.log import logger
|
|
|
|
|
|
class SchedulerService:
|
|
"""代理验证调度器"""
|
|
|
|
def __init__(
|
|
self,
|
|
validation_queue: ValidationQueue,
|
|
proxy_repo: ProxyRepository = ProxyRepository(),
|
|
):
|
|
self.validation_queue = validation_queue
|
|
self.proxy_repo = proxy_repo
|
|
self.interval_minutes = 30
|
|
self.running = False
|
|
self._task: asyncio.Task | None = None
|
|
|
|
async def start(self):
|
|
if self.running:
|
|
logger.warning("Scheduler already running")
|
|
return
|
|
self.running = True
|
|
await self.validation_queue.start()
|
|
self._task = asyncio.create_task(self._run_loop())
|
|
logger.info("Scheduler started")
|
|
|
|
async def stop(self):
|
|
self.running = False
|
|
if self._task:
|
|
self._task.cancel()
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._task = None
|
|
await self.validation_queue.stop()
|
|
logger.info("Scheduler stopped")
|
|
|
|
async def validate_all_now(self):
|
|
"""立即执行一次全量验证(后台运行,不阻塞)"""
|
|
asyncio.create_task(self._do_validate_all())
|
|
|
|
async def _run_loop(self):
|
|
"""定时循环"""
|
|
while self.running:
|
|
try:
|
|
await self._do_validate_all()
|
|
except Exception as e:
|
|
logger.error(f"Scheduler loop error: {e}")
|
|
# 等待下一次
|
|
for _ in range(self.interval_minutes * 60):
|
|
if not self.running:
|
|
break
|
|
await asyncio.sleep(1)
|
|
|
|
async def _do_validate_all(self):
|
|
"""验证数据库中所有存量代理"""
|
|
logger.info("Starting scheduled validation for all proxies")
|
|
async with get_db() as db:
|
|
proxies = await self.proxy_repo.list_all(db)
|
|
if not proxies:
|
|
logger.info("No proxies to validate")
|
|
return
|
|
|
|
logger.info(f"Validating {len(proxies)} proxies from database")
|
|
from models.domain import ProxyRaw
|
|
|
|
# 批量提交到验证队列
|
|
batch_size = 100
|
|
for i in range(0, len(proxies), batch_size):
|
|
if not self.running:
|
|
break
|
|
batch = proxies[i : i + batch_size]
|
|
await self.validation_queue.submit([
|
|
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
|
|
])
|
|
# 等待当前批次处理完
|
|
await self.validation_queue.drain()
|
|
logger.info(f"Validated batch {i//batch_size + 1}/{(len(proxies)-1)//batch_size + 1}")
|
|
|
|
logger.info("Scheduled validation completed")
|