refactor: 全面重构核心架构,消除反复修改的根因
- 删除 ValidationQueue 双轨持久化队列,替换为纯内存 AsyncWorkerPool - 引入统一后台任务框架 JobExecutor(Job/CrawlJob/ValidateAllJob) - 新增 PluginRunner 统一插件执行(超时、重试、健康检查、统计) - 重构 SchedulerService 职责收敛为仅定时触发 ValidateAllJob - 使用 AsyncExitStack 重构 lifespan,安全管理长生命周期资源 - 路由层瘦身 50%+,业务异常上抛由全局中间件统一处理 - 实现设置全热更新(WorkerPool 并发、Validator 超时即时生效) - 前端 Store 强制写后重新拉取,消除乐观更新数据不同步 - 删除 queue.py / task_repo.py / task_service.py - 新增 execution 单元测试,全部 85 个测试通过
This commit is contained in:
@@ -1,43 +1,42 @@
|
||||
"""调度器服务 - 定时验证存量代理"""
|
||||
"""调度器服务 - 定时触发全量验证"""
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from app.core.db import get_db
|
||||
from app.repositories.proxy_repo import ProxyRepository
|
||||
from app.repositories.task_repo import ValidationTaskRepository
|
||||
from app.core.tasks.queue import ValidationQueue
|
||||
from app.core.config import settings as app_settings
|
||||
from typing import Optional
|
||||
|
||||
from app.core.execution.executor import JobExecutor
|
||||
from app.core.execution.job import ValidateAllJob
|
||||
from app.core.log import logger
|
||||
from app.models.domain import ProxyRaw
|
||||
from app.services.task_service import task_service
|
||||
|
||||
|
||||
class SchedulerService:
|
||||
"""代理验证调度器"""
|
||||
"""代理验证调度器
|
||||
|
||||
职责单一:定时循环,触发 ValidateAllJob。
|
||||
不再直接持有验证队列或 ValidatorService。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
validation_queue: ValidationQueue,
|
||||
proxy_repo: ProxyRepository = ProxyRepository(),
|
||||
executor: JobExecutor,
|
||||
interval_minutes: int = 30,
|
||||
):
|
||||
self.validation_queue = validation_queue
|
||||
self.proxy_repo = proxy_repo
|
||||
self.interval_minutes = 30
|
||||
self.executor = executor
|
||||
self.interval_minutes = interval_minutes
|
||||
self.running = False
|
||||
self._stop_event = asyncio.Event()
|
||||
self._task: asyncio.Task | None = None
|
||||
self._validate_task: asyncio.Task | None = None
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
|
||||
async def start(self):
|
||||
async def start(self) -> None:
|
||||
if self.running:
|
||||
logger.warning("Scheduler already running")
|
||||
return
|
||||
self._stop_event.clear()
|
||||
self.running = True
|
||||
await self.validation_queue.start()
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info("Scheduler started")
|
||||
|
||||
async def stop(self):
|
||||
async def stop(self) -> None:
|
||||
if not self.running:
|
||||
return
|
||||
self.running = False
|
||||
self._stop_event.set()
|
||||
if self._task:
|
||||
@@ -47,72 +46,26 @@ class SchedulerService:
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
await self.validation_queue.stop()
|
||||
logger.info("Scheduler stopped")
|
||||
|
||||
def cancel_validate_task(self):
|
||||
"""取消正在执行的全量验证后台任务"""
|
||||
if self._validate_task and not self._validate_task.done():
|
||||
self._validate_task.cancel()
|
||||
def validate_all_now(self) -> str:
|
||||
"""立即执行一次全量验证,返回 Job ID"""
|
||||
job_id = self.executor.submit_job(ValidateAllJob())
|
||||
logger.info(f"ValidateAllJob submitted: {job_id}")
|
||||
return job_id
|
||||
|
||||
async def validate_all_now(self):
|
||||
"""立即执行一次全量验证(后台运行,不阻塞)"""
|
||||
if self._validate_task and not self._validate_task.done():
|
||||
return
|
||||
self._validate_task = asyncio.create_task(self._do_validate_all(from_loop=False))
|
||||
|
||||
async def _run_loop(self):
|
||||
async def _run_loop(self) -> None:
|
||||
"""定时循环"""
|
||||
while self.running:
|
||||
try:
|
||||
# 清理过期任务,防止内存无限增长
|
||||
task_service.cleanup_old_tasks()
|
||||
await self._do_validate_all(from_loop=True)
|
||||
self.executor.submit_job(ValidateAllJob())
|
||||
except Exception as e:
|
||||
logger.error(f"Scheduler loop error: {e}", exc_info=True)
|
||||
# 等待下一次
|
||||
try:
|
||||
await asyncio.wait_for(self._stop_event.wait(), timeout=self.interval_minutes * 60)
|
||||
await asyncio.wait_for(
|
||||
self._stop_event.wait(),
|
||||
timeout=self.interval_minutes * 60,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
async def _do_validate_all(self, from_loop: bool = True):
|
||||
"""验证数据库中所有存量代理"""
|
||||
queue_started_here = False
|
||||
try:
|
||||
logger.info("Starting scheduled validation for all proxies")
|
||||
|
||||
# 如果队列未运行,临时启动它(适用于 validate_all_now 在调度器停止时调用)
|
||||
if not self.validation_queue._running:
|
||||
await self.validation_queue.start()
|
||||
queue_started_here = True
|
||||
|
||||
async with get_db() as db:
|
||||
# 清理 7 天前的验证任务记录,防止表无限增长
|
||||
cleaned = await ValidationTaskRepository.cleanup_old(db, days=7)
|
||||
if cleaned:
|
||||
logger.info(f"Cleaned up {cleaned} old validation tasks")
|
||||
proxies = await self.proxy_repo.list_all(db)
|
||||
if not proxies:
|
||||
logger.info("No proxies to validate")
|
||||
return
|
||||
|
||||
logger.info(f"Validating {len(proxies)} proxies from database")
|
||||
# 批量提交到验证队列,不再阻塞等待 drain
|
||||
batch_size = 100
|
||||
total_batches = (len(proxies) - 1) // batch_size + 1
|
||||
for i in range(0, len(proxies), batch_size):
|
||||
if from_loop and not self.running:
|
||||
break
|
||||
batch = proxies[i : i + batch_size]
|
||||
await self.validation_queue.submit([
|
||||
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
|
||||
])
|
||||
logger.info(f"Submitted batch {i // batch_size + 1}/{total_batches}")
|
||||
|
||||
logger.info("Scheduled validation batches submitted")
|
||||
except Exception as e:
|
||||
logger.error(f"Scheduled validation error: {e}", exc_info=True)
|
||||
finally:
|
||||
if queue_started_here:
|
||||
await self.validation_queue.stop()
|
||||
|
||||
Reference in New Issue
Block a user