- 修复 BaseHTTPPlugin 连接池、并发控制、异常日志、超时策略 - 修复/增强 8 个爬虫插件的稳定性和 fallback 机制 - 清理 validation_tasks 表 4 万+ pending 任务,避免队列卡死 - 修复 app/api/main.py 缺失全局 app 实例导致的 500 错误 - 提升前端 Axios 超时到 120 秒,避免请求断开 - 修复插件统计持久化和调度器生命周期问题
98 lines
3.4 KiB
Python
98 lines
3.4 KiB
Python
"""调度器服务 - 定时验证存量代理"""
|
|
import asyncio
|
|
from datetime import datetime
|
|
from app.core.db import get_db
|
|
from app.repositories.proxy_repo import ProxyRepository
|
|
from app.core.tasks.queue import ValidationQueue
|
|
from app.core.config import settings as app_settings
|
|
from app.core.log import logger
|
|
|
|
|
|
class SchedulerService:
|
|
"""代理验证调度器"""
|
|
|
|
def __init__(
|
|
self,
|
|
validation_queue: ValidationQueue,
|
|
proxy_repo: ProxyRepository = ProxyRepository(),
|
|
):
|
|
self.validation_queue = validation_queue
|
|
self.proxy_repo = proxy_repo
|
|
self.interval_minutes = 30
|
|
self.running = False
|
|
self._stop_event = asyncio.Event()
|
|
self._task: asyncio.Task | None = None
|
|
self._validate_task: asyncio.Task | None = None
|
|
|
|
async def start(self):
|
|
if self.running:
|
|
logger.warning("Scheduler already running")
|
|
return
|
|
self._stop_event.clear()
|
|
self.running = True
|
|
await self.validation_queue.start()
|
|
self._task = asyncio.create_task(self._run_loop())
|
|
logger.info("Scheduler started")
|
|
|
|
async def stop(self):
|
|
self.running = False
|
|
self._stop_event.set()
|
|
if self._task:
|
|
self._task.cancel()
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._task = None
|
|
await self.validation_queue.stop()
|
|
logger.info("Scheduler stopped")
|
|
|
|
async def validate_all_now(self):
|
|
"""立即执行一次全量验证(后台运行,不阻塞)"""
|
|
if self._validate_task and not self._validate_task.done():
|
|
return
|
|
self._validate_task = asyncio.create_task(self._do_validate_all())
|
|
|
|
async def _run_loop(self):
|
|
"""定时循环"""
|
|
while self.running:
|
|
try:
|
|
await self._do_validate_all()
|
|
except Exception as e:
|
|
logger.error(f"Scheduler loop error: {e}")
|
|
# 等待下一次
|
|
try:
|
|
await asyncio.wait_for(self._stop_event.wait(), timeout=self.interval_minutes * 60)
|
|
except asyncio.TimeoutError:
|
|
pass
|
|
|
|
async def _do_validate_all(self):
|
|
"""验证数据库中所有存量代理"""
|
|
try:
|
|
logger.info("Starting scheduled validation for all proxies")
|
|
async with get_db() as db:
|
|
proxies = await self.proxy_repo.list_all(db)
|
|
if not proxies:
|
|
logger.info("No proxies to validate")
|
|
return
|
|
|
|
logger.info(f"Validating {len(proxies)} proxies from database")
|
|
from app.models.domain import ProxyRaw
|
|
|
|
# 批量提交到验证队列
|
|
batch_size = 100
|
|
for i in range(0, len(proxies), batch_size):
|
|
if not self.running:
|
|
break
|
|
batch = proxies[i : i + batch_size]
|
|
await self.validation_queue.submit([
|
|
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
|
|
])
|
|
# 等待当前批次处理完
|
|
await self.validation_queue.drain()
|
|
logger.info(f"Validated batch {i//batch_size + 1}/{(len(proxies)-1)//batch_size + 1}")
|
|
|
|
logger.info("Scheduled validation completed")
|
|
except Exception as e:
|
|
logger.error(f"Scheduled validation error: {e}")
|