Files
ProxyPool/app/services/scheduler_service.py
祀梦 f09a8e16c4 fix: 修复爬虫网络层、验证队列卡死及 API 500 错误
- 修复 BaseHTTPPlugin 连接池、并发控制、异常日志、超时策略
- 修复/增强 8 个爬虫插件的稳定性和 fallback 机制
- 清理 validation_tasks 表 4 万+ pending 任务,避免队列卡死
- 修复 app/api/main.py 缺失全局 app 实例导致的 500 错误
- 提升前端 Axios 超时到 120 秒,避免请求断开
- 修复插件统计持久化和调度器生命周期问题
2026-04-04 19:27:36 +08:00

98 lines
3.4 KiB
Python

"""调度器服务 - 定时验证存量代理"""
import asyncio
from datetime import datetime
from app.core.db import get_db
from app.repositories.proxy_repo import ProxyRepository
from app.core.tasks.queue import ValidationQueue
from app.core.config import settings as app_settings
from app.core.log import logger
class SchedulerService:
"""代理验证调度器"""
def __init__(
self,
validation_queue: ValidationQueue,
proxy_repo: ProxyRepository = ProxyRepository(),
):
self.validation_queue = validation_queue
self.proxy_repo = proxy_repo
self.interval_minutes = 30
self.running = False
self._stop_event = asyncio.Event()
self._task: asyncio.Task | None = None
self._validate_task: asyncio.Task | None = None
async def start(self):
if self.running:
logger.warning("Scheduler already running")
return
self._stop_event.clear()
self.running = True
await self.validation_queue.start()
self._task = asyncio.create_task(self._run_loop())
logger.info("Scheduler started")
async def stop(self):
self.running = False
self._stop_event.set()
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
await self.validation_queue.stop()
logger.info("Scheduler stopped")
async def validate_all_now(self):
"""立即执行一次全量验证(后台运行,不阻塞)"""
if self._validate_task and not self._validate_task.done():
return
self._validate_task = asyncio.create_task(self._do_validate_all())
async def _run_loop(self):
"""定时循环"""
while self.running:
try:
await self._do_validate_all()
except Exception as e:
logger.error(f"Scheduler loop error: {e}")
# 等待下一次
try:
await asyncio.wait_for(self._stop_event.wait(), timeout=self.interval_minutes * 60)
except asyncio.TimeoutError:
pass
async def _do_validate_all(self):
"""验证数据库中所有存量代理"""
try:
logger.info("Starting scheduled validation for all proxies")
async with get_db() as db:
proxies = await self.proxy_repo.list_all(db)
if not proxies:
logger.info("No proxies to validate")
return
logger.info(f"Validating {len(proxies)} proxies from database")
from app.models.domain import ProxyRaw
# 批量提交到验证队列
batch_size = 100
for i in range(0, len(proxies), batch_size):
if not self.running:
break
batch = proxies[i : i + batch_size]
await self.validation_queue.submit([
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
])
# 等待当前批次处理完
await self.validation_queue.drain()
logger.info(f"Validated batch {i//batch_size + 1}/{(len(proxies)-1)//batch_size + 1}")
logger.info("Scheduled validation completed")
except Exception as e:
logger.error(f"Scheduled validation error: {e}")