""" 代理验证调度器 负责定期验证数据库中的代理,并更新分数 """ import asyncio from datetime import datetime, timedelta from typing import Optional from core.sqlite import SQLiteManager from core.validator import ProxyValidator from core.log import logger from config import config class ValidationScheduler: """代理验证调度器""" def __init__(self): self.db = SQLiteManager() self.validator: Optional[ProxyValidator] = None self.running = False self.task: Optional[asyncio.Task] = None self.interval_minutes = 30 # 默认每30分钟验证一次 self.batch_size = 100 # 每批验证数量 async def start(self): """启动验证调度器""" if self.running: logger.warning("验证调度器已在运行") return self.running = True self.validator = ProxyValidator( max_concurrency=config.VALIDATOR_MAX_CONCURRENCY, timeout=config.VALIDATOR_TIMEOUT ) self.task = asyncio.create_task(self._run_loop()) logger.info("代理验证调度器已启动") async def stop(self): """停止验证调度器""" self.running = False if self.task: self.task.cancel() try: await self.task except asyncio.CancelledError: pass if self.validator: await self.validator.__aexit__(None, None, None) logger.info("代理验证调度器已停止") async def _run_loop(self): """运行循环""" while self.running: try: await self.validate_all_proxies() except Exception as e: logger.error(f"验证循环出错: {e}") # 等待下一次验证 await asyncio.sleep(self.interval_minutes * 60) async def validate_all_proxies(self): """验证所有代理""" logger.info("开始批量验证代理...") try: # 获取所有代理 proxies = await self.db.get_all_proxies() if not proxies: logger.info("数据库中没有代理需要验证") return logger.info(f"需要验证 {len(proxies)} 个代理") # 分批验证 validated_count = 0 valid_count = 0 invalid_count = 0 async with self.validator: for i in range(0, len(proxies), self.batch_size): if not self.running: break batch = proxies[i:i + self.batch_size] tasks = [] for proxy in batch: ip, port, protocol, score, last_check = proxy task = self._validate_and_update(ip, port, protocol) tasks.append(task) # 并发验证一批 results = await asyncio.gather(*tasks, return_exceptions=True) for result in results: validated_count += 1 if isinstance(result, Exception): logger.error(f"验证过程出错: {result}") continue if result: valid_count += 1 else: invalid_count += 1 logger.info(f"已验证 {validated_count}/{len(proxies)} 个代理") # 批次间短暂延迟,避免过载 if i + self.batch_size < len(proxies): await asyncio.sleep(1) logger.info(f"验证完成: 总计 {validated_count}, 有效 {valid_count}, 无效 {invalid_count}") except Exception as e: logger.error(f"批量验证代理失败: {e}", exc_info=True) async def _validate_and_update(self, ip: str, port: int, protocol: str) -> bool: """验证单个代理并更新分数""" try: is_valid, latency = await self.validator.validate(ip, port, protocol) if is_valid: # 验证成功,增加分数 await self.db.update_score( ip, port, config.SCORE_VALID, min_score=config.SCORE_MIN, max_score=config.SCORE_MAX ) logger.debug(f"代理验证成功 {ip}:{port} ({protocol}) - 延迟 {latency}ms") return True else: # 验证失败,减少分数 await self.db.update_score( ip, port, config.SCORE_INVALID, min_score=config.SCORE_MIN, max_score=config.SCORE_MAX ) logger.debug(f"代理验证失败 {ip}:{port} ({protocol})") return False except Exception as e: logger.error(f"验证代理 {ip}:{port} 时出错: {e}") # 出错也视为失败 await self.db.update_score( ip, port, config.SCORE_INVALID, min_score=config.SCORE_MIN, max_score=config.SCORE_MAX ) return False async def validate_proxies_batch(self, proxies: list) -> tuple: """ 验证一批新抓取的代理 Args: proxies: [(ip, port, protocol), ...] Returns: (有效代理列表, 无效代理列表) """ if not proxies: return [], [] valid_proxies = [] invalid_proxies = [] logger.info(f"开始验证 {len(proxies)} 个新抓取代理...") try: validator = ProxyValidator( max_concurrency=min(config.VALIDATOR_MAX_CONCURRENCY, 50), timeout=config.VALIDATOR_TIMEOUT ) async with validator: tasks = [] for ip, port, protocol in proxies: task = validator.validate(ip, port, protocol) tasks.append((ip, port, protocol, task)) for ip, port, protocol, task in tasks: try: is_valid, latency = await task if is_valid: valid_proxies.append((ip, port, protocol)) logger.debug(f"新代理有效: {ip}:{port} ({protocol}) - {latency}ms") else: invalid_proxies.append((ip, port, protocol)) except Exception as e: logger.warning(f"验证新代理 {ip}:{port} 失败: {e}") invalid_proxies.append((ip, port, protocol)) logger.info(f"新代理验证完成: 有效 {len(valid_proxies)}, 无效 {len(invalid_proxies)}") except Exception as e: logger.error(f"批量验证新代理失败: {e}") return valid_proxies, invalid_proxies # 全局调度器实例 scheduler = ValidationScheduler()