Files
ProxyPool/app/core/tasks/queue.py
祀梦 875e61f17e fix: 修复设置系统脱节、队列计数漂移、资源泄露等全量问题
- 统一设置系统:create_scheduler_service 读取 DB 设置覆盖默认值
- 修复 ProxyRepository.update_score 误删所有无效代理的 SQL
- ValidationQueue:修复 Worker 计数漂移与启动恢复任务饿死
- SchedulerService:移除 drain() 阻塞,主循环可正常响应 stop
- TaskService:在调度器周期内自动清理过期任务,防止内存泄漏
- lifespan/conftest:规范关闭顺序,消除 Event loop closed 警告
- Repository:异常日志增加 exc_info,今日新增按 created_at 统计
- ValidatorService:防止 HTTP session 重复关闭,移除 SOCKS 多余 close
- 前端:补全 pluginsStore.isEmpty,ProxyList 最低分数上限改为 100
- 删除 config.py 中冗余的 cors_origins_list property
2026-04-04 20:31:52 +08:00

169 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""验证任务队列 - 解耦爬取与验证,支持背压控制和持久化"""
import asyncio
from typing import Optional
from app.models.domain import ProxyRaw
from app.repositories.task_repo import ValidationTaskRepository
from app.core.db import get_db
from app.core.log import logger
class ValidationQueue:
"""代理验证队列(支持持久化到 SQLite
工作流程:
1. 爬虫将原始代理 submit() 到队列(写入数据库 + 内存信号)
2. Worker 池从数据库消费并验证
3. 验证通过的代理写入数据库
4. 服务重启时自动恢复未完成的 pending 任务
"""
def __init__(
self,
validator,
proxy_repo,
worker_count: int = 50,
score_valid: int = 10,
score_invalid: int = -5,
score_min: int = 0,
score_max: int = 100,
):
self.validator = validator
self.proxy_repo = proxy_repo
self.task_repo = ValidationTaskRepository()
self.worker_count = worker_count
self.score_valid = score_valid
self.score_invalid = score_invalid
self.score_min = score_min
self.score_max = score_max
self._signal: asyncio.Queue[None] = asyncio.Queue()
self._workers: list[asyncio.Task] = []
self._running = False
self._pending_count = 0
self._condition = asyncio.Condition()
# 统计
self.valid_count = 0
self.invalid_count = 0
async def start(self):
if self._running:
return
self._running = True
# 恢复之前中断的 processing 任务
async with get_db() as db:
recovered = await self.task_repo.reset_processing(db)
pending = await self.task_repo.get_pending_count(db)
if pending > 1000:
logger.warning(f"ValidationQueue has {pending} pending tasks, cleaning up all pending tasks...")
await db.execute("DELETE FROM validation_tasks WHERE status = 'pending'")
await db.commit()
pending = await self.task_repo.get_pending_count(db)
logger.info(f"ValidationQueue cleaned up pending tasks, remaining: {pending}")
if recovered:
logger.info(f"ValidationQueue recovered {recovered} interrupted tasks")
if pending:
logger.info(f"ValidationQueue has {pending} pending tasks to process")
async with self._condition:
self._pending_count = pending
for i in range(self.worker_count):
self._workers.append(asyncio.create_task(self._worker_loop(i)))
# 唤醒 Worker 处理恢复的 pending 任务(每个 Worker 一次唤醒即可,内部会循环处理)
if pending:
for _ in range(self.worker_count):
self._signal.put_nowait(None)
logger.info(f"ValidationQueue started with {self.worker_count} workers")
async def stop(self):
if not self._running:
return
self._running = False
for _ in self._workers:
self._signal.put_nowait(None) # sentinel
if self._workers:
await asyncio.gather(*self._workers, return_exceptions=True)
self._workers.clear()
logger.info("ValidationQueue stopped")
async def submit(self, proxies: list[ProxyRaw]):
"""提交代理到验证队列(持久化 + 唤醒 Worker"""
async with get_db() as db:
inserted = await self.task_repo.insert_batch(db, proxies)
if inserted:
async with self._condition:
self._pending_count += inserted
self._condition.notify_all()
for _ in range(min(inserted, self.worker_count)):
self._signal.put_nowait(None)
async def submit_one(self, proxy: ProxyRaw):
await self.submit([proxy])
async def drain(self):
"""等待队列中当前所有 pending 任务处理完毕"""
async with self._condition:
if self._pending_count > 0:
await self._condition.wait_for(lambda: self._pending_count == 0)
async def _worker_loop(self, worker_id: int):
while True:
await self._signal.get()
self._signal.task_done()
if not self._running:
break
# 持续处理任务直到没有 pending 为止,避免信号数不足导致任务饿死
while self._running:
processed = await self._process_one_task(worker_id)
if not processed:
break
async def _process_one_task(self, worker_id: int) -> bool:
"""从数据库取一个任务并验证。返回 True 表示确实处理了一个任务。"""
async with get_db() as db:
task = await self.task_repo.acquire_pending(db)
if not task:
return False
proxy = ProxyRaw(task["ip"], task["port"], task["protocol"])
try:
is_valid, latency = await self.validator.validate(
proxy.ip, proxy.port, proxy.protocol
)
except Exception as e:
logger.error(f"Worker {worker_id} validation error: {e}", exc_info=True)
is_valid, latency = False, 0.0
if is_valid:
await self.proxy_repo.insert_or_update(
db, proxy.ip, proxy.port, proxy.protocol, score=self.score_valid
)
if latency:
await self.proxy_repo.update_response_time(
db, proxy.ip, proxy.port, latency
)
await self.task_repo.complete_task(db, task["id"], True, latency)
self.valid_count += 1
logger.debug(f"ValidationQueue: valid {proxy.ip}:{proxy.port}")
else:
# 对已有代理扣分,分数<=0时自动删除
await self.proxy_repo.update_score(
db, proxy.ip, proxy.port, self.score_invalid,
self.score_min, self.score_max
)
await self.task_repo.complete_task(db, task["id"], False, 0.0)
self.invalid_count += 1
logger.debug(f"ValidationQueue: invalid {proxy.ip}:{proxy.port}")
async with self._condition:
self._pending_count = max(0, self._pending_count - 1)
if self._pending_count == 0:
self._condition.notify_all()
return True
def reset_stats(self):
self.valid_count = 0
self.invalid_count = 0