refactor(backend): optimize database safety, validator performance, and scheduler concurrency

- Fix SQL injection risks in proxy_repo and task_repo
- Atomic acquire_pending with UPDATE ... RETURNING
- Reuse aiohttp ClientSession in ValidatorService
- Replace polling with asyncio.Event in SchedulerService
- Optimize ValidationQueue.drain with asyncio.Condition
- Concurrent plugin crawling with asyncio.gather
- Unify ProxyRaw model import path
- Fix test baseline and remove tracked __pycache__ files
This commit is contained in:
祀梦
2026-04-04 14:43:31 +08:00
parent abb8b32ed3
commit 635c524a7e
27 changed files with 103 additions and 89 deletions

View File

@@ -1,20 +1,7 @@
"""插件基类 - 所有爬虫插件必须继承此基类"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Dict, Any
@dataclass
class ProxyRaw:
"""爬虫产出的原始代理数据"""
ip: str
port: int
protocol: str = "http"
def __post_init__(self):
self.protocol = self.protocol.lower().strip()
if self.protocol not in ("http", "https", "socks4", "socks5"):
self.protocol = "http"
from app.models.domain import ProxyRaw
class BaseCrawlerPlugin(ABC):