refactor(backend): optimize database safety, validator performance, and scheduler concurrency

- Fix SQL injection risks in proxy_repo and task_repo
- Atomic acquire_pending with UPDATE ... RETURNING
- Reuse aiohttp ClientSession in ValidatorService
- Replace polling with asyncio.Event in SchedulerService
- Optimize ValidationQueue.drain with asyncio.Condition
- Concurrent plugin crawling with asyncio.gather
- Unify ProxyRaw model import path
- Fix test baseline and remove tracked __pycache__ files
This commit is contained in:
祀梦
2026-04-04 14:43:31 +08:00
parent abb8b32ed3
commit 635c524a7e
27 changed files with 103 additions and 89 deletions

View File

@@ -1,4 +1,5 @@
"""插件业务服务"""
import asyncio
from datetime import datetime
from typing import List, Optional
from app.core.db import get_db
@@ -108,14 +109,13 @@ class PluginService:
async def run_all_plugins(self) -> List[ProxyRaw]:
"""执行所有启用插件的爬取"""
all_results: List[ProxyRaw] = []
for plugin in registry.list_plugins():
if not plugin.enabled:
tasks = [self.run_plugin(plugin.name) for plugin in registry.list_plugins() if plugin.enabled]
results_list = await asyncio.gather(*tasks, return_exceptions=True)
for results in results_list:
if isinstance(results, Exception):
logger.error(f"Run all plugins error: {results}")
continue
try:
results = await self.run_plugin(plugin.name)
all_results.extend(results)
except Exception as e:
logger.error(f"Run all plugins error at {plugin.name}: {e}")
all_results.extend(results)
# 去重
seen = set()
unique = []

View File

@@ -20,12 +20,14 @@ class SchedulerService:
self.proxy_repo = proxy_repo
self.interval_minutes = 30
self.running = False
self._stop_event = asyncio.Event()
self._task: asyncio.Task | None = None
async def start(self):
if self.running:
logger.warning("Scheduler already running")
return
self._stop_event.clear()
self.running = True
await self.validation_queue.start()
self._task = asyncio.create_task(self._run_loop())
@@ -33,6 +35,7 @@ class SchedulerService:
async def stop(self):
self.running = False
self._stop_event.set()
if self._task:
self._task.cancel()
try:
@@ -55,10 +58,10 @@ class SchedulerService:
except Exception as e:
logger.error(f"Scheduler loop error: {e}")
# 等待下一次
for _ in range(self.interval_minutes * 60):
if not self.running:
break
await asyncio.sleep(1)
try:
await asyncio.wait_for(self._stop_event.wait(), timeout=self.interval_minutes * 60)
except asyncio.TimeoutError:
pass
async def _do_validate_all(self):
"""验证数据库中所有存量代理"""

View File

@@ -25,8 +25,24 @@ class ValidatorService:
):
self.timeout = timeout
self.connect_timeout = connect_timeout
self.max_concurrency = max_concurrency
self.semaphore = asyncio.Semaphore(max_concurrency)
# 共享 HTTP/HTTPS ClientSession
self._http_connector = aiohttp.TCPConnector(
ssl=False,
limit=max_concurrency,
limit_per_host=max_concurrency,
force_close=False,
)
self._timeout = aiohttp.ClientTimeout(
total=timeout, connect=connect_timeout
)
self._http_session = aiohttp.ClientSession(
connector=self._http_connector,
timeout=self._timeout,
)
def _get_test_url(self, protocol: str) -> str:
"""获取测试 URL"""
urls = self.TEST_URLS.get(protocol.lower(), self.TEST_URLS["http"])
@@ -53,20 +69,14 @@ class ValidatorService:
async def _validate_http(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
"""验证 HTTP/HTTPS 代理"""
proxy_url = f"http://{ip}:{port}"
connector = aiohttp.TCPConnector(ssl=False, limit=0, force_close=True)
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
test_url = self._get_test_url(protocol)
try:
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
finally:
await connector.close()
async with self._http_session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
async def _validate_socks(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
"""验证 SOCKS4/SOCKS5 代理"""
@@ -95,3 +105,7 @@ class ValidatorService:
return False, 0.0
finally:
await connector.close()
async def close(self):
"""关闭共享的 HTTP ClientSession"""
await self._http_session.close()