重构: 迁移后端代码到 app 目录,前端移动到 WebUI,添加完整测试套件

主要变更:
- 后端代码从根目录迁移到 app/ 目录
- 前端代码从 frontend/ 重命名为 WebUI/
- 更新所有导入路径以适配新结构
- 提取公共 API 响应函数到 app/api/common.py
- 精简验证器服务代码
- 更新启动脚本和文档

测试:
- 新增完整测试套件 (tests/)
- 单元测试: 模型、仓库层
- 集成测试: 覆盖所有 22+ API 端点
- E2E 测试: 4个完整工作流场景
- 添加 pytest 配置和测试运行脚本
This commit is contained in:
祀梦
2026-04-04 13:32:36 +08:00
parent df3cc87f88
commit 38bd66128b
109 changed files with 2017 additions and 548 deletions

12
app/services/__init__.py Normal file
View File

@@ -0,0 +1,12 @@
"""业务逻辑层包"""
from .proxy_service import ProxyService
from .plugin_service import PluginService
from .scheduler_service import SchedulerService
from .validator_service import ValidatorService
__all__ = [
"ProxyService",
"PluginService",
"SchedulerService",
"ValidatorService",
]

View File

@@ -0,0 +1,139 @@
"""插件业务服务"""
from datetime import datetime
from typing import List, Optional
from app.core.db import get_db
from app.core.plugin_system.registry import registry
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.repositories.settings_repo import PluginSettingsRepository
from app.models.domain import PluginInfo, ProxyRaw
from app.core.log import logger
class PluginService:
"""插件业务服务:管理插件生命周期、执行爬取、配置管理"""
def __init__(self):
self.plugin_settings_repo = PluginSettingsRepository()
self._stats: dict[str, dict] = {}
async def list_plugins(self) -> List[PluginInfo]:
"""获取所有插件信息(合并持久化状态和配置)"""
async with get_db() as db:
db_states = await self.plugin_settings_repo.list_all(db)
result = []
for plugin in registry.list_plugins():
# 合并持久化状态
state = db_states.get(plugin.name, {})
if "enabled" in state:
plugin.enabled = state["enabled"]
if "config" in state and isinstance(state["config"], dict):
plugin.update_config(state["config"])
stat = self._stats.get(plugin.name, {
"success_count": 0,
"failure_count": 0,
"last_run": None,
})
result.append(PluginInfo(
id=plugin.name,
name=plugin.name,
display_name=plugin.display_name or plugin.name,
description=plugin.description or f"{plugin.name} 爬取代理",
enabled=plugin.enabled,
last_run=stat.get("last_run"),
success_count=stat.get("success_count", 0),
failure_count=stat.get("failure_count", 0),
))
return result
async def toggle_plugin(self, plugin_id: str, enabled: bool) -> bool:
plugin = registry.get(plugin_id)
if not plugin:
return False
async with get_db() as db:
success = await self.plugin_settings_repo.set_enabled(db, plugin_id, enabled)
if success:
plugin.enabled = enabled
logger.info(f"Plugin {plugin_id} toggled to {enabled}")
return success
async def get_plugin_config(self, plugin_id: str) -> Optional[dict]:
"""获取插件当前配置(合并默认值和持久化值)"""
plugin = registry.get(plugin_id)
if not plugin:
return None
async with get_db() as db:
saved = await self.plugin_settings_repo.get_config(db, plugin_id)
config = dict(plugin.default_config)
if saved:
config.update(saved)
return config
async def update_plugin_config(self, plugin_id: str, config: dict) -> bool:
"""更新插件配置(只保存已存在于 default_config 中的键)"""
plugin = registry.get(plugin_id)
if not plugin:
return False
# 过滤非法键
safe_config = {k: v for k, v in config.items() if k in plugin.default_config}
if not safe_config:
return False
plugin.update_config(safe_config)
async with get_db() as db:
return await self.plugin_settings_repo.set_config(db, plugin_id, plugin.config)
def get_plugin(self, plugin_id: str) -> Optional[BaseCrawlerPlugin]:
return registry.get(plugin_id)
async def run_plugin(self, plugin_id: str) -> List[ProxyRaw]:
"""执行单个插件爬取"""
plugin = self.get_plugin(plugin_id)
if not plugin:
raise ValueError(f"Plugin {plugin_id} not found")
if not plugin.enabled:
logger.warning(f"Plugin {plugin_id} is disabled, skip crawl")
return []
try:
results = await plugin.crawl()
self._record_stat(plugin_id, success=len(results))
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
return results
except Exception as e:
self._record_stat(plugin_id, failure=1)
logger.error(f"Plugin {plugin_id} crawl failed: {e}")
return []
async def run_all_plugins(self) -> List[ProxyRaw]:
"""执行所有启用插件的爬取"""
all_results: List[ProxyRaw] = []
for plugin in registry.list_plugins():
if not plugin.enabled:
continue
try:
results = await self.run_plugin(plugin.name)
all_results.extend(results)
except Exception as e:
logger.error(f"Run all plugins error at {plugin.name}: {e}")
# 去重
seen = set()
unique = []
for p in all_results:
key = (p.ip, p.port, p.protocol)
if key not in seen:
seen.add(key)
unique.append(p)
return unique
def _record_stat(self, plugin_id: str, success: int = 0, failure: int = 0):
if plugin_id not in self._stats:
self._stats[plugin_id] = {
"success_count": 0,
"failure_count": 0,
"last_run": None,
}
self._stats[plugin_id]["success_count"] += success
self._stats[plugin_id]["failure_count"] += failure
if success or failure:
self._stats[plugin_id]["last_run"] = datetime.now()

View File

@@ -0,0 +1,93 @@
"""代理业务服务"""
import csv
import json
import io
from datetime import datetime
from typing import List, Optional, Tuple, AsyncIterator
from app.core.db import get_db
from app.repositories.proxy_repo import ProxyRepository
from app.models.domain import Proxy
from app.core.log import logger
class ProxyService:
def __init__(self, proxy_repo: ProxyRepository = ProxyRepository()):
self.proxy_repo = proxy_repo
async def get_stats(self) -> dict:
async with get_db() as db:
stats = await self.proxy_repo.get_stats(db)
stats["today_new"] = await self.proxy_repo.get_today_new_count(db)
return stats
async def list_proxies(
self,
page: int = 1,
page_size: int = 20,
protocol: Optional[str] = None,
min_score: int = 0,
max_score: Optional[int] = None,
sort_by: str = "last_check",
sort_order: str = "DESC",
) -> Tuple[List[Proxy], int]:
async with get_db() as db:
return await self.proxy_repo.list_paginated(
db, page, page_size, protocol, min_score, max_score, sort_by, sort_order
)
async def get_random_proxy(self) -> Optional[Proxy]:
async with get_db() as db:
return await self.proxy_repo.get_random(db)
async def delete_proxy(self, ip: str, port: int) -> None:
async with get_db() as db:
await self.proxy_repo.delete(db, ip, port)
async def batch_delete(self, proxies: List[Tuple[str, int]]) -> int:
async with get_db() as db:
return await self.proxy_repo.batch_delete(db, proxies)
async def clean_invalid(self) -> int:
async with get_db() as db:
return await self.proxy_repo.clean_invalid(db)
async def clean_expired(self, days: int) -> int:
async with get_db() as db:
return await self.proxy_repo.clean_expired(db, days)
async def export_proxies(
self,
fmt: str,
protocol: Optional[str] = None,
limit: int = 10000,
) -> AsyncIterator[str]:
async with get_db() as db:
proxies = await self.proxy_repo.list_all(db, protocol=protocol, limit=limit)
if fmt == "csv":
yield "IP,Port,Protocol,Score,Last Check\n"
for p in proxies:
yield f"{p.ip},{p.port},{p.protocol},{p.score},{self._fmt_time(p.last_check)}\n"
elif fmt == "txt":
for p in proxies:
yield f"{p.ip}:{p.port}\n"
elif fmt == "json":
data = [
{
"ip": p.ip,
"port": p.port,
"protocol": p.protocol,
"score": p.score,
"last_check": self._fmt_time(p.last_check),
}
for p in proxies
]
yield json.dumps(data, ensure_ascii=False, indent=2)
@staticmethod
def _fmt_time(dt: Optional[datetime]) -> str:
if not dt:
return ""
if isinstance(dt, str):
return dt
return dt.isoformat()

View File

@@ -0,0 +1,88 @@
"""调度器服务 - 定时验证存量代理"""
import asyncio
from datetime import datetime
from app.core.db import get_db
from app.repositories.proxy_repo import ProxyRepository
from app.core.tasks.queue import ValidationQueue
from app.core.config import settings as app_settings
from app.core.log import logger
class SchedulerService:
"""代理验证调度器"""
def __init__(
self,
validation_queue: ValidationQueue,
proxy_repo: ProxyRepository = ProxyRepository(),
):
self.validation_queue = validation_queue
self.proxy_repo = proxy_repo
self.interval_minutes = 30
self.running = False
self._task: asyncio.Task | None = None
async def start(self):
if self.running:
logger.warning("Scheduler already running")
return
self.running = True
await self.validation_queue.start()
self._task = asyncio.create_task(self._run_loop())
logger.info("Scheduler started")
async def stop(self):
self.running = False
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
await self.validation_queue.stop()
logger.info("Scheduler stopped")
async def validate_all_now(self):
"""立即执行一次全量验证(后台运行,不阻塞)"""
asyncio.create_task(self._do_validate_all())
async def _run_loop(self):
"""定时循环"""
while self.running:
try:
await self._do_validate_all()
except Exception as e:
logger.error(f"Scheduler loop error: {e}")
# 等待下一次
for _ in range(self.interval_minutes * 60):
if not self.running:
break
await asyncio.sleep(1)
async def _do_validate_all(self):
"""验证数据库中所有存量代理"""
logger.info("Starting scheduled validation for all proxies")
async with get_db() as db:
proxies = await self.proxy_repo.list_all(db)
if not proxies:
logger.info("No proxies to validate")
return
logger.info(f"Validating {len(proxies)} proxies from database")
from app.models.domain import ProxyRaw
# 批量提交到验证队列
batch_size = 100
for i in range(0, len(proxies), batch_size):
if not self.running:
break
batch = proxies[i : i + batch_size]
await self.validation_queue.submit([
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
])
# 等待当前批次处理完
await self.validation_queue.drain()
logger.info(f"Validated batch {i//batch_size + 1}/{(len(proxies)-1)//batch_size + 1}")
logger.info("Scheduled validation completed")

View File

@@ -0,0 +1,97 @@
"""代理验证服务 - 支持 HTTP/HTTPS/SOCKS4/SOCKS5"""
import asyncio
import random
import time
import aiohttp
import aiohttp_socks
from typing import Tuple
from app.core.log import logger
class ValidatorService:
"""代理验证器"""
# 测试 URL
TEST_URLS = {
"http": ["http://httpbin.org/ip", "http://api.ipify.org"],
"https": ["https://httpbin.org/ip", "https://api.ipify.org"],
}
def __init__(
self,
timeout: float = 5.0,
connect_timeout: float = 3.0,
max_concurrency: int = 50,
):
self.timeout = timeout
self.connect_timeout = connect_timeout
self.semaphore = asyncio.Semaphore(max_concurrency)
def _get_test_url(self, protocol: str) -> str:
"""获取测试 URL"""
urls = self.TEST_URLS.get(protocol.lower(), self.TEST_URLS["http"])
return random.choice(urls)
async def validate(self, ip: str, port: int, protocol: str = "http") -> Tuple[bool, float]:
"""验证单个代理,返回 (是否有效, 延迟毫秒)"""
protocol = protocol.lower()
async with self.semaphore:
start = time.time()
try:
if protocol in ("socks4", "socks5"):
return await self._validate_socks(ip, port, protocol, start)
else:
return await self._validate_http(ip, port, protocol, start)
except asyncio.TimeoutError:
logger.debug(f"Validation timeout: {ip}:{port} ({protocol})")
return False, 0.0
except Exception as e:
logger.debug(f"Validation error {ip}:{port} ({protocol}): {e}")
return False, 0.0
async def _validate_http(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
"""验证 HTTP/HTTPS 代理"""
proxy_url = f"http://{ip}:{port}"
connector = aiohttp.TCPConnector(ssl=False, limit=0, force_close=True)
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
test_url = self._get_test_url(protocol)
try:
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
finally:
await connector.close()
async def _validate_socks(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
"""验证 SOCKS4/SOCKS5 代理"""
proxy_type = (
aiohttp_socks.ProxyType.SOCKS4
if protocol == "socks4"
else aiohttp_socks.ProxyType.SOCKS5
)
connector = aiohttp_socks.ProxyConnector(
proxy_type=proxy_type,
host=ip,
port=port,
rdns=True,
ssl=False,
)
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
test_url = self._get_test_url("http")
try:
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with session.get(test_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
finally:
await connector.close()