重构: 迁移后端代码到 app 目录,前端移动到 WebUI,添加完整测试套件
主要变更: - 后端代码从根目录迁移到 app/ 目录 - 前端代码从 frontend/ 重命名为 WebUI/ - 更新所有导入路径以适配新结构 - 提取公共 API 响应函数到 app/api/common.py - 精简验证器服务代码 - 更新启动脚本和文档 测试: - 新增完整测试套件 (tests/) - 单元测试: 模型、仓库层 - 集成测试: 覆盖所有 22+ API 端点 - E2E 测试: 4个完整工作流场景 - 添加 pytest 配置和测试运行脚本
This commit is contained in:
4
app/api/__init__.py
Normal file
4
app/api/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""API 包"""
|
||||
from .main import create_app
|
||||
|
||||
__all__ = ["create_app"]
|
||||
41
app/api/common.py
Normal file
41
app/api/common.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""API 通用工具函数"""
|
||||
from typing import Any, Optional
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
|
||||
def success_response(message: str, data: Any = None) -> dict:
|
||||
"""成功响应"""
|
||||
return {"code": 200, "message": message, "data": data}
|
||||
|
||||
|
||||
def error_response(message: str, code: int = 500) -> JSONResponse:
|
||||
"""错误响应"""
|
||||
return JSONResponse(
|
||||
status_code=code,
|
||||
content={"code": code, "message": message, "data": None},
|
||||
)
|
||||
|
||||
|
||||
def format_proxy(proxy) -> dict:
|
||||
"""格式化代理对象"""
|
||||
return {
|
||||
"ip": proxy.ip,
|
||||
"port": proxy.port,
|
||||
"protocol": proxy.protocol,
|
||||
"score": proxy.score,
|
||||
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
|
||||
}
|
||||
|
||||
|
||||
def format_plugin(plugin) -> dict:
|
||||
"""格式化插件对象"""
|
||||
return {
|
||||
"id": plugin.id,
|
||||
"name": plugin.display_name,
|
||||
"display_name": plugin.display_name,
|
||||
"description": plugin.description,
|
||||
"enabled": plugin.enabled,
|
||||
"last_run": plugin.last_run.isoformat() if plugin.last_run else None,
|
||||
"success_count": plugin.success_count,
|
||||
"failure_count": plugin.failure_count,
|
||||
}
|
||||
45
app/api/deps.py
Normal file
45
app/api/deps.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""依赖注入"""
|
||||
from fastapi import Request
|
||||
from app.services.proxy_service import ProxyService
|
||||
from app.services.plugin_service import PluginService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.services.validator_service import ValidatorService
|
||||
from app.repositories.proxy_repo import ProxyRepository
|
||||
from app.core.tasks.queue import ValidationQueue
|
||||
from app.core.config import settings as app_settings
|
||||
|
||||
|
||||
def get_proxy_service() -> ProxyService:
|
||||
return ProxyService()
|
||||
|
||||
|
||||
def get_plugin_service() -> PluginService:
|
||||
return PluginService()
|
||||
|
||||
|
||||
def get_scheduler_service(request: Request) -> SchedulerService:
|
||||
return request.app.state.scheduler_service
|
||||
|
||||
|
||||
def get_validation_queue(request: Request) -> ValidationQueue:
|
||||
return request.app.state.validation_queue
|
||||
|
||||
|
||||
def create_scheduler_service() -> SchedulerService:
|
||||
"""在应用启动时创建 SchedulerService(非请求上下文)"""
|
||||
validator = ValidatorService(
|
||||
timeout=app_settings.validator_timeout,
|
||||
connect_timeout=app_settings.validator_connect_timeout,
|
||||
max_concurrency=app_settings.validator_max_concurrency,
|
||||
)
|
||||
proxy_repo = ProxyRepository()
|
||||
queue = ValidationQueue(
|
||||
validator=validator,
|
||||
proxy_repo=proxy_repo,
|
||||
worker_count=app_settings.validator_max_concurrency,
|
||||
score_valid=app_settings.score_valid,
|
||||
score_invalid=app_settings.score_invalid,
|
||||
score_min=app_settings.score_min,
|
||||
score_max=app_settings.score_max,
|
||||
)
|
||||
return SchedulerService(validation_queue=queue, proxy_repo=proxy_repo)
|
||||
33
app/api/errors.py
Normal file
33
app/api/errors.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""统一异常处理"""
|
||||
from fastapi import Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import ValidationError
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
async def proxy_pool_exception_handler(request: Request, exc: ProxyPoolException):
|
||||
return JSONResponse(
|
||||
status_code=exc.code,
|
||||
content={"code": exc.code, "message": exc.message, "data": None},
|
||||
)
|
||||
|
||||
|
||||
async def pydantic_validation_handler(request: Request, exc: ValidationError):
|
||||
logger.error(f"Validation error: {exc}")
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={
|
||||
"code": 422,
|
||||
"message": "参数验证失败",
|
||||
"data": exc.errors(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def general_exception_handler(request: Request, exc: Exception):
|
||||
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"code": 500, "message": "服务器内部错误", "data": None},
|
||||
)
|
||||
41
app/api/lifespan.py
Normal file
41
app/api/lifespan.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""应用生命周期管理"""
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from app.core.db import init_db, get_db
|
||||
from app.core.config import settings as app_settings
|
||||
from app.core.log import logger
|
||||
from app.api.deps import create_scheduler_service
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
|
||||
settings_repo = SettingsRepository()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""应用启动和关闭时的生命周期管理"""
|
||||
# 初始化数据库
|
||||
await init_db()
|
||||
|
||||
# 创建调度器并挂载到 app.state
|
||||
scheduler_service = create_scheduler_service()
|
||||
app.state.scheduler_service = scheduler_service
|
||||
app.state.validation_queue = scheduler_service.validation_queue
|
||||
|
||||
# 加载设置并决定是否启动调度器
|
||||
try:
|
||||
async with get_db() as db:
|
||||
settings = await settings_repo.get_all(db)
|
||||
scheduler_service.interval_minutes = settings.get(
|
||||
"validate_interval_minutes", app_settings.validator_timeout
|
||||
)
|
||||
if settings.get("auto_validate", True):
|
||||
await scheduler_service.start()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load settings on startup: {e}")
|
||||
|
||||
logger.info("API server started")
|
||||
yield
|
||||
|
||||
# 关闭调度器
|
||||
await scheduler_service.stop()
|
||||
logger.info("API server shutdown")
|
||||
55
app/api/main.py
Normal file
55
app/api/main.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""FastAPI 应用工厂"""
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.api.lifespan import lifespan
|
||||
from app.api.routes import api_router
|
||||
from app.api.errors import proxy_pool_exception_handler, pydantic_validation_handler, general_exception_handler
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from pydantic import ValidationError
|
||||
from app.core.config import settings as app_settings
|
||||
|
||||
# 导入并注册所有插件(显式注册模式)
|
||||
import app.plugins
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(
|
||||
title="代理池API",
|
||||
version="2.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=app_settings.cors_origins_list,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 异常处理
|
||||
app.add_exception_handler(ProxyPoolException, proxy_pool_exception_handler)
|
||||
app.add_exception_handler(ValidationError, pydantic_validation_handler)
|
||||
app.add_exception_handler(Exception, general_exception_handler)
|
||||
|
||||
# 路由
|
||||
app.include_router(api_router)
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "欢迎使用代理池API", "status": "running", "data": None}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
from datetime import datetime
|
||||
scheduler = app.state.scheduler_service
|
||||
return {
|
||||
"status": "healthy",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"database": "connected",
|
||||
"scheduler": "running" if scheduler.running else "stopped",
|
||||
"version": "2.0.0",
|
||||
}
|
||||
|
||||
return app
|
||||
9
app/api/routes/__init__.py
Normal file
9
app/api/routes/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""路由包"""
|
||||
from fastapi import APIRouter
|
||||
from app.api.routes import proxies, plugins, scheduler, settings
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(proxies.router)
|
||||
api_router.include_router(plugins.router)
|
||||
api_router.include_router(scheduler.router)
|
||||
api_router.include_router(settings.router)
|
||||
154
app/api/routes/plugins.py
Normal file
154
app/api/routes/plugins.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""插件相关路由"""
|
||||
import asyncio
|
||||
from fastapi import APIRouter, Depends
|
||||
from app.services.plugin_service import PluginService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.api.deps import get_plugin_service, get_scheduler_service
|
||||
from app.api.common import success_response, error_response, format_plugin
|
||||
from app.core.log import logger
|
||||
|
||||
router = APIRouter(prefix="/api/plugins", tags=["plugins"])
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_plugins(service: PluginService = Depends(get_plugin_service)):
|
||||
try:
|
||||
plugins = await service.list_plugins()
|
||||
return success_response("获取插件列表成功", {"plugins": [format_plugin(p) for p in plugins]})
|
||||
except Exception as e:
|
||||
logger.error(f"List plugins failed: {e}")
|
||||
return error_response("获取插件列表失败", 500)
|
||||
|
||||
|
||||
@router.put("/{plugin_id}/toggle")
|
||||
async def toggle_plugin(
|
||||
plugin_id: str,
|
||||
request: dict,
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
enabled = request.get("enabled")
|
||||
if enabled is None:
|
||||
return error_response("缺少 enabled 参数", 400)
|
||||
|
||||
try:
|
||||
success = await service.toggle_plugin(plugin_id, enabled)
|
||||
if not success:
|
||||
return error_response("插件不存在", 404)
|
||||
return success_response(
|
||||
f"插件 {plugin_id} 已{'启用' if enabled else '禁用'}",
|
||||
{"plugin_id": plugin_id, "enabled": enabled},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Toggle plugin failed: {e}")
|
||||
return error_response("切换插件状态失败", 500)
|
||||
|
||||
|
||||
@router.get("/{plugin_id}/config")
|
||||
async def get_plugin_config(
|
||||
plugin_id: str,
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
try:
|
||||
config = await service.get_plugin_config(plugin_id)
|
||||
if config is None:
|
||||
return error_response("插件不存在", 404)
|
||||
return success_response("获取插件配置成功", {"plugin_id": plugin_id, "config": config})
|
||||
except Exception as e:
|
||||
logger.error(f"Get plugin config failed: {e}")
|
||||
return error_response("获取插件配置失败", 500)
|
||||
|
||||
|
||||
@router.post("/{plugin_id}/config")
|
||||
async def update_plugin_config(
|
||||
plugin_id: str,
|
||||
request: dict,
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
config = request.get("config", {})
|
||||
if not isinstance(config, dict):
|
||||
return error_response("config 必须是对象", 400)
|
||||
|
||||
try:
|
||||
success = await service.update_plugin_config(plugin_id, config)
|
||||
if not success:
|
||||
return error_response("插件不存在或配置无效", 404)
|
||||
return success_response("保存插件配置成功", {"plugin_id": plugin_id, "config": config})
|
||||
except Exception as e:
|
||||
logger.error(f"Update plugin config failed: {e}")
|
||||
return error_response("保存插件配置失败", 500)
|
||||
|
||||
|
||||
@router.post("/{plugin_id}/crawl")
|
||||
async def crawl_plugin(
|
||||
plugin_id: str,
|
||||
plugin_service: PluginService = Depends(get_plugin_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
):
|
||||
plugin = plugin_service.get_plugin(plugin_id)
|
||||
if not plugin:
|
||||
return error_response("插件不存在", 404)
|
||||
|
||||
try:
|
||||
results = await plugin_service.run_plugin(plugin_id)
|
||||
if not results:
|
||||
return success_response(
|
||||
f"插件 {plugin_id} 爬取完成,未获取到代理",
|
||||
{"plugin_id": plugin_id, "proxy_count": 0, "valid_count": 0},
|
||||
)
|
||||
|
||||
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
|
||||
scheduler_service.validation_queue.reset_stats()
|
||||
await scheduler_service.validation_queue.submit(results)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
return success_response(
|
||||
f"插件 {plugin_id} 爬取并验证完成",
|
||||
{
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": len(results),
|
||||
"valid_count": scheduler_service.validation_queue.valid_count,
|
||||
"invalid_count": scheduler_service.validation_queue.invalid_count,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl plugin {plugin_id} failed: {e}")
|
||||
return error_response(f"插件爬取失败: {str(e)}", 500)
|
||||
|
||||
|
||||
@router.post("/crawl-all")
|
||||
async def crawl_all(
|
||||
plugin_service: PluginService = Depends(get_plugin_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
):
|
||||
try:
|
||||
results = await plugin_service.run_all_plugins()
|
||||
if not results:
|
||||
return success_response(
|
||||
"所有插件爬取完成,未获取到代理",
|
||||
{"total_crawled": 0, "valid_count": 0, "invalid_count": 0},
|
||||
)
|
||||
|
||||
logger.info(f"All plugins crawled {len(results)} unique proxies")
|
||||
scheduler_service.validation_queue.reset_stats()
|
||||
await scheduler_service.validation_queue.submit(results)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=60.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
return success_response(
|
||||
"所有插件爬取并验证完成",
|
||||
{
|
||||
"total_crawled": len(results),
|
||||
"valid_count": scheduler_service.validation_queue.valid_count,
|
||||
"invalid_count": scheduler_service.validation_queue.invalid_count,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl all failed: {e}")
|
||||
return error_response(f"批量爬取失败: {str(e)}", 500)
|
||||
125
app/api/routes/proxies.py
Normal file
125
app/api/routes/proxies.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""代理相关路由(含统计信息)"""
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from app.services.proxy_service import ProxyService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.models.schemas import ProxyListRequest, BatchDeleteRequest
|
||||
from app.api.deps import get_proxy_service, get_scheduler_service
|
||||
from app.api.common import success_response, error_response, format_proxy
|
||||
from app.core.log import logger
|
||||
|
||||
router = APIRouter(prefix="/api/proxies", tags=["proxies"])
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_stats(
|
||||
proxy_service: ProxyService = Depends(get_proxy_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
):
|
||||
try:
|
||||
stats = await proxy_service.get_stats()
|
||||
stats["scheduler_running"] = scheduler_service.running
|
||||
return success_response("获取统计信息成功", stats)
|
||||
except Exception as e:
|
||||
logger.error(f"Get stats failed: {e}")
|
||||
return error_response("获取统计信息失败", 500)
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def list_proxies(
|
||||
request: ProxyListRequest,
|
||||
service: ProxyService = Depends(get_proxy_service),
|
||||
):
|
||||
try:
|
||||
proxies, total = await service.list_proxies(
|
||||
page=request.page,
|
||||
page_size=request.page_size,
|
||||
protocol=request.protocol,
|
||||
min_score=request.min_score,
|
||||
max_score=request.max_score,
|
||||
sort_by=request.sort_by,
|
||||
sort_order=request.sort_order,
|
||||
)
|
||||
return success_response(
|
||||
"获取代理列表成功",
|
||||
{
|
||||
"list": [format_proxy(p) for p in proxies],
|
||||
"total": total,
|
||||
"page": request.page,
|
||||
"page_size": request.page_size,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"List proxies failed: {e}")
|
||||
return error_response("获取代理列表失败", 500)
|
||||
|
||||
|
||||
@router.get("/random")
|
||||
async def get_random_proxy(service: ProxyService = Depends(get_proxy_service)):
|
||||
try:
|
||||
proxy = await service.get_random_proxy()
|
||||
if not proxy:
|
||||
return error_response("没有找到可用的代理", 404)
|
||||
return success_response("获取随机代理成功", format_proxy(proxy))
|
||||
except Exception as e:
|
||||
logger.error(f"Get random proxy failed: {e}")
|
||||
return error_response("获取随机代理失败", 500)
|
||||
|
||||
|
||||
@router.get("/export/{fmt}")
|
||||
async def export_proxies(
|
||||
fmt: str,
|
||||
protocol: Optional[str] = None,
|
||||
limit: int = Query(default=10000, ge=1, le=100000),
|
||||
service: ProxyService = Depends(get_proxy_service),
|
||||
):
|
||||
if fmt not in ("csv", "txt", "json"):
|
||||
return error_response("不支持的导出格式", 400)
|
||||
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
media_types = {"csv": "text/csv", "txt": "text/plain", "json": "application/json"}
|
||||
|
||||
async def generate():
|
||||
async for chunk in service.export_proxies(fmt, protocol, limit):
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
media_type=media_types[fmt],
|
||||
headers={"Content-Disposition": f"attachment; filename=proxies.{fmt}"},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{ip}/{port}")
|
||||
async def delete_proxy(ip: str, port: int, service: ProxyService = Depends(get_proxy_service)):
|
||||
try:
|
||||
await service.delete_proxy(ip, port)
|
||||
return success_response("删除代理成功")
|
||||
except Exception as e:
|
||||
logger.error(f"Delete proxy failed: {e}")
|
||||
return error_response("删除代理失败", 500)
|
||||
|
||||
|
||||
@router.post("/batch-delete")
|
||||
async def batch_delete(
|
||||
request: BatchDeleteRequest,
|
||||
service: ProxyService = Depends(get_proxy_service),
|
||||
):
|
||||
try:
|
||||
proxies = [(item.ip, item.port) for item in request.proxies]
|
||||
deleted = await service.batch_delete(proxies)
|
||||
return success_response(f"批量删除 {deleted} 个代理成功", {"deleted_count": deleted})
|
||||
except Exception as e:
|
||||
logger.error(f"Batch delete failed: {e}")
|
||||
return error_response("批量删除失败", 500)
|
||||
|
||||
|
||||
@router.delete("/clean-invalid")
|
||||
async def clean_invalid(service: ProxyService = Depends(get_proxy_service)):
|
||||
try:
|
||||
count = await service.clean_invalid()
|
||||
return success_response(f"清理了 {count} 个无效代理", {"deleted_count": count})
|
||||
except Exception as e:
|
||||
logger.error(f"Clean invalid failed: {e}")
|
||||
return error_response("清理无效代理失败", 500)
|
||||
64
app/api/routes/scheduler.py
Normal file
64
app/api/routes/scheduler.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""调度器相关路由"""
|
||||
from fastapi import APIRouter, Depends
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
from app.core.db import get_db
|
||||
from app.api.deps import get_scheduler_service
|
||||
from app.api.common import success_response, error_response
|
||||
from app.core.log import logger
|
||||
|
||||
router = APIRouter(prefix="/api/scheduler", tags=["scheduler"])
|
||||
settings_repo = SettingsRepository()
|
||||
|
||||
|
||||
async def _save_auto_validate_setting(enabled: bool):
|
||||
"""保存自动验证设置"""
|
||||
async with get_db() as db:
|
||||
settings = await settings_repo.get_all(db)
|
||||
settings["auto_validate"] = enabled
|
||||
from app.models.schemas import SettingsSchema
|
||||
await settings_repo.save(db, SettingsSchema(**settings).model_dump())
|
||||
|
||||
|
||||
@router.post("/start")
|
||||
async def start_scheduler(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
try:
|
||||
if scheduler.running:
|
||||
return success_response("验证调度器已在运行", {"running": True})
|
||||
await scheduler.start()
|
||||
await _save_auto_validate_setting(True)
|
||||
return success_response("验证调度器已启动", {"running": True})
|
||||
except Exception as e:
|
||||
logger.error(f"Start scheduler failed: {e}")
|
||||
return error_response(f"启动调度器失败: {str(e)}", 500)
|
||||
|
||||
|
||||
@router.post("/stop")
|
||||
async def stop_scheduler(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
try:
|
||||
if not scheduler.running:
|
||||
return success_response("验证调度器未运行", {"running": False})
|
||||
await scheduler.stop()
|
||||
await _save_auto_validate_setting(False)
|
||||
return success_response("验证调度器已停止", {"running": False})
|
||||
except Exception as e:
|
||||
logger.error(f"Stop scheduler failed: {e}")
|
||||
return error_response(f"停止调度器失败: {str(e)}", 500)
|
||||
|
||||
|
||||
@router.post("/validate-now")
|
||||
async def validate_now(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
try:
|
||||
scheduler.validate_all_now()
|
||||
return success_response("已开始全量验证", {"started": True})
|
||||
except Exception as e:
|
||||
logger.error(f"Validate now failed: {e}")
|
||||
return error_response(f"启动验证失败: {str(e)}", 500)
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def scheduler_status(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
return success_response(
|
||||
"获取状态成功",
|
||||
{"running": scheduler.running, "interval_minutes": scheduler.interval_minutes},
|
||||
)
|
||||
34
app/api/routes/settings.py
Normal file
34
app/api/routes/settings.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""设置相关路由"""
|
||||
from fastapi import APIRouter
|
||||
from app.core.db import get_db
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
from app.models.schemas import SettingsSchema
|
||||
from app.api.common import success_response, error_response
|
||||
from app.core.log import logger
|
||||
|
||||
router = APIRouter(prefix="/api/settings", tags=["settings"])
|
||||
settings_repo = SettingsRepository()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def get_settings():
|
||||
try:
|
||||
async with get_db() as db:
|
||||
settings = await settings_repo.get_all(db)
|
||||
return success_response("获取设置成功", settings)
|
||||
except Exception as e:
|
||||
logger.error(f"Get settings failed: {e}")
|
||||
return error_response("获取设置失败", 500)
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def save_settings(request: SettingsSchema):
|
||||
try:
|
||||
async with get_db() as db:
|
||||
success = await settings_repo.save(db, request.model_dump())
|
||||
if not success:
|
||||
return error_response("保存设置失败", 500)
|
||||
return success_response("保存设置成功", request.model_dump())
|
||||
except Exception as e:
|
||||
logger.error(f"Save settings failed: {e}")
|
||||
return error_response(f"保存设置失败: {str(e)}", 500)
|
||||
13
app/core/__init__.py
Normal file
13
app/core/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""核心基础设施包"""
|
||||
from .config import settings
|
||||
from .log import logger
|
||||
from .exceptions import ProxyPoolException, PluginNotFoundException, ProxyNotFoundException, ValidationException
|
||||
|
||||
__all__ = [
|
||||
"settings",
|
||||
"logger",
|
||||
"ProxyPoolException",
|
||||
"PluginNotFoundException",
|
||||
"ProxyNotFoundException",
|
||||
"ValidationException",
|
||||
]
|
||||
59
app/core/config.py
Normal file
59
app/core/config.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""全局配置 - 使用 Pydantic Settings 支持环境变量和 .env 文件"""
|
||||
import os
|
||||
from typing import List
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# 数据库配置
|
||||
db_path: str = "db/proxies.sqlite"
|
||||
|
||||
# API 服务配置
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 9949
|
||||
|
||||
# 验证器配置
|
||||
validator_timeout: int = 5
|
||||
validator_max_concurrency: int = 200
|
||||
validator_connect_timeout: int = 3
|
||||
|
||||
# 爬虫配置
|
||||
crawler_num_validators: int = 50
|
||||
crawler_max_queue_size: int = 500
|
||||
|
||||
# 日志配置
|
||||
log_level: str = "INFO"
|
||||
log_dir: str = "logs"
|
||||
|
||||
# 导出配置
|
||||
export_max_records: int = 10000
|
||||
|
||||
# 代理评分配置
|
||||
score_valid: int = 10
|
||||
score_invalid: int = -5
|
||||
score_min: int = 0
|
||||
score_max: int = 100
|
||||
|
||||
# 插件配置
|
||||
plugins_dir: str = "plugins"
|
||||
|
||||
# CORS 配置
|
||||
cors_origins: str = "http://localhost:8080,http://localhost:5173,http://localhost:9948"
|
||||
|
||||
@property
|
||||
def cors_origins_list(self) -> List[str]:
|
||||
return [origin.strip() for origin in self.cors_origins.split(",") if origin.strip()]
|
||||
|
||||
@property
|
||||
def base_dir(self) -> str:
|
||||
return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
# 全局配置实例(启动时加载一次)
|
||||
settings = Settings()
|
||||
120
app/core/db.py
Normal file
120
app/core/db.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""数据库连接管理 - 使用上下文管理器,避免全局单例连接泄漏"""
|
||||
import os
|
||||
import aiosqlite
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncIterator
|
||||
from app.core.config import settings
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
DB_PATH = os.path.join(settings.base_dir, settings.db_path)
|
||||
|
||||
|
||||
def ensure_db_dir():
|
||||
db_dir = os.path.dirname(DB_PATH)
|
||||
if db_dir and not os.path.exists(db_dir):
|
||||
os.makedirs(db_dir, exist_ok=True)
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""初始化数据库表结构(支持迁移)"""
|
||||
ensure_db_dir()
|
||||
async with aiosqlite.connect(DB_PATH) as db:
|
||||
await db.execute("PRAGMA journal_mode=WAL")
|
||||
await db.execute("PRAGMA synchronous=NORMAL")
|
||||
await db.execute("PRAGMA cache_size=-64000")
|
||||
await db.execute("PRAGMA temp_store=MEMORY")
|
||||
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS proxies (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ip TEXT NOT NULL,
|
||||
port INTEGER NOT NULL,
|
||||
protocol TEXT DEFAULT 'http',
|
||||
score INTEGER DEFAULT 10,
|
||||
response_time_ms REAL,
|
||||
last_check TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(ip, port)
|
||||
)
|
||||
""")
|
||||
|
||||
# 迁移:如果旧表缺少 response_time_ms 列,则添加
|
||||
try:
|
||||
await db.execute("SELECT response_time_ms FROM proxies LIMIT 1")
|
||||
except Exception:
|
||||
await db.execute("ALTER TABLE proxies ADD COLUMN response_time_ms REAL")
|
||||
logger.info("Migrated: added response_time_ms column")
|
||||
|
||||
# 迁移:如果旧表缺少 created_at 列,则添加
|
||||
try:
|
||||
await db.execute("SELECT created_at FROM proxies LIMIT 1")
|
||||
except Exception:
|
||||
await db.execute("ALTER TABLE proxies ADD COLUMN created_at TIMESTAMP")
|
||||
await db.execute("UPDATE proxies SET created_at = CURRENT_TIMESTAMP WHERE created_at IS NULL")
|
||||
logger.info("Migrated: added created_at column")
|
||||
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)")
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)")
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)")
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_ip_port ON proxies(ip, port)")
|
||||
|
||||
# 插件设置表
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS plugin_settings (
|
||||
plugin_id TEXT PRIMARY KEY,
|
||||
enabled INTEGER DEFAULT 1,
|
||||
config_json TEXT DEFAULT '{}',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# 迁移:为旧版 plugin_settings 表增加 config_json 列
|
||||
try:
|
||||
await db.execute("SELECT config_json FROM plugin_settings LIMIT 1")
|
||||
except Exception:
|
||||
await db.execute("ALTER TABLE plugin_settings ADD COLUMN config_json TEXT DEFAULT '{}'")
|
||||
logger.info("Migrated: added config_json column to plugin_settings")
|
||||
|
||||
# 验证任务队列表
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS validation_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ip TEXT NOT NULL,
|
||||
port INTEGER NOT NULL,
|
||||
protocol TEXT DEFAULT 'http',
|
||||
status TEXT DEFAULT 'pending',
|
||||
result TEXT,
|
||||
response_time_ms REAL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_validation_status ON validation_tasks(status)")
|
||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_validation_created ON validation_tasks(created_at)")
|
||||
|
||||
# 系统设置表
|
||||
await db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS settings (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
await db.commit()
|
||||
logger.info("Database initialized")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db() -> AsyncIterator[aiosqlite.Connection]:
|
||||
"""获取数据库连接的异步上下文管理器"""
|
||||
ensure_db_dir()
|
||||
db = await aiosqlite.connect(DB_PATH)
|
||||
try:
|
||||
await db.execute("PRAGMA journal_mode=WAL")
|
||||
await db.execute("PRAGMA synchronous=NORMAL")
|
||||
yield db
|
||||
finally:
|
||||
await db.close()
|
||||
24
app/core/exceptions.py
Normal file
24
app/core/exceptions.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""业务异常定义"""
|
||||
|
||||
|
||||
class ProxyPoolException(Exception):
|
||||
"""基础业务异常"""
|
||||
def __init__(self, message: str, code: int = 500):
|
||||
self.message = message
|
||||
self.code = code
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class PluginNotFoundException(ProxyPoolException):
|
||||
def __init__(self, plugin_id: str):
|
||||
super().__init__(f"Plugin '{plugin_id}' not found", 404)
|
||||
|
||||
|
||||
class ProxyNotFoundException(ProxyPoolException):
|
||||
def __init__(self, ip: str, port: int):
|
||||
super().__init__(f"Proxy {ip}:{port} not found", 404)
|
||||
|
||||
|
||||
class ValidationException(ProxyPoolException):
|
||||
def __init__(self, message: str):
|
||||
super().__init__(message, 400)
|
||||
47
app/core/log.py
Normal file
47
app/core/log.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import logging
|
||||
import os
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class LogHandler(logging.Logger):
|
||||
def __init__(self, name='ProxyPool', level=logging.INFO):
|
||||
super().__init__(name, level)
|
||||
|
||||
# 获取项目根目录并创建 logs 目录
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
log_dir = os.path.join(base_dir, 'logs')
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 使用日期作为文件名
|
||||
log_filename = f"{datetime.now().strftime('%Y-%m-%d')}.log"
|
||||
log_file = os.path.join(log_dir, log_filename)
|
||||
|
||||
# 设置格式
|
||||
formatter = logging.Formatter(
|
||||
'[%(asctime)s] %(name)s [%(levelname)s] %(filename)s[line:%(lineno)d]: %(message)s'
|
||||
)
|
||||
|
||||
# 文件处理器(使用RotatingFileHandler支持日志轮转)
|
||||
# 每个日志文件最大10MB,保留5个备份
|
||||
file_handler = RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=10*1024*1024,
|
||||
backupCount=5,
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setFormatter(formatter)
|
||||
self.addHandler(file_handler)
|
||||
|
||||
# 控制台处理器
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(formatter)
|
||||
self.addHandler(console_handler)
|
||||
|
||||
|
||||
# 实例化一个默认 logger 供外部直接使用
|
||||
logger = LogHandler()
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('这是一条按日期存储的日志测试')
|
||||
5
app/core/plugin_system/__init__.py
Normal file
5
app/core/plugin_system/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""插件系统包"""
|
||||
from .base import BaseCrawlerPlugin, ProxyRaw
|
||||
from .registry import registry
|
||||
|
||||
__all__ = ["BaseCrawlerPlugin", "ProxyRaw", "registry"]
|
||||
55
app/core/plugin_system/base.py
Normal file
55
app/core/plugin_system/base.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""插件基类 - 所有爬虫插件必须继承此基类"""
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProxyRaw:
|
||||
"""爬虫产出的原始代理数据"""
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str = "http"
|
||||
|
||||
def __post_init__(self):
|
||||
self.protocol = self.protocol.lower().strip()
|
||||
if self.protocol not in ("http", "https", "socks4", "socks5"):
|
||||
self.protocol = "http"
|
||||
|
||||
|
||||
class BaseCrawlerPlugin(ABC):
|
||||
"""爬虫插件基类
|
||||
|
||||
添加新爬虫只需:
|
||||
1. 继承 BaseCrawlerPlugin
|
||||
2. 实现 crawl() 方法返回 List[ProxyRaw]
|
||||
3. 用 @registry.register 装饰或在 __init__ 中显式注册
|
||||
"""
|
||||
|
||||
name: str = ""
|
||||
display_name: str = ""
|
||||
description: str = ""
|
||||
enabled: bool = True
|
||||
default_config: Dict[str, Any] = {}
|
||||
|
||||
def __init__(self):
|
||||
self._config: Dict[str, Any] = dict(self.default_config or {})
|
||||
|
||||
@property
|
||||
def config(self) -> Dict[str, Any]:
|
||||
return self._config
|
||||
|
||||
def update_config(self, updates: Dict[str, Any]) -> None:
|
||||
"""更新插件配置,只覆盖存在的键"""
|
||||
for key, value in updates.items():
|
||||
if key in self._config:
|
||||
self._config[key] = value
|
||||
|
||||
@abstractmethod
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
"""爬取代理的核心方法。只负责爬取,不要在这里验证。"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""可选:检查插件健康状态"""
|
||||
return True
|
||||
77
app/core/plugin_system/registry.py
Normal file
77
app/core/plugin_system/registry.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""插件注册中心 - 显式注册,类型安全,测试友好"""
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
from typing import Dict, List, Type, Optional
|
||||
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class PluginRegistry:
|
||||
"""插件注册中心"""
|
||||
|
||||
def __init__(self):
|
||||
self._plugins: Dict[str, Type[BaseCrawlerPlugin]] = {}
|
||||
self._instances: Dict[str, BaseCrawlerPlugin] = {}
|
||||
|
||||
def register(self, plugin_cls: Type[BaseCrawlerPlugin]) -> Type[BaseCrawlerPlugin]:
|
||||
"""注册一个插件类。支持装饰器语法。"""
|
||||
if not inspect.isclass(plugin_cls) or not issubclass(plugin_cls, BaseCrawlerPlugin):
|
||||
raise ValueError("Plugin must be a subclass of BaseCrawlerPlugin")
|
||||
if not plugin_cls.name:
|
||||
raise ValueError(f"Plugin {plugin_cls.__name__} must have a 'name' attribute")
|
||||
|
||||
self._plugins[plugin_cls.name] = plugin_cls
|
||||
logger.info(f"Plugin registered: {plugin_cls.name} ({plugin_cls.__name__})")
|
||||
return plugin_cls
|
||||
|
||||
def get(self, name: str) -> Optional[BaseCrawlerPlugin]:
|
||||
"""获取插件实例(懒加载)"""
|
||||
if name not in self._instances:
|
||||
cls = self._plugins.get(name)
|
||||
if cls:
|
||||
self._instances[name] = cls()
|
||||
return self._instances.get(name)
|
||||
|
||||
def list_plugins(self) -> List[BaseCrawlerPlugin]:
|
||||
"""获取所有已注册插件的实例列表"""
|
||||
result = []
|
||||
for name in self._plugins:
|
||||
instance = self.get(name)
|
||||
if instance:
|
||||
result.append(instance)
|
||||
return result
|
||||
|
||||
def get_plugin_names(self) -> List[str]:
|
||||
return list(self._plugins.keys())
|
||||
|
||||
def auto_discover(self, package_name: str):
|
||||
"""自动扫描指定包下的所有模块并注册其中的插件类。
|
||||
注意:为了类型安全和可控性,推荐显式注册。auto_discover 仅作为兼容。"""
|
||||
try:
|
||||
package = importlib.import_module(package_name)
|
||||
package_dir = os.path.dirname(package.__file__)
|
||||
except Exception as e:
|
||||
logger.error(f"Auto discover failed for package {package_name}: {e}")
|
||||
return
|
||||
|
||||
for filename in os.listdir(package_dir):
|
||||
if filename.endswith(".py") and not filename.startswith("__"):
|
||||
module_name = f"{package_name}.{filename[:-3]}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
for attr_name in dir(module):
|
||||
obj = getattr(module, attr_name)
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and issubclass(obj, BaseCrawlerPlugin)
|
||||
and obj is not BaseCrawlerPlugin
|
||||
and obj not in self._plugins.values()
|
||||
):
|
||||
self.register(obj)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load module {module_name}: {e}")
|
||||
|
||||
|
||||
# 全局注册中心实例
|
||||
registry = PluginRegistry()
|
||||
4
app/core/tasks/__init__.py
Normal file
4
app/core/tasks/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""任务队列包"""
|
||||
from .queue import ValidationQueue
|
||||
|
||||
__all__ = ["ValidationQueue"]
|
||||
149
app/core/tasks/queue.py
Normal file
149
app/core/tasks/queue.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""验证任务队列 - 解耦爬取与验证,支持背压控制和持久化"""
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from app.models.domain import ProxyRaw
|
||||
from app.repositories.task_repo import ValidationTaskRepository
|
||||
from app.core.db import get_db
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ValidationQueue:
|
||||
"""代理验证队列(支持持久化到 SQLite)
|
||||
|
||||
工作流程:
|
||||
1. 爬虫将原始代理 submit() 到队列(写入数据库 + 内存信号)
|
||||
2. Worker 池从数据库消费并验证
|
||||
3. 验证通过的代理写入数据库
|
||||
4. 服务重启时自动恢复未完成的 pending 任务
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
validator,
|
||||
proxy_repo,
|
||||
worker_count: int = 50,
|
||||
score_valid: int = 10,
|
||||
score_invalid: int = -5,
|
||||
score_min: int = 0,
|
||||
score_max: int = 100,
|
||||
):
|
||||
self.validator = validator
|
||||
self.proxy_repo = proxy_repo
|
||||
self.task_repo = ValidationTaskRepository()
|
||||
self.worker_count = worker_count
|
||||
self.score_valid = score_valid
|
||||
self.score_invalid = score_invalid
|
||||
self.score_min = score_min
|
||||
self.score_max = score_max
|
||||
|
||||
self._signal: asyncio.Queue[None] = asyncio.Queue()
|
||||
self._workers: list[asyncio.Task] = []
|
||||
self._running = False
|
||||
self._db_lock = asyncio.Lock()
|
||||
|
||||
# 统计
|
||||
self.valid_count = 0
|
||||
self.invalid_count = 0
|
||||
|
||||
async def start(self):
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
|
||||
# 恢复之前中断的 processing 任务
|
||||
async with get_db() as db:
|
||||
recovered = await self.task_repo.reset_processing(db)
|
||||
pending = await self.task_repo.get_pending_count(db)
|
||||
if recovered:
|
||||
logger.info(f"ValidationQueue recovered {recovered} interrupted tasks")
|
||||
if pending:
|
||||
logger.info(f"ValidationQueue has {pending} pending tasks to process")
|
||||
|
||||
for i in range(self.worker_count):
|
||||
self._workers.append(asyncio.create_task(self._worker_loop(i)))
|
||||
|
||||
# 唤醒 Worker 处理恢复的 pending 任务
|
||||
if pending:
|
||||
for _ in range(min(pending, self.worker_count)):
|
||||
self._signal.put_nowait(None)
|
||||
|
||||
logger.info(f"ValidationQueue started with {self.worker_count} workers")
|
||||
|
||||
async def stop(self):
|
||||
if not self._running:
|
||||
return
|
||||
self._running = False
|
||||
for _ in self._workers:
|
||||
self._signal.put_nowait(None) # sentinel
|
||||
if self._workers:
|
||||
await asyncio.gather(*self._workers, return_exceptions=True)
|
||||
self._workers.clear()
|
||||
logger.info("ValidationQueue stopped")
|
||||
|
||||
async def submit(self, proxies: list[ProxyRaw]):
|
||||
"""提交代理到验证队列(持久化 + 唤醒 Worker)"""
|
||||
async with self._db_lock:
|
||||
async with get_db() as db:
|
||||
inserted = await self.task_repo.insert_batch(db, proxies)
|
||||
if inserted:
|
||||
for _ in range(min(inserted, self.worker_count)):
|
||||
self._signal.put_nowait(None)
|
||||
|
||||
async def submit_one(self, proxy: ProxyRaw):
|
||||
await self.submit([proxy])
|
||||
|
||||
async def drain(self):
|
||||
"""等待队列中当前所有 pending 任务处理完毕"""
|
||||
while True:
|
||||
async with get_db() as db:
|
||||
count = await self.task_repo.get_pending_count(db)
|
||||
if count == 0:
|
||||
break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
async def _worker_loop(self, worker_id: int):
|
||||
while True:
|
||||
await self._signal.get()
|
||||
self._signal.task_done()
|
||||
if not self._running:
|
||||
break
|
||||
await self._process_one_task(worker_id)
|
||||
|
||||
async def _process_one_task(self, worker_id: int):
|
||||
"""从数据库取一个任务并验证"""
|
||||
async with self._db_lock:
|
||||
async with get_db() as db:
|
||||
task = await self.task_repo.acquire_pending(db)
|
||||
if not task:
|
||||
return
|
||||
|
||||
proxy = ProxyRaw(task["ip"], task["port"], task["protocol"])
|
||||
try:
|
||||
is_valid, latency = await self.validator.validate(
|
||||
proxy.ip, proxy.port, proxy.protocol
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} validation error: {e}")
|
||||
is_valid, latency = False, 0.0
|
||||
|
||||
async with self._db_lock:
|
||||
async with get_db() as db:
|
||||
if is_valid:
|
||||
await self.proxy_repo.insert_or_update(
|
||||
db, proxy.ip, proxy.port, proxy.protocol, score=self.score_valid
|
||||
)
|
||||
if latency:
|
||||
await self.proxy_repo.update_response_time(
|
||||
db, proxy.ip, proxy.port, latency
|
||||
)
|
||||
await self.task_repo.complete_task(db, task["id"], True, latency)
|
||||
self.valid_count += 1
|
||||
logger.debug(f"ValidationQueue: valid {proxy.ip}:{proxy.port}")
|
||||
else:
|
||||
await self.task_repo.complete_task(db, task["id"], False, 0.0)
|
||||
self.invalid_count += 1
|
||||
logger.debug(f"ValidationQueue: invalid {proxy.ip}:{proxy.port}")
|
||||
|
||||
def reset_stats(self):
|
||||
self.valid_count = 0
|
||||
self.invalid_count = 0
|
||||
30
app/models/__init__.py
Normal file
30
app/models/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""数据模型包"""
|
||||
from .domain import ProxyRaw, Proxy, PluginInfo
|
||||
from .schemas import (
|
||||
ProxyCreate,
|
||||
ProxyResponse,
|
||||
PluginResponse,
|
||||
SettingsSchema,
|
||||
CrawlResult,
|
||||
ProxyListRequest,
|
||||
ProxyDeleteItem,
|
||||
BatchDeleteRequest,
|
||||
PluginToggleRequest,
|
||||
ExportRequest,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ProxyRaw",
|
||||
"Proxy",
|
||||
"PluginInfo",
|
||||
"ProxyCreate",
|
||||
"ProxyResponse",
|
||||
"PluginResponse",
|
||||
"SettingsSchema",
|
||||
"CrawlResult",
|
||||
"ProxyListRequest",
|
||||
"ProxyDeleteItem",
|
||||
"BatchDeleteRequest",
|
||||
"PluginToggleRequest",
|
||||
"ExportRequest",
|
||||
]
|
||||
42
app/models/domain.py
Normal file
42
app/models/domain.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""领域模型 - 纯数据结构,不依赖任何框架"""
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProxyRaw:
|
||||
"""爬虫爬取的原始代理数据"""
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str = "http"
|
||||
|
||||
def __post_init__(self):
|
||||
self.protocol = self.protocol.lower().strip()
|
||||
if self.protocol not in ("http", "https", "socks4", "socks5"):
|
||||
self.protocol = "http"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Proxy:
|
||||
"""数据库中的代理实体"""
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str
|
||||
score: int
|
||||
response_time_ms: Optional[float] = None
|
||||
last_check: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PluginInfo:
|
||||
"""插件元数据"""
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
enabled: bool
|
||||
last_run: Optional[datetime] = None
|
||||
success_count: int = 0
|
||||
failure_count: int = 0
|
||||
105
app/models/schemas.py
Normal file
105
app/models/schemas.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Pydantic 模型 - 用于 API 请求/响应校验"""
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
class ProxyCreate(BaseModel):
|
||||
ip: str
|
||||
port: int = Field(ge=1, le=65535)
|
||||
protocol: str = "http"
|
||||
score: int = Field(default=10, ge=0, le=100)
|
||||
|
||||
@field_validator("protocol")
|
||||
@classmethod
|
||||
def validate_protocol(cls, v: str):
|
||||
v = v.lower().strip()
|
||||
if v not in ("http", "https", "socks4", "socks5"):
|
||||
raise ValueError("protocol must be http, https, socks4 or socks5")
|
||||
return v
|
||||
|
||||
|
||||
class ProxyResponse(BaseModel):
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str
|
||||
score: int
|
||||
last_check: Optional[str] = None
|
||||
|
||||
|
||||
class PluginResponse(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
enabled: bool
|
||||
last_run: Optional[str] = None
|
||||
success_count: int = 0
|
||||
failure_count: int = 0
|
||||
|
||||
|
||||
class SettingsSchema(BaseModel):
|
||||
crawl_timeout: int = Field(default=30, ge=5, le=120)
|
||||
validation_timeout: int = Field(default=10, ge=3, le=60)
|
||||
max_retries: int = Field(default=3, ge=0, le=10)
|
||||
default_concurrency: int = Field(default=50, ge=10, le=200)
|
||||
min_proxy_score: int = Field(default=0, ge=0, le=100)
|
||||
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
|
||||
auto_validate: bool = True
|
||||
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
|
||||
|
||||
|
||||
class CrawlResult(BaseModel):
|
||||
plugin_id: str
|
||||
proxy_count: int
|
||||
valid_count: int
|
||||
invalid_count: int = 0
|
||||
|
||||
|
||||
class ProxyListRequest(BaseModel):
|
||||
page: int = Field(default=1, ge=1)
|
||||
page_size: int = Field(default=20, ge=1, le=100)
|
||||
protocol: Optional[str] = None
|
||||
min_score: int = Field(default=0, ge=0)
|
||||
max_score: Optional[int] = Field(default=None, ge=0)
|
||||
sort_by: str = "last_check"
|
||||
sort_order: str = "DESC"
|
||||
|
||||
@field_validator("protocol")
|
||||
@classmethod
|
||||
def validate_protocol(cls, v):
|
||||
if v is not None and v.lower() not in ("http", "https", "socks4", "socks5"):
|
||||
raise ValueError("协议类型必须是 http, https, socks4 或 socks5")
|
||||
return v.lower() if v else v
|
||||
|
||||
@field_validator("sort_by")
|
||||
@classmethod
|
||||
def validate_sort_by(cls, v):
|
||||
if v not in ("ip", "port", "protocol", "score", "last_check"):
|
||||
raise ValueError("排序字段必须是 ip, port, protocol, score 或 last_check")
|
||||
return v
|
||||
|
||||
@field_validator("sort_order")
|
||||
@classmethod
|
||||
def validate_sort_order(cls, v):
|
||||
if v.upper() not in ("ASC", "DESC"):
|
||||
raise ValueError("排序方式必须是 ASC 或 DESC")
|
||||
return v.upper()
|
||||
|
||||
|
||||
class ProxyDeleteItem(BaseModel):
|
||||
ip: str
|
||||
port: int = Field(ge=1, le=65535)
|
||||
|
||||
|
||||
class BatchDeleteRequest(BaseModel):
|
||||
proxies: List[ProxyDeleteItem] = Field(max_length=1000)
|
||||
|
||||
|
||||
class PluginToggleRequest(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
format: str = Field(pattern=r"^(csv|txt|json)$")
|
||||
protocol: Optional[str] = None
|
||||
limit: int = Field(default=10000, ge=1, le=100000)
|
||||
21
app/plugins/__init__.py
Normal file
21
app/plugins/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""插件包 - 在这里显式注册所有爬虫插件"""
|
||||
from app.core.plugin_system import registry
|
||||
|
||||
from .fate0 import Fate0Plugin
|
||||
from .proxylist_download import ProxyListDownloadPlugin
|
||||
from .ip3366 import Ip3366Plugin
|
||||
from .ip89 import Ip89Plugin
|
||||
from .kuaidaili import KuaiDaiLiPlugin
|
||||
from .speedx import SpeedXPlugin
|
||||
from .yundaili import YunDaiLiPlugin
|
||||
from .proxyscrape import ProxyScrapePlugin
|
||||
|
||||
# 显式注册所有插件
|
||||
registry.register(Fate0Plugin)
|
||||
registry.register(ProxyListDownloadPlugin)
|
||||
registry.register(Ip3366Plugin)
|
||||
registry.register(Ip89Plugin)
|
||||
registry.register(KuaiDaiLiPlugin)
|
||||
registry.register(SpeedXPlugin)
|
||||
registry.register(YunDaiLiPlugin)
|
||||
registry.register(ProxyScrapePlugin)
|
||||
52
app/plugins/base.py
Normal file
52
app/plugins/base.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""通用 HTTP 爬虫基类 - 为基于 HTTP 请求的插件提供封装"""
|
||||
import random
|
||||
import asyncio
|
||||
import aiohttp
|
||||
from typing import List
|
||||
from app.core.plugin_system import BaseCrawlerPlugin
|
||||
|
||||
|
||||
class BaseHTTPPlugin(BaseCrawlerPlugin):
|
||||
"""基于 HTTP 的爬虫插件基类"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
|
||||
]
|
||||
self.urls: List[str] = []
|
||||
self.current_url: str = ""
|
||||
|
||||
def get_headers(self) -> dict:
|
||||
return {
|
||||
"User-Agent": random.choice(self.user_agents),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
async def fetch(self, url: str, timeout: float = 10.0, retries: int = 3) -> str:
|
||||
"""异步抓取指定 URL 的 HTML 内容"""
|
||||
headers = self.get_headers()
|
||||
async with aiohttp.ClientSession(headers=headers) as session:
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
async with session.get(
|
||||
url, timeout=aiohttp.ClientTimeout(total=timeout)
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
content = await response.read()
|
||||
encoding = response.get_encoding()
|
||||
if encoding == "utf-8" or not encoding:
|
||||
try:
|
||||
return content.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return content.decode("gbk", errors="ignore")
|
||||
return content.decode(encoding, errors="ignore")
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(random.uniform(1, 3))
|
||||
return ""
|
||||
38
app/plugins/fate0.py
Normal file
38
app/plugins/fate0.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import json
|
||||
from typing import List
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class Fate0Plugin(BaseHTTPPlugin):
|
||||
name = "fate0"
|
||||
display_name = "Fate0聚合源"
|
||||
description = "从 GitHub 持续更新的高质量代理聚合列表"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = ["https://raw.githubusercontent.com/fate0/proxylist/master/proxy.list"]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=30)
|
||||
if not html:
|
||||
continue
|
||||
for line in html.split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
ip = data.get("host")
|
||||
port = data.get("port")
|
||||
protocol = data.get("type", "http")
|
||||
if ip and port:
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
except Exception:
|
||||
continue
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
56
app/plugins/ip3366.py
Normal file
56
app/plugins/ip3366.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import re
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
VALID_PROTOCOLS = ("http", "https", "socks4", "socks5")
|
||||
|
||||
|
||||
class Ip3366Plugin(BaseHTTPPlugin):
|
||||
name = "ip3366"
|
||||
display_name = "IP3366"
|
||||
description = "从 IP3366 网站爬取免费代理"
|
||||
default_config = {"max_pages": 5}
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._update_urls()
|
||||
|
||||
def _update_urls(self):
|
||||
max_pages = self.config.get("max_pages", 5)
|
||||
self.urls = [
|
||||
f"http://www.ip3366.net/free/?stype=1&page={i}" for i in range(1, max_pages + 1)
|
||||
] + [
|
||||
f"http://www.ip3366.net/free/?stype=2&page={i}" for i in range(1, max_pages + 1)
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=15)
|
||||
if not html:
|
||||
continue
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
list_div = soup.find("div", id="list")
|
||||
if not list_div:
|
||||
continue
|
||||
table = list_div.find("table")
|
||||
if not table:
|
||||
continue
|
||||
|
||||
for row in table.find_all("tr"):
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 5:
|
||||
ip = tds[0].get_text(strip=True)
|
||||
port = tds[1].get_text(strip=True)
|
||||
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = "http"
|
||||
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
39
app/plugins/ip89.py
Normal file
39
app/plugins/ip89.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import re
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class Ip89Plugin(BaseHTTPPlugin):
|
||||
name = "ip89"
|
||||
display_name = "89免费代理"
|
||||
description = "从 89ip.cn 爬取免费代理"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [f"https://www.89ip.cn/index_{i}.html" for i in range(1, 6)]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=15)
|
||||
if not html:
|
||||
continue
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
table = soup.find("table", class_="layui-table")
|
||||
if not table:
|
||||
continue
|
||||
|
||||
for row in table.find_all("tr"):
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 2:
|
||||
ip = tds[0].get_text(strip=True)
|
||||
port = tds[1].get_text(strip=True)
|
||||
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port), "http"))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
49
app/plugins/kuaidaili.py
Normal file
49
app/plugins/kuaidaili.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
VALID_PROTOCOLS = ("http", "https", "socks4", "socks5")
|
||||
|
||||
|
||||
class KuaiDaiLiPlugin(BaseHTTPPlugin):
|
||||
name = "kuaidaili"
|
||||
display_name = "快代理"
|
||||
description = "从快代理网站爬取免费代理"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [
|
||||
f"https://www.kuaidaili.com/free/inha/{i}/" for i in range(1, 11)
|
||||
] + [
|
||||
f"https://www.kuaidaili.com/free/intr/{i}/" for i in range(1, 11)
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=15)
|
||||
if not html:
|
||||
continue
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
table = soup.find("table")
|
||||
if not table:
|
||||
logger.warning(f"{self.display_name} 未能找到表格,可能是触发了反爬")
|
||||
continue
|
||||
|
||||
for row in table.find_all("tr"):
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 5:
|
||||
ip = tds[0].get_text(strip=True)
|
||||
port = tds[1].get_text(strip=True)
|
||||
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = "http"
|
||||
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
55
app/plugins/proxylist_download.py
Normal file
55
app/plugins/proxylist_download.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from typing import List
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ProxyListDownloadPlugin(BaseHTTPPlugin):
|
||||
name = "proxylist_download"
|
||||
display_name = "ProxyListDownload"
|
||||
description = "从 ProxyListDownload API 获取代理"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [
|
||||
"https://www.proxy-list.download/api/v1/get?type=http",
|
||||
"https://www.proxy-list.download/api/v1/get?type=https",
|
||||
"https://www.proxy-list.download/api/v1/get?type=socks4",
|
||||
"https://www.proxy-list.download/api/v1/get?type=socks5",
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=30)
|
||||
if not html:
|
||||
continue
|
||||
|
||||
# 根据 URL 判断协议
|
||||
if "type=socks4" in url:
|
||||
protocol = "socks4"
|
||||
elif "type=socks5" in url:
|
||||
protocol = "socks5"
|
||||
elif "type=https" in url:
|
||||
protocol = "https"
|
||||
else:
|
||||
protocol = "http"
|
||||
|
||||
lines = html.split("\r\n")
|
||||
if len(lines) <= 1:
|
||||
lines = html.split("\n")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or ":" not in line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
if len(parts) >= 2:
|
||||
ip = parts[0].strip()
|
||||
port = parts[1].strip()
|
||||
if ip and port.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
75
app/plugins/proxyscrape.py
Normal file
75
app/plugins/proxyscrape.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""ProxyScrape 测试爬虫 - 用于验证架构,支持全协议类型"""
|
||||
from typing import List
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ProxyScrapePlugin(BaseHTTPPlugin):
|
||||
"""
|
||||
从 ProxyScrape 公开 API 获取代理。
|
||||
覆盖 http/https/socks4/socks5 全协议,专门用于测试插件系统的可扩展性。
|
||||
"""
|
||||
|
||||
name = "proxyscrape"
|
||||
display_name = "ProxyScrape测试源"
|
||||
description = "从 ProxyScrape API 获取各类型代理(HTTP/HTTPS/SOCKS4/SOCKS5),用于测试架构扩展"
|
||||
enabled = True
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# 使用多个公开 GitHub 代理列表作为源,稳定性较高
|
||||
self.urls = [
|
||||
("http", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt"),
|
||||
("https", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/https.txt"),
|
||||
("socks4", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt"),
|
||||
("socks5", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt"),
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results: List[ProxyRaw] = []
|
||||
for protocol, url in self.urls:
|
||||
try:
|
||||
html = await self.fetch(url, timeout=30)
|
||||
if not html:
|
||||
logger.warning(f"ProxyScrape {protocol.upper()} 返回空内容")
|
||||
continue
|
||||
|
||||
count = 0
|
||||
for line in html.splitlines():
|
||||
line = line.strip()
|
||||
if not line or ":" not in line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
if len(parts) >= 2:
|
||||
ip = parts[0].strip()
|
||||
port_str = parts[1].strip()
|
||||
if port_str.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port_str), protocol))
|
||||
count += 1
|
||||
|
||||
logger.info(f"ProxyScrape {protocol.upper()} 获取 {count} 个代理")
|
||||
except Exception as e:
|
||||
logger.error(f"ProxyScrape {protocol.upper()} 爬取失败: {e}")
|
||||
|
||||
if results:
|
||||
logger.info(f"ProxyScrape 总计获取 {len(results)} 个代理")
|
||||
else:
|
||||
# Fallback:生成测试代理,确保在测试环境也能验证完整流程
|
||||
logger.warning("ProxyScrape 所有真实源均不可用,生成测试代理用于架构验证")
|
||||
results = self._generate_test_proxies()
|
||||
return results
|
||||
|
||||
def _generate_test_proxies(self) -> List[ProxyRaw]:
|
||||
"""生成测试代理数据,覆盖全协议类型,用于验证插件系统"""
|
||||
import random
|
||||
test_proxies = []
|
||||
protocols = ["http", "https", "socks4", "socks5"]
|
||||
for protocol in protocols:
|
||||
for _ in range(3):
|
||||
# 生成随机公网格式 IP(仅用于测试流程)
|
||||
ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
||||
port = random.randint(1024, 65535)
|
||||
test_proxies.append(ProxyRaw(ip, port, protocol))
|
||||
logger.info(f"生成 {len(test_proxies)} 个测试代理: HTTP/HTTPS/SOCKS4/SOCKS5 各 3 个")
|
||||
return test_proxies
|
||||
51
app/plugins/speedx.py
Normal file
51
app/plugins/speedx.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import re
|
||||
from typing import List
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class SpeedXPlugin(BaseHTTPPlugin):
|
||||
name = "speedx"
|
||||
display_name = "SpeedX代理源"
|
||||
description = "从 SpeedX GitHub 仓库获取 SOCKS 代理列表"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [
|
||||
"https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
|
||||
"https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
|
||||
"https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt",
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=30)
|
||||
if not html:
|
||||
continue
|
||||
|
||||
# 根据 URL 判断协议
|
||||
protocol = "http"
|
||||
if "socks5" in url:
|
||||
protocol = "socks5"
|
||||
elif "socks4" in url:
|
||||
protocol = "socks4"
|
||||
|
||||
for line in html.split("\n"):
|
||||
line = line.strip()
|
||||
if not line or ":" not in line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
if len(parts) >= 2:
|
||||
ip = parts[0].strip()
|
||||
port = parts[1].strip()
|
||||
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
|
||||
continue
|
||||
if not port.isdigit() or not (1 <= int(port) <= 65535):
|
||||
continue
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
51
app/plugins/yundaili.py
Normal file
51
app/plugins/yundaili.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import re
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
from app.core.plugin_system import ProxyRaw
|
||||
from app.plugins.base import BaseHTTPPlugin
|
||||
from app.core.log import logger
|
||||
|
||||
VALID_PROTOCOLS = ("http", "https", "socks4", "socks5")
|
||||
|
||||
|
||||
class YunDaiLiPlugin(BaseHTTPPlugin):
|
||||
name = "yundaili"
|
||||
display_name = "云代理"
|
||||
description = "从云代理网站爬取免费代理"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.urls = [
|
||||
f"http://www.ip3366.net/free/?stype=1&page={i}" for i in range(1, 6)
|
||||
] + [
|
||||
f"http://www.ip3366.net/free/?stype=2&page={i}" for i in range(1, 6)
|
||||
]
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
results = []
|
||||
for url in self.urls:
|
||||
html = await self.fetch(url, timeout=15)
|
||||
if not html:
|
||||
continue
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
list_table = soup.find("div", id="list")
|
||||
if not list_table:
|
||||
continue
|
||||
table = list_table.find("table")
|
||||
if not table:
|
||||
continue
|
||||
|
||||
for row in table.find_all("tr"):
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 5:
|
||||
ip = tds[0].get_text(strip=True)
|
||||
port = tds[1].get_text(strip=True)
|
||||
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = "http"
|
||||
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
|
||||
results.append(ProxyRaw(ip, int(port), protocol))
|
||||
|
||||
if results:
|
||||
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")
|
||||
return results
|
||||
11
app/repositories/__init__.py
Normal file
11
app/repositories/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""数据访问层包"""
|
||||
from .proxy_repo import ProxyRepository
|
||||
from .settings_repo import SettingsRepository, PluginSettingsRepository
|
||||
from .task_repo import ValidationTaskRepository
|
||||
|
||||
__all__ = [
|
||||
"ProxyRepository",
|
||||
"SettingsRepository",
|
||||
"PluginSettingsRepository",
|
||||
"ValidationTaskRepository",
|
||||
]
|
||||
277
app/repositories/proxy_repo.py
Normal file
277
app/repositories/proxy_repo.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""代理数据访问层 - 所有 SQL 操作收敛于此"""
|
||||
import aiosqlite
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from app.models.domain import Proxy
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
VALID_PROTOCOLS = ("http", "https", "socks4", "socks5")
|
||||
|
||||
|
||||
def _to_datetime(value: Union[str, datetime, None]) -> Optional[datetime]:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
|
||||
try:
|
||||
return datetime.strptime(value, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _row_to_proxy(row: Tuple) -> Proxy:
|
||||
return Proxy(
|
||||
ip=row[0],
|
||||
port=row[1],
|
||||
protocol=row[2],
|
||||
score=row[3],
|
||||
response_time_ms=row[4],
|
||||
last_check=_to_datetime(row[5]),
|
||||
created_at=_to_datetime(row[6]),
|
||||
)
|
||||
|
||||
|
||||
class ProxyRepository:
|
||||
"""代理 Repository"""
|
||||
|
||||
@staticmethod
|
||||
async def insert_or_update(
|
||||
db: aiosqlite.Connection,
|
||||
ip: str,
|
||||
port: int,
|
||||
protocol: str = "http",
|
||||
score: int = 10,
|
||||
) -> bool:
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = "http"
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(ip, port) DO UPDATE SET
|
||||
protocol = excluded.protocol,
|
||||
score = excluded.score,
|
||||
last_check = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(ip, port, protocol, score),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"insert_or_update proxy failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def update_score(
|
||||
db: aiosqlite.Connection,
|
||||
ip: str,
|
||||
port: int,
|
||||
delta: int,
|
||||
min_score: int = 0,
|
||||
max_score: int = 100,
|
||||
) -> bool:
|
||||
try:
|
||||
async with db.execute(
|
||||
"SELECT score FROM proxies WHERE ip = ? AND port = ?", (ip, port)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
return False
|
||||
current_score = row[0]
|
||||
new_score = max(min_score, min(max_score, current_score + delta))
|
||||
await db.execute(
|
||||
"UPDATE proxies SET score = ?, last_check = CURRENT_TIMESTAMP WHERE ip = ? AND port = ?",
|
||||
(new_score, ip, port),
|
||||
)
|
||||
if new_score <= 0:
|
||||
await db.execute("DELETE FROM proxies WHERE score <= 0")
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"update_score failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def update_response_time(
|
||||
db: aiosqlite.Connection,
|
||||
ip: str,
|
||||
port: int,
|
||||
response_time_ms: float,
|
||||
) -> bool:
|
||||
try:
|
||||
await db.execute(
|
||||
"UPDATE proxies SET response_time_ms = ? WHERE ip = ? AND port = ?",
|
||||
(response_time_ms, ip, port),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"update_response_time failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def delete(db: aiosqlite.Connection, ip: str, port: int) -> None:
|
||||
await db.execute("DELETE FROM proxies WHERE ip = ? AND port = ?", (ip, port))
|
||||
await db.commit()
|
||||
|
||||
@staticmethod
|
||||
async def batch_delete(db: aiosqlite.Connection, proxies: List[Tuple[str, int]]) -> int:
|
||||
if not proxies:
|
||||
return 0
|
||||
await db.executemany("DELETE FROM proxies WHERE ip = ? AND port = ?", proxies)
|
||||
await db.commit()
|
||||
return len(proxies)
|
||||
|
||||
@staticmethod
|
||||
async def get_by_ip_port(
|
||||
db: aiosqlite.Connection, ip: str, port: int
|
||||
) -> Optional[Proxy]:
|
||||
async with db.execute(
|
||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE ip = ? AND port = ?",
|
||||
(ip, port),
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
return _row_to_proxy(row)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
|
||||
async with db.execute(
|
||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE score > 0 ORDER BY RANDOM() LIMIT 1"
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
return _row_to_proxy(row)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def list_all(
|
||||
db: aiosqlite.Connection,
|
||||
protocol: Optional[str] = None,
|
||||
limit: int = 100000,
|
||||
) -> List[Proxy]:
|
||||
query = "SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies"
|
||||
params: List = []
|
||||
if protocol:
|
||||
query += " WHERE protocol = ?"
|
||||
params.append(protocol.lower())
|
||||
query += " LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
async with db.execute(query, params) as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
return [_row_to_proxy(row) for row in rows]
|
||||
|
||||
@staticmethod
|
||||
async def list_paginated(
|
||||
db: aiosqlite.Connection,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
protocol: Optional[str] = None,
|
||||
min_score: int = 0,
|
||||
max_score: Optional[int] = None,
|
||||
sort_by: str = "last_check",
|
||||
sort_order: str = "DESC",
|
||||
) -> Tuple[List[Proxy], int]:
|
||||
conditions = ["score >= ?"]
|
||||
params: List = [min_score]
|
||||
|
||||
if protocol:
|
||||
conditions.append("protocol = ?")
|
||||
params.append(protocol)
|
||||
if max_score is not None:
|
||||
conditions.append("score <= ?")
|
||||
params.append(max_score)
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
order_clause = f"{sort_by} {sort_order}"
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
count_query = f"SELECT COUNT(*) FROM proxies WHERE {where_clause}"
|
||||
async with db.execute(count_query, list(params)) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
total = row[0] if row else 0
|
||||
|
||||
data_query = f"""
|
||||
SELECT ip, port, protocol, score, response_time_ms, last_check, created_at
|
||||
FROM proxies
|
||||
WHERE {where_clause}
|
||||
ORDER BY {order_clause}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
async with db.execute(data_query, params) as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
proxies = [_row_to_proxy(row) for row in rows]
|
||||
return proxies, total
|
||||
|
||||
@staticmethod
|
||||
async def get_stats(db: aiosqlite.Connection) -> dict:
|
||||
query = """
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN score > 0 THEN 1 END) as available,
|
||||
AVG(score) as avg_score,
|
||||
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
|
||||
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
|
||||
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
|
||||
COUNT(CASE WHEN protocol = 'socks5' THEN 1 END) as socks5_count
|
||||
FROM proxies
|
||||
"""
|
||||
async with db.execute(query) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
return {
|
||||
"total": row[0] or 0,
|
||||
"available": row[1] or 0,
|
||||
"avg_score": round(row[2], 2) if row[2] else 0,
|
||||
"http_count": row[3] or 0,
|
||||
"https_count": row[4] or 0,
|
||||
"socks4_count": row[5] or 0,
|
||||
"socks5_count": row[6] or 0,
|
||||
}
|
||||
return {
|
||||
"total": 0,
|
||||
"available": 0,
|
||||
"avg_score": 0,
|
||||
"http_count": 0,
|
||||
"https_count": 0,
|
||||
"socks4_count": 0,
|
||||
"socks5_count": 0,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
async def get_today_new_count(db: aiosqlite.Connection) -> int:
|
||||
try:
|
||||
async with db.execute(
|
||||
"SELECT COUNT(*) FROM proxies WHERE DATE(last_check) = DATE('now', 'localtime')"
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
except Exception as e:
|
||||
logger.error(f"get_today_new_count failed: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
async def clean_invalid(db: aiosqlite.Connection) -> int:
|
||||
await db.execute("DELETE FROM proxies WHERE score <= 0")
|
||||
await db.commit()
|
||||
return db.total_changes
|
||||
|
||||
@staticmethod
|
||||
async def clean_expired(db: aiosqlite.Connection, days: int) -> int:
|
||||
try:
|
||||
await db.execute(
|
||||
"DELETE FROM proxies WHERE last_check < datetime('now', '-{} days')".format(days)
|
||||
)
|
||||
await db.commit()
|
||||
return db.total_changes
|
||||
except Exception as e:
|
||||
logger.error(f"clean_expired failed: {e}")
|
||||
return 0
|
||||
140
app/repositories/settings_repo.py
Normal file
140
app/repositories/settings_repo.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""设置数据访问层"""
|
||||
import json
|
||||
import aiosqlite
|
||||
from typing import Optional, Dict, Any
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
DEFAULT_SETTINGS = {
|
||||
"crawl_timeout": 30,
|
||||
"validation_timeout": 10,
|
||||
"max_retries": 3,
|
||||
"default_concurrency": 50,
|
||||
"min_proxy_score": 0,
|
||||
"proxy_expiry_days": 7,
|
||||
"auto_validate": True,
|
||||
"validate_interval_minutes": 30,
|
||||
}
|
||||
|
||||
|
||||
class SettingsRepository:
|
||||
"""系统设置 Repository"""
|
||||
|
||||
@staticmethod
|
||||
async def get_all(db: aiosqlite.Connection) -> Dict[str, Any]:
|
||||
settings = DEFAULT_SETTINGS.copy()
|
||||
try:
|
||||
async with db.execute("SELECT key, value FROM settings") as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
for key, value in rows:
|
||||
# 类型转换
|
||||
default = DEFAULT_SETTINGS.get(key)
|
||||
if isinstance(default, bool):
|
||||
settings[key] = value.lower() == "true"
|
||||
elif isinstance(default, int):
|
||||
settings[key] = int(value)
|
||||
else:
|
||||
settings[key] = value
|
||||
except Exception as e:
|
||||
logger.error(f"get_all settings failed: {e}")
|
||||
return settings
|
||||
|
||||
@staticmethod
|
||||
async def save(db: aiosqlite.Connection, settings: Dict[str, Any]) -> bool:
|
||||
try:
|
||||
for key, value in settings.items():
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO settings (key, value, updated_at)
|
||||
VALUES (?, ?, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(key) DO UPDATE SET
|
||||
value = excluded.value,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(key, str(value)),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"save settings failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
class PluginSettingsRepository:
|
||||
"""插件设置 Repository"""
|
||||
|
||||
@staticmethod
|
||||
async def get_enabled(db: aiosqlite.Connection, plugin_id: str) -> Optional[bool]:
|
||||
async with db.execute(
|
||||
"SELECT enabled FROM plugin_settings WHERE plugin_id = ?", (plugin_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
return bool(row[0])
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def set_enabled(db: aiosqlite.Connection, plugin_id: str, enabled: bool) -> bool:
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO plugin_settings (plugin_id, enabled, created_at, updated_at)
|
||||
VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(plugin_id) DO UPDATE SET
|
||||
enabled = excluded.enabled,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(plugin_id, int(enabled)),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"set_enabled failed for {plugin_id}: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def get_config(db: aiosqlite.Connection, plugin_id: str) -> Optional[Dict[str, Any]]:
|
||||
async with db.execute(
|
||||
"SELECT config_json FROM plugin_settings WHERE plugin_id = ?", (plugin_id,)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row and row[0]:
|
||||
try:
|
||||
return json.loads(row[0])
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def set_config(db: aiosqlite.Connection, plugin_id: str, config: Dict[str, Any]) -> bool:
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO plugin_settings (plugin_id, config_json, updated_at)
|
||||
VALUES (?, ?, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(plugin_id) DO UPDATE SET
|
||||
config_json = excluded.config_json,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""",
|
||||
(plugin_id, json.dumps(config, ensure_ascii=False)),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"set_config failed for {plugin_id}: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def list_all(db: aiosqlite.Connection) -> Dict[str, Dict[str, Any]]:
|
||||
result = {}
|
||||
async with db.execute("SELECT plugin_id, enabled, config_json FROM plugin_settings") as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
for plugin_id, enabled, config_json in rows:
|
||||
config = {}
|
||||
if config_json:
|
||||
try:
|
||||
config = json.loads(config_json)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
result[plugin_id] = {"enabled": bool(enabled), "config": config}
|
||||
return result
|
||||
135
app/repositories/task_repo.py
Normal file
135
app/repositories/task_repo.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""验证任务队列持久化层"""
|
||||
import aiosqlite
|
||||
from typing import List, Optional
|
||||
from app.models.domain import ProxyRaw
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ValidationTaskRepository:
|
||||
"""验证任务 Repository —— 支持队列持久化"""
|
||||
|
||||
@staticmethod
|
||||
async def insert_batch(db: aiosqlite.Connection, proxies: List[ProxyRaw]) -> int:
|
||||
if not proxies:
|
||||
return 0
|
||||
try:
|
||||
rows = [(p.ip, p.port, p.protocol) for p in proxies]
|
||||
await db.executemany(
|
||||
"""
|
||||
INSERT INTO validation_tasks (ip, port, protocol, status, created_at)
|
||||
VALUES (?, ?, ?, 'pending', CURRENT_TIMESTAMP)
|
||||
""",
|
||||
rows,
|
||||
)
|
||||
await db.commit()
|
||||
return len(rows)
|
||||
except Exception as e:
|
||||
logger.error(f"insert_batch validation tasks failed: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
async def acquire_pending(db: aiosqlite.Connection) -> Optional[dict]:
|
||||
"""原子性地获取一个 pending 任务并将其标记为 processing"""
|
||||
try:
|
||||
async with db.execute(
|
||||
"""
|
||||
SELECT id, ip, port, protocol FROM validation_tasks
|
||||
WHERE status = 'pending'
|
||||
ORDER BY id ASC
|
||||
LIMIT 1
|
||||
"""
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
task_id = row[0]
|
||||
await db.execute(
|
||||
"UPDATE validation_tasks SET status = 'processing', updated_at = CURRENT_TIMESTAMP WHERE id = ?",
|
||||
(task_id,),
|
||||
)
|
||||
await db.commit()
|
||||
return {"id": task_id, "ip": row[1], "port": row[2], "protocol": row[3]}
|
||||
except Exception as e:
|
||||
logger.error(f"acquire_pending failed: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def complete_task(
|
||||
db: aiosqlite.Connection,
|
||||
task_id: int,
|
||||
is_valid: bool,
|
||||
response_time_ms: Optional[float] = None,
|
||||
) -> bool:
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
UPDATE validation_tasks
|
||||
SET status = 'completed',
|
||||
result = ?,
|
||||
response_time_ms = ?,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""",
|
||||
("valid" if is_valid else "invalid", response_time_ms, task_id),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"complete_task failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def fail_task(db: aiosqlite.Connection, task_id: int) -> bool:
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
UPDATE validation_tasks
|
||||
SET status = 'failed',
|
||||
result = 'invalid',
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""",
|
||||
(task_id,),
|
||||
)
|
||||
await db.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"fail_task failed: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def get_pending_count(db: aiosqlite.Connection) -> int:
|
||||
async with db.execute(
|
||||
"SELECT COUNT(*) FROM validation_tasks WHERE status = 'pending'"
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
@staticmethod
|
||||
async def reset_processing(db: aiosqlite.Connection) -> int:
|
||||
"""将异常中断的 processing 任务重置为 pending,用于启动恢复"""
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
UPDATE validation_tasks
|
||||
SET status = 'pending', updated_at = CURRENT_TIMESTAMP
|
||||
WHERE status = 'processing'
|
||||
"""
|
||||
)
|
||||
await db.commit()
|
||||
return db.total_changes
|
||||
except Exception as e:
|
||||
logger.error(f"reset_processing failed: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
async def cleanup_old(db: aiosqlite.Connection, days: int = 7) -> int:
|
||||
try:
|
||||
await db.execute(
|
||||
"DELETE FROM validation_tasks WHERE updated_at < datetime('now', '-{} days')".format(days)
|
||||
)
|
||||
await db.commit()
|
||||
return db.total_changes
|
||||
except Exception as e:
|
||||
logger.error(f"cleanup_old tasks failed: {e}")
|
||||
return 0
|
||||
12
app/services/__init__.py
Normal file
12
app/services/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""业务逻辑层包"""
|
||||
from .proxy_service import ProxyService
|
||||
from .plugin_service import PluginService
|
||||
from .scheduler_service import SchedulerService
|
||||
from .validator_service import ValidatorService
|
||||
|
||||
__all__ = [
|
||||
"ProxyService",
|
||||
"PluginService",
|
||||
"SchedulerService",
|
||||
"ValidatorService",
|
||||
]
|
||||
139
app/services/plugin_service.py
Normal file
139
app/services/plugin_service.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""插件业务服务"""
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from app.core.db import get_db
|
||||
from app.core.plugin_system.registry import registry
|
||||
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||
from app.repositories.settings_repo import PluginSettingsRepository
|
||||
from app.models.domain import PluginInfo, ProxyRaw
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class PluginService:
|
||||
"""插件业务服务:管理插件生命周期、执行爬取、配置管理"""
|
||||
|
||||
def __init__(self):
|
||||
self.plugin_settings_repo = PluginSettingsRepository()
|
||||
self._stats: dict[str, dict] = {}
|
||||
|
||||
async def list_plugins(self) -> List[PluginInfo]:
|
||||
"""获取所有插件信息(合并持久化状态和配置)"""
|
||||
async with get_db() as db:
|
||||
db_states = await self.plugin_settings_repo.list_all(db)
|
||||
|
||||
result = []
|
||||
for plugin in registry.list_plugins():
|
||||
# 合并持久化状态
|
||||
state = db_states.get(plugin.name, {})
|
||||
if "enabled" in state:
|
||||
plugin.enabled = state["enabled"]
|
||||
if "config" in state and isinstance(state["config"], dict):
|
||||
plugin.update_config(state["config"])
|
||||
|
||||
stat = self._stats.get(plugin.name, {
|
||||
"success_count": 0,
|
||||
"failure_count": 0,
|
||||
"last_run": None,
|
||||
})
|
||||
result.append(PluginInfo(
|
||||
id=plugin.name,
|
||||
name=plugin.name,
|
||||
display_name=plugin.display_name or plugin.name,
|
||||
description=plugin.description or f"从 {plugin.name} 爬取代理",
|
||||
enabled=plugin.enabled,
|
||||
last_run=stat.get("last_run"),
|
||||
success_count=stat.get("success_count", 0),
|
||||
failure_count=stat.get("failure_count", 0),
|
||||
))
|
||||
return result
|
||||
|
||||
async def toggle_plugin(self, plugin_id: str, enabled: bool) -> bool:
|
||||
plugin = registry.get(plugin_id)
|
||||
if not plugin:
|
||||
return False
|
||||
async with get_db() as db:
|
||||
success = await self.plugin_settings_repo.set_enabled(db, plugin_id, enabled)
|
||||
if success:
|
||||
plugin.enabled = enabled
|
||||
logger.info(f"Plugin {plugin_id} toggled to {enabled}")
|
||||
return success
|
||||
|
||||
async def get_plugin_config(self, plugin_id: str) -> Optional[dict]:
|
||||
"""获取插件当前配置(合并默认值和持久化值)"""
|
||||
plugin = registry.get(plugin_id)
|
||||
if not plugin:
|
||||
return None
|
||||
async with get_db() as db:
|
||||
saved = await self.plugin_settings_repo.get_config(db, plugin_id)
|
||||
config = dict(plugin.default_config)
|
||||
if saved:
|
||||
config.update(saved)
|
||||
return config
|
||||
|
||||
async def update_plugin_config(self, plugin_id: str, config: dict) -> bool:
|
||||
"""更新插件配置(只保存已存在于 default_config 中的键)"""
|
||||
plugin = registry.get(plugin_id)
|
||||
if not plugin:
|
||||
return False
|
||||
# 过滤非法键
|
||||
safe_config = {k: v for k, v in config.items() if k in plugin.default_config}
|
||||
if not safe_config:
|
||||
return False
|
||||
plugin.update_config(safe_config)
|
||||
async with get_db() as db:
|
||||
return await self.plugin_settings_repo.set_config(db, plugin_id, plugin.config)
|
||||
|
||||
def get_plugin(self, plugin_id: str) -> Optional[BaseCrawlerPlugin]:
|
||||
return registry.get(plugin_id)
|
||||
|
||||
async def run_plugin(self, plugin_id: str) -> List[ProxyRaw]:
|
||||
"""执行单个插件爬取"""
|
||||
plugin = self.get_plugin(plugin_id)
|
||||
if not plugin:
|
||||
raise ValueError(f"Plugin {plugin_id} not found")
|
||||
if not plugin.enabled:
|
||||
logger.warning(f"Plugin {plugin_id} is disabled, skip crawl")
|
||||
return []
|
||||
|
||||
try:
|
||||
results = await plugin.crawl()
|
||||
self._record_stat(plugin_id, success=len(results))
|
||||
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
|
||||
return results
|
||||
except Exception as e:
|
||||
self._record_stat(plugin_id, failure=1)
|
||||
logger.error(f"Plugin {plugin_id} crawl failed: {e}")
|
||||
return []
|
||||
|
||||
async def run_all_plugins(self) -> List[ProxyRaw]:
|
||||
"""执行所有启用插件的爬取"""
|
||||
all_results: List[ProxyRaw] = []
|
||||
for plugin in registry.list_plugins():
|
||||
if not plugin.enabled:
|
||||
continue
|
||||
try:
|
||||
results = await self.run_plugin(plugin.name)
|
||||
all_results.extend(results)
|
||||
except Exception as e:
|
||||
logger.error(f"Run all plugins error at {plugin.name}: {e}")
|
||||
# 去重
|
||||
seen = set()
|
||||
unique = []
|
||||
for p in all_results:
|
||||
key = (p.ip, p.port, p.protocol)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(p)
|
||||
return unique
|
||||
|
||||
def _record_stat(self, plugin_id: str, success: int = 0, failure: int = 0):
|
||||
if plugin_id not in self._stats:
|
||||
self._stats[plugin_id] = {
|
||||
"success_count": 0,
|
||||
"failure_count": 0,
|
||||
"last_run": None,
|
||||
}
|
||||
self._stats[plugin_id]["success_count"] += success
|
||||
self._stats[plugin_id]["failure_count"] += failure
|
||||
if success or failure:
|
||||
self._stats[plugin_id]["last_run"] = datetime.now()
|
||||
93
app/services/proxy_service.py
Normal file
93
app/services/proxy_service.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""代理业务服务"""
|
||||
import csv
|
||||
import json
|
||||
import io
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Tuple, AsyncIterator
|
||||
from app.core.db import get_db
|
||||
from app.repositories.proxy_repo import ProxyRepository
|
||||
from app.models.domain import Proxy
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ProxyService:
|
||||
def __init__(self, proxy_repo: ProxyRepository = ProxyRepository()):
|
||||
self.proxy_repo = proxy_repo
|
||||
|
||||
async def get_stats(self) -> dict:
|
||||
async with get_db() as db:
|
||||
stats = await self.proxy_repo.get_stats(db)
|
||||
stats["today_new"] = await self.proxy_repo.get_today_new_count(db)
|
||||
return stats
|
||||
|
||||
async def list_proxies(
|
||||
self,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
protocol: Optional[str] = None,
|
||||
min_score: int = 0,
|
||||
max_score: Optional[int] = None,
|
||||
sort_by: str = "last_check",
|
||||
sort_order: str = "DESC",
|
||||
) -> Tuple[List[Proxy], int]:
|
||||
async with get_db() as db:
|
||||
return await self.proxy_repo.list_paginated(
|
||||
db, page, page_size, protocol, min_score, max_score, sort_by, sort_order
|
||||
)
|
||||
|
||||
async def get_random_proxy(self) -> Optional[Proxy]:
|
||||
async with get_db() as db:
|
||||
return await self.proxy_repo.get_random(db)
|
||||
|
||||
async def delete_proxy(self, ip: str, port: int) -> None:
|
||||
async with get_db() as db:
|
||||
await self.proxy_repo.delete(db, ip, port)
|
||||
|
||||
async def batch_delete(self, proxies: List[Tuple[str, int]]) -> int:
|
||||
async with get_db() as db:
|
||||
return await self.proxy_repo.batch_delete(db, proxies)
|
||||
|
||||
async def clean_invalid(self) -> int:
|
||||
async with get_db() as db:
|
||||
return await self.proxy_repo.clean_invalid(db)
|
||||
|
||||
async def clean_expired(self, days: int) -> int:
|
||||
async with get_db() as db:
|
||||
return await self.proxy_repo.clean_expired(db, days)
|
||||
|
||||
async def export_proxies(
|
||||
self,
|
||||
fmt: str,
|
||||
protocol: Optional[str] = None,
|
||||
limit: int = 10000,
|
||||
) -> AsyncIterator[str]:
|
||||
async with get_db() as db:
|
||||
proxies = await self.proxy_repo.list_all(db, protocol=protocol, limit=limit)
|
||||
|
||||
if fmt == "csv":
|
||||
yield "IP,Port,Protocol,Score,Last Check\n"
|
||||
for p in proxies:
|
||||
yield f"{p.ip},{p.port},{p.protocol},{p.score},{self._fmt_time(p.last_check)}\n"
|
||||
elif fmt == "txt":
|
||||
for p in proxies:
|
||||
yield f"{p.ip}:{p.port}\n"
|
||||
elif fmt == "json":
|
||||
data = [
|
||||
{
|
||||
"ip": p.ip,
|
||||
"port": p.port,
|
||||
"protocol": p.protocol,
|
||||
"score": p.score,
|
||||
"last_check": self._fmt_time(p.last_check),
|
||||
}
|
||||
for p in proxies
|
||||
]
|
||||
yield json.dumps(data, ensure_ascii=False, indent=2)
|
||||
|
||||
@staticmethod
|
||||
def _fmt_time(dt: Optional[datetime]) -> str:
|
||||
if not dt:
|
||||
return ""
|
||||
if isinstance(dt, str):
|
||||
return dt
|
||||
return dt.isoformat()
|
||||
88
app/services/scheduler_service.py
Normal file
88
app/services/scheduler_service.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""调度器服务 - 定时验证存量代理"""
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from app.core.db import get_db
|
||||
from app.repositories.proxy_repo import ProxyRepository
|
||||
from app.core.tasks.queue import ValidationQueue
|
||||
from app.core.config import settings as app_settings
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class SchedulerService:
|
||||
"""代理验证调度器"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
validation_queue: ValidationQueue,
|
||||
proxy_repo: ProxyRepository = ProxyRepository(),
|
||||
):
|
||||
self.validation_queue = validation_queue
|
||||
self.proxy_repo = proxy_repo
|
||||
self.interval_minutes = 30
|
||||
self.running = False
|
||||
self._task: asyncio.Task | None = None
|
||||
|
||||
async def start(self):
|
||||
if self.running:
|
||||
logger.warning("Scheduler already running")
|
||||
return
|
||||
self.running = True
|
||||
await self.validation_queue.start()
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info("Scheduler started")
|
||||
|
||||
async def stop(self):
|
||||
self.running = False
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
await self.validation_queue.stop()
|
||||
logger.info("Scheduler stopped")
|
||||
|
||||
async def validate_all_now(self):
|
||||
"""立即执行一次全量验证(后台运行,不阻塞)"""
|
||||
asyncio.create_task(self._do_validate_all())
|
||||
|
||||
async def _run_loop(self):
|
||||
"""定时循环"""
|
||||
while self.running:
|
||||
try:
|
||||
await self._do_validate_all()
|
||||
except Exception as e:
|
||||
logger.error(f"Scheduler loop error: {e}")
|
||||
# 等待下一次
|
||||
for _ in range(self.interval_minutes * 60):
|
||||
if not self.running:
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def _do_validate_all(self):
|
||||
"""验证数据库中所有存量代理"""
|
||||
logger.info("Starting scheduled validation for all proxies")
|
||||
async with get_db() as db:
|
||||
proxies = await self.proxy_repo.list_all(db)
|
||||
if not proxies:
|
||||
logger.info("No proxies to validate")
|
||||
return
|
||||
|
||||
logger.info(f"Validating {len(proxies)} proxies from database")
|
||||
from app.models.domain import ProxyRaw
|
||||
|
||||
# 批量提交到验证队列
|
||||
batch_size = 100
|
||||
for i in range(0, len(proxies), batch_size):
|
||||
if not self.running:
|
||||
break
|
||||
batch = proxies[i : i + batch_size]
|
||||
await self.validation_queue.submit([
|
||||
ProxyRaw(p.ip, p.port, p.protocol) for p in batch
|
||||
])
|
||||
# 等待当前批次处理完
|
||||
await self.validation_queue.drain()
|
||||
logger.info(f"Validated batch {i//batch_size + 1}/{(len(proxies)-1)//batch_size + 1}")
|
||||
|
||||
logger.info("Scheduled validation completed")
|
||||
97
app/services/validator_service.py
Normal file
97
app/services/validator_service.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""代理验证服务 - 支持 HTTP/HTTPS/SOCKS4/SOCKS5"""
|
||||
import asyncio
|
||||
import random
|
||||
import time
|
||||
import aiohttp
|
||||
import aiohttp_socks
|
||||
from typing import Tuple
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
class ValidatorService:
|
||||
"""代理验证器"""
|
||||
|
||||
# 测试 URL
|
||||
TEST_URLS = {
|
||||
"http": ["http://httpbin.org/ip", "http://api.ipify.org"],
|
||||
"https": ["https://httpbin.org/ip", "https://api.ipify.org"],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: float = 5.0,
|
||||
connect_timeout: float = 3.0,
|
||||
max_concurrency: int = 50,
|
||||
):
|
||||
self.timeout = timeout
|
||||
self.connect_timeout = connect_timeout
|
||||
self.semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
def _get_test_url(self, protocol: str) -> str:
|
||||
"""获取测试 URL"""
|
||||
urls = self.TEST_URLS.get(protocol.lower(), self.TEST_URLS["http"])
|
||||
return random.choice(urls)
|
||||
|
||||
async def validate(self, ip: str, port: int, protocol: str = "http") -> Tuple[bool, float]:
|
||||
"""验证单个代理,返回 (是否有效, 延迟毫秒)"""
|
||||
protocol = protocol.lower()
|
||||
|
||||
async with self.semaphore:
|
||||
start = time.time()
|
||||
try:
|
||||
if protocol in ("socks4", "socks5"):
|
||||
return await self._validate_socks(ip, port, protocol, start)
|
||||
else:
|
||||
return await self._validate_http(ip, port, protocol, start)
|
||||
except asyncio.TimeoutError:
|
||||
logger.debug(f"Validation timeout: {ip}:{port} ({protocol})")
|
||||
return False, 0.0
|
||||
except Exception as e:
|
||||
logger.debug(f"Validation error {ip}:{port} ({protocol}): {e}")
|
||||
return False, 0.0
|
||||
|
||||
async def _validate_http(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
||||
"""验证 HTTP/HTTPS 代理"""
|
||||
proxy_url = f"http://{ip}:{port}"
|
||||
connector = aiohttp.TCPConnector(ssl=False, limit=0, force_close=True)
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
|
||||
test_url = self._get_test_url(protocol)
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
||||
async with session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
|
||||
if response.status in (200, 301, 302):
|
||||
latency = round((time.time() - start) * 1000, 2)
|
||||
logger.info(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
|
||||
return True, latency
|
||||
return False, 0.0
|
||||
finally:
|
||||
await connector.close()
|
||||
|
||||
async def _validate_socks(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
||||
"""验证 SOCKS4/SOCKS5 代理"""
|
||||
proxy_type = (
|
||||
aiohttp_socks.ProxyType.SOCKS4
|
||||
if protocol == "socks4"
|
||||
else aiohttp_socks.ProxyType.SOCKS5
|
||||
)
|
||||
connector = aiohttp_socks.ProxyConnector(
|
||||
proxy_type=proxy_type,
|
||||
host=ip,
|
||||
port=port,
|
||||
rdns=True,
|
||||
ssl=False,
|
||||
)
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
|
||||
test_url = self._get_test_url("http")
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
||||
async with session.get(test_url, allow_redirects=True) as response:
|
||||
if response.status in (200, 301, 302):
|
||||
latency = round((time.time() - start) * 1000, 2)
|
||||
logger.info(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
|
||||
return True, latency
|
||||
return False, 0.0
|
||||
finally:
|
||||
await connector.close()
|
||||
Reference in New Issue
Block a user