feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
@@ -25,6 +25,7 @@ def format_proxy(proxy) -> dict:
|
||||
"score": proxy.score,
|
||||
"response_time_ms": proxy.response_time_ms,
|
||||
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
|
||||
"validated": getattr(proxy, "validated", 0),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import asyncio
|
||||
from contextlib import AsyncExitStack, asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.core.db import init_db, get_db
|
||||
from app.core.db import init_db, get_db, get_db_connection
|
||||
from app.core.config import settings as app_settings
|
||||
from app.core.log import logger
|
||||
from app.core.execution import AsyncWorkerPool, JobExecutor
|
||||
@@ -13,6 +13,8 @@ from app.repositories.settings_repo import SettingsRepository, DEFAULT_SETTINGS
|
||||
from app.services.validator_service import ValidatorService
|
||||
from app.services.plugin_runner import PluginRunner
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.api.ws_manager import ConnectionManager
|
||||
from app.api.realtime import stats_broadcaster_loop
|
||||
|
||||
settings_repo = SettingsRepository()
|
||||
proxy_repo = ProxyRepository()
|
||||
@@ -46,22 +48,50 @@ async def lifespan(app: FastAPI):
|
||||
|
||||
# 验证 WorkerPool
|
||||
async def validation_handler(proxy):
|
||||
from app.models.domain import ProxyRaw
|
||||
is_valid, latency = await validator.validate(
|
||||
proxy.ip, proxy.port, proxy.protocol
|
||||
)
|
||||
async with get_db() as db:
|
||||
if is_valid:
|
||||
await proxy_repo.insert_or_update(
|
||||
db, proxy.ip, proxy.port, proxy.protocol, score=app_settings.score_valid
|
||||
)
|
||||
if latency:
|
||||
await proxy_repo.update_response_time(db, proxy.ip, proxy.port, latency)
|
||||
async with get_db_connection() as db:
|
||||
existing = await proxy_repo.get_by_ip_port(db, proxy.ip, proxy.port)
|
||||
is_valid, latency = await validator.validate(
|
||||
proxy.ip, proxy.port, proxy.protocol
|
||||
)
|
||||
if not existing:
|
||||
return
|
||||
if existing.validated == 0:
|
||||
if is_valid:
|
||||
await proxy_repo.insert_or_update(
|
||||
db,
|
||||
proxy.ip,
|
||||
proxy.port,
|
||||
proxy.protocol,
|
||||
score=app_settings.score_valid,
|
||||
)
|
||||
if latency:
|
||||
await proxy_repo.update_response_time(
|
||||
db, proxy.ip, proxy.port, latency
|
||||
)
|
||||
else:
|
||||
await proxy_repo.delete(db, proxy.ip, proxy.port)
|
||||
else:
|
||||
await proxy_repo.update_score(
|
||||
db, proxy.ip, proxy.port, app_settings.score_invalid,
|
||||
app_settings.score_min, app_settings.score_max
|
||||
)
|
||||
if is_valid:
|
||||
await proxy_repo.insert_or_update(
|
||||
db,
|
||||
proxy.ip,
|
||||
proxy.port,
|
||||
proxy.protocol,
|
||||
score=app_settings.score_valid,
|
||||
)
|
||||
if latency:
|
||||
await proxy_repo.update_response_time(
|
||||
db, proxy.ip, proxy.port, latency
|
||||
)
|
||||
else:
|
||||
await proxy_repo.update_score(
|
||||
db,
|
||||
proxy.ip,
|
||||
proxy.port,
|
||||
app_settings.score_invalid,
|
||||
app_settings.score_min,
|
||||
app_settings.score_max,
|
||||
)
|
||||
|
||||
worker_pool = AsyncWorkerPool(
|
||||
worker_count=db_settings.get("default_concurrency", app_settings.validator_max_concurrency),
|
||||
@@ -75,7 +105,7 @@ async def lifespan(app: FastAPI):
|
||||
await stack.enter_async_context(executor)
|
||||
|
||||
# 插件运行器
|
||||
plugin_runner = PluginRunner(timeout=db_settings.get("crawl_timeout", 30))
|
||||
plugin_runner = PluginRunner()
|
||||
|
||||
# 调度器
|
||||
scheduler = SchedulerService(
|
||||
@@ -91,6 +121,9 @@ async def lifespan(app: FastAPI):
|
||||
app.state.plugin_runner = plugin_runner
|
||||
app.state.scheduler = scheduler
|
||||
|
||||
app.state.ws_manager = ConnectionManager()
|
||||
app.state.stats_broadcaster_task = asyncio.create_task(stats_broadcaster_loop(app))
|
||||
|
||||
# 启动调度器
|
||||
if db_settings.get("auto_validate", True):
|
||||
try:
|
||||
@@ -101,6 +134,13 @@ async def lifespan(app: FastAPI):
|
||||
logger.info("API server started")
|
||||
yield
|
||||
|
||||
app.state.stats_broadcaster_task.cancel()
|
||||
try:
|
||||
await app.state.stats_broadcaster_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await app.state.ws_manager.disconnect_all()
|
||||
|
||||
# 停止调度器
|
||||
await scheduler.stop()
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
"""FastAPI 应用工厂"""
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
# Windows 上默认 Proactor 事件循环易导致 httpx 异步出站 ConnectTimeout,与同步请求表现不一致
|
||||
if sys.platform == "win32":
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.api.lifespan import lifespan
|
||||
|
||||
25
app/api/realtime.py
Normal file
25
app/api/realtime.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""实时统计广播后台任务"""
|
||||
import asyncio
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.log import logger
|
||||
from app.services.dashboard_stats import get_dashboard_stats
|
||||
|
||||
|
||||
async def stats_broadcaster_loop(app: FastAPI) -> None:
|
||||
manager = app.state.ws_manager
|
||||
interval = settings.ws_stats_interval_seconds
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(interval)
|
||||
if manager.connection_count == 0:
|
||||
continue
|
||||
scheduler = app.state.scheduler
|
||||
stats = await get_dashboard_stats(scheduler.running)
|
||||
await manager.broadcast_json({"type": "stats", "data": stats})
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception:
|
||||
logger.exception("stats broadcaster tick failed")
|
||||
@@ -1,9 +1,10 @@
|
||||
"""路由包"""
|
||||
from fastapi import APIRouter
|
||||
from app.api.routes import proxies, plugins, scheduler, settings, tasks
|
||||
from app.api.routes import proxies, plugins, scheduler, settings, tasks, ws
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(proxies.router)
|
||||
api_router.include_router(ws.router)
|
||||
api_router.include_router(plugins.router)
|
||||
api_router.include_router(scheduler.router)
|
||||
api_router.include_router(settings.router)
|
||||
|
||||
@@ -113,8 +113,8 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
class CrawlAllAggregator(Job):
|
||||
async def run(self):
|
||||
self._set_running()
|
||||
# 等待所有子 job 完成(最多等 30 秒)
|
||||
for _ in range(300):
|
||||
# 等待所有子 job 完成(最多约 5 分钟,与前端轮询一致)
|
||||
for _ in range(3000):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
all_done = all(
|
||||
@@ -125,15 +125,56 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
total = 0
|
||||
valid = 0
|
||||
invalid = 0
|
||||
plugins_failed = 0
|
||||
per_plugin = []
|
||||
for jid in job_ids:
|
||||
job = executor.get_job(jid)
|
||||
if job and job.result:
|
||||
total += job.result.get("proxy_count", 0)
|
||||
valid += job.result.get("success_count", 0)
|
||||
invalid += job.result.get("failure_count", 0)
|
||||
result = {"total_crawled": total, "valid_count": valid, "invalid_count": invalid}
|
||||
plugin_id = getattr(job, "plugin_id", "") if job else ""
|
||||
proxy_count = 0
|
||||
crawl_failed = False
|
||||
err_msg = None
|
||||
job_status = job.status.value if job else "missing"
|
||||
|
||||
if not job:
|
||||
per_plugin.append({
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": 0,
|
||||
"crawl_failed": True,
|
||||
"error": "任务不存在",
|
||||
"job_status": job_status,
|
||||
})
|
||||
plugins_failed += 1
|
||||
continue
|
||||
|
||||
if job.status.value == "failed":
|
||||
crawl_failed = True
|
||||
plugins_failed += 1
|
||||
err_msg = job.error or "任务失败"
|
||||
elif job.result:
|
||||
r = job.result
|
||||
plugin_id = r.get("plugin_id") or plugin_id
|
||||
proxy_count = r.get("proxy_count", 0)
|
||||
total += proxy_count
|
||||
if r.get("crawl_failed") or r.get("failure_count", 0) > 0:
|
||||
crawl_failed = True
|
||||
plugins_failed += 1
|
||||
err_msg = r.get("error")
|
||||
else:
|
||||
total += 0
|
||||
|
||||
per_plugin.append({
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": proxy_count,
|
||||
"crawl_failed": crawl_failed,
|
||||
"error": err_msg,
|
||||
"job_status": job_status,
|
||||
})
|
||||
|
||||
result = {
|
||||
"total_crawled": total,
|
||||
"plugins_failed": plugins_failed,
|
||||
"per_plugin": per_plugin,
|
||||
}
|
||||
if self.is_cancelled:
|
||||
result["cancelled"] = True
|
||||
return result
|
||||
|
||||
@@ -5,7 +5,8 @@ from fastapi.responses import StreamingResponse
|
||||
|
||||
from app.services.proxy_service import ProxyService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.models.schemas import ProxyListRequest, BatchDeleteRequest
|
||||
from app.services.dashboard_stats import get_dashboard_stats
|
||||
from app.models.schemas import ProxyListRequest, BatchDeleteRequest, ProxyDeleteItem
|
||||
from app.api.deps import get_proxy_service, get_scheduler_service
|
||||
from app.api.common import success_response, format_proxy
|
||||
from app.core.exceptions import ProxyPoolException, ProxyNotFoundException
|
||||
@@ -15,11 +16,9 @@ router = APIRouter(prefix="/api/proxies", tags=["proxies"])
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_stats(
|
||||
proxy_service: ProxyService = Depends(get_proxy_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
):
|
||||
stats = await proxy_service.get_stats()
|
||||
stats["scheduler_running"] = scheduler_service.running
|
||||
stats = await get_dashboard_stats(scheduler_service.running)
|
||||
return success_response("获取统计信息成功", stats)
|
||||
|
||||
|
||||
@@ -36,6 +35,7 @@ async def list_proxies(
|
||||
max_score=request.max_score,
|
||||
sort_by=request.sort_by,
|
||||
sort_order=request.sort_order,
|
||||
pool_filter=request.pool_filter,
|
||||
)
|
||||
return success_response(
|
||||
"获取代理列表成功",
|
||||
@@ -75,6 +75,16 @@ async def export_proxies(
|
||||
)
|
||||
|
||||
|
||||
@router.post("/delete-one")
|
||||
async def delete_proxy_one(
|
||||
item: ProxyDeleteItem,
|
||||
service: ProxyService = Depends(get_proxy_service),
|
||||
):
|
||||
"""JSON 删除(推荐):IPv6 等含冒号 IP 不受路径分段影响。"""
|
||||
await service.delete_proxy(item.ip, item.port)
|
||||
return success_response("删除代理成功")
|
||||
|
||||
|
||||
@router.delete("/{ip}/{port}")
|
||||
async def delete_proxy(ip: str, port: int, service: ProxyService = Depends(get_proxy_service)):
|
||||
await service.delete_proxy(ip, port)
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
"""设置相关路由"""
|
||||
import asyncio
|
||||
|
||||
from fastapi import APIRouter, Request, Depends
|
||||
from app.core.db import get_db
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
from app.models.schemas import SettingsSchema
|
||||
from app.api.common import success_response
|
||||
from app.api.deps import get_settings_repo
|
||||
from app.core.config import settings as app_settings
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from app.core.log import logger
|
||||
|
||||
@@ -47,17 +50,21 @@ async def save_settings(
|
||||
|
||||
# 热更新验证器超时和并发(下次验证时生效)
|
||||
if validator:
|
||||
validator._init_timeout = request.validation_timeout
|
||||
validator._init_connect_timeout = request.validation_timeout
|
||||
vt = float(request.validation_timeout)
|
||||
validator._init_timeout = vt
|
||||
# 连接阶段单独收紧:勿与 total 等同,否则死代理会在 connect 上耗满整段超时
|
||||
validator._init_connect_timeout = min(
|
||||
float(app_settings.validator_connect_timeout), vt
|
||||
)
|
||||
validator._init_max_concurrency = request.default_concurrency
|
||||
if request.validation_targets is not None:
|
||||
validator.update_test_urls(request.validation_targets)
|
||||
# 延迟关闭旧 session:让正在验证的代理继续使用旧 session,
|
||||
# 新请求会通过 _ensure_session() 自动创建使用新配置的 session
|
||||
await validator.close_socks_sessions()
|
||||
old_session = validator._http_session
|
||||
validator._http_session = None
|
||||
validator._http_connector = None
|
||||
validator._semaphore = None
|
||||
if old_session and not old_session.closed:
|
||||
asyncio.create_task(old_session.close())
|
||||
logger.info(f"Validator config updated: timeout={request.validation_timeout}, concurrency={request.default_concurrency}, targets={request.validation_targets}")
|
||||
|
||||
32
app/api/routes/ws.py
Normal file
32
app/api/routes/ws.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""WebSocket 实时推送"""
|
||||
import json
|
||||
|
||||
from fastapi import APIRouter, WebSocket
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
|
||||
from app.services.dashboard_stats import get_dashboard_stats
|
||||
|
||||
router = APIRouter(prefix="/api", tags=["websocket"])
|
||||
|
||||
|
||||
@router.websocket("/ws")
|
||||
async def websocket_dashboard(websocket: WebSocket):
|
||||
app = websocket.app
|
||||
await websocket.accept()
|
||||
manager = app.state.ws_manager
|
||||
await manager.connect(websocket)
|
||||
try:
|
||||
stats = await get_dashboard_stats(app.state.scheduler.running)
|
||||
await websocket.send_json({"type": "stats", "data": stats})
|
||||
while True:
|
||||
raw = await websocket.receive_text()
|
||||
try:
|
||||
msg = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if msg.get("type") == "ping":
|
||||
await websocket.send_json({"type": "pong"})
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
await manager.disconnect(websocket)
|
||||
52
app/api/ws_manager.py
Normal file
52
app/api/ws_manager.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""WebSocket 连接管理与广播"""
|
||||
import asyncio
|
||||
from typing import List
|
||||
|
||||
from starlette.websockets import WebSocket, WebSocketState
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
def __init__(self) -> None:
|
||||
self._connections: List[WebSocket] = []
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
@property
|
||||
def connection_count(self) -> int:
|
||||
return len(self._connections)
|
||||
|
||||
async def connect(self, websocket: WebSocket) -> None:
|
||||
async with self._lock:
|
||||
self._connections.append(websocket)
|
||||
|
||||
async def disconnect(self, websocket: WebSocket) -> None:
|
||||
async with self._lock:
|
||||
if websocket in self._connections:
|
||||
self._connections.remove(websocket)
|
||||
|
||||
async def broadcast_json(self, payload: dict) -> None:
|
||||
async with self._lock:
|
||||
targets = list(self._connections)
|
||||
stale: List[WebSocket] = []
|
||||
for ws in targets:
|
||||
try:
|
||||
if ws.client_state != WebSocketState.CONNECTED:
|
||||
stale.append(ws)
|
||||
continue
|
||||
await ws.send_json(payload)
|
||||
except Exception:
|
||||
stale.append(ws)
|
||||
if stale:
|
||||
async with self._lock:
|
||||
for ws in stale:
|
||||
if ws in self._connections:
|
||||
self._connections.remove(ws)
|
||||
|
||||
async def disconnect_all(self) -> None:
|
||||
async with self._lock:
|
||||
targets = list(self._connections)
|
||||
self._connections.clear()
|
||||
for ws in targets:
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user