Round 3 fixes: cancelled polling, aggregator invalid_count, filter state, scheduler atomicity, HTTP exception handler, tests
This commit is contained in:
@@ -5,6 +5,7 @@ from app.services.plugin_service import PluginService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.services.plugin_runner import PluginRunner
|
||||
from app.core.execution import JobExecutor, AsyncWorkerPool
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
|
||||
|
||||
def get_proxy_service() -> ProxyService:
|
||||
@@ -15,6 +16,10 @@ def get_plugin_service() -> PluginService:
|
||||
return PluginService()
|
||||
|
||||
|
||||
def get_settings_repo() -> SettingsRepository:
|
||||
return SettingsRepository()
|
||||
|
||||
|
||||
def get_scheduler_service(request: Request) -> SchedulerService:
|
||||
return request.app.state.scheduler
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""统一异常处理"""
|
||||
from fastapi import Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
from pydantic import ValidationError
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from app.core.log import logger
|
||||
@@ -13,6 +14,13 @@ async def proxy_pool_exception_handler(request: Request, exc: ProxyPoolException
|
||||
)
|
||||
|
||||
|
||||
async def http_exception_handler(request: Request, exc: StarletteHTTPException):
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content={"code": exc.status_code, "message": exc.detail, "data": None},
|
||||
)
|
||||
|
||||
|
||||
async def pydantic_validation_handler(request: Request, exc: ValidationError):
|
||||
logger.error(f"Validation error: {exc}")
|
||||
return JSONResponse(
|
||||
|
||||
@@ -80,6 +80,7 @@ async def lifespan(app: FastAPI):
|
||||
# 调度器
|
||||
scheduler = SchedulerService(
|
||||
executor=executor,
|
||||
worker_pool=worker_pool,
|
||||
interval_minutes=db_settings.get("validate_interval_minutes", 30),
|
||||
)
|
||||
|
||||
|
||||
@@ -3,9 +3,10 @@ from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.api.lifespan import lifespan
|
||||
from app.api.routes import api_router
|
||||
from app.api.errors import proxy_pool_exception_handler, pydantic_validation_handler, general_exception_handler
|
||||
from app.api.errors import proxy_pool_exception_handler, http_exception_handler, pydantic_validation_handler, general_exception_handler
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from pydantic import ValidationError
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
from app.core.config import settings as app_settings
|
||||
|
||||
# 导入并注册所有插件(显式注册模式)
|
||||
@@ -30,6 +31,7 @@ def create_app() -> FastAPI:
|
||||
|
||||
# 异常处理
|
||||
app.add_exception_handler(ProxyPoolException, proxy_pool_exception_handler)
|
||||
app.add_exception_handler(StarletteHTTPException, http_exception_handler)
|
||||
app.add_exception_handler(ValidationError, pydantic_validation_handler)
|
||||
app.add_exception_handler(Exception, general_exception_handler)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from app.services.plugin_runner import PluginRunner
|
||||
from app.core.execution import JobExecutor, CrawlJob
|
||||
from app.core.exceptions import PluginNotFoundException
|
||||
from app.api.deps import get_plugin_service, get_plugin_runner, get_executor
|
||||
from app.api.common import success_response
|
||||
from app.api.common import success_response, format_plugin
|
||||
|
||||
router = APIRouter(prefix="/api/plugins", tags=["plugins"])
|
||||
|
||||
@@ -114,8 +114,11 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
|
||||
class CrawlAllAggregator(Job):
|
||||
async def run(self):
|
||||
self._set_running()
|
||||
# 等待所有子 job 完成(最多等 30 秒)
|
||||
for _ in range(300):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
all_done = all(
|
||||
executor.get_job(jid) and executor.get_job(jid).status.value in ("completed", "failed", "cancelled")
|
||||
for jid in job_ids
|
||||
@@ -125,24 +128,18 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
||||
await asyncio.sleep(0.1)
|
||||
total = 0
|
||||
valid = 0
|
||||
invalid = 0
|
||||
for jid in job_ids:
|
||||
job = executor.get_job(jid)
|
||||
if job and job.result:
|
||||
total += job.result.get("proxy_count", 0)
|
||||
valid += job.result.get("success_count", 0)
|
||||
return {"total_crawled": total, "valid_count": valid, "invalid_count": 0}
|
||||
invalid += job.result.get("failure_count", 0)
|
||||
result = {"total_crawled": total, "valid_count": valid, "invalid_count": invalid}
|
||||
if self.is_cancelled:
|
||||
result["cancelled"] = True
|
||||
return result
|
||||
|
||||
return CrawlAllAggregator()
|
||||
|
||||
|
||||
def format_plugin(plugin) -> dict:
|
||||
return {
|
||||
"id": plugin.id,
|
||||
"name": plugin.display_name,
|
||||
"display_name": plugin.display_name,
|
||||
"description": plugin.description,
|
||||
"enabled": plugin.enabled,
|
||||
"last_run": plugin.last_run.isoformat() if plugin.last_run else None,
|
||||
"success_count": plugin.success_count,
|
||||
"failure_count": plugin.failure_count,
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ async def list_proxies(
|
||||
async def get_random_proxy(service: ProxyService = Depends(get_proxy_service)):
|
||||
proxy = await service.get_random_proxy()
|
||||
if not proxy:
|
||||
raise ProxyNotFoundException("", 0)
|
||||
raise ProxyPoolException("暂无可用代理", 404)
|
||||
return success_response("获取随机代理成功", format_proxy(proxy))
|
||||
|
||||
|
||||
@@ -68,12 +68,8 @@ async def export_proxies(
|
||||
|
||||
media_types = {"csv": "text/csv", "txt": "text/plain", "json": "application/json"}
|
||||
|
||||
async def generate():
|
||||
async for chunk in service.export_proxies(fmt, protocol, limit):
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
service.export_proxies(fmt, protocol, limit),
|
||||
media_type=media_types[fmt],
|
||||
headers={"Content-Disposition": f"attachment; filename=proxies.{fmt}"},
|
||||
)
|
||||
|
||||
@@ -4,37 +4,45 @@ from fastapi import APIRouter, Depends
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
from app.core.db import get_db
|
||||
from app.api.deps import get_scheduler_service
|
||||
from app.api.deps import get_scheduler_service, get_settings_repo
|
||||
from app.api.common import success_response
|
||||
|
||||
router = APIRouter(prefix="/api/scheduler", tags=["scheduler"])
|
||||
settings_repo = SettingsRepository()
|
||||
|
||||
|
||||
async def _save_auto_validate_setting(enabled: bool):
|
||||
async def _save_auto_validate_setting(enabled: bool, settings_repo: SettingsRepository):
|
||||
"""保存自动验证设置"""
|
||||
async with get_db() as db:
|
||||
settings = await settings_repo.get_all(db)
|
||||
settings["auto_validate"] = enabled
|
||||
from app.models.schemas import SettingsSchema
|
||||
await settings_repo.save(db, SettingsSchema(**settings).model_dump())
|
||||
await settings_repo.save(db, {"auto_validate": enabled})
|
||||
|
||||
|
||||
@router.post("/start")
|
||||
async def start_scheduler(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
async def start_scheduler(
|
||||
scheduler: SchedulerService = Depends(get_scheduler_service),
|
||||
settings_repo: SettingsRepository = Depends(get_settings_repo),
|
||||
):
|
||||
if scheduler.running:
|
||||
return success_response("验证调度器已在运行", {"running": True})
|
||||
try:
|
||||
await _save_auto_validate_setting(True, settings_repo)
|
||||
except Exception:
|
||||
return success_response("启动调度器失败(设置保存异常)", {"running": False})
|
||||
await scheduler.start()
|
||||
await _save_auto_validate_setting(True)
|
||||
return success_response("验证调度器已启动", {"running": True})
|
||||
|
||||
|
||||
@router.post("/stop")
|
||||
async def stop_scheduler(scheduler: SchedulerService = Depends(get_scheduler_service)):
|
||||
async def stop_scheduler(
|
||||
scheduler: SchedulerService = Depends(get_scheduler_service),
|
||||
settings_repo: SettingsRepository = Depends(get_settings_repo),
|
||||
):
|
||||
if not scheduler.running:
|
||||
return success_response("验证调度器未运行", {"running": False})
|
||||
try:
|
||||
await _save_auto_validate_setting(False, settings_repo)
|
||||
except Exception:
|
||||
return success_response("停止调度器失败(设置保存异常)", {"running": True})
|
||||
await scheduler.stop()
|
||||
await _save_auto_validate_setting(False)
|
||||
return success_response("验证调度器已停止", {"running": False})
|
||||
|
||||
|
||||
|
||||
@@ -1,28 +1,33 @@
|
||||
"""设置相关路由"""
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi import APIRouter, Request, Depends
|
||||
from app.core.db import get_db
|
||||
from app.repositories.settings_repo import SettingsRepository
|
||||
from app.models.schemas import SettingsSchema
|
||||
from app.api.common import success_response
|
||||
from app.api.deps import get_settings_repo
|
||||
from app.core.exceptions import ProxyPoolException
|
||||
from app.core.log import logger
|
||||
|
||||
router = APIRouter(prefix="/api/settings", tags=["settings"])
|
||||
settings_repo = SettingsRepository()
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def get_settings():
|
||||
async def get_settings(settings_repo: SettingsRepository = Depends(get_settings_repo)):
|
||||
async with get_db() as db:
|
||||
settings = await settings_repo.get_all(db)
|
||||
return success_response("获取设置成功", settings)
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def save_settings(request: SettingsSchema, http_request: Request):
|
||||
async def save_settings(
|
||||
request: SettingsSchema,
|
||||
http_request: Request,
|
||||
settings_repo: SettingsRepository = Depends(get_settings_repo),
|
||||
):
|
||||
async with get_db() as db:
|
||||
success = await settings_repo.save(db, request.model_dump())
|
||||
if not success:
|
||||
raise RuntimeError("保存设置失败")
|
||||
raise ProxyPoolException("保存设置失败", 500)
|
||||
|
||||
# 热更新运行中调度器的间隔时间
|
||||
scheduler = getattr(http_request.app.state, "scheduler", None)
|
||||
@@ -47,9 +52,9 @@ async def save_settings(request: SettingsSchema, http_request: Request):
|
||||
validator._init_max_concurrency = request.default_concurrency
|
||||
if request.validation_targets:
|
||||
validator.update_test_urls(request.validation_targets)
|
||||
# 重新创建 semaphore 和 session
|
||||
validator._semaphore = None
|
||||
# 先关闭现有 session,再重置 semaphore,避免竞态窗口
|
||||
await validator.close()
|
||||
validator._semaphore = None
|
||||
logger.info(f"Validator config updated: timeout={request.validation_timeout}, concurrency={request.default_concurrency}, targets={request.validation_targets}")
|
||||
|
||||
return success_response("保存设置成功", request.model_dump())
|
||||
|
||||
Reference in New Issue
Block a user