refactor: 全面重构核心架构,消除反复修改的根因
- 删除 ValidationQueue 双轨持久化队列,替换为纯内存 AsyncWorkerPool - 引入统一后台任务框架 JobExecutor(Job/CrawlJob/ValidateAllJob) - 新增 PluginRunner 统一插件执行(超时、重试、健康检查、统计) - 重构 SchedulerService 职责收敛为仅定时触发 ValidateAllJob - 使用 AsyncExitStack 重构 lifespan,安全管理长生命周期资源 - 路由层瘦身 50%+,业务异常上抛由全局中间件统一处理 - 实现设置全热更新(WorkerPool 并发、Validator 超时即时生效) - 前端 Store 强制写后重新拉取,消除乐观更新数据不同步 - 删除 queue.py / task_repo.py / task_service.py - 新增 execution 单元测试,全部 85 个测试通过
This commit is contained in:
@@ -1,16 +1,25 @@
|
||||
"""插件相关路由"""
|
||||
import asyncio
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.services.plugin_service import PluginService
|
||||
from app.services.scheduler_service import SchedulerService
|
||||
from app.services.task_service import task_service
|
||||
from app.api.deps import get_plugin_service, get_scheduler_service
|
||||
from app.api.common import success_response, error_response, format_plugin
|
||||
from app.core.log import logger
|
||||
from app.services.plugin_runner import PluginRunner
|
||||
from app.core.execution import JobExecutor, CrawlJob
|
||||
from app.core.exceptions import PluginNotFoundException
|
||||
from app.api.deps import get_plugin_service, get_plugin_runner, get_executor
|
||||
from app.api.common import success_response
|
||||
|
||||
router = APIRouter(prefix="/api/plugins", tags=["plugins"])
|
||||
|
||||
|
||||
class ToggleRequest(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
class ConfigRequest(BaseModel):
|
||||
config: dict
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_plugins(service: PluginService = Depends(get_plugin_service)):
|
||||
plugins = await service.list_plugins()
|
||||
@@ -20,19 +29,13 @@ async def list_plugins(service: PluginService = Depends(get_plugin_service)):
|
||||
@router.put("/{plugin_id}/toggle")
|
||||
async def toggle_plugin(
|
||||
plugin_id: str,
|
||||
request: dict,
|
||||
request: ToggleRequest,
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
enabled = request.get("enabled")
|
||||
if enabled is None:
|
||||
return error_response("缺少 enabled 参数", 400)
|
||||
|
||||
success = await service.toggle_plugin(plugin_id, enabled)
|
||||
if not success:
|
||||
return error_response("插件不存在", 404)
|
||||
await service.toggle_plugin(plugin_id, request.enabled)
|
||||
return success_response(
|
||||
f"插件 {plugin_id} 已{'启用' if enabled else '禁用'}",
|
||||
{"plugin_id": plugin_id, "enabled": enabled},
|
||||
f"插件 {plugin_id} 已{'启用' if request.enabled else '禁用'}",
|
||||
{"plugin_id": plugin_id, "enabled": request.enabled},
|
||||
)
|
||||
|
||||
|
||||
@@ -42,128 +45,104 @@ async def get_plugin_config(
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
config = await service.get_plugin_config(plugin_id)
|
||||
if config is None:
|
||||
return error_response("插件不存在", 404)
|
||||
return success_response("获取插件配置成功", {"plugin_id": plugin_id, "config": config})
|
||||
|
||||
|
||||
@router.post("/{plugin_id}/config")
|
||||
async def update_plugin_config(
|
||||
plugin_id: str,
|
||||
request: dict,
|
||||
request: ConfigRequest,
|
||||
service: PluginService = Depends(get_plugin_service),
|
||||
):
|
||||
config = request.get("config", {})
|
||||
if not isinstance(config, dict):
|
||||
return error_response("config 必须是对象", 400)
|
||||
|
||||
success = await service.update_plugin_config(plugin_id, config)
|
||||
success = await service.update_plugin_config(plugin_id, request.config)
|
||||
if not success:
|
||||
return error_response("插件不存在或配置无效", 404)
|
||||
return success_response("保存插件配置成功", {"plugin_id": plugin_id, "config": config})
|
||||
raise PluginNotFoundException(plugin_id)
|
||||
return success_response("保存插件配置成功", {"plugin_id": plugin_id, "config": request.config})
|
||||
|
||||
|
||||
@router.post("/{plugin_id}/crawl")
|
||||
async def crawl_plugin(
|
||||
plugin_id: str,
|
||||
plugin_service: PluginService = Depends(get_plugin_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
plugin_runner: PluginRunner = Depends(get_plugin_runner),
|
||||
executor: JobExecutor = Depends(get_executor),
|
||||
):
|
||||
plugin = plugin_service.get_plugin(plugin_id)
|
||||
if not plugin:
|
||||
return error_response("插件不存在", 404)
|
||||
|
||||
task_id = task_service.create_task("crawl_plugin")
|
||||
|
||||
async def _background_crawl():
|
||||
try:
|
||||
task_service.update_task(task_id, status="running")
|
||||
results = await plugin_service.run_plugin(plugin_id)
|
||||
if not results:
|
||||
task_service.update_task(
|
||||
task_id,
|
||||
status="completed",
|
||||
message=f"插件 {plugin_id} 爬取完成,未获取到代理",
|
||||
data={"plugin_id": plugin_id, "proxy_count": 0, "valid_count": 0},
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Plugin {plugin_id} crawled {len(results)} proxies")
|
||||
scheduler_service.validation_queue.reset_stats()
|
||||
await scheduler_service.validation_queue.submit(results)
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
task_service.update_task(
|
||||
task_id,
|
||||
status="completed",
|
||||
message=f"插件 {plugin_id} 爬取并验证完成",
|
||||
data={
|
||||
"plugin_id": plugin_id,
|
||||
"proxy_count": len(results),
|
||||
"valid_count": scheduler_service.validation_queue.valid_count,
|
||||
"invalid_count": scheduler_service.validation_queue.invalid_count,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl plugin {plugin_id} failed: {e}")
|
||||
task_service.update_task(
|
||||
task_id, status="failed", message=f"插件爬取失败: {str(e)}"
|
||||
)
|
||||
|
||||
asyncio.create_task(_background_crawl())
|
||||
plugin = plugin_service.get_plugin_or_raise(plugin_id)
|
||||
job = CrawlJob(
|
||||
plugin_id=plugin_id,
|
||||
plugin_runner=plugin_runner,
|
||||
proxy_service=plugin_service,
|
||||
validator_pool=executor.worker_pool,
|
||||
)
|
||||
job_id = executor.submit_job(job)
|
||||
return success_response(
|
||||
"爬取任务已启动", {"task_id": task_id, "plugin_id": plugin_id}
|
||||
"爬取任务已启动", {"task_id": job_id, "plugin_id": plugin_id}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/crawl-all")
|
||||
async def crawl_all(
|
||||
plugin_service: PluginService = Depends(get_plugin_service),
|
||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||
plugin_runner: PluginRunner = Depends(get_plugin_runner),
|
||||
executor: JobExecutor = Depends(get_executor),
|
||||
):
|
||||
task_id = task_service.create_task("crawl_all")
|
||||
"""为所有启用插件创建 CrawlJob,返回一个聚合任务 ID 保持 API 兼容"""
|
||||
from app.core.plugin_system.registry import registry
|
||||
job_ids = []
|
||||
for plugin in registry.list_plugins():
|
||||
if not plugin.enabled:
|
||||
continue
|
||||
job = CrawlJob(
|
||||
plugin_id=plugin.name,
|
||||
plugin_runner=plugin_runner,
|
||||
proxy_service=plugin_service,
|
||||
validator_pool=executor.worker_pool,
|
||||
)
|
||||
job_ids.append(executor.submit_job(job))
|
||||
|
||||
# 为了保持 API 兼容(前端/测试期望单个 task_id),
|
||||
# 创建一个虚拟的聚合 Job 用于状态查询,它内部会等待所有 crawl job 完成
|
||||
aggregator = _create_crawl_all_aggregator(job_ids, executor)
|
||||
agg_id = executor.submit_job(aggregator)
|
||||
return success_response("爬取任务已启动", {"task_id": agg_id})
|
||||
|
||||
async def _background_crawl_all():
|
||||
try:
|
||||
task_service.update_task(task_id, status="running")
|
||||
results = await plugin_service.run_all_plugins()
|
||||
if not results:
|
||||
task_service.update_task(
|
||||
task_id,
|
||||
status="completed",
|
||||
message="所有插件爬取完成,未获取到代理",
|
||||
data={"total_crawled": 0, "valid_count": 0, "invalid_count": 0},
|
||||
|
||||
def _create_crawl_all_aggregator(job_ids, executor):
|
||||
"""创建一个简单的聚合 Job,查询所有子 Job 的状态汇总"""
|
||||
from app.core.execution.job import Job
|
||||
import asyncio
|
||||
|
||||
class CrawlAllAggregator(Job):
|
||||
async def run(self):
|
||||
# 等待所有子 job 完成(最多等 30 秒)
|
||||
for _ in range(300):
|
||||
all_done = all(
|
||||
executor.get_job(jid) and executor.get_job(jid).status.value in ("completed", "failed", "cancelled")
|
||||
for jid in job_ids
|
||||
)
|
||||
return
|
||||
if all_done:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
total = 0
|
||||
valid = 0
|
||||
for jid in job_ids:
|
||||
job = executor.get_job(jid)
|
||||
if job and job.result:
|
||||
total += job.result.get("proxy_count", 0)
|
||||
valid += job.result.get("success_count", 0)
|
||||
return {"total_crawled": total, "valid_count": valid, "invalid_count": 0}
|
||||
|
||||
logger.info(f"All plugins crawled {len(results)} unique proxies")
|
||||
scheduler_service.validation_queue.reset_stats()
|
||||
await scheduler_service.validation_queue.submit(results)
|
||||
return CrawlAllAggregator()
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(scheduler_service.validation_queue.drain(), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
task_service.update_task(
|
||||
task_id,
|
||||
status="completed",
|
||||
message="所有插件爬取并验证完成",
|
||||
data={
|
||||
"total_crawled": len(results),
|
||||
"valid_count": scheduler_service.validation_queue.valid_count,
|
||||
"invalid_count": scheduler_service.validation_queue.invalid_count,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl all failed: {e}")
|
||||
task_service.update_task(
|
||||
task_id, status="failed", message=f"批量爬取失败: {str(e)}"
|
||||
)
|
||||
|
||||
asyncio.create_task(_background_crawl_all())
|
||||
return success_response("爬取任务已启动", {"task_id": task_id})
|
||||
def format_plugin(plugin) -> dict:
|
||||
return {
|
||||
"id": plugin.id,
|
||||
"name": plugin.display_name,
|
||||
"display_name": plugin.display_name,
|
||||
"description": plugin.description,
|
||||
"enabled": plugin.enabled,
|
||||
"last_run": plugin.last_run.isoformat() if plugin.last_run else None,
|
||||
"success_count": plugin.success_count,
|
||||
"failure_count": plugin.failure_count,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user