- 删除 ValidationQueue 双轨持久化队列,替换为纯内存 AsyncWorkerPool - 引入统一后台任务框架 JobExecutor(Job/CrawlJob/ValidateAllJob) - 新增 PluginRunner 统一插件执行(超时、重试、健康检查、统计) - 重构 SchedulerService 职责收敛为仅定时触发 ValidateAllJob - 使用 AsyncExitStack 重构 lifespan,安全管理长生命周期资源 - 路由层瘦身 50%+,业务异常上抛由全局中间件统一处理 - 实现设置全热更新(WorkerPool 并发、Validator 超时即时生效) - 前端 Store 强制写后重新拉取,消除乐观更新数据不同步 - 删除 queue.py / task_repo.py / task_service.py - 新增 execution 单元测试,全部 85 个测试通过
139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
"""Job 执行器 - 统一管理所有后台 Job 的生命周期"""
|
|
import asyncio
|
|
from contextlib import asynccontextmanager
|
|
from datetime import datetime, timedelta
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from app.core.execution.job import Job, JobStatus
|
|
from app.core.execution.worker_pool import AsyncWorkerPool
|
|
from app.core.log import logger
|
|
|
|
|
|
class JobExecutor:
|
|
"""后台任务执行器
|
|
|
|
- 维护内存中的 Job 状态表
|
|
- 限制最大并发 Job 数
|
|
- 自动清理过期 Job
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
worker_pool: Optional[AsyncWorkerPool] = None,
|
|
max_concurrent_jobs: int = 10,
|
|
cleanup_interval_seconds: int = 300,
|
|
):
|
|
self.worker_pool = worker_pool
|
|
self.max_concurrent_jobs = max_concurrent_jobs
|
|
self._jobs: Dict[str, Job] = {}
|
|
self._running = False
|
|
self._semaphore = asyncio.Semaphore(max_concurrent_jobs)
|
|
self._cleanup_interval = cleanup_interval_seconds
|
|
self._cleanup_task: Optional[asyncio.Task] = None
|
|
|
|
async def __aenter__(self):
|
|
await self.start()
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
await self.stop()
|
|
|
|
async def start(self) -> None:
|
|
if self._running:
|
|
return
|
|
self._running = True
|
|
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
|
logger.info("JobExecutor started")
|
|
|
|
async def stop(self) -> None:
|
|
if not self._running:
|
|
return
|
|
self._running = False
|
|
await self.cancel_all()
|
|
if self._cleanup_task:
|
|
self._cleanup_task.cancel()
|
|
try:
|
|
await self._cleanup_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
logger.info("JobExecutor stopped")
|
|
|
|
def submit_job(self, job: Job) -> str:
|
|
"""提交一个 Job 到后台执行"""
|
|
self._jobs[job.id] = job
|
|
asyncio.create_task(self._run_job(job))
|
|
return job.id
|
|
|
|
async def _run_job(self, job: Job) -> None:
|
|
async with self._semaphore:
|
|
try:
|
|
if job.is_cancelled:
|
|
logger.info(f"Job {job.id} was cancelled before running")
|
|
return
|
|
result = await job.run()
|
|
# 如果子类没有显式设置完成状态,自动设为 completed
|
|
if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
|
|
job._set_completed(result)
|
|
logger.info(f"Job {job.id} completed: {result}")
|
|
except asyncio.CancelledError:
|
|
job.status = JobStatus.CANCELLED
|
|
job._touch()
|
|
logger.info(f"Job {job.id} cancelled during execution")
|
|
except Exception as e:
|
|
job._set_failed(str(e))
|
|
logger.error(f"Job {job.id} failed: {e}", exc_info=True)
|
|
|
|
def get_job(self, job_id: str) -> Optional[Job]:
|
|
return self._jobs.get(job_id)
|
|
|
|
def list_jobs(
|
|
self,
|
|
status: Optional[JobStatus] = None,
|
|
limit: int = 100,
|
|
) -> List[Job]:
|
|
jobs = list(self._jobs.values())
|
|
if status:
|
|
jobs = [j for j in jobs if j.status == status]
|
|
jobs.sort(key=lambda j: j.created_at, reverse=True)
|
|
return jobs[:limit]
|
|
|
|
async def cancel_job(self, job_id: str) -> bool:
|
|
job = self._jobs.get(job_id)
|
|
if not job:
|
|
return False
|
|
job.cancel()
|
|
return True
|
|
|
|
async def cancel_all(self) -> int:
|
|
cancelled = 0
|
|
for job in list(self._jobs.values()):
|
|
if job.status in (JobStatus.PENDING, JobStatus.RUNNING):
|
|
job.cancel()
|
|
cancelled += 1
|
|
return cancelled
|
|
|
|
async def _cleanup_loop(self) -> None:
|
|
"""定期清理已完成且过期的 Job"""
|
|
while self._running:
|
|
try:
|
|
await asyncio.sleep(self._cleanup_interval)
|
|
self._cleanup_old_jobs()
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"JobExecutor cleanup error: {e}")
|
|
|
|
def _cleanup_old_jobs(self) -> int:
|
|
cutoff = datetime.now() - timedelta(seconds=self._cleanup_interval)
|
|
to_remove = [
|
|
job_id
|
|
for job_id, job in self._jobs.items()
|
|
if job.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
|
|
and job.updated_at < cutoff
|
|
]
|
|
for job_id in to_remove:
|
|
del self._jobs[job_id]
|
|
if to_remove:
|
|
logger.info(f"JobExecutor cleaned up {len(to_remove)} old jobs")
|
|
return len(to_remove)
|