"""Job 执行器 - 统一管理所有后台 Job 的生命周期""" import asyncio from contextlib import asynccontextmanager from datetime import datetime, timedelta from typing import Any, Dict, List, Optional from app.core.execution.job import Job, JobStatus from app.core.execution.worker_pool import AsyncWorkerPool from app.core.log import logger class JobExecutor: """后台任务执行器 - 维护内存中的 Job 状态表 - 限制最大并发 Job 数 - 自动清理过期 Job """ def __init__( self, worker_pool: Optional[AsyncWorkerPool] = None, max_concurrent_jobs: int = 10, cleanup_interval_seconds: int = 300, ): self.worker_pool = worker_pool self.max_concurrent_jobs = max_concurrent_jobs self._jobs: Dict[str, Job] = {} self._tasks: Dict[str, asyncio.Task] = {} self._running = False self._semaphore = asyncio.Semaphore(max_concurrent_jobs) self._cleanup_interval = cleanup_interval_seconds self._cleanup_task: Optional[asyncio.Task] = None async def __aenter__(self): await self.start() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.stop() async def start(self) -> None: if self._running: return self._running = True self._cleanup_task = asyncio.create_task(self._cleanup_loop()) logger.info("JobExecutor started") async def stop(self) -> None: if not self._running: return self._running = False await self.cancel_all() if self._cleanup_task: self._cleanup_task.cancel() try: await self._cleanup_task except asyncio.CancelledError: pass logger.info("JobExecutor stopped") def submit_job(self, job: Job) -> str: """提交一个 Job 到后台执行""" self._jobs[job.id] = job task = asyncio.create_task(self._run_job(job)) self._tasks[job.id] = task return job.id async def _run_job(self, job: Job) -> None: try: async with self._semaphore: try: if job.is_cancelled: logger.info(f"Job {job.id} was cancelled before running") return result = await job.run() # 如果子类没有显式设置完成状态,自动设为 completed if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED): job._set_completed(result) logger.info(f"Job {job.id} completed: {result}") except asyncio.CancelledError: job.status = JobStatus.CANCELLED job._touch() logger.info(f"Job {job.id} cancelled during execution") except Exception as e: job._set_failed(str(e)) logger.error(f"Job {job.id} failed: {e}", exc_info=True) finally: self._tasks.pop(job.id, None) def get_job(self, job_id: str) -> Optional[Job]: return self._jobs.get(job_id) def list_jobs( self, status: Optional[JobStatus] = None, limit: int = 100, ) -> List[Job]: jobs = list(self._jobs.values()) if status: jobs = [j for j in jobs if j.status == status] jobs.sort(key=lambda j: j.created_at, reverse=True) return jobs[:limit] async def cancel_job(self, job_id: str) -> bool: job = self._jobs.get(job_id) if not job: return False task = self._tasks.get(job_id) if task and not task.done(): task.cancel() job.cancel() return True async def cancel_all(self) -> int: cancelled = 0 for job in list(self._jobs.values()): if job.status in (JobStatus.PENDING, JobStatus.RUNNING): task = self._tasks.get(job.id) if task and not task.done(): task.cancel() job.cancel() cancelled += 1 return cancelled async def _cleanup_loop(self) -> None: """定期清理已完成且过期的 Job""" while self._running: try: await asyncio.sleep(self._cleanup_interval) self._cleanup_old_jobs() except asyncio.CancelledError: break except Exception as e: logger.error(f"JobExecutor cleanup error: {e}") def _cleanup_old_jobs(self) -> int: cutoff = datetime.now() - timedelta(seconds=self._cleanup_interval) to_remove = [ job_id for job_id, job in self._jobs.items() if job.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED) and job.updated_at < cutoff ] for job_id in to_remove: del self._jobs[job_id] if to_remove: logger.info(f"JobExecutor cleaned up {len(to_remove)} old jobs") return len(to_remove)