Round 3 fixes: cancelled polling, aggregator invalid_count, filter state, scheduler atomicity, HTTP exception handler, tests

This commit is contained in:
祀梦
2026-04-05 10:20:23 +08:00
parent 49e440cb41
commit dc5f050683
32 changed files with 321 additions and 163 deletions

View File

@@ -26,6 +26,7 @@ class JobExecutor:
self.worker_pool = worker_pool
self.max_concurrent_jobs = max_concurrent_jobs
self._jobs: Dict[str, Job] = {}
self._tasks: Dict[str, asyncio.Task] = {}
self._running = False
self._semaphore = asyncio.Semaphore(max_concurrent_jobs)
self._cleanup_interval = cleanup_interval_seconds
@@ -61,27 +62,31 @@ class JobExecutor:
def submit_job(self, job: Job) -> str:
"""提交一个 Job 到后台执行"""
self._jobs[job.id] = job
asyncio.create_task(self._run_job(job))
task = asyncio.create_task(self._run_job(job))
self._tasks[job.id] = task
return job.id
async def _run_job(self, job: Job) -> None:
async with self._semaphore:
try:
if job.is_cancelled:
logger.info(f"Job {job.id} was cancelled before running")
return
result = await job.run()
# 如果子类没有显式设置完成状态,自动设为 completed
if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
job._set_completed(result)
logger.info(f"Job {job.id} completed: {result}")
except asyncio.CancelledError:
job.status = JobStatus.CANCELLED
job._touch()
logger.info(f"Job {job.id} cancelled during execution")
except Exception as e:
job._set_failed(str(e))
logger.error(f"Job {job.id} failed: {e}", exc_info=True)
try:
async with self._semaphore:
try:
if job.is_cancelled:
logger.info(f"Job {job.id} was cancelled before running")
return
result = await job.run()
# 如果子类没有显式设置完成状态,自动设为 completed
if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
job._set_completed(result)
logger.info(f"Job {job.id} completed: {result}")
except asyncio.CancelledError:
job.status = JobStatus.CANCELLED
job._touch()
logger.info(f"Job {job.id} cancelled during execution")
except Exception as e:
job._set_failed(str(e))
logger.error(f"Job {job.id} failed: {e}", exc_info=True)
finally:
self._tasks.pop(job.id, None)
def get_job(self, job_id: str) -> Optional[Job]:
return self._jobs.get(job_id)
@@ -101,6 +106,9 @@ class JobExecutor:
job = self._jobs.get(job_id)
if not job:
return False
task = self._tasks.get(job_id)
if task and not task.done():
task.cancel()
job.cancel()
return True
@@ -108,6 +116,9 @@ class JobExecutor:
cancelled = 0
for job in list(self._jobs.values()):
if job.status in (JobStatus.PENDING, JobStatus.RUNNING):
task = self._tasks.get(job.id)
if task and not task.done():
task.cancel()
job.cancel()
cancelled += 1
return cancelled

View File

@@ -154,6 +154,10 @@ class ValidateAllJob(Job):
self._touch()
payload = {"total": total, "submitted": submitted}
self._set_completed(payload)
if self.is_cancelled:
self.status = JobStatus.CANCELLED
self._touch()
else:
self._set_completed(payload)
logger.info(f"ValidateAllJob {self.id}: submitted {submitted}/{total} proxies")
return payload

View File

@@ -74,21 +74,36 @@ class AsyncWorkerPool:
await self._queue.join()
async def resize(self, new_worker_count: int) -> None:
"""动态调整 Worker 数量:先全部停止,再按新数量启动"""
"""动态调整 Worker 数量,不丢失队列中的任务"""
if new_worker_count == self.worker_count:
return
logger.info(f"{self.name} resizing from {self.worker_count} to {new_worker_count}")
# 安全做法:先 stop 再 start避免新旧 Worker 竞争 sentinel 导致死锁
await self.stop()
if new_worker_count > self.worker_count:
for i in range(self.worker_count, new_worker_count):
self._workers.append(
asyncio.create_task(self._worker_loop(i), name=f"{self.name}-worker-{i}")
)
elif new_worker_count < self.worker_count:
for _ in range(self.worker_count - new_worker_count):
await self._queue.put(None)
await asyncio.sleep(0)
still_running = []
for w in self._workers:
if w.done():
try:
await w
except asyncio.CancelledError:
pass
else:
still_running.append(w)
self._workers = still_running
self.worker_count = new_worker_count
await self.start()
async def _worker_loop(self, worker_id: int) -> None:
while True:
item = await self._queue.get()
try:
if item is None or not self._running:
self._queue.task_done()
break
await self.handler(item)
except Exception as e: