Round 3 fixes: cancelled polling, aggregator invalid_count, filter state, scheduler atomicity, HTTP exception handler, tests
This commit is contained in:
@@ -26,6 +26,7 @@ class JobExecutor:
|
||||
self.worker_pool = worker_pool
|
||||
self.max_concurrent_jobs = max_concurrent_jobs
|
||||
self._jobs: Dict[str, Job] = {}
|
||||
self._tasks: Dict[str, asyncio.Task] = {}
|
||||
self._running = False
|
||||
self._semaphore = asyncio.Semaphore(max_concurrent_jobs)
|
||||
self._cleanup_interval = cleanup_interval_seconds
|
||||
@@ -61,27 +62,31 @@ class JobExecutor:
|
||||
def submit_job(self, job: Job) -> str:
|
||||
"""提交一个 Job 到后台执行"""
|
||||
self._jobs[job.id] = job
|
||||
asyncio.create_task(self._run_job(job))
|
||||
task = asyncio.create_task(self._run_job(job))
|
||||
self._tasks[job.id] = task
|
||||
return job.id
|
||||
|
||||
async def _run_job(self, job: Job) -> None:
|
||||
async with self._semaphore:
|
||||
try:
|
||||
if job.is_cancelled:
|
||||
logger.info(f"Job {job.id} was cancelled before running")
|
||||
return
|
||||
result = await job.run()
|
||||
# 如果子类没有显式设置完成状态,自动设为 completed
|
||||
if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
|
||||
job._set_completed(result)
|
||||
logger.info(f"Job {job.id} completed: {result}")
|
||||
except asyncio.CancelledError:
|
||||
job.status = JobStatus.CANCELLED
|
||||
job._touch()
|
||||
logger.info(f"Job {job.id} cancelled during execution")
|
||||
except Exception as e:
|
||||
job._set_failed(str(e))
|
||||
logger.error(f"Job {job.id} failed: {e}", exc_info=True)
|
||||
try:
|
||||
async with self._semaphore:
|
||||
try:
|
||||
if job.is_cancelled:
|
||||
logger.info(f"Job {job.id} was cancelled before running")
|
||||
return
|
||||
result = await job.run()
|
||||
# 如果子类没有显式设置完成状态,自动设为 completed
|
||||
if job.status not in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
|
||||
job._set_completed(result)
|
||||
logger.info(f"Job {job.id} completed: {result}")
|
||||
except asyncio.CancelledError:
|
||||
job.status = JobStatus.CANCELLED
|
||||
job._touch()
|
||||
logger.info(f"Job {job.id} cancelled during execution")
|
||||
except Exception as e:
|
||||
job._set_failed(str(e))
|
||||
logger.error(f"Job {job.id} failed: {e}", exc_info=True)
|
||||
finally:
|
||||
self._tasks.pop(job.id, None)
|
||||
|
||||
def get_job(self, job_id: str) -> Optional[Job]:
|
||||
return self._jobs.get(job_id)
|
||||
@@ -101,6 +106,9 @@ class JobExecutor:
|
||||
job = self._jobs.get(job_id)
|
||||
if not job:
|
||||
return False
|
||||
task = self._tasks.get(job_id)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
job.cancel()
|
||||
return True
|
||||
|
||||
@@ -108,6 +116,9 @@ class JobExecutor:
|
||||
cancelled = 0
|
||||
for job in list(self._jobs.values()):
|
||||
if job.status in (JobStatus.PENDING, JobStatus.RUNNING):
|
||||
task = self._tasks.get(job.id)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
job.cancel()
|
||||
cancelled += 1
|
||||
return cancelled
|
||||
|
||||
@@ -154,6 +154,10 @@ class ValidateAllJob(Job):
|
||||
self._touch()
|
||||
|
||||
payload = {"total": total, "submitted": submitted}
|
||||
self._set_completed(payload)
|
||||
if self.is_cancelled:
|
||||
self.status = JobStatus.CANCELLED
|
||||
self._touch()
|
||||
else:
|
||||
self._set_completed(payload)
|
||||
logger.info(f"ValidateAllJob {self.id}: submitted {submitted}/{total} proxies")
|
||||
return payload
|
||||
|
||||
@@ -74,21 +74,36 @@ class AsyncWorkerPool:
|
||||
await self._queue.join()
|
||||
|
||||
async def resize(self, new_worker_count: int) -> None:
|
||||
"""动态调整 Worker 数量:先全部停止,再按新数量启动"""
|
||||
"""动态调整 Worker 数量,不丢失队列中的任务"""
|
||||
if new_worker_count == self.worker_count:
|
||||
return
|
||||
logger.info(f"{self.name} resizing from {self.worker_count} to {new_worker_count}")
|
||||
# 安全做法:先 stop 再 start,避免新旧 Worker 竞争 sentinel 导致死锁
|
||||
await self.stop()
|
||||
if new_worker_count > self.worker_count:
|
||||
for i in range(self.worker_count, new_worker_count):
|
||||
self._workers.append(
|
||||
asyncio.create_task(self._worker_loop(i), name=f"{self.name}-worker-{i}")
|
||||
)
|
||||
elif new_worker_count < self.worker_count:
|
||||
for _ in range(self.worker_count - new_worker_count):
|
||||
await self._queue.put(None)
|
||||
await asyncio.sleep(0)
|
||||
still_running = []
|
||||
for w in self._workers:
|
||||
if w.done():
|
||||
try:
|
||||
await w
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
else:
|
||||
still_running.append(w)
|
||||
self._workers = still_running
|
||||
self.worker_count = new_worker_count
|
||||
await self.start()
|
||||
|
||||
async def _worker_loop(self, worker_id: int) -> None:
|
||||
while True:
|
||||
item = await self._queue.get()
|
||||
try:
|
||||
if item is None or not self._running:
|
||||
self._queue.task_done()
|
||||
break
|
||||
await self.handler(item)
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user