refactor: 全面重构核心架构,消除反复修改的根因
- 删除 ValidationQueue 双轨持久化队列,替换为纯内存 AsyncWorkerPool - 引入统一后台任务框架 JobExecutor(Job/CrawlJob/ValidateAllJob) - 新增 PluginRunner 统一插件执行(超时、重试、健康检查、统计) - 重构 SchedulerService 职责收敛为仅定时触发 ValidateAllJob - 使用 AsyncExitStack 重构 lifespan,安全管理长生命周期资源 - 路由层瘦身 50%+,业务异常上抛由全局中间件统一处理 - 实现设置全热更新(WorkerPool 并发、Validator 超时即时生效) - 前端 Store 强制写后重新拉取,消除乐观更新数据不同步 - 删除 queue.py / task_repo.py / task_service.py - 新增 execution 单元测试,全部 85 个测试通过
This commit is contained in:
159
app/core/execution/job.py
Normal file
159
app/core/execution/job.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""Job 定义 - 所有后台异步任务的统一抽象"""
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from app.models.domain import ProxyRaw
|
||||
|
||||
|
||||
class JobStatus(Enum):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Job(ABC):
|
||||
"""后台任务基类"""
|
||||
|
||||
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
||||
status: JobStatus = JobStatus.PENDING
|
||||
progress: float = 0.0 # 0-100
|
||||
result: Any = None
|
||||
error: Optional[str] = None
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
updated_at: datetime = field(default_factory=datetime.now)
|
||||
_cancelled: bool = field(default=False, repr=False)
|
||||
|
||||
def cancel(self) -> None:
|
||||
"""请求取消任务"""
|
||||
self._cancelled = True
|
||||
if self.status in (JobStatus.PENDING, JobStatus.RUNNING):
|
||||
self.status = JobStatus.CANCELLED
|
||||
self._touch()
|
||||
|
||||
def _touch(self) -> None:
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
def _set_running(self) -> None:
|
||||
self.status = JobStatus.RUNNING
|
||||
self._touch()
|
||||
|
||||
def _set_completed(self, result: Any = None) -> None:
|
||||
self.status = JobStatus.COMPLETED
|
||||
self.result = result
|
||||
self.progress = 100.0
|
||||
self._touch()
|
||||
|
||||
def _set_failed(self, error: str) -> None:
|
||||
self.status = JobStatus.FAILED
|
||||
self.error = error
|
||||
self._touch()
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
return self._cancelled
|
||||
|
||||
@abstractmethod
|
||||
async def run(self) -> Any:
|
||||
"""执行任务的核心逻辑,子类必须实现"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"status": self.status.value,
|
||||
"progress": round(self.progress, 2),
|
||||
"result": self.result,
|
||||
"error": self.error,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CrawlJob(Job):
|
||||
"""插件爬取任务"""
|
||||
|
||||
plugin_id: str = ""
|
||||
plugin_runner: Any = field(repr=False, default=None)
|
||||
proxy_service: Any = field(repr=False, default=None)
|
||||
validator_pool: Any = field(repr=False, default=None)
|
||||
|
||||
async def run(self) -> Dict[str, Any]:
|
||||
from app.services.plugin_service import PluginService
|
||||
from app.core.log import logger
|
||||
|
||||
self._set_running()
|
||||
if not self.plugin_runner:
|
||||
raise RuntimeError("plugin_runner is not set")
|
||||
|
||||
plugin_service = self.proxy_service or PluginService()
|
||||
plugin = plugin_service.get_plugin(self.plugin_id)
|
||||
if not plugin:
|
||||
raise ValueError(f"Plugin '{self.plugin_id}' not found")
|
||||
|
||||
result = await self.plugin_runner.run(plugin)
|
||||
proxies: List[ProxyRaw] = result.proxies if result else []
|
||||
|
||||
if proxies and self.validator_pool:
|
||||
await self.validator_pool.submit(proxies)
|
||||
logger.info(f"CrawlJob {self.id}: submitted {len(proxies)} proxies for validation")
|
||||
|
||||
payload = {
|
||||
"plugin_id": self.plugin_id,
|
||||
"proxy_count": len(proxies),
|
||||
}
|
||||
if result:
|
||||
payload["success_count"] = result.success_count
|
||||
payload["failure_count"] = result.failure_count
|
||||
self._set_completed(payload)
|
||||
return payload
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidateAllJob(Job):
|
||||
"""全量验证任务 - 验证数据库中所有存量代理"""
|
||||
|
||||
proxy_repo: Any = field(repr=False, default=None)
|
||||
validator_pool: Any = field(repr=False, default=None)
|
||||
batch_size: int = 100
|
||||
|
||||
async def run(self) -> Dict[str, Any]:
|
||||
from app.repositories.proxy_repo import ProxyRepository
|
||||
from app.core.db import get_db
|
||||
from app.core.log import logger
|
||||
|
||||
self._set_running()
|
||||
repo = self.proxy_repo or ProxyRepository()
|
||||
|
||||
async with get_db() as db:
|
||||
proxies = await repo.list_all(db)
|
||||
|
||||
if not proxies:
|
||||
self._set_completed({"total": 0, "submitted": 0})
|
||||
return self.result
|
||||
|
||||
total = len(proxies)
|
||||
submitted = 0
|
||||
for i in range(0, total, self.batch_size):
|
||||
if self.is_cancelled:
|
||||
logger.info(f"ValidateAllJob {self.id}: cancelled")
|
||||
break
|
||||
batch = proxies[i : i + self.batch_size]
|
||||
raws = [ProxyRaw(p.ip, p.port, p.protocol) for p in batch]
|
||||
if self.validator_pool:
|
||||
await self.validator_pool.submit(raws)
|
||||
submitted += len(raws)
|
||||
self.progress = min(100.0, (submitted / total) * 100)
|
||||
self._touch()
|
||||
|
||||
payload = {"total": total, "submitted": submitted}
|
||||
self._set_completed(payload)
|
||||
logger.info(f"ValidateAllJob {self.id}: submitted {submitted}/{total} proxies")
|
||||
return payload
|
||||
Reference in New Issue
Block a user