Files
ProxyPool/app/core/execution/job.py

164 lines
5.1 KiB
Python

"""Job 定义 - 所有后台异步任务的统一抽象"""
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from app.models.domain import ProxyRaw
class JobStatus(Enum):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class Job(ABC):
"""后台任务基类"""
id: str = field(default_factory=lambda: str(uuid.uuid4()))
status: JobStatus = JobStatus.PENDING
progress: float = 0.0 # 0-100
result: Any = None
error: Optional[str] = None
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
_cancelled: bool = field(default=False, repr=False)
def cancel(self) -> None:
"""请求取消任务"""
self._cancelled = True
if self.status in (JobStatus.PENDING, JobStatus.RUNNING):
self.status = JobStatus.CANCELLED
self._touch()
def _touch(self) -> None:
self.updated_at = datetime.now()
def _set_running(self) -> None:
self.status = JobStatus.RUNNING
self._touch()
def _set_completed(self, result: Any = None) -> None:
self.status = JobStatus.COMPLETED
self.result = result
self.progress = 100.0
self._touch()
def _set_failed(self, error: str) -> None:
self.status = JobStatus.FAILED
self.error = error
self._touch()
@property
def is_cancelled(self) -> bool:
return self._cancelled
@abstractmethod
async def run(self) -> Any:
"""执行任务的核心逻辑,子类必须实现"""
raise NotImplementedError
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"status": self.status.value,
"progress": round(self.progress, 2),
"result": self.result,
"error": self.error,
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
}
@dataclass
class CrawlJob(Job):
"""插件爬取任务"""
plugin_id: str = ""
plugin_runner: Any = field(repr=False, default=None)
proxy_service: Any = field(repr=False, default=None)
validator_pool: Any = field(repr=False, default=None)
async def run(self) -> Dict[str, Any]:
from app.services.plugin_service import PluginService
from app.core.log import logger
self._set_running()
if not self.plugin_runner:
raise RuntimeError("plugin_runner is not set")
plugin_service = self.proxy_service or PluginService()
plugin = plugin_service.get_plugin(self.plugin_id)
if not plugin:
raise ValueError(f"Plugin '{self.plugin_id}' not found")
result = await self.plugin_runner.run(plugin)
proxies: List[ProxyRaw] = result.proxies if result else []
if proxies and self.validator_pool:
await self.validator_pool.submit(proxies)
logger.info(f"CrawlJob {self.id}: submitted {len(proxies)} proxies for validation")
payload = {
"plugin_id": self.plugin_id,
"proxy_count": len(proxies),
}
if result:
payload["success_count"] = result.success_count
payload["failure_count"] = result.failure_count
self._set_completed(payload)
return payload
@dataclass
class ValidateAllJob(Job):
"""全量验证任务 - 验证数据库中所有存量代理"""
proxy_repo: Any = field(repr=False, default=None)
validator_pool: Any = field(repr=False, default=None)
batch_size: int = 100
async def run(self) -> Dict[str, Any]:
from app.repositories.proxy_repo import ProxyRepository
from app.core.db import get_db
from app.core.log import logger
self._set_running()
repo = self.proxy_repo or ProxyRepository()
async with get_db() as db:
proxies = await repo.list_all(db)
if not proxies:
self._set_completed({"total": 0, "submitted": 0})
return self.result
total = len(proxies)
submitted = 0
for i in range(0, total, self.batch_size):
if self.is_cancelled:
logger.info(f"ValidateAllJob {self.id}: cancelled")
break
batch = proxies[i : i + self.batch_size]
raws = [ProxyRaw(p.ip, p.port, p.protocol) for p in batch]
if self.validator_pool:
await self.validator_pool.submit(raws)
submitted += len(raws)
self.progress = min(100.0, (submitted / total) * 100)
self._touch()
payload = {"total": total, "submitted": submitted}
if self.is_cancelled:
self.status = JobStatus.CANCELLED
self._touch()
else:
self._set_completed(payload)
logger.info(f"ValidateAllJob {self.id}: submitted {submitted}/{total} proxies")
return payload