Files
ProxyPool/app/services/plugin_runner.py

95 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""插件统一执行器 - 封装超时、重试、健康检查、错误捕获"""
import asyncio
from datetime import datetime
from typing import Optional
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.core.config import settings as app_settings
from app.core.log import logger
from app.models.domain import CrawlResult, ProxyRaw
class PluginRunner:
"""统一插件执行器
- 超时控制(从 settings 读取 crawl_timeout
- 异常捕获和统计更新
- 可选的健康检查前置
- 结果去重
"""
def __init__(self, timeout: Optional[float] = None):
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
"""执行单个插件爬取"""
result = CrawlResult(plugin_name=plugin.name)
# 健康检查(可选)
try:
healthy = await asyncio.wait_for(
plugin.health_check(), timeout=5.0
)
if not healthy:
result.error = "health check failed"
result.failure_count = 1
await self._save_stats(plugin, result)
return result
except Exception as e:
logger.warning(f"Plugin {plugin.name} health check error: {e}")
result.error = f"health check error: {e}"
result.failure_count = 1
await self._save_stats(plugin, result)
return result
# 执行爬取
try:
proxies = await asyncio.wait_for(
plugin.crawl(),
timeout=self.timeout,
)
result.proxies = self._dedup(proxies)
result.success_count = 1 if result.proxies else 0
logger.info(
f"Plugin {plugin.name} crawled {len(result.proxies)} unique proxies"
)
except asyncio.TimeoutError:
result.error = f"crawl timeout after {self.timeout}s"
result.failure_count = 1
logger.error(f"Plugin {plugin.name} crawl timeout")
except Exception as e:
result.error = str(e)
result.failure_count = 1
logger.error(f"Plugin {plugin.name} crawl failed: {e}", exc_info=True)
await self._save_stats(plugin, result)
return result
@staticmethod
def _dedup(proxies: list[ProxyRaw]) -> list[ProxyRaw]:
seen = set()
unique = []
for p in proxies:
key = (p.ip, p.port, p.protocol)
if key not in seen:
seen.add(key)
unique.append(p)
return unique
async def _save_stats(self, plugin: BaseCrawlerPlugin, result: CrawlResult) -> None:
"""将爬取统计持久化到数据库"""
from app.core.db import get_db
from app.repositories.settings_repo import PluginSettingsRepository
repo = PluginSettingsRepository()
payload = {
"success_count": result.success_count,
"failure_count": result.failure_count,
"last_run": datetime.now().isoformat(),
}
try:
async with get_db() as db:
await repo.set_stats(db, plugin.name, payload)
except Exception as e:
logger.error(f"Failed to save stats for {plugin.name}: {e}")