"""插件基类 - 所有爬虫插件必须继承此基类""" from abc import ABC, abstractmethod from dataclasses import dataclass from typing import List, Dict, Any @dataclass class ProxyRaw: """爬虫产出的原始代理数据""" ip: str port: int protocol: str = "http" def __post_init__(self): self.protocol = self.protocol.lower().strip() if self.protocol not in ("http", "https", "socks4", "socks5"): self.protocol = "http" class BaseCrawlerPlugin(ABC): """爬虫插件基类 添加新爬虫只需: 1. 继承 BaseCrawlerPlugin 2. 实现 crawl() 方法返回 List[ProxyRaw] 3. 用 @registry.register 装饰或在 __init__ 中显式注册 """ name: str = "" display_name: str = "" description: str = "" enabled: bool = True default_config: Dict[str, Any] = {} def __init__(self): self._config: Dict[str, Any] = dict(self.default_config or {}) @property def config(self) -> Dict[str, Any]: return self._config def update_config(self, updates: Dict[str, Any]) -> None: """更新插件配置,只覆盖存在的键""" for key, value in updates.items(): if key in self._config: self._config[key] = value @abstractmethod async def crawl(self) -> List[ProxyRaw]: """爬取代理的核心方法。只负责爬取,不要在这里验证。""" raise NotImplementedError async def health_check(self) -> bool: """可选:检查插件健康状态""" return True