全面架构重构:建立分层架构与高度可扩展的插件系统
后端重构: - 新增分层架构:API Routes -> Services -> Repositories -> Infrastructure - 彻底移除全局单例,全面采用 FastAPI 依赖注入 - 新增 api/ 目录拆分路由(proxies, plugins, scheduler, settings, stats) - 新增 services/ 业务逻辑层:ProxyService, PluginService, SchedulerService, ValidatorService, SettingsService - 新增 repositories/ 数据访问层:ProxyRepository, SettingsRepository, PluginSettingsRepository - 新增 models/ 层:Pydantic Schemas + Domain Models - 重写 core/config.py:采用 Pydantic Settings 管理配置 - 新增 core/db.py:基于 asynccontextmanager 的连接管理,支持数据库迁移 - 新增 core/exceptions.py:统一业务异常体系 插件系统重构(核心): - 新增 core/plugin_system/:BaseCrawlerPlugin + PluginRegistry - 采用显式注册模式(装饰器 + plugins/__init__.py),类型安全、测试友好 - 新增 plugins/base.py:BaseHTTPPlugin 通用 HTTP 爬虫基类 - 迁移全部 7 个插件到新架构(fate0, proxylist_download, ip3366, ip89, kuaidaili, speedx, yundaili) - 插件状态持久化到 plugin_settings 表 任务调度重构: - 新增 core/tasks/queue.py:ValidationQueue + WorkerPool - 解耦爬取与验证:爬虫只负责爬取,代理提交队列后由 Worker 异步验证 - 调度器定时从数据库拉取存量代理并分批投入验证队列 前端调整: - 新增 frontend/src/services/ 层拆分 API 调用逻辑 - 调整 stores/ 和 views/ 使用 Service 层 - 保持 API 兼容性,页面无需大幅修改 其他: - 新增 main.py 作为新入口 - 新增 DESIGN.md 架构设计文档 - 更新 requirements.txt 增加 pydantic-settings
This commit is contained in:
13
models/__init__.py
Normal file
13
models/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from .domain import ProxyRaw, Proxy, PluginInfo
|
||||
from .schemas import ProxyCreate, ProxyResponse, PluginResponse, SettingsSchema, CrawlResult
|
||||
|
||||
__all__ = [
|
||||
"ProxyRaw",
|
||||
"Proxy",
|
||||
"PluginInfo",
|
||||
"ProxyCreate",
|
||||
"ProxyResponse",
|
||||
"PluginResponse",
|
||||
"SettingsSchema",
|
||||
"CrawlResult",
|
||||
]
|
||||
42
models/domain.py
Normal file
42
models/domain.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""领域模型 - 纯数据结构,不依赖任何框架"""
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProxyRaw:
|
||||
"""爬虫爬取的原始代理数据"""
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str = "http"
|
||||
|
||||
def __post_init__(self):
|
||||
self.protocol = self.protocol.lower().strip()
|
||||
if self.protocol not in ("http", "https", "socks4", "socks5"):
|
||||
self.protocol = "http"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Proxy:
|
||||
"""数据库中的代理实体"""
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str
|
||||
score: int
|
||||
response_time_ms: Optional[float] = None
|
||||
last_check: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PluginInfo:
|
||||
"""插件元数据"""
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
enabled: bool
|
||||
last_run: Optional[datetime] = None
|
||||
success_count: int = 0
|
||||
failure_count: int = 0
|
||||
105
models/schemas.py
Normal file
105
models/schemas.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Pydantic 模型 - 用于 API 请求/响应校验"""
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
class ProxyCreate(BaseModel):
|
||||
ip: str
|
||||
port: int = Field(ge=1, le=65535)
|
||||
protocol: str = "http"
|
||||
score: int = Field(default=10, ge=0, le=100)
|
||||
|
||||
@field_validator("protocol")
|
||||
@classmethod
|
||||
def validate_protocol(cls, v: str):
|
||||
v = v.lower().strip()
|
||||
if v not in ("http", "https", "socks4", "socks5"):
|
||||
raise ValueError("protocol must be http, https, socks4 or socks5")
|
||||
return v
|
||||
|
||||
|
||||
class ProxyResponse(BaseModel):
|
||||
ip: str
|
||||
port: int
|
||||
protocol: str
|
||||
score: int
|
||||
last_check: Optional[str] = None
|
||||
|
||||
|
||||
class PluginResponse(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
enabled: bool
|
||||
last_run: Optional[str] = None
|
||||
success_count: int = 0
|
||||
failure_count: int = 0
|
||||
|
||||
|
||||
class SettingsSchema(BaseModel):
|
||||
crawl_timeout: int = Field(default=30, ge=5, le=120)
|
||||
validation_timeout: int = Field(default=10, ge=3, le=60)
|
||||
max_retries: int = Field(default=3, ge=0, le=10)
|
||||
default_concurrency: int = Field(default=50, ge=10, le=200)
|
||||
min_proxy_score: int = Field(default=0, ge=0, le=100)
|
||||
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
|
||||
auto_validate: bool = True
|
||||
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
|
||||
|
||||
|
||||
class CrawlResult(BaseModel):
|
||||
plugin_id: str
|
||||
proxy_count: int
|
||||
valid_count: int
|
||||
invalid_count: int = 0
|
||||
|
||||
|
||||
class ProxyListRequest(BaseModel):
|
||||
page: int = Field(default=1, ge=1)
|
||||
page_size: int = Field(default=20, ge=1, le=100)
|
||||
protocol: Optional[str] = None
|
||||
min_score: int = Field(default=0, ge=0)
|
||||
max_score: Optional[int] = Field(default=None, ge=0)
|
||||
sort_by: str = "last_check"
|
||||
sort_order: str = "DESC"
|
||||
|
||||
@field_validator("protocol")
|
||||
@classmethod
|
||||
def validate_protocol(cls, v):
|
||||
if v is not None and v.lower() not in ("http", "https", "socks4", "socks5"):
|
||||
raise ValueError("协议类型必须是 http, https, socks4 或 socks5")
|
||||
return v.lower() if v else v
|
||||
|
||||
@field_validator("sort_by")
|
||||
@classmethod
|
||||
def validate_sort_by(cls, v):
|
||||
if v not in ("ip", "port", "protocol", "score", "last_check"):
|
||||
raise ValueError("排序字段必须是 ip, port, protocol, score 或 last_check")
|
||||
return v
|
||||
|
||||
@field_validator("sort_order")
|
||||
@classmethod
|
||||
def validate_sort_order(cls, v):
|
||||
if v.upper() not in ("ASC", "DESC"):
|
||||
raise ValueError("排序方式必须是 ASC 或 DESC")
|
||||
return v.upper()
|
||||
|
||||
|
||||
class ProxyDeleteItem(BaseModel):
|
||||
ip: str
|
||||
port: int = Field(ge=1, le=65535)
|
||||
|
||||
|
||||
class BatchDeleteRequest(BaseModel):
|
||||
proxies: List[ProxyDeleteItem] = Field(max_length=1000)
|
||||
|
||||
|
||||
class PluginToggleRequest(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
format: str = Field(pattern=r"^(csv|txt|json)$")
|
||||
protocol: Optional[str] = None
|
||||
limit: int = Field(default=10000, ge=1, le=100000)
|
||||
Reference in New Issue
Block a user