feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -22,6 +22,7 @@ class ProxyRaw:
@dataclass
class Proxy:
"""数据库中的代理实体"""
ip: str
port: int
protocol: str
@@ -29,6 +30,7 @@ class Proxy:
response_time_ms: Optional[float] = None
last_check: Optional[datetime] = None
created_at: Optional[datetime] = None
validated: int = 0 # 0 待验证 1 已验证(可参与分数与对外取用)
@dataclass
@@ -46,7 +48,12 @@ class PluginInfo:
@dataclass
class CrawlResult:
"""插件爬取结果"""
"""插件爬取结果
success_count: 最近一轮成功爬取到的代理条数(去重后),非「验证通过数」
failure_count: 最近一轮是否爬取失败(健康检查/超时/异常为 1否则为 0
"""
plugin_name: str
proxies: List[ProxyRaw] = field(default_factory=list)
success_count: int = 0

View File

@@ -1,5 +1,5 @@
"""Pydantic 模型 - 用于 API 请求/响应校验"""
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, ConfigDict
from typing import Optional, List
@@ -25,6 +25,7 @@ class ProxyResponse(BaseModel):
score: int
response_time_ms: Optional[float] = None
last_check: Optional[str] = None
validated: int = 0
class PluginResponse(BaseModel):
@@ -39,13 +40,14 @@ class PluginResponse(BaseModel):
class SettingsSchema(BaseModel):
crawl_timeout: int = Field(default=30, ge=5, le=120)
validation_timeout: int = Field(default=10, ge=3, le=60)
max_retries: int = Field(default=3, ge=0, le=10)
default_concurrency: int = Field(default=50, ge=10, le=200)
model_config = ConfigDict(extra="ignore")
validation_timeout: int = Field(default=6, ge=3, le=60)
default_concurrency: int = Field(default=120, ge=10, le=400)
min_proxy_score: int = Field(default=0, ge=0, le=100)
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
auto_validate: bool = True
auto_validate_after_crawl: bool = False
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
validation_targets: List[str] = Field(
default=[
@@ -60,10 +62,14 @@ class SettingsSchema(BaseModel):
class CrawlSummarySchema(BaseModel):
"""单次爬取任务结果(与 CrawlJob 返回的 result 对齐)"""
plugin_id: str
proxy_count: int
valid_count: int
invalid_count: int = 0
crawl_failed: bool = False
error: Optional[str] = None
success_count: int = 0 # 与 proxy_count 相同,兼容旧前端
failure_count: int = 0
class ProxyListRequest(BaseModel):
@@ -74,6 +80,20 @@ class ProxyListRequest(BaseModel):
max_score: Optional[int] = Field(default=None, ge=0)
sort_by: str = "last_check"
sort_order: str = "DESC"
pool_filter: Optional[str] = Field(
default=None,
description="all 或不传=全部pending=待验证available=已验证且可用",
)
@field_validator("pool_filter")
@classmethod
def validate_pool_filter(cls, v: Optional[str]):
if v is None or v == "" or v == "all":
return None
allowed = ("pending", "available")
if v not in allowed:
raise ValueError(f"pool_filter 必须是 {allowed} 之一或 all")
return v
@field_validator("protocol")
@classmethod