feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -1,5 +1,5 @@
"""Pydantic 模型 - 用于 API 请求/响应校验"""
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, ConfigDict
from typing import Optional, List
@@ -25,6 +25,7 @@ class ProxyResponse(BaseModel):
score: int
response_time_ms: Optional[float] = None
last_check: Optional[str] = None
validated: int = 0
class PluginResponse(BaseModel):
@@ -39,13 +40,14 @@ class PluginResponse(BaseModel):
class SettingsSchema(BaseModel):
crawl_timeout: int = Field(default=30, ge=5, le=120)
validation_timeout: int = Field(default=10, ge=3, le=60)
max_retries: int = Field(default=3, ge=0, le=10)
default_concurrency: int = Field(default=50, ge=10, le=200)
model_config = ConfigDict(extra="ignore")
validation_timeout: int = Field(default=6, ge=3, le=60)
default_concurrency: int = Field(default=120, ge=10, le=400)
min_proxy_score: int = Field(default=0, ge=0, le=100)
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
auto_validate: bool = True
auto_validate_after_crawl: bool = False
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
validation_targets: List[str] = Field(
default=[
@@ -60,10 +62,14 @@ class SettingsSchema(BaseModel):
class CrawlSummarySchema(BaseModel):
"""单次爬取任务结果(与 CrawlJob 返回的 result 对齐)"""
plugin_id: str
proxy_count: int
valid_count: int
invalid_count: int = 0
crawl_failed: bool = False
error: Optional[str] = None
success_count: int = 0 # 与 proxy_count 相同,兼容旧前端
failure_count: int = 0
class ProxyListRequest(BaseModel):
@@ -74,6 +80,20 @@ class ProxyListRequest(BaseModel):
max_score: Optional[int] = Field(default=None, ge=0)
sort_by: str = "last_check"
sort_order: str = "DESC"
pool_filter: Optional[str] = Field(
default=None,
description="all 或不传=全部pending=待验证available=已验证且可用",
)
@field_validator("pool_filter")
@classmethod
def validate_pool_filter(cls, v: Optional[str]):
if v is None or v == "" or v == "all":
return None
allowed = ("pending", "available")
if v not in allowed:
raise ValueError(f"pool_filter 必须是 {allowed} 之一或 all")
return v
@field_validator("protocol")
@classmethod