feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -4,7 +4,6 @@ from datetime import datetime
from typing import Optional
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.core.config import settings as app_settings
from app.core.log import logger
from app.models.domain import CrawlResult, ProxyRaw
@@ -12,14 +11,13 @@ from app.models.domain import CrawlResult, ProxyRaw
class PluginRunner:
"""统一插件执行器
- 超时控制(从 settings 读取 crawl_timeout
- 异常捕获和统计更新
- 可选的健康检查前置
- 结果去重
- 超时:每插件独立,使用 plugin.crawl_timeout_seconds默认 120s
- 可选 crawl_timeout_override仅用于测试等场景覆盖插件自身限时
- 异常捕获和统计更新、健康检查前置、结果去重
"""
def __init__(self, timeout: Optional[float] = None):
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
def __init__(self, crawl_timeout_override: Optional[float] = None):
self.crawl_timeout_override = crawl_timeout_override
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
"""执行单个插件爬取"""
@@ -42,19 +40,22 @@ class PluginRunner:
await self._save_stats(plugin, result)
return result
# 执行爬取
crawl_limit = float(getattr(plugin, "crawl_timeout_seconds", 120.0))
if self.crawl_timeout_override is not None:
crawl_limit = float(self.crawl_timeout_override)
try:
proxies = await asyncio.wait_for(
plugin.crawl(),
timeout=self.timeout,
timeout=crawl_limit,
)
result.proxies = self._dedup(proxies)
result.success_count = 1 if result.proxies else 0
result.success_count = len(result.proxies)
logger.info(
f"Plugin {plugin.name} crawled {len(result.proxies)} unique proxies"
)
except asyncio.TimeoutError:
result.error = f"crawl timeout after {self.timeout}s"
result.error = f"crawl timeout after {crawl_limit}s"
result.failure_count = 1
logger.error(f"Plugin {plugin.name} crawl timeout")
except Exception as e: