feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
@@ -4,7 +4,6 @@ from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||
from app.core.config import settings as app_settings
|
||||
from app.core.log import logger
|
||||
from app.models.domain import CrawlResult, ProxyRaw
|
||||
|
||||
@@ -12,14 +11,13 @@ from app.models.domain import CrawlResult, ProxyRaw
|
||||
class PluginRunner:
|
||||
"""统一插件执行器
|
||||
|
||||
- 超时控制(从 settings 读取 crawl_timeout)
|
||||
- 异常捕获和统计更新
|
||||
- 可选的健康检查前置
|
||||
- 结果去重
|
||||
- 超时:每插件独立,使用 plugin.crawl_timeout_seconds(默认 120s)
|
||||
- 可选 crawl_timeout_override:仅用于测试等场景覆盖插件自身限时
|
||||
- 异常捕获和统计更新、健康检查前置、结果去重
|
||||
"""
|
||||
|
||||
def __init__(self, timeout: Optional[float] = None):
|
||||
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
|
||||
def __init__(self, crawl_timeout_override: Optional[float] = None):
|
||||
self.crawl_timeout_override = crawl_timeout_override
|
||||
|
||||
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
|
||||
"""执行单个插件爬取"""
|
||||
@@ -42,19 +40,22 @@ class PluginRunner:
|
||||
await self._save_stats(plugin, result)
|
||||
return result
|
||||
|
||||
# 执行爬取
|
||||
crawl_limit = float(getattr(plugin, "crawl_timeout_seconds", 120.0))
|
||||
if self.crawl_timeout_override is not None:
|
||||
crawl_limit = float(self.crawl_timeout_override)
|
||||
|
||||
try:
|
||||
proxies = await asyncio.wait_for(
|
||||
plugin.crawl(),
|
||||
timeout=self.timeout,
|
||||
timeout=crawl_limit,
|
||||
)
|
||||
result.proxies = self._dedup(proxies)
|
||||
result.success_count = 1 if result.proxies else 0
|
||||
result.success_count = len(result.proxies)
|
||||
logger.info(
|
||||
f"Plugin {plugin.name} crawled {len(result.proxies)} unique proxies"
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
result.error = f"crawl timeout after {self.timeout}s"
|
||||
result.error = f"crawl timeout after {crawl_limit}s"
|
||||
result.failure_count = 1
|
||||
logger.error(f"Plugin {plugin.name} crawl timeout")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user