Round 3 fixes: cancelled polling, aggregator invalid_count, filter state, scheduler atomicity, HTTP exception handler, tests

This commit is contained in:
祀梦
2026-04-05 10:20:23 +08:00
parent 49e440cb41
commit dc5f050683
32 changed files with 321 additions and 163 deletions

View File

@@ -19,7 +19,7 @@ class PluginRunner:
"""
def __init__(self, timeout: Optional[float] = None):
self.timeout = timeout or getattr(app_settings, "crawler_timeout", 30)
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
"""执行单个插件爬取"""
@@ -37,6 +37,10 @@ class PluginRunner:
return result
except Exception as e:
logger.warning(f"Plugin {plugin.name} health check error: {e}")
result.error = f"health check error: {e}"
result.failure_count = 1
await self._save_stats(plugin, result)
return result
# 执行爬取
try:
@@ -66,7 +70,7 @@ class PluginRunner:
seen = set()
unique = []
for p in proxies:
key = (p.ip, p.port)
key = (p.ip, p.port, p.protocol)
if key not in seen:
seen.add(key)
unique.append(p)