Round 3 fixes: cancelled polling, aggregator invalid_count, filter state, scheduler atomicity, HTTP exception handler, tests
This commit is contained in:
@@ -19,7 +19,7 @@ class PluginRunner:
|
||||
"""
|
||||
|
||||
def __init__(self, timeout: Optional[float] = None):
|
||||
self.timeout = timeout or getattr(app_settings, "crawler_timeout", 30)
|
||||
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
|
||||
|
||||
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
|
||||
"""执行单个插件爬取"""
|
||||
@@ -37,6 +37,10 @@ class PluginRunner:
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(f"Plugin {plugin.name} health check error: {e}")
|
||||
result.error = f"health check error: {e}"
|
||||
result.failure_count = 1
|
||||
await self._save_stats(plugin, result)
|
||||
return result
|
||||
|
||||
# 执行爬取
|
||||
try:
|
||||
@@ -66,7 +70,7 @@ class PluginRunner:
|
||||
seen = set()
|
||||
unique = []
|
||||
for p in proxies:
|
||||
key = (p.ip, p.port)
|
||||
key = (p.ip, p.port, p.protocol)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(p)
|
||||
|
||||
Reference in New Issue
Block a user