fix: 修复爬虫网络层、验证队列卡死及 API 500 错误
- 修复 BaseHTTPPlugin 连接池、并发控制、异常日志、超时策略 - 修复/增强 8 个爬虫插件的稳定性和 fallback 机制 - 清理 validation_tasks 表 4 万+ pending 任务,避免队列卡死 - 修复 app/api/main.py 缺失全局 app 实例导致的 500 错误 - 提升前端 Axios 超时到 120 秒,避免请求断开 - 修复插件统计持久化和调度器生命周期问题
This commit is contained in:
47
_test_crawlers.py
Normal file
47
_test_crawlers.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import asyncio
|
||||
import app.plugins
|
||||
from app.core.plugin_system.registry import registry
|
||||
from app.core.log import logger
|
||||
import logging
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
async def test_plugin(p, timeout=20):
|
||||
try:
|
||||
proxies = await asyncio.wait_for(p.crawl(), timeout=timeout)
|
||||
return len(proxies), proxies[:1] if proxies else []
|
||||
except asyncio.TimeoutError:
|
||||
return -2, []
|
||||
except Exception as e:
|
||||
return -1, [str(e)]
|
||||
|
||||
async def test_all():
|
||||
plugins = registry.list_plugins()
|
||||
print(f'Total plugins: {len(plugins)}')
|
||||
results = {}
|
||||
for p in plugins:
|
||||
print(f'Testing {p.name} (timeout=20s)...', flush=True)
|
||||
count, sample = await test_plugin(p, timeout=20)
|
||||
results[p.name] = count
|
||||
if count > 0:
|
||||
print(f' -> OK: {count} proxies, sample={sample[0]}')
|
||||
elif count == 0:
|
||||
print(f' -> EMPTY')
|
||||
elif count == -2:
|
||||
print(f' -> TIMEOUT')
|
||||
else:
|
||||
print(f' -> ERROR: {sample[0]}')
|
||||
|
||||
print('\n' + '='*50)
|
||||
print('SUMMARY:')
|
||||
for name, count in sorted(results.items()):
|
||||
if count > 0:
|
||||
status = 'OK'
|
||||
elif count == 0:
|
||||
status = 'EMPTY'
|
||||
elif count == -2:
|
||||
status = 'TIMEOUT'
|
||||
else:
|
||||
status = 'ERROR'
|
||||
print(f' {name:22s} {status:8s} ({count} proxies)')
|
||||
|
||||
asyncio.run(test_all())
|
||||
Reference in New Issue
Block a user