feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -1,6 +1,8 @@
"""插件 API 集成测试 - 测试 /api/plugins/* 所有接口"""
import pytest
from tests.task_utils import poll_task_until_terminal
class TestPluginsAPI:
"""测试插件相关 API"""
@@ -116,10 +118,11 @@ class TestPluginsAPI:
data = response.json()
assert data["code"] == 200
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_crawl_plugin(self, client):
"""测试 POST /api/plugins/{id}/crawl - 异步任务模式"""
import asyncio
response = await client.get("/api/plugins")
plugins = response.json()["data"]["plugins"]
if not plugins:
@@ -133,18 +136,11 @@ class TestPluginsAPI:
assert "task_id" in data["data"]
task_id = data["data"]["task_id"]
# 轮询任务状态
task_data = None
for _ in range(10):
await asyncio.sleep(0.3)
res = await client.get(f"/api/tasks/{task_id}")
assert res.status_code == 200
task_data = res.json()["data"]
if task_data["status"] in ("completed", "failed", "cancelled"):
break
task_data = await poll_task_until_terminal(
client, task_id, max_rounds=140, interval=0.5
)
assert task_data is not None
assert task_data["status"] in ("completed", "cancelled")
assert task_data["status"] in ("completed", "failed", "cancelled")
@pytest.mark.asyncio
async def test_crawl_nonexistent_plugin(self, client):
@@ -152,10 +148,11 @@ class TestPluginsAPI:
response = await client.post("/api/plugins/nonexistent_plugin/crawl")
assert response.status_code == 404
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_crawl_all_plugins(self, client):
"""测试 POST /api/plugins/crawl-all - 异步任务模式"""
import asyncio
response = await client.post("/api/plugins/crawl-all")
assert response.status_code == 200
data = response.json()
@@ -163,15 +160,8 @@ class TestPluginsAPI:
assert "task_id" in data["data"]
task_id = data["data"]["task_id"]
# 轮询任务状态
task_data = None
for _ in range(10):
await asyncio.sleep(0.3)
res = await client.get(f"/api/tasks/{task_id}")
assert res.status_code == 200
task_data = res.json()["data"]
if task_data["status"] in ("completed", "failed", "cancelled"):
break
task_data = await poll_task_until_terminal(
client, task_id, max_rounds=400, interval=0.5
)
assert task_data is not None
assert task_data["status"] in ("completed", "cancelled")
assert task_data["status"] in ("completed", "failed", "cancelled")

View File

@@ -14,6 +14,7 @@ class TestProxiesAPI:
assert data["code"] == 200
assert "data" in data
assert "total" in data["data"]
assert "pending" in data["data"]
assert "available" in data["data"]
assert "scheduler_running" in data["data"]
@@ -68,6 +69,17 @@ class TestProxiesAPI:
# 可能返回 200(有数据) 或 404(无数据)
assert response.status_code in [200, 404]
@pytest.mark.asyncio
async def test_delete_proxy_post_json(self, client, sample_proxy):
"""测试 POST /api/proxies/delete-one前端默认路径兼容 IPv6"""
response = await client.post(
"/api/proxies/delete-one",
json={"ip": sample_proxy["ip"], "port": sample_proxy["port"]},
)
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
@pytest.mark.asyncio
async def test_delete_proxy(self, client, sample_proxy):
"""测试 DELETE /api/proxies/{ip}/{port}"""
@@ -76,6 +88,19 @@ class TestProxiesAPI:
data = response.json()
assert data["code"] == 200
@pytest.mark.asyncio
async def test_delete_one_ipv6(self, client, db, proxy_repo):
"""POST delete-one 可删除含冒号的 IP路径 DELETE 无法可靠表达)"""
await proxy_repo.insert_or_update(db, "2001:db8::1", 18080, "http", 40)
r = await client.post(
"/api/proxies/delete-one",
json={"ip": "2001:db8::1", "port": 18080},
)
assert r.status_code == 200
assert r.json()["code"] == 200
left = await proxy_repo.get_by_ip_port(db, "2001:db8::1", 18080)
assert left is None
@pytest.mark.asyncio
async def test_delete_nonexistent_proxy(self, client):
"""测试 DELETE /api/proxies/{ip}/{port} - 不存在的代理"""

View File

@@ -1,6 +1,17 @@
"""调度器 API 集成测试 - 测试 /api/scheduler/* 所有接口"""
import pytest
from app.api.deps import get_settings_repo
from app.repositories.settings_repo import SettingsRepository
class FailingSettingsRepository(SettingsRepository):
"""save 恒为 False用于覆盖「设置保存失败」分支非 MagicMock。"""
@staticmethod
async def save(db, settings):
return False
class TestSchedulerAPI:
"""测试调度器相关 API"""
@@ -93,18 +104,17 @@ class TestSchedulerAPI:
assert job is not None
@pytest.mark.asyncio
async def test_start_scheduler_db_save_failure(self, client, monkeypatch):
async def test_start_scheduler_db_save_failure(self, client, app):
"""测试启动调度器时数据库保存失败应返回 running=False"""
from app.repositories.settings_repo import SettingsRepository
# lifespan 启动时调度器可能已自动启动,先停止它
await client.post("/api/scheduler/stop")
async def mock_save(*args, **kwargs):
return False
app.dependency_overrides[get_settings_repo] = lambda: FailingSettingsRepository()
try:
response = await client.post("/api/scheduler/start")
finally:
app.dependency_overrides.pop(get_settings_repo, None)
monkeypatch.setattr(SettingsRepository, "save", mock_save)
response = await client.post("/api/scheduler/start")
assert response.status_code == 200
data = response.json()
assert data["code"] == 200

View File

@@ -12,7 +12,7 @@ class TestSettingsAPI:
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
assert "crawl_timeout" in data["data"]
assert "crawl_timeout" not in data["data"]
assert "validation_timeout" in data["data"]
assert "auto_validate" in data["data"]
@@ -21,17 +21,16 @@ class TestSettingsAPI:
"""测试 GET /api/settings 返回结构"""
response = await client.get("/api/settings")
data = response.json()["data"]
# 验证所有预期的设置项
expected_keys = [
"crawl_timeout",
"validation_timeout",
"max_retries",
"default_concurrency",
"min_proxy_score",
"proxy_expiry_days",
"auto_validate",
"auto_validate_after_crawl",
"validate_interval_minutes",
"validation_targets",
]
for key in expected_keys:
assert key in data, f"缺少设置项: {key}"
@@ -40,65 +39,45 @@ class TestSettingsAPI:
async def test_save_settings(self, client):
"""测试 POST /api/settings"""
settings = {
"crawl_timeout": 45,
"validation_timeout": 15,
"max_retries": 5,
"default_concurrency": 100,
"min_proxy_score": 10,
"proxy_expiry_days": 14,
"auto_validate": True,
"auto_validate_after_crawl": False,
"validate_interval_minutes": 60,
"validation_targets": [
"http://httpbin.org/ip",
],
}
response = await client.post("/api/settings", json=settings)
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
# 验证返回的数据与提交的一致
for key, value in settings.items():
assert data["data"][key] == value
@pytest.mark.asyncio
async def test_save_settings_partial(self, client):
"""测试 POST /api/settings - 部分更新(实际上会替换所有)"""
# 先获取当前设置
response = await client.get("/api/settings")
current_settings = response.json()["data"]
# 修改部分设置
new_settings = current_settings.copy()
new_settings["crawl_timeout"] = 60
new_settings["validation_timeout"] = 25
new_settings["auto_validate"] = False
response = await client.post("/api/settings", json=new_settings)
assert response.status_code == 200
data = response.json()
assert data["data"]["crawl_timeout"] == 60
assert data["data"]["validation_timeout"] == 25
assert data["data"]["auto_validate"] is False
@pytest.mark.asyncio
async def test_save_settings_validation_error(self, client):
"""测试 POST /api/settings - 验证错误"""
# crawl_timeout 必须在 5-120 之间
invalid_settings = {
"crawl_timeout": 200, # 超出范围
"validation_timeout": 10,
"max_retries": 3,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
"auto_validate": True,
"validate_interval_minutes": 30,
}
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422 # 验证错误
@pytest.mark.asyncio
async def test_save_settings_invalid_type(self, client):
"""测试 POST /api/settings - 无效类型"""
invalid_settings = {
"crawl_timeout": "invalid", # 应该是整数
"validation_timeout": 10,
"max_retries": 3,
"validation_timeout": 100,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
@@ -108,31 +87,62 @@ class TestSettingsAPI:
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422
@pytest.mark.asyncio
async def test_save_settings_invalid_type(self, client):
"""测试 POST /api/settings - 无效类型"""
invalid_settings = {
"validation_timeout": 10,
"default_concurrency": "invalid",
"min_proxy_score": 0,
"proxy_expiry_days": 7,
"auto_validate": True,
"validate_interval_minutes": 30,
}
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422
@pytest.mark.asyncio
async def test_save_settings_ignores_deprecated_crawl_timeout(self, client):
"""旧客户端若仍提交 crawl_timeout应忽略且保存成功"""
response = await client.get("/api/settings")
base = response.json()["data"]
payload = {**base, "crawl_timeout": 999}
response = await client.post("/api/settings", json=payload)
assert response.status_code == 200
again = (await client.get("/api/settings")).json()["data"]
assert "crawl_timeout" not in again
@pytest.mark.asyncio
async def test_save_settings_ignores_obsolete_max_retries(self, client):
"""已移除的 max_retries 键若仍被提交,应忽略。"""
response = await client.get("/api/settings")
base = response.json()["data"]
payload = {**base, "max_retries": 9}
response = await client.post("/api/settings", json=payload)
assert response.status_code == 200
again = (await client.get("/api/settings")).json()["data"]
assert "max_retries" not in again
@pytest.mark.asyncio
async def test_settings_roundtrip(self, client):
"""测试设置读写一致性"""
# 生成随机但有效的设置
import random
test_settings = {
"crawl_timeout": random.randint(10, 60),
"validation_timeout": random.randint(5, 30),
"max_retries": random.randint(1, 5),
"default_concurrency": random.randint(20, 100),
"min_proxy_score": random.randint(0, 50),
"proxy_expiry_days": random.randint(1, 14),
"auto_validate": random.choice([True, False]),
"validate_interval_minutes": random.randint(10, 120),
}
# 写入设置
response = await client.post("/api/settings", json=test_settings)
assert response.status_code == 200
# 读取设置
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
# 验证一致性
for key, value in test_settings.items():
assert saved_settings[key] == value, f"设置项 {key} 不一致"
@@ -140,9 +150,7 @@ class TestSettingsAPI:
async def test_settings_roundtrip_with_validation_targets(self, client):
"""测试设置读写一致性 - 包含数组类型的 validation_targets"""
test_settings = {
"crawl_timeout": 30,
"validation_timeout": 10,
"max_retries": 3,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
@@ -154,13 +162,11 @@ class TestSettingsAPI:
],
}
# 写入设置
response = await client.post("/api/settings", json=test_settings)
assert response.status_code == 200
data = response.json()
assert data["data"]["validation_targets"] == test_settings["validation_targets"]
# 读取设置
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
assert saved_settings["validation_targets"] == test_settings["validation_targets"]
@@ -179,7 +185,6 @@ class TestSettingsAPI:
data = response.json()
assert data["data"]["validation_targets"] == []
# 读取确认
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
assert saved_settings["validation_targets"] == []