feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -5,6 +5,8 @@
```
tests/
├── conftest.py # pytest 配置和 fixtures
├── task_utils.py # 异步任务轮询(集成/E2E 共用)
├── support/ # 测试专用插件类等(非 mock
├── README.md # 本文件
├── unit/ # 单元测试
│ ├── test_models.py # 模型测试
@@ -12,6 +14,7 @@ tests/
├── integration/ # 集成测试
│ ├── test_proxies_api.py # 代理 API 测试
│ ├── test_plugins_api.py # 插件 API 测试
│ ├── test_plugins_live_crawl.py # 各插件真实爬取验收(须外网)
│ ├── test_scheduler_api.py # 调度器 API 测试
│ ├── test_settings_api.py # 设置 API 测试
│ └── test_health_api.py # 健康检查测试
@@ -19,6 +22,25 @@ tests/
└── test_full_workflow.py # 完整工作流测试
```
## 网络与真实调用
集成测试与 E2E **不再 mock** `PluginRunner` / `ValidatorService`:会发起真实 HTTP 爬取与代理验证(视设置而定)。运行全量 `pytest` 需要 **可用的出站网络**,且含 `network` / `slow` 标记的用例可能耗时数分钟。
跳过需外网的用例(例如离线快速检查):
```bash
pytest -m "not network"
```
**插件爬取验收**`test_plugins_live_crawl.py`
- 核心 8 插件:必须至少 1 条代理且无 Runner 失败。
- `fpw_*`:对照 [Free_Proxy_Website](https://github.com/cyubuchen/Free_Proxy_Website) 的公开源,允许 0 条(国际网络差异),使用更长超时。
```bash
pytest tests/integration/test_plugins_live_crawl.py -v
```
## 运行测试
### 安装测试依赖

View File

@@ -1,5 +1,15 @@
"""pytest 配置文件和 fixtures"""
# 必须在任何 app.* 导入之前:下方 app fixture 会清空表,不可与生产共用 db/proxies.sqlite
import os
os.environ["PROXYPOOL_DB_PATH"] = "db/proxies.test.sqlite"
import asyncio
import sys
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
import pytest
import pytest_asyncio
from typing import AsyncGenerator
@@ -17,22 +27,28 @@ from app.plugins import (
SpeedXPlugin,
YunDaiLiPlugin,
ProxyScrapePlugin,
FpwProxyListDownloadPlugin,
FpwSocksSslProxyPlugin,
FpwSpysOnePlugin,
FpwProxynovaPlugin,
FpwHidemyPlugin,
FpwPremproxyPlugin,
FpwFreeproxylistsPlugin,
FpwGatherproxyPlugin,
FpwCheckerproxyPlugin,
)
from app.repositories.proxy_repo import ProxyRepository
from app.models.domain import ProxyRaw
@pytest_asyncio.fixture(scope="function")
async def app():
"""创建应用实例"""
# 初始化测试数据库并清空历史数据
await init_db()
async with get_db() as db:
await db.execute("DELETE FROM proxies")
await db.execute("DELETE FROM settings")
await db.commit()
# 清理并重新注册插件,防止跨测试污染
registry.clear()
for plugin_cls in [
Fate0Plugin,
@@ -43,14 +59,22 @@ async def app():
SpeedXPlugin,
YunDaiLiPlugin,
ProxyScrapePlugin,
FpwProxyListDownloadPlugin,
FpwSocksSslProxyPlugin,
FpwSpysOnePlugin,
FpwProxynovaPlugin,
FpwHidemyPlugin,
FpwPremproxyPlugin,
FpwFreeproxylistsPlugin,
FpwGatherproxyPlugin,
FpwCheckerproxyPlugin,
]:
registry.register(plugin_cls)
test_app = create_app()
async with test_app.router.lifespan_context(test_app):
yield test_app
# 给 aiosqlite / aiohttp 后台线程留出收尾时间
await asyncio.sleep(0.1)
@@ -80,32 +104,4 @@ async def sample_proxy(db, proxy_repo):
"""创建一个测试代理"""
await proxy_repo.insert_or_update(db, "192.168.1.1", 8080, "http", 50)
yield {"ip": "192.168.1.1", "port": 8080, "protocol": "http", "score": 50}
# 清理
await proxy_repo.delete(db, "192.168.1.1", 8080)
@pytest_asyncio.fixture(autouse=True)
async def mock_external_requests(monkeypatch, request):
"""
自动在集成/E2E 测试中 mock 外部网络请求:
1. 插件爬取返回固定测试代理,避免真实 HTTP 请求
2. 代理验证瞬间成功,避免连接超时等待
"""
if "/unit/" in request.node.nodeid:
return
from app.services.plugin_runner import PluginRunner
from app.services.validator_service import ValidatorService
async def _mock_run(self, plugin):
from app.models.domain import CrawlResult
return CrawlResult(
plugin_name=plugin.name,
proxies=[ProxyRaw("192.168.100.10", 8080, "http")],
success_count=1,
)
async def _mock_validate(self, ip: str, port: int, protocol: str = "http"):
return True, 1.23
monkeypatch.setattr(PluginRunner, "run", _mock_run)
monkeypatch.setattr(ValidatorService, "validate", _mock_validate)

View File

@@ -4,10 +4,14 @@
"""
import pytest
from tests.task_utils import poll_task_until_terminal
class TestFullWorkflow:
"""测试完整工作流"""
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_proxy_management_workflow(self, client):
"""测试代理管理完整工作流
@@ -35,12 +39,18 @@ class TestFullWorkflow:
# 3. 触发所有插件爬取
response = await client.post("/api/plugins/crawl-all")
assert response.status_code == 200
crawl_result = response.json()["data"]
task_id = response.json()["data"]["task_id"]
task_data = await poll_task_until_terminal(
client, task_id, max_rounds=400, interval=0.5
)
assert task_data is not None
assert task_data["status"] in ("completed", "failed", "cancelled")
# 4. 获取更新后的统计
response = await client.get("/api/proxies/stats")
updated_stats = response.json()["data"]
assert "total" in initial_stats and "total" in updated_stats
# 5. 导出代理(所有格式)
for fmt in ["csv", "txt", "json"]:
response = await client.get(f"/api/proxies/export/{fmt}")
@@ -50,6 +60,8 @@ class TestFullWorkflow:
response = await client.delete("/api/proxies/clean-invalid")
assert response.status_code == 200
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_plugin_management_workflow(self, client):
"""测试插件管理完整工作流
@@ -93,6 +105,12 @@ class TestFullWorkflow:
# 6. 触发爬取
response = await client.post(f"/api/plugins/{plugin_id}/crawl")
assert response.status_code == 200
crawl_task_id = response.json()["data"]["task_id"]
crawl_task = await poll_task_until_terminal(
client, crawl_task_id, max_rounds=140, interval=0.5
)
assert crawl_task is not None
assert crawl_task["status"] in ("completed", "failed", "cancelled")
@pytest.mark.asyncio
async def test_scheduler_workflow(self, client):

View File

@@ -1,6 +1,8 @@
"""插件 API 集成测试 - 测试 /api/plugins/* 所有接口"""
import pytest
from tests.task_utils import poll_task_until_terminal
class TestPluginsAPI:
"""测试插件相关 API"""
@@ -116,10 +118,11 @@ class TestPluginsAPI:
data = response.json()
assert data["code"] == 200
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_crawl_plugin(self, client):
"""测试 POST /api/plugins/{id}/crawl - 异步任务模式"""
import asyncio
response = await client.get("/api/plugins")
plugins = response.json()["data"]["plugins"]
if not plugins:
@@ -133,18 +136,11 @@ class TestPluginsAPI:
assert "task_id" in data["data"]
task_id = data["data"]["task_id"]
# 轮询任务状态
task_data = None
for _ in range(10):
await asyncio.sleep(0.3)
res = await client.get(f"/api/tasks/{task_id}")
assert res.status_code == 200
task_data = res.json()["data"]
if task_data["status"] in ("completed", "failed", "cancelled"):
break
task_data = await poll_task_until_terminal(
client, task_id, max_rounds=140, interval=0.5
)
assert task_data is not None
assert task_data["status"] in ("completed", "cancelled")
assert task_data["status"] in ("completed", "failed", "cancelled")
@pytest.mark.asyncio
async def test_crawl_nonexistent_plugin(self, client):
@@ -152,10 +148,11 @@ class TestPluginsAPI:
response = await client.post("/api/plugins/nonexistent_plugin/crawl")
assert response.status_code == 404
@pytest.mark.network
@pytest.mark.slow
@pytest.mark.asyncio
async def test_crawl_all_plugins(self, client):
"""测试 POST /api/plugins/crawl-all - 异步任务模式"""
import asyncio
response = await client.post("/api/plugins/crawl-all")
assert response.status_code == 200
data = response.json()
@@ -163,15 +160,8 @@ class TestPluginsAPI:
assert "task_id" in data["data"]
task_id = data["data"]["task_id"]
# 轮询任务状态
task_data = None
for _ in range(10):
await asyncio.sleep(0.3)
res = await client.get(f"/api/tasks/{task_id}")
assert res.status_code == 200
task_data = res.json()["data"]
if task_data["status"] in ("completed", "failed", "cancelled"):
break
task_data = await poll_task_until_terminal(
client, task_id, max_rounds=400, interval=0.5
)
assert task_data is not None
assert task_data["status"] in ("completed", "cancelled")
assert task_data["status"] in ("completed", "failed", "cancelled")

View File

@@ -14,6 +14,7 @@ class TestProxiesAPI:
assert data["code"] == 200
assert "data" in data
assert "total" in data["data"]
assert "pending" in data["data"]
assert "available" in data["data"]
assert "scheduler_running" in data["data"]
@@ -68,6 +69,17 @@ class TestProxiesAPI:
# 可能返回 200(有数据) 或 404(无数据)
assert response.status_code in [200, 404]
@pytest.mark.asyncio
async def test_delete_proxy_post_json(self, client, sample_proxy):
"""测试 POST /api/proxies/delete-one前端默认路径兼容 IPv6"""
response = await client.post(
"/api/proxies/delete-one",
json={"ip": sample_proxy["ip"], "port": sample_proxy["port"]},
)
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
@pytest.mark.asyncio
async def test_delete_proxy(self, client, sample_proxy):
"""测试 DELETE /api/proxies/{ip}/{port}"""
@@ -76,6 +88,19 @@ class TestProxiesAPI:
data = response.json()
assert data["code"] == 200
@pytest.mark.asyncio
async def test_delete_one_ipv6(self, client, db, proxy_repo):
"""POST delete-one 可删除含冒号的 IP路径 DELETE 无法可靠表达)"""
await proxy_repo.insert_or_update(db, "2001:db8::1", 18080, "http", 40)
r = await client.post(
"/api/proxies/delete-one",
json={"ip": "2001:db8::1", "port": 18080},
)
assert r.status_code == 200
assert r.json()["code"] == 200
left = await proxy_repo.get_by_ip_port(db, "2001:db8::1", 18080)
assert left is None
@pytest.mark.asyncio
async def test_delete_nonexistent_proxy(self, client):
"""测试 DELETE /api/proxies/{ip}/{port} - 不存在的代理"""

View File

@@ -1,6 +1,17 @@
"""调度器 API 集成测试 - 测试 /api/scheduler/* 所有接口"""
import pytest
from app.api.deps import get_settings_repo
from app.repositories.settings_repo import SettingsRepository
class FailingSettingsRepository(SettingsRepository):
"""save 恒为 False用于覆盖「设置保存失败」分支非 MagicMock。"""
@staticmethod
async def save(db, settings):
return False
class TestSchedulerAPI:
"""测试调度器相关 API"""
@@ -93,18 +104,17 @@ class TestSchedulerAPI:
assert job is not None
@pytest.mark.asyncio
async def test_start_scheduler_db_save_failure(self, client, monkeypatch):
async def test_start_scheduler_db_save_failure(self, client, app):
"""测试启动调度器时数据库保存失败应返回 running=False"""
from app.repositories.settings_repo import SettingsRepository
# lifespan 启动时调度器可能已自动启动,先停止它
await client.post("/api/scheduler/stop")
async def mock_save(*args, **kwargs):
return False
app.dependency_overrides[get_settings_repo] = lambda: FailingSettingsRepository()
try:
response = await client.post("/api/scheduler/start")
finally:
app.dependency_overrides.pop(get_settings_repo, None)
monkeypatch.setattr(SettingsRepository, "save", mock_save)
response = await client.post("/api/scheduler/start")
assert response.status_code == 200
data = response.json()
assert data["code"] == 200

View File

@@ -12,7 +12,7 @@ class TestSettingsAPI:
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
assert "crawl_timeout" in data["data"]
assert "crawl_timeout" not in data["data"]
assert "validation_timeout" in data["data"]
assert "auto_validate" in data["data"]
@@ -21,17 +21,16 @@ class TestSettingsAPI:
"""测试 GET /api/settings 返回结构"""
response = await client.get("/api/settings")
data = response.json()["data"]
# 验证所有预期的设置项
expected_keys = [
"crawl_timeout",
"validation_timeout",
"max_retries",
"default_concurrency",
"min_proxy_score",
"proxy_expiry_days",
"auto_validate",
"auto_validate_after_crawl",
"validate_interval_minutes",
"validation_targets",
]
for key in expected_keys:
assert key in data, f"缺少设置项: {key}"
@@ -40,65 +39,45 @@ class TestSettingsAPI:
async def test_save_settings(self, client):
"""测试 POST /api/settings"""
settings = {
"crawl_timeout": 45,
"validation_timeout": 15,
"max_retries": 5,
"default_concurrency": 100,
"min_proxy_score": 10,
"proxy_expiry_days": 14,
"auto_validate": True,
"auto_validate_after_crawl": False,
"validate_interval_minutes": 60,
"validation_targets": [
"http://httpbin.org/ip",
],
}
response = await client.post("/api/settings", json=settings)
assert response.status_code == 200
data = response.json()
assert data["code"] == 200
# 验证返回的数据与提交的一致
for key, value in settings.items():
assert data["data"][key] == value
@pytest.mark.asyncio
async def test_save_settings_partial(self, client):
"""测试 POST /api/settings - 部分更新(实际上会替换所有)"""
# 先获取当前设置
response = await client.get("/api/settings")
current_settings = response.json()["data"]
# 修改部分设置
new_settings = current_settings.copy()
new_settings["crawl_timeout"] = 60
new_settings["validation_timeout"] = 25
new_settings["auto_validate"] = False
response = await client.post("/api/settings", json=new_settings)
assert response.status_code == 200
data = response.json()
assert data["data"]["crawl_timeout"] == 60
assert data["data"]["validation_timeout"] == 25
assert data["data"]["auto_validate"] is False
@pytest.mark.asyncio
async def test_save_settings_validation_error(self, client):
"""测试 POST /api/settings - 验证错误"""
# crawl_timeout 必须在 5-120 之间
invalid_settings = {
"crawl_timeout": 200, # 超出范围
"validation_timeout": 10,
"max_retries": 3,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
"auto_validate": True,
"validate_interval_minutes": 30,
}
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422 # 验证错误
@pytest.mark.asyncio
async def test_save_settings_invalid_type(self, client):
"""测试 POST /api/settings - 无效类型"""
invalid_settings = {
"crawl_timeout": "invalid", # 应该是整数
"validation_timeout": 10,
"max_retries": 3,
"validation_timeout": 100,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
@@ -108,31 +87,62 @@ class TestSettingsAPI:
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422
@pytest.mark.asyncio
async def test_save_settings_invalid_type(self, client):
"""测试 POST /api/settings - 无效类型"""
invalid_settings = {
"validation_timeout": 10,
"default_concurrency": "invalid",
"min_proxy_score": 0,
"proxy_expiry_days": 7,
"auto_validate": True,
"validate_interval_minutes": 30,
}
response = await client.post("/api/settings", json=invalid_settings)
assert response.status_code == 422
@pytest.mark.asyncio
async def test_save_settings_ignores_deprecated_crawl_timeout(self, client):
"""旧客户端若仍提交 crawl_timeout应忽略且保存成功"""
response = await client.get("/api/settings")
base = response.json()["data"]
payload = {**base, "crawl_timeout": 999}
response = await client.post("/api/settings", json=payload)
assert response.status_code == 200
again = (await client.get("/api/settings")).json()["data"]
assert "crawl_timeout" not in again
@pytest.mark.asyncio
async def test_save_settings_ignores_obsolete_max_retries(self, client):
"""已移除的 max_retries 键若仍被提交,应忽略。"""
response = await client.get("/api/settings")
base = response.json()["data"]
payload = {**base, "max_retries": 9}
response = await client.post("/api/settings", json=payload)
assert response.status_code == 200
again = (await client.get("/api/settings")).json()["data"]
assert "max_retries" not in again
@pytest.mark.asyncio
async def test_settings_roundtrip(self, client):
"""测试设置读写一致性"""
# 生成随机但有效的设置
import random
test_settings = {
"crawl_timeout": random.randint(10, 60),
"validation_timeout": random.randint(5, 30),
"max_retries": random.randint(1, 5),
"default_concurrency": random.randint(20, 100),
"min_proxy_score": random.randint(0, 50),
"proxy_expiry_days": random.randint(1, 14),
"auto_validate": random.choice([True, False]),
"validate_interval_minutes": random.randint(10, 120),
}
# 写入设置
response = await client.post("/api/settings", json=test_settings)
assert response.status_code == 200
# 读取设置
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
# 验证一致性
for key, value in test_settings.items():
assert saved_settings[key] == value, f"设置项 {key} 不一致"
@@ -140,9 +150,7 @@ class TestSettingsAPI:
async def test_settings_roundtrip_with_validation_targets(self, client):
"""测试设置读写一致性 - 包含数组类型的 validation_targets"""
test_settings = {
"crawl_timeout": 30,
"validation_timeout": 10,
"max_retries": 3,
"default_concurrency": 50,
"min_proxy_score": 0,
"proxy_expiry_days": 7,
@@ -154,13 +162,11 @@ class TestSettingsAPI:
],
}
# 写入设置
response = await client.post("/api/settings", json=test_settings)
assert response.status_code == 200
data = response.json()
assert data["data"]["validation_targets"] == test_settings["validation_targets"]
# 读取设置
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
assert saved_settings["validation_targets"] == test_settings["validation_targets"]
@@ -179,7 +185,6 @@ class TestSettingsAPI:
data = response.json()
assert data["data"]["validation_targets"] == []
# 读取确认
response = await client.get("/api/settings")
saved_settings = response.json()["data"]
assert saved_settings["validation_targets"] == []

View File

@@ -0,0 +1 @@
# Test support package (non-mock plugin doubles, etc.)

View File

@@ -0,0 +1,19 @@
"""供 PluginRunner 等测试使用的真实插件子类(非 unittest.mock"""
from typing import List
from app.core.plugin_system.base import BaseCrawlerPlugin
from app.models.domain import ProxyRaw
class UnhealthyPlugin(BaseCrawlerPlugin):
"""health_check 抛错,用于验证 Runner 对异常的统计与落库。"""
name = "test_unhealthy_runner"
display_name = "TestUnhealthy"
description = "PluginRunner health_check failure test double"
async def crawl(self) -> List[ProxyRaw]:
return []
async def health_check(self) -> bool:
raise RuntimeError("network down")

22
tests/task_utils.py Normal file
View File

@@ -0,0 +1,22 @@
"""测试用异步任务轮询工具"""
import asyncio
from typing import Any, Dict, Optional
async def poll_task_until_terminal(
client,
task_id: str,
*,
max_rounds: int,
interval: float,
) -> Optional[Dict[str, Any]]:
"""轮询任务直到终态或超时。返回最后一次 task data。"""
task_data = None
for _ in range(max_rounds):
await asyncio.sleep(interval)
res = await client.get(f"/api/tasks/{task_id}")
assert res.status_code == 200
task_data = res.json()["data"]
if task_data["status"] in ("completed", "failed", "cancelled"):
break
return task_data

View File

@@ -106,6 +106,14 @@ class TestProxyListRequest:
assert request.page_size == 50
assert request.protocol == "https"
def test_pool_filter_pending_available(self):
r1 = ProxyListRequest(pool_filter="pending")
assert r1.pool_filter == "pending"
r2 = ProxyListRequest(pool_filter="all")
assert r2.pool_filter is None
with pytest.raises(Exception):
ProxyListRequest(pool_filter="invalid")
class TestSettingsSchema:
"""测试 SettingsSchema"""
@@ -113,16 +121,22 @@ class TestSettingsSchema:
def test_default_settings(self):
"""测试默认设置"""
settings = SettingsSchema()
assert settings.crawl_timeout == 30
assert settings.validation_timeout == 10
assert settings.validation_timeout == 6
assert settings.default_concurrency == 120
assert settings.auto_validate is True
assert settings.auto_validate_after_crawl is False
def test_custom_settings(self):
"""测试自定义设置"""
settings = SettingsSchema(crawl_timeout=60, auto_validate=False)
assert settings.crawl_timeout == 60
settings = SettingsSchema(validation_timeout=25, auto_validate=False)
assert settings.validation_timeout == 25
assert settings.auto_validate is False
def test_settings_schema_ignores_unknown_fields(self):
s = SettingsSchema.model_validate({"validation_timeout": 10, "crawl_timeout": 99})
assert "crawl_timeout" not in s.model_dump()
assert s.validation_timeout == 10
class TestBatchDeleteRequest:
"""测试 BatchDeleteRequest"""

View File

@@ -81,23 +81,25 @@ class TestProxyRepository:
@pytest.mark.asyncio
async def test_iter_batches(self, db, proxy_repo):
"""测试流式分批读取"""
# 插入 5 条测试数据
"""测试流式分批读取(与库内已有数据共存,只校验增量与分批形状)"""
async with db.execute("SELECT COUNT(*) FROM proxies") as c:
before = (await c.fetchone())[0]
for i in range(5):
await proxy_repo.insert_or_update(db, f"192.168.1.{i}", 8000 + i, "http", 10)
await proxy_repo.insert_or_update(db, f"192.168.99.{i}", 8000 + i, "http", 10)
async with db.execute("SELECT COUNT(*) FROM proxies") as c:
after = (await c.fetchone())[0]
assert after == before + 5
batches = []
async for batch in proxy_repo.iter_batches(db, batch_size=2):
batches.append(batch)
assert len(batches) == 3
assert len(batches[0]) == 2
assert len(batches[1]) == 2
assert len(batches[2]) == 1
# 清理
assert sum(len(b) for b in batches) == after
assert len(batches[-1]) in (1, 2)
assert all(len(b) <= 2 for b in batches)
for i in range(5):
await proxy_repo.delete(db, f"192.168.1.{i}", 8000 + i)
await proxy_repo.delete(db, f"192.168.99.{i}", 8000 + i)
@pytest.mark.asyncio
async def test_batch_delete(self, db, proxy_repo):
@@ -121,6 +123,38 @@ class TestProxyRepository:
"""测试获取统计信息"""
stats = await proxy_repo.get_stats(db)
assert "total" in stats
assert "pending" in stats
assert "available" in stats
assert "avg_score" in stats
assert "http_count" in stats
@pytest.mark.asyncio
async def test_get_today_new_count_only_validated_available(self, db, proxy_repo):
"""今日新增不计待验证;仅今日创建且 validated=1、score>0"""
base = await proxy_repo.get_today_new_count(db)
await proxy_repo.upsert_from_crawl(db, "192.168.88.20", 9020, "http", 0)
assert await proxy_repo.get_today_new_count(db) == base
await proxy_repo.insert_or_update(db, "192.168.88.21", 9021, "http", 55)
assert await proxy_repo.get_today_new_count(db) == base + 1
await proxy_repo.delete(db, "192.168.88.20", 9020)
await proxy_repo.delete(db, "192.168.88.21", 9021)
@pytest.mark.asyncio
async def test_upsert_many_from_crawl(self, db, proxy_repo):
from app.models.domain import ProxyRaw
raws = [
ProxyRaw("10.0.0.1", 18080, "http"),
ProxyRaw("10.0.0.2", 18081, "socks5"),
]
await proxy_repo.upsert_many_from_crawl(db, raws, 0)
await db.commit()
p1 = await proxy_repo.get_by_ip_port(db, "10.0.0.1", 18080)
assert p1 is not None
assert p1.validated == 0
p2 = await proxy_repo.get_by_ip_port(db, "10.0.0.2", 18081)
assert p2.protocol == "socks5"
await proxy_repo.delete(db, "10.0.0.1", 18080)
await proxy_repo.delete(db, "10.0.0.2", 18081)