feat: JSON 配置、质量分与仪表盘,及设置与爬取流程

- 后端改为 config/app.json;pytest 使用 config/app.test.json 与 set_config_file,不再依赖环境变量;移除 pydantic-settings。

- 前端 API/WebSocket 由 config/webui.json 经 Vite define 注入。

- 代理分数按延迟与随机取用次数计算,新增 use_count 与 proxy_scoring;保存设置时同步调度器启停。

- 仪表盘双饼图(可用/待验证协议);设置页去掉调度器启停按钮并移动立即验证;爬取全部结束后自动提交全量验证。

- 删除 script/settings_maintain.py(此前已标记删除)。

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 16:08:32 +08:00
parent 07248ff4ee
commit 7bc6d4e4de
31 changed files with 643 additions and 280 deletions

View File

@@ -26,6 +26,7 @@ def format_proxy(proxy) -> dict:
"response_time_ms": proxy.response_time_ms,
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
"validated": getattr(proxy, "validated", 0),
"use_count": int(getattr(proxy, "use_count", 0) or 0),
}

View File

@@ -11,6 +11,7 @@ from app.core.plugin_system.registry import registry
from app.repositories.proxy_repo import ProxyRepository
from app.repositories.settings_repo import SettingsRepository, DEFAULT_SETTINGS
from app.services.validator_service import ValidatorService
from app.services.proxy_scoring import compute_proxy_quality_score
from app.services.plugin_runner import PluginRunner
from app.services.scheduler_service import SchedulerService
from app.api.ws_manager import ConnectionManager
@@ -63,12 +64,21 @@ async def lifespan(app: FastAPI):
return
if existing.validated == 0:
if is_valid:
lat_ms = (
float(latency)
if latency is not None and float(latency) > 0
else None
)
uc = int(getattr(existing, "use_count", 0) or 0)
q_score = compute_proxy_quality_score(
lat_ms, uc, app_settings
)
await proxy_repo.insert_or_update(
db,
proxy.ip,
proxy.port,
proxy.protocol,
score=app_settings.score_valid,
score=q_score,
)
if latency:
await proxy_repo.update_response_time(
@@ -78,12 +88,21 @@ async def lifespan(app: FastAPI):
await proxy_repo.delete(db, proxy.ip, proxy.port)
else:
if is_valid:
lat_ms = (
float(latency)
if latency is not None and float(latency) > 0
else None
)
uc = int(getattr(existing, "use_count", 0) or 0)
q_score = compute_proxy_quality_score(
lat_ms, uc, app_settings
)
await proxy_repo.insert_or_update(
db,
proxy.ip,
proxy.port,
proxy.protocol,
score=app_settings.score_valid,
score=q_score,
)
if latency:
await proxy_repo.update_response_time(

View File

@@ -4,7 +4,8 @@ from pydantic import BaseModel
from app.services.plugin_service import PluginService
from app.services.plugin_runner import PluginRunner
from app.core.execution import JobExecutor, CrawlJob
from app.core.execution import JobExecutor, CrawlJob, ValidateAllJob
from app.core.log import logger
from app.core.exceptions import PluginNotFoundException
from app.api.deps import get_plugin_service, get_plugin_runner, get_executor
from app.api.common import success_response, format_plugin
@@ -106,7 +107,7 @@ async def crawl_all(
def _create_crawl_all_aggregator(job_ids, executor):
"""创建一个简单的聚合 Job查询所有子 Job 的状态汇总"""
"""创建一个简单的聚合 Job查询所有子 Job 的状态汇总;正常结束时自动提交一次全量验证"""
from app.core.execution.job import Job
import asyncio
@@ -177,6 +178,13 @@ def _create_crawl_all_aggregator(job_ids, executor):
}
if self.is_cancelled:
result["cancelled"] = True
else:
v_job = ValidateAllJob(validator_pool=executor.worker_pool)
result["validate_all_task_id"] = executor.submit_job(v_job)
logger.info(
"Crawl-all finished; submitted ValidateAllJob %s",
result["validate_all_task_id"],
)
return result
return CrawlAllAggregator()

View File

@@ -43,6 +43,18 @@ async def save_settings(
scheduler.interval_minutes = new_interval
logger.info(f"Scheduler interval updated to {new_interval} minutes")
want_run = bool(request.auto_validate)
if want_run and not scheduler.running:
try:
await scheduler.start()
except Exception as e:
logger.error(f"Failed to start scheduler after settings save: {e}")
elif not want_run and scheduler.running:
try:
await scheduler.stop()
except Exception as e:
logger.error(f"Failed to stop scheduler after settings save: {e}")
# 热更新 Worker 池大小
if worker_pool and worker_pool.worker_count != request.default_concurrency:
await worker_pool.resize(request.default_concurrency)

View File

@@ -1,11 +1,18 @@
"""核心基础设施包"""
from .config import settings
from .log import logger
from .exceptions import ProxyPoolException, PluginNotFoundException, ProxyNotFoundException, ValidationException
"""核心基础设施包
注意:不在此模块导入 config / log以免测试在 conftest 中调用 set_config_file 之前
就把配置定死。请使用:
from app.core.config import settings
from app.core.log import logger
"""
from app.core.exceptions import (
PluginNotFoundException,
ProxyNotFoundException,
ProxyPoolException,
ValidationException,
)
__all__ = [
"settings",
"logger",
"ProxyPoolException",
"PluginNotFoundException",
"ProxyNotFoundException",

View File

@@ -1,77 +1,111 @@
"""全局配置 - 使用 Pydantic Settings 支持环境变量和 .env 文件"""
import os
from typing import List
from pydantic import AliasChoices, Field
from pydantic_settings import BaseSettings, SettingsConfigDict
"""全局配置:仅从 JSON 文件加载,不使用环境变量。"""
from __future__ import annotations
import json
import logging
from typing import Any, Dict, List
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
from pydantic import BaseModel, ConfigDict
# 数据库配置(环境变量 PROXYPOOL_DB_PATH 优先,供 pytest 与生产隔离)
db_path: str = Field(
default="db/proxies.sqlite",
validation_alias=AliasChoices("PROXYPOOL_DB_PATH", "DB_PATH", "db_path"),
)
from app.core.config_paths import project_root, resolved_config_path
# API 服务配置
host: str = "127.0.0.1"
port: int = 18080
logger = logging.getLogger("ProxyPool")
# 验证器配置
validator_timeout: int = 5
validator_max_concurrency: int = 200
validator_connect_timeout: int = 3
# 爬虫配置
crawler_num_validators: int = 50
crawler_max_queue_size: int = 500
# 日志配置
log_level: str = "INFO"
log_dir: str = "logs"
# WebSocket统计广播间隔无连接时不查库
ws_stats_interval_seconds: int = 1
# 导出配置
export_max_records: int = 10000
# 代理评分配置
score_valid: int = 10
score_invalid: int = -5
score_min: int = 0
score_max: int = 100
# 验证目标配置
validator_test_urls: List[str] = [
_DEFAULTS: Dict[str, Any] = {
"db_path": "db/proxies.sqlite",
"host": "127.0.0.1",
"port": 18080,
"validator_timeout": 5,
"validator_max_concurrency": 200,
"validator_connect_timeout": 3,
"crawler_num_validators": 50,
"crawler_max_queue_size": 500,
"log_level": "INFO",
"log_dir": "logs",
"ws_stats_interval_seconds": 1,
"export_max_records": 10000,
"score_valid": 10,
"score_invalid": -5,
"score_min": 0,
"score_max": 100,
"score_latency_ref_ms": 500.0,
"score_use_penalty_per_pick": 2.5,
"score_max_use_penalty": 70.0,
"score_default_latency_ms": 1500.0,
"validator_test_urls": [
"http://httpbin.org/ip",
"https://httpbin.org/ip",
"http://api.ipify.org",
"https://api.ipify.org",
"http://www.baidu.com",
"http://www.qq.com",
]
# 插件配置
plugins_dir: str = "plugins"
# CORS 配置 - Pydantic v2 会自动将逗号分隔的字符串解析为 List[str]
cors_origins: List[str] = [
],
"plugins_dir": "plugins",
"cors_origins": [
"http://localhost:8080",
"http://localhost:5173",
"http://127.0.0.1:18081",
"http://localhost:18081",
]
],
"run_network_tests": False,
}
def _load_merged_dict() -> Dict[str, Any]:
data = dict(_DEFAULTS)
path = resolved_config_path()
if not path.is_file():
logger.warning("配置文件不存在,使用内置默认项: %s", path)
return data
try:
with path.open(encoding="utf-8") as f:
file_data = json.load(f)
if not isinstance(file_data, dict):
logger.error("配置文件须为 JSON 对象,已忽略: %s", path)
return data
data.update(file_data)
except (json.JSONDecodeError, OSError) as e:
logger.error("读取配置文件失败,使用内置默认项: %s (%s)", path, e)
return data
class AppSettings(BaseModel):
"""应用配置(与 config/app.json 字段一致)"""
model_config = ConfigDict(extra="ignore")
db_path: str
host: str
port: int
validator_timeout: int
validator_max_concurrency: int
validator_connect_timeout: int
crawler_num_validators: int
crawler_max_queue_size: int
log_level: str
log_dir: str
ws_stats_interval_seconds: int
export_max_records: int
score_valid: int
score_invalid: int
score_min: int
score_max: int
score_latency_ref_ms: float
score_use_penalty_per_pick: float
score_max_use_penalty: float
score_default_latency_ms: float
validator_test_urls: List[str]
plugins_dir: str
cors_origins: List[str]
run_network_tests: bool = False
@property
def base_dir(self) -> str:
return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
return str(project_root())
# 全局配置实例(启动时加载一次
settings = Settings()
# 全局单例(进程内首次导入时按当前 resolved_config_path() 加载
settings = AppSettings.model_validate(_load_merged_dict())
# 历史代码别名
Settings = AppSettings

24
app/core/config_paths.py Normal file
View File

@@ -0,0 +1,24 @@
"""配置文件路径解析(先于 config 加载,供测试在导入应用前切换配置文件)"""
from __future__ import annotations
from pathlib import Path
from typing import Optional
_CONFIG_FILE: Optional[Path] = None
def project_root() -> Path:
"""项目根目录(含 config/、app/ 的目录)"""
return Path(__file__).resolve().parents[2]
def set_config_file(path: Path) -> None:
"""指定使用的应用配置文件(仅测试应在导入 app.core.config 之前调用)"""
global _CONFIG_FILE
_CONFIG_FILE = Path(path)
def resolved_config_path() -> Path:
if _CONFIG_FILE is not None:
return _CONFIG_FILE
return project_root() / "config" / "app.json"

View File

@@ -75,6 +75,14 @@ async def init_db():
)
logger.info("Migrated: added validated column")
try:
await db.execute("SELECT use_count FROM proxies LIMIT 1")
except Exception:
await db.execute(
"ALTER TABLE proxies ADD COLUMN use_count INTEGER NOT NULL DEFAULT 0"
)
logger.info("Migrated: added use_count column")
await db.execute("CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)")
await db.execute("CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)")
await db.execute("CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)")

View File

@@ -31,6 +31,7 @@ class Proxy:
last_check: Optional[datetime] = None
created_at: Optional[datetime] = None
validated: int = 0 # 0 待验证 1 已验证(可参与分数与对外取用)
use_count: int = 0 # 被随机 API 取用的累计次数(用于降权)
@dataclass

View File

@@ -26,6 +26,7 @@ class ProxyResponse(BaseModel):
response_time_ms: Optional[float] = None
last_check: Optional[str] = None
validated: int = 0
use_count: int = 0
class PluginResponse(BaseModel):

View File

@@ -25,6 +25,8 @@ def _to_datetime(value: Union[str, datetime, None]) -> Optional[datetime]:
def _row_to_proxy(row: Tuple) -> Proxy:
validated = int(row[7]) if len(row) > 7 and row[7] is not None else 0
use_count = int(row[8]) if len(row) > 8 and row[8] is not None else 0
return Proxy(
ip=row[0],
port=row[1],
@@ -33,12 +35,13 @@ def _row_to_proxy(row: Tuple) -> Proxy:
response_time_ms=row[4],
last_check=_to_datetime(row[5]),
created_at=_to_datetime(row[6]),
validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0,
validated=validated,
use_count=use_count,
)
_SELECT_PROXY_COLS = (
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated"
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated, use_count"
)
@@ -58,8 +61,8 @@ class ProxyRepository:
try:
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1)
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
@@ -87,13 +90,14 @@ class ProxyRepository:
protocol = "http"
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
validated = 0,
use_count = 0
""",
(ip, port, protocol, initial_score),
)
@@ -113,13 +117,14 @@ class ProxyRepository:
rows.append((p.ip, p.port, proto, initial_score))
await db.executemany(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
validated = 0,
use_count = 0
""",
rows,
)
@@ -176,6 +181,29 @@ class ProxyRepository:
logger.error(f"update_response_time failed: {e}", exc_info=True)
return False
@staticmethod
async def set_use_count_and_score(
db: aiosqlite.Connection,
ip: str,
port: int,
use_count: int,
score: int,
) -> bool:
try:
await db.execute(
"""
UPDATE proxies
SET use_count = ?, score = ?, last_check = CURRENT_TIMESTAMP
WHERE ip = ? AND port = ? AND validated = 1
""",
(use_count, score, ip, port),
)
await db.commit()
return db.total_changes > 0
except Exception as e:
logger.error(f"set_use_count_and_score failed: {e}", exc_info=True)
return False
@staticmethod
async def delete(db: aiosqlite.Connection, ip: str, port: int) -> None:
await db.execute("DELETE FROM proxies WHERE ip = ? AND port = ?", (ip, port))
@@ -369,21 +397,34 @@ class ProxyRepository:
@staticmethod
async def get_stats(db: aiosqlite.Connection) -> dict:
"""统计快照。
协议计数http/https/socks*)仅含已验证且 score>0 的可用代理,供首页图表与「可用」口径一致。
pending_* 为待验证池validated=0按协议分布。
"""
query = """
SELECT
COUNT(*) as total,
COUNT(CASE WHEN validated = 0 THEN 1 END) as pending,
COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available,
(SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score,
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
COUNT(CASE WHEN protocol = 'socks5' THEN 1 END) as socks5_count
COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'http' THEN 1 END) as http_count,
COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'https' THEN 1 END) as https_count,
COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'socks4' THEN 1 END) as socks4_count,
COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'socks5' THEN 1 END) as socks5_count,
COUNT(CASE WHEN validated = 0 AND protocol = 'http' THEN 1 END) as pending_http_count,
COUNT(CASE WHEN validated = 0 AND protocol = 'https' THEN 1 END) as pending_https_count,
COUNT(CASE WHEN validated = 0 AND protocol = 'socks4' THEN 1 END) as pending_socks4_count,
COUNT(CASE WHEN validated = 0 AND protocol = 'socks5' THEN 1 END) as pending_socks5_count,
COUNT(CASE WHEN validated = 1 AND score <= 0 THEN 1 END) as invalid_count,
(SELECT AVG(response_time_ms) FROM proxies WHERE validated = 1 AND score > 0
AND response_time_ms IS NOT NULL AND response_time_ms > 0) as avg_response_ms
FROM proxies
"""
async with db.execute(query) as cursor:
row = await cursor.fetchone()
if row:
avg_lat = row[13]
return {
"total": row[0] or 0,
"pending": row[1] or 0,
@@ -393,6 +434,12 @@ class ProxyRepository:
"https_count": row[5] or 0,
"socks4_count": row[6] or 0,
"socks5_count": row[7] or 0,
"pending_http_count": row[8] or 0,
"pending_https_count": row[9] or 0,
"pending_socks4_count": row[10] or 0,
"pending_socks5_count": row[11] or 0,
"invalid_count": row[12] or 0,
"avg_response_ms": round(avg_lat, 2) if avg_lat is not None else None,
}
return {
"total": 0,
@@ -403,6 +450,12 @@ class ProxyRepository:
"https_count": 0,
"socks4_count": 0,
"socks5_count": 0,
"pending_http_count": 0,
"pending_https_count": 0,
"pending_socks4_count": 0,
"pending_socks5_count": 0,
"invalid_count": 0,
"avg_response_ms": None,
}
@staticmethod

View File

@@ -0,0 +1,54 @@
"""代理质量分:延迟越低越高,被取用次数越多越低。
设计要点
--------
1. **延迟项**0100用平滑倒数把毫秒映射到质量避免线性过于极端。
``latency_quality = 100 / (1 + latency_ms / latency_ref_ms)``
在 ``latency_ref_ms`` 处约为 50 分;越快越接近 100。
2. **使用惩罚**:每次通过 API 随机取出代理视为一次「使用」,``use_count`` 递增;
惩罚 ``min(max_use_penalty, use_count * use_penalty_per_pick)`` 从延迟项上扣除。
3. **未知延迟**:尚无 ``response_time_ms`` 时用 ``default_latency_ms`` 代替,避免给满分。
验证失败仍走 ``update_score`` 扣分;验证成功则用本函数**覆盖**分数(与当前延迟、使用次数一致)。
"""
from __future__ import annotations
from typing import Optional
from app.core.config import Settings
def compute_proxy_quality_score(
latency_ms: Optional[float],
use_count: int,
settings: Settings,
) -> int:
"""根据延迟与累计使用次数计算 0100 的整数分。"""
ref = float(settings.score_latency_ref_ms)
penalty_per = float(settings.score_use_penalty_per_pick)
cap = float(settings.score_max_use_penalty)
default_lat = float(settings.score_default_latency_ms)
lo = int(settings.score_min)
hi = int(settings.score_max)
if ref <= 0:
ref = 500.0
if penalty_per < 0:
penalty_per = 0.0
if cap < 0:
cap = 0.0
if default_lat <= 0:
default_lat = 1500.0
ms = latency_ms
if ms is None or ms <= 0:
ms = default_lat
latency_quality = 100.0 / (1.0 + float(ms) / ref)
uses = max(0, int(use_count))
usage_penalty = min(cap, uses * penalty_per)
raw = latency_quality - usage_penalty
score = int(round(raw))
return max(lo, min(hi, score))

View File

@@ -9,6 +9,8 @@ from app.core.db import get_db
from app.repositories.proxy_repo import ProxyRepository
from app.models.domain import Proxy
from app.core.log import logger
from app.core.config import settings as app_settings
from app.services.proxy_scoring import compute_proxy_quality_score
class ProxyService:
@@ -47,7 +49,19 @@ class ProxyService:
async def get_random_proxy(self) -> Optional[Proxy]:
async with get_db() as db:
return await self.proxy_repo.get_random(db)
p = await self.proxy_repo.get_random(db)
if not p:
return None
new_uc = int(getattr(p, "use_count", 0) or 0) + 1
q_score = compute_proxy_quality_score(
p.response_time_ms, new_uc, app_settings
)
await self.proxy_repo.set_use_count_and_score(
db, p.ip, p.port, new_uc, q_score
)
p.use_count = new_uc
p.score = q_score
return p
async def delete_proxy(self, ip: str, port: int) -> None:
async with get_db() as db: