diff --git a/.env.example b/.env.example index 80721c4..43869b5 100644 --- a/.env.example +++ b/.env.example @@ -1,38 +1,6 @@ -# 代理池系统配置文件示例 -# 复制此文件为 .env 并根据实际情况修改配置 - -# ==================== 数据库配置 ==================== -DB_PATH=db/proxies.sqlite - -# ==================== API服务配置 ==================== -HOST=0.0.0.0 -PORT=9949 - -# ==================== 验证器配置 ==================== -VALIDATOR_TIMEOUT=5 -VALIDATOR_MAX_CONCURRENCY=200 -VALIDATOR_CONNECT_TIMEOUT=3 - -# ==================== 爬虫配置 ==================== -CRAWLER_NUM_VALIDATORS=50 -CRAWLER_MAX_QUEUE_SIZE=500 - -# ==================== 日志配置 ==================== -LOG_LEVEL=INFO -LOG_DIR=logs - -# ==================== 导出配置 ==================== -EXPORT_MAX_RECORDS=10000 - -# ==================== 代理评分配置 ==================== -SCORE_VALID=10 -SCORE_INVALID=-5 -SCORE_MIN=0 -SCORE_MAX=100 - -# ==================== 插件配置 ==================== -PLUGINS_DIR=plugins - -# ==================== CORS配置 ==================== -# 允许的来源域名,用逗号分隔 -CORS_ORIGINS=http://localhost:8080,http://localhost:5173,http://localhost:9948 +# 本项目的运行参数已改为由 JSON 配置文件提供,不再使用环境变量。 +# +# 后端:编辑项目根目录下的 config/app.json +# 前端 dev/build:编辑项目根目录下的 config/webui.json(与 WebUI 同级的 config 目录) +# +# 测试专用配置:config/app.test.json(pytest 会自动选用,勿与生产库共用 db_path) diff --git a/.gitignore b/.gitignore index 08a3889..e5f5d28 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,7 @@ env/ *.sqlite *.sqlite3 *.db -# pytest 隔离库(PROXYPOOL_DB_PATH=db/proxies.test.sqlite),勿提交 +# pytest 隔离库(见 config/app.test.json 的 db_path),勿提交 **/proxies.test.sqlite proxies.test.sqlite *.db-shm diff --git a/WebUI/src/api/index.js b/WebUI/src/api/index.js index eda2f0f..3563cca 100644 --- a/WebUI/src/api/index.js +++ b/WebUI/src/api/index.js @@ -1,14 +1,15 @@ import axios from 'axios' import { showError } from '../utils/message' -/** @type {string} 默认 API 基础 URL */ -export const DEFAULT_API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:18080' +/** @type {string} 由项目根目录 config/webui.json 注入(见 vite.config.js) */ +export const DEFAULT_API_BASE_URL = + typeof __WEBUI_API_BASE_URL__ !== 'undefined' ? __WEBUI_API_BASE_URL__ : 'http://127.0.0.1:18080' /** @type {number} 请求超时时间(毫秒) */ export const REQUEST_TIMEOUT = 120000 const api = axios.create({ - baseURL: import.meta.env.VITE_API_BASE_URL || DEFAULT_API_BASE_URL, + baseURL: DEFAULT_API_BASE_URL, timeout: REQUEST_TIMEOUT }) diff --git a/WebUI/src/components/ProtocolChart.vue b/WebUI/src/components/ProtocolChart.vue index 26781c3..e8c4f17 100644 --- a/WebUI/src/components/ProtocolChart.vue +++ b/WebUI/src/components/ProtocolChart.vue @@ -1,18 +1,18 @@ @@ -27,29 +27,73 @@ const props = defineProps({ data: { type: Object, default: () => ({}) + }, + /** available:仅已验证可用;pending:仅待验证池 */ + variant: { + type: String, + default: 'available', + validator: (v) => ['available', 'pending'].includes(v) + }, + /** 并排展示时略压低高度 */ + compact: { + type: Boolean, + default: false } }) +const titleText = computed(() => + props.variant === 'pending' ? '待验证 · 协议分布' : '可用代理 · 协议分布' +) + +const helpText = computed(() => + props.variant === 'pending' + ? '仅统计 validated=0 的待验证代理,与各协议在队列中的占比' + : '仅统计已验证且分数大于 0 的可用代理,不含待验证与低分条目' +) + +const emptyText = computed(() => + props.variant === 'pending' ? '暂无待验证代理' : '暂无可用代理' +) + const chartRef = ref(null) let chartInstance = null let resizeTimer = null const cachedColors = ref(null) // ==================== 计算属性 ==================== +const counts = computed(() => { + const d = props.data || {} + if (props.variant === 'pending') { + return { + http: d.pending_http_count || 0, + https: d.pending_https_count || 0, + socks4: d.pending_socks4_count || 0, + socks5: d.pending_socks5_count || 0 + } + } + return { + http: d.http_count || 0, + https: d.https_count || 0, + socks4: d.socks4_count || 0, + socks5: d.socks5_count || 0 + } +}) + const hasData = computed(() => { - const { http_count, https_count, socks4_count, socks5_count } = props.data - return (http_count || 0) + (https_count || 0) + (socks4_count || 0) + (socks5_count || 0) > 0 + const c = counts.value + return c.http + c.https + c.socks4 + c.socks5 > 0 }) const chartData = computed(() => { if (!cachedColors.value) return [] const colors = cachedColors.value + const c = counts.value return [ - { value: props.data.http_count || 0, name: 'HTTP', itemStyle: { color: colors.info } }, - { value: props.data.https_count || 0, name: 'HTTPS', itemStyle: { color: colors.success } }, - { value: props.data.socks4_count || 0, name: 'SOCKS4', itemStyle: { color: colors.primary } }, - { value: props.data.socks5_count || 0, name: 'SOCKS5', itemStyle: { color: colors.warning } } - ].filter(item => item.value > 0) + { value: c.http, name: 'HTTP', itemStyle: { color: colors.info } }, + { value: c.https, name: 'HTTPS', itemStyle: { color: colors.success } }, + { value: c.socks4, name: 'SOCKS4', itemStyle: { color: colors.primary } }, + { value: c.socks5, name: 'SOCKS5', itemStyle: { color: colors.warning } } + ].filter((item) => item.value > 0) }) const total = computed(() => @@ -141,11 +185,16 @@ function getChartOption() { function initChart() { if (!chartRef.value || !hasData.value) return - + loadColors() + if (chartInstance) { + updateChart() + return + } + chartInstance = echarts.init(chartRef.value) updateChart() - + window.addEventListener('resize', handleResize) } @@ -172,13 +221,21 @@ function destroyChart() { } // ==================== 监听 ==================== -watch(() => props.data, () => { - if (!chartInstance && hasData.value) { - initChart() - } else { - updateChart() - } -}, { deep: true }) +watch( + () => [props.data, props.variant, props.compact], + () => { + if (!hasData.value) { + destroyChart() + return + } + if (!chartInstance) { + initChart() + } else { + updateChart() + } + }, + { deep: true } +) // ==================== 生命周期 ==================== onMounted(() => { @@ -200,6 +257,14 @@ onUnmounted(() => { border: 1px solid var(--border); } +.chart-card--compact { + min-height: 340px; +} + +.chart-card--compact .chart-container { + height: 300px; +} + .chart-card:hover { border-color: var(--border-light); } diff --git a/WebUI/src/components/StatCard.vue b/WebUI/src/components/StatCard.vue index 811a226..a8702ef 100644 --- a/WebUI/src/components/StatCard.vue +++ b/WebUI/src/components/StatCard.vue @@ -25,7 +25,16 @@ const props = defineProps({ type: String, default: 'default', validator: (value) => - ['default', 'total', 'pending', 'available', 'new', 'score'].includes(value) + [ + 'default', + 'total', + 'pending', + 'available', + 'new', + 'score', + 'invalid', + 'latency' + ].includes(value) }, /** 图标组件 */ icon: { @@ -45,6 +54,9 @@ const props = defineProps({ }) const displayValue = computed(() => { + if (props.value === '—' || props.value === '-') { + return props.value + } const num = Number(props.value) if (!isNaN(num) && num > 9999) { return (num / 10000).toFixed(1) + 'w' @@ -95,6 +107,16 @@ const displayValue = computed(() => { filter: drop-shadow(0 0 8px rgba(146, 124, 255, 0.4)); } +.stat-card.invalid .stat-icon { + color: var(--danger, #f56c6c); + filter: drop-shadow(0 0 8px rgba(245, 108, 108, 0.35)); +} + +.stat-card.latency .stat-icon { + color: var(--info); + filter: drop-shadow(0 0 8px rgba(56, 189, 248, 0.35)); +} + .stat-content { display: flex; align-items: center; diff --git a/WebUI/src/composables/useStatsWebSocket.js b/WebUI/src/composables/useStatsWebSocket.js index 7f9c7ba..0428236 100644 --- a/WebUI/src/composables/useStatsWebSocket.js +++ b/WebUI/src/composables/useStatsWebSocket.js @@ -9,12 +9,16 @@ const INITIAL_DELAY_MS = 1000 * @returns {string} */ export function resolveWebSocketStatsUrl() { - const explicit = import.meta.env.VITE_WS_URL + const explicit = + typeof __WEBUI_WS_URL__ !== 'undefined' ? String(__WEBUI_WS_URL__).trim() : '' if (explicit) { - const t = String(explicit).trim().replace(/\/$/, '') + const t = explicit.replace(/\/$/, '') return t.endsWith('/api/ws') ? t : `${t}/api/ws` } - const api = import.meta.env.VITE_API_BASE_URL || 'http://localhost:18080' + const api = + typeof __WEBUI_API_BASE_URL__ !== 'undefined' + ? __WEBUI_API_BASE_URL__ + : 'http://127.0.0.1:18080' const u = new URL(api) u.protocol = u.protocol === 'https:' ? 'wss:' : 'ws:' u.pathname = '/api/ws' diff --git a/WebUI/src/views/Dashboard.vue b/WebUI/src/views/Dashboard.vue index 5eb75a0..a3f8361 100644 --- a/WebUI/src/views/Dashboard.vue +++ b/WebUI/src/views/Dashboard.vue @@ -31,13 +31,32 @@ type="score" :icon="StarFilled" :value="avgScore" - label="平均分数" + label="平均分数(可用)" + /> + + - + + + + + + + +
- HTTP 代理 + HTTP(可用) {{ stats.http_count || 0 }}
- HTTPS 代理 + HTTPS(可用) {{ stats.https_count || 0 }}
- SOCKS 代理 + SOCKS(可用) {{ (stats.socks4_count || 0) + (stats.socks5_count || 0) }}
+
+ 低分(可清理) + {{ stats.invalid_count }} +
@@ -95,7 +118,9 @@ import { Timer, StarFilled, InfoFilled, - Clock + Clock, + Odometer, + WarningFilled } from '@element-plus/icons-vue' import { useProxyStore } from '../stores/proxy' import { formatNumber } from '../utils/format' @@ -113,6 +138,14 @@ const { start: startStatsWs } = useStatsWebSocket() const stats = computed(() => proxyStore.stats) const avgScore = computed(() => formatNumber(stats.value.avg_score || 0, 1)) +const latencyLabel = computed(() => { + const ms = stats.value.avg_response_ms + if (ms == null || ms === '' || Number(ms) <= 0) { + return '—' + } + return `${formatNumber(Number(ms), 1)} ms` +}) + async function refreshData() { await proxyStore.fetchStats() } @@ -170,6 +203,10 @@ onMounted(async () => { margin-bottom: 20px; } +.charts-inner { + height: 100%; +} + .status-row { margin-bottom: 20px; } @@ -214,6 +251,10 @@ onMounted(async () => { color: var(--primary); } +.status-value.warn { + color: var(--danger, #f56c6c); +} + @media (max-width: 768px) { .status-list { flex-direction: column; diff --git a/WebUI/src/views/Plugins.vue b/WebUI/src/views/Plugins.vue index dd59f15..53416a7 100644 --- a/WebUI/src/views/Plugins.vue +++ b/WebUI/src/views/Plugins.vue @@ -359,7 +359,7 @@ async function handleCrawlAll() { } await ElMessageBox.confirm( - `确定要运行所有 ${enabledPlugins.length} 个启用的插件吗?代理将先以「待验证」入库,需再执行「全部验证」后才会变为可用(除非已开启「爬取后立即验证」)。`, + `确定要运行所有 ${enabledPlugins.length} 个启用的插件吗?代理将先以「待验证」入库;全部插件爬取结束后会自动执行一次「全部验证」(若已开启「爬取后立即验证」,新入库条目也会在爬取时提前排队验证)。`, '批量爬取确认', { confirmButtonText: '开始爬取', @@ -405,7 +405,11 @@ async function handleCrawlAll() { crawlResults.value = merged } if (!data.cancelled) { - ElMessage.success('批量爬取完成') + ElMessage.success( + data.validate_all_task_id + ? '批量爬取完成,已自动启动全部验证' + : '批量爬取完成' + ) } await pluginsStore.fetchPlugins() } else { diff --git a/WebUI/src/views/ProxyList.vue b/WebUI/src/views/ProxyList.vue index 6325c93..c56bff5 100644 --- a/WebUI/src/views/ProxyList.vue +++ b/WebUI/src/views/ProxyList.vue @@ -113,9 +113,21 @@ + + + diff --git a/WebUI/src/views/Settings.vue b/WebUI/src/views/Settings.vue index 9bfe9ae..4f7c1aa 100644 --- a/WebUI/src/views/Settings.vue +++ b/WebUI/src/views/Settings.vue @@ -2,7 +2,7 @@
- + -
- - - 启动自动验证 - - - - - 停止自动验证 - - - - - 立即验证全部 - -
-
基础配置 - - - 保存配置 - +
+ + + 立即验证全部 + + + + 保存配置 + +
@@ -199,8 +178,6 @@ import { DocumentChecked, Tools, Timer, - VideoPlay, - VideoPause, Refresh } from '@element-plus/icons-vue' import { settingService } from '../services/settingService' @@ -210,11 +187,8 @@ import PageHeader from '../components/PageHeader.vue' // ==================== Composables ==================== const { schedulerRunning, - schedulerLoading, validating, fetchStatus, - startScheduler, - stopScheduler, validateNow } = useScheduler() @@ -248,7 +222,7 @@ const schedulerInfo = computed(() => { if (schedulerRunning.value) { return `验证调度器正在运行,每 ${settings.validate_interval_minutes} 分钟执行一次:优先验证待验证代理,再按检查时间复检已入库代理` } - return '验证调度器已停止,待验证代理不会自动检查;可在下方开启自动验证或点击「立即验证全部」' + return '验证调度器当前未运行。请在下方打开「启用自动验证」并保存配置以恢复定时任务;需要时可使用「基础配置」标题栏中的「立即验证全部」手动执行一轮全量验证。' }) // ==================== 表单验证规则 ==================== @@ -276,21 +250,6 @@ async function fetchSettings() { } } -// ==================== 调度器控制 ==================== -async function handleStartScheduler() { - await startScheduler( - (msg) => ElMessage.success(msg), - (msg) => ElMessage.error(msg) - ) -} - -async function handleStopScheduler() { - await stopScheduler( - (msg) => ElMessage.success(msg), - (msg) => ElMessage.error(msg) - ) -} - async function handleValidateNow() { try { await ElMessageBox.confirm( @@ -372,6 +331,13 @@ onMounted(() => { align-items: center; } +.header-actions { + display: flex; + align-items: center; + gap: 12px; + flex-wrap: wrap; +} + .card-title { font-size: 16px; font-weight: 600; @@ -403,15 +369,8 @@ onMounted(() => { color: var(--text-secondary); } -.scheduler-actions { - display: flex; - gap: 12px; - flex-wrap: wrap; - margin-bottom: 16px; -} - .scheduler-info { - margin-top: 8px; + margin-top: 0; } .settings-form { diff --git a/WebUI/src/vite-globals.d.ts b/WebUI/src/vite-globals.d.ts new file mode 100644 index 0000000..a1bce37 --- /dev/null +++ b/WebUI/src/vite-globals.d.ts @@ -0,0 +1,3 @@ +/** 由 vite.config.js define 注入(值来自项目根目录 config/webui.json) */ +declare const __WEBUI_API_BASE_URL__: string +declare const __WEBUI_WS_URL__: string diff --git a/WebUI/vite.config.js b/WebUI/vite.config.js index a04684c..85792e1 100644 --- a/WebUI/vite.config.js +++ b/WebUI/vite.config.js @@ -1,9 +1,25 @@ +import fs from 'node:fs' +import path from 'node:path' +import { fileURLToPath } from 'node:url' import { defineConfig } from 'vite' import vue from '@vitejs/plugin-vue' +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const webuiConfigPath = path.resolve(__dirname, '../config/webui.json') +let webui = { api_base_url: 'http://127.0.0.1:18080', ws_url: '' } +try { + webui = { ...webui, ...JSON.parse(fs.readFileSync(webuiConfigPath, 'utf-8')) } +} catch { + console.warn('[vite] 未读取 config/webui.json,使用默认 API 地址') +} + // https://vite.dev/config/ export default defineConfig({ plugins: [vue()], + define: { + __WEBUI_API_BASE_URL__: JSON.stringify(String(webui.api_base_url || '').trim() || 'http://127.0.0.1:18080'), + __WEBUI_WS_URL__: JSON.stringify(webui.ws_url != null ? String(webui.ws_url) : ''), + }, server: { port: 18081, // 支持 Vue Router 的 history 模式 diff --git a/app/api/common.py b/app/api/common.py index a0e93c5..a45dd91 100644 --- a/app/api/common.py +++ b/app/api/common.py @@ -26,6 +26,7 @@ def format_proxy(proxy) -> dict: "response_time_ms": proxy.response_time_ms, "last_check": proxy.last_check.isoformat() if proxy.last_check else None, "validated": getattr(proxy, "validated", 0), + "use_count": int(getattr(proxy, "use_count", 0) or 0), } diff --git a/app/api/lifespan.py b/app/api/lifespan.py index d2794bf..4de40d9 100644 --- a/app/api/lifespan.py +++ b/app/api/lifespan.py @@ -11,6 +11,7 @@ from app.core.plugin_system.registry import registry from app.repositories.proxy_repo import ProxyRepository from app.repositories.settings_repo import SettingsRepository, DEFAULT_SETTINGS from app.services.validator_service import ValidatorService +from app.services.proxy_scoring import compute_proxy_quality_score from app.services.plugin_runner import PluginRunner from app.services.scheduler_service import SchedulerService from app.api.ws_manager import ConnectionManager @@ -63,12 +64,21 @@ async def lifespan(app: FastAPI): return if existing.validated == 0: if is_valid: + lat_ms = ( + float(latency) + if latency is not None and float(latency) > 0 + else None + ) + uc = int(getattr(existing, "use_count", 0) or 0) + q_score = compute_proxy_quality_score( + lat_ms, uc, app_settings + ) await proxy_repo.insert_or_update( db, proxy.ip, proxy.port, proxy.protocol, - score=app_settings.score_valid, + score=q_score, ) if latency: await proxy_repo.update_response_time( @@ -78,12 +88,21 @@ async def lifespan(app: FastAPI): await proxy_repo.delete(db, proxy.ip, proxy.port) else: if is_valid: + lat_ms = ( + float(latency) + if latency is not None and float(latency) > 0 + else None + ) + uc = int(getattr(existing, "use_count", 0) or 0) + q_score = compute_proxy_quality_score( + lat_ms, uc, app_settings + ) await proxy_repo.insert_or_update( db, proxy.ip, proxy.port, proxy.protocol, - score=app_settings.score_valid, + score=q_score, ) if latency: await proxy_repo.update_response_time( diff --git a/app/api/routes/plugins.py b/app/api/routes/plugins.py index 07226ed..a470058 100644 --- a/app/api/routes/plugins.py +++ b/app/api/routes/plugins.py @@ -4,7 +4,8 @@ from pydantic import BaseModel from app.services.plugin_service import PluginService from app.services.plugin_runner import PluginRunner -from app.core.execution import JobExecutor, CrawlJob +from app.core.execution import JobExecutor, CrawlJob, ValidateAllJob +from app.core.log import logger from app.core.exceptions import PluginNotFoundException from app.api.deps import get_plugin_service, get_plugin_runner, get_executor from app.api.common import success_response, format_plugin @@ -106,7 +107,7 @@ async def crawl_all( def _create_crawl_all_aggregator(job_ids, executor): - """创建一个简单的聚合 Job,查询所有子 Job 的状态汇总""" + """创建一个简单的聚合 Job,查询所有子 Job 的状态汇总;正常结束时自动提交一次全量验证""" from app.core.execution.job import Job import asyncio @@ -177,6 +178,13 @@ def _create_crawl_all_aggregator(job_ids, executor): } if self.is_cancelled: result["cancelled"] = True + else: + v_job = ValidateAllJob(validator_pool=executor.worker_pool) + result["validate_all_task_id"] = executor.submit_job(v_job) + logger.info( + "Crawl-all finished; submitted ValidateAllJob %s", + result["validate_all_task_id"], + ) return result return CrawlAllAggregator() diff --git a/app/api/routes/settings.py b/app/api/routes/settings.py index b69f511..5a8b16d 100644 --- a/app/api/routes/settings.py +++ b/app/api/routes/settings.py @@ -43,6 +43,18 @@ async def save_settings( scheduler.interval_minutes = new_interval logger.info(f"Scheduler interval updated to {new_interval} minutes") + want_run = bool(request.auto_validate) + if want_run and not scheduler.running: + try: + await scheduler.start() + except Exception as e: + logger.error(f"Failed to start scheduler after settings save: {e}") + elif not want_run and scheduler.running: + try: + await scheduler.stop() + except Exception as e: + logger.error(f"Failed to stop scheduler after settings save: {e}") + # 热更新 Worker 池大小 if worker_pool and worker_pool.worker_count != request.default_concurrency: await worker_pool.resize(request.default_concurrency) diff --git a/app/core/__init__.py b/app/core/__init__.py index ccae523..c48d547 100644 --- a/app/core/__init__.py +++ b/app/core/__init__.py @@ -1,11 +1,18 @@ -"""核心基础设施包""" -from .config import settings -from .log import logger -from .exceptions import ProxyPoolException, PluginNotFoundException, ProxyNotFoundException, ValidationException +"""核心基础设施包 + +注意:不在此模块导入 config / log,以免测试在 conftest 中调用 set_config_file 之前 +就把配置定死。请使用: + from app.core.config import settings + from app.core.log import logger +""" +from app.core.exceptions import ( + PluginNotFoundException, + ProxyNotFoundException, + ProxyPoolException, + ValidationException, +) __all__ = [ - "settings", - "logger", "ProxyPoolException", "PluginNotFoundException", "ProxyNotFoundException", diff --git a/app/core/config.py b/app/core/config.py index 1f50b59..884b1ec 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -1,77 +1,111 @@ -"""全局配置 - 使用 Pydantic Settings 支持环境变量和 .env 文件""" -import os -from typing import List -from pydantic import AliasChoices, Field -from pydantic_settings import BaseSettings, SettingsConfigDict +"""全局配置:仅从 JSON 文件加载,不使用环境变量。""" +from __future__ import annotations +import json +import logging +from typing import Any, Dict, List -class Settings(BaseSettings): - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - extra="ignore", - ) +from pydantic import BaseModel, ConfigDict - # 数据库配置(环境变量 PROXYPOOL_DB_PATH 优先,供 pytest 与生产隔离) - db_path: str = Field( - default="db/proxies.sqlite", - validation_alias=AliasChoices("PROXYPOOL_DB_PATH", "DB_PATH", "db_path"), - ) +from app.core.config_paths import project_root, resolved_config_path - # API 服务配置 - host: str = "127.0.0.1" - port: int = 18080 +logger = logging.getLogger("ProxyPool") - # 验证器配置 - validator_timeout: int = 5 - validator_max_concurrency: int = 200 - validator_connect_timeout: int = 3 - - # 爬虫配置 - crawler_num_validators: int = 50 - crawler_max_queue_size: int = 500 - - # 日志配置 - log_level: str = "INFO" - log_dir: str = "logs" - - # WebSocket:统计广播间隔(秒);无连接时不查库 - ws_stats_interval_seconds: int = 1 - - # 导出配置 - export_max_records: int = 10000 - - # 代理评分配置 - score_valid: int = 10 - score_invalid: int = -5 - score_min: int = 0 - score_max: int = 100 - - # 验证目标配置 - validator_test_urls: List[str] = [ +_DEFAULTS: Dict[str, Any] = { + "db_path": "db/proxies.sqlite", + "host": "127.0.0.1", + "port": 18080, + "validator_timeout": 5, + "validator_max_concurrency": 200, + "validator_connect_timeout": 3, + "crawler_num_validators": 50, + "crawler_max_queue_size": 500, + "log_level": "INFO", + "log_dir": "logs", + "ws_stats_interval_seconds": 1, + "export_max_records": 10000, + "score_valid": 10, + "score_invalid": -5, + "score_min": 0, + "score_max": 100, + "score_latency_ref_ms": 500.0, + "score_use_penalty_per_pick": 2.5, + "score_max_use_penalty": 70.0, + "score_default_latency_ms": 1500.0, + "validator_test_urls": [ "http://httpbin.org/ip", "https://httpbin.org/ip", "http://api.ipify.org", "https://api.ipify.org", "http://www.baidu.com", "http://www.qq.com", - ] - - # 插件配置 - plugins_dir: str = "plugins" - - # CORS 配置 - Pydantic v2 会自动将逗号分隔的字符串解析为 List[str] - cors_origins: List[str] = [ + ], + "plugins_dir": "plugins", + "cors_origins": [ "http://localhost:8080", "http://localhost:5173", "http://127.0.0.1:18081", "http://localhost:18081", - ] + ], + "run_network_tests": False, +} + + +def _load_merged_dict() -> Dict[str, Any]: + data = dict(_DEFAULTS) + path = resolved_config_path() + if not path.is_file(): + logger.warning("配置文件不存在,使用内置默认项: %s", path) + return data + try: + with path.open(encoding="utf-8") as f: + file_data = json.load(f) + if not isinstance(file_data, dict): + logger.error("配置文件须为 JSON 对象,已忽略: %s", path) + return data + data.update(file_data) + except (json.JSONDecodeError, OSError) as e: + logger.error("读取配置文件失败,使用内置默认项: %s (%s)", path, e) + return data + + +class AppSettings(BaseModel): + """应用配置(与 config/app.json 字段一致)""" + + model_config = ConfigDict(extra="ignore") + + db_path: str + host: str + port: int + validator_timeout: int + validator_max_concurrency: int + validator_connect_timeout: int + crawler_num_validators: int + crawler_max_queue_size: int + log_level: str + log_dir: str + ws_stats_interval_seconds: int + export_max_records: int + score_valid: int + score_invalid: int + score_min: int + score_max: int + score_latency_ref_ms: float + score_use_penalty_per_pick: float + score_max_use_penalty: float + score_default_latency_ms: float + validator_test_urls: List[str] + plugins_dir: str + cors_origins: List[str] + run_network_tests: bool = False @property def base_dir(self) -> str: - return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + return str(project_root()) -# 全局配置实例(启动时加载一次) -settings = Settings() +# 全局单例(进程内首次导入时按当前 resolved_config_path() 加载) +settings = AppSettings.model_validate(_load_merged_dict()) + +# 历史代码别名 +Settings = AppSettings diff --git a/app/core/config_paths.py b/app/core/config_paths.py new file mode 100644 index 0000000..aaf4f1f --- /dev/null +++ b/app/core/config_paths.py @@ -0,0 +1,24 @@ +"""配置文件路径解析(先于 config 加载,供测试在导入应用前切换配置文件)""" +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +_CONFIG_FILE: Optional[Path] = None + + +def project_root() -> Path: + """项目根目录(含 config/、app/ 的目录)""" + return Path(__file__).resolve().parents[2] + + +def set_config_file(path: Path) -> None: + """指定使用的应用配置文件(仅测试应在导入 app.core.config 之前调用)""" + global _CONFIG_FILE + _CONFIG_FILE = Path(path) + + +def resolved_config_path() -> Path: + if _CONFIG_FILE is not None: + return _CONFIG_FILE + return project_root() / "config" / "app.json" diff --git a/app/core/db.py b/app/core/db.py index 8afd324..94dcf20 100644 --- a/app/core/db.py +++ b/app/core/db.py @@ -75,6 +75,14 @@ async def init_db(): ) logger.info("Migrated: added validated column") + try: + await db.execute("SELECT use_count FROM proxies LIMIT 1") + except Exception: + await db.execute( + "ALTER TABLE proxies ADD COLUMN use_count INTEGER NOT NULL DEFAULT 0" + ) + logger.info("Migrated: added use_count column") + await db.execute("CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)") await db.execute("CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)") await db.execute("CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)") diff --git a/app/models/domain.py b/app/models/domain.py index eea9751..7d3df43 100644 --- a/app/models/domain.py +++ b/app/models/domain.py @@ -31,6 +31,7 @@ class Proxy: last_check: Optional[datetime] = None created_at: Optional[datetime] = None validated: int = 0 # 0 待验证 1 已验证(可参与分数与对外取用) + use_count: int = 0 # 被随机 API 取用的累计次数(用于降权) @dataclass diff --git a/app/models/schemas.py b/app/models/schemas.py index ad66194..ef5aabd 100644 --- a/app/models/schemas.py +++ b/app/models/schemas.py @@ -26,6 +26,7 @@ class ProxyResponse(BaseModel): response_time_ms: Optional[float] = None last_check: Optional[str] = None validated: int = 0 + use_count: int = 0 class PluginResponse(BaseModel): diff --git a/app/repositories/proxy_repo.py b/app/repositories/proxy_repo.py index c70a239..139ae2c 100644 --- a/app/repositories/proxy_repo.py +++ b/app/repositories/proxy_repo.py @@ -25,6 +25,8 @@ def _to_datetime(value: Union[str, datetime, None]) -> Optional[datetime]: def _row_to_proxy(row: Tuple) -> Proxy: + validated = int(row[7]) if len(row) > 7 and row[7] is not None else 0 + use_count = int(row[8]) if len(row) > 8 and row[8] is not None else 0 return Proxy( ip=row[0], port=row[1], @@ -33,12 +35,13 @@ def _row_to_proxy(row: Tuple) -> Proxy: response_time_ms=row[4], last_check=_to_datetime(row[5]), created_at=_to_datetime(row[6]), - validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0, + validated=validated, + use_count=use_count, ) _SELECT_PROXY_COLS = ( - "ip, port, protocol, score, response_time_ms, last_check, created_at, validated" + "ip, port, protocol, score, response_time_ms, last_check, created_at, validated, use_count" ) @@ -58,8 +61,8 @@ class ProxyRepository: try: await db.execute( """ - INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated) - VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1) + INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count) + VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1, 0) ON CONFLICT(ip, port) DO UPDATE SET protocol = excluded.protocol, score = excluded.score, @@ -87,13 +90,14 @@ class ProxyRepository: protocol = "http" await db.execute( """ - INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated) - VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0) + INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count) + VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0, 0) ON CONFLICT(ip, port) DO UPDATE SET protocol = excluded.protocol, score = excluded.score, last_check = CURRENT_TIMESTAMP, - validated = 0 + validated = 0, + use_count = 0 """, (ip, port, protocol, initial_score), ) @@ -113,13 +117,14 @@ class ProxyRepository: rows.append((p.ip, p.port, proto, initial_score)) await db.executemany( """ - INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated) - VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0) + INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated, use_count) + VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0, 0) ON CONFLICT(ip, port) DO UPDATE SET protocol = excluded.protocol, score = excluded.score, last_check = CURRENT_TIMESTAMP, - validated = 0 + validated = 0, + use_count = 0 """, rows, ) @@ -176,6 +181,29 @@ class ProxyRepository: logger.error(f"update_response_time failed: {e}", exc_info=True) return False + @staticmethod + async def set_use_count_and_score( + db: aiosqlite.Connection, + ip: str, + port: int, + use_count: int, + score: int, + ) -> bool: + try: + await db.execute( + """ + UPDATE proxies + SET use_count = ?, score = ?, last_check = CURRENT_TIMESTAMP + WHERE ip = ? AND port = ? AND validated = 1 + """, + (use_count, score, ip, port), + ) + await db.commit() + return db.total_changes > 0 + except Exception as e: + logger.error(f"set_use_count_and_score failed: {e}", exc_info=True) + return False + @staticmethod async def delete(db: aiosqlite.Connection, ip: str, port: int) -> None: await db.execute("DELETE FROM proxies WHERE ip = ? AND port = ?", (ip, port)) @@ -369,21 +397,34 @@ class ProxyRepository: @staticmethod async def get_stats(db: aiosqlite.Connection) -> dict: + """统计快照。 + + 协议计数(http/https/socks*)仅含已验证且 score>0 的可用代理,供首页图表与「可用」口径一致。 + pending_* 为待验证池(validated=0)按协议分布。 + """ query = """ SELECT COUNT(*) as total, COUNT(CASE WHEN validated = 0 THEN 1 END) as pending, COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available, (SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score, - COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count, - COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count, - COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count, - COUNT(CASE WHEN protocol = 'socks5' THEN 1 END) as socks5_count + COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'http' THEN 1 END) as http_count, + COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'https' THEN 1 END) as https_count, + COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'socks4' THEN 1 END) as socks4_count, + COUNT(CASE WHEN validated = 1 AND score > 0 AND protocol = 'socks5' THEN 1 END) as socks5_count, + COUNT(CASE WHEN validated = 0 AND protocol = 'http' THEN 1 END) as pending_http_count, + COUNT(CASE WHEN validated = 0 AND protocol = 'https' THEN 1 END) as pending_https_count, + COUNT(CASE WHEN validated = 0 AND protocol = 'socks4' THEN 1 END) as pending_socks4_count, + COUNT(CASE WHEN validated = 0 AND protocol = 'socks5' THEN 1 END) as pending_socks5_count, + COUNT(CASE WHEN validated = 1 AND score <= 0 THEN 1 END) as invalid_count, + (SELECT AVG(response_time_ms) FROM proxies WHERE validated = 1 AND score > 0 + AND response_time_ms IS NOT NULL AND response_time_ms > 0) as avg_response_ms FROM proxies """ async with db.execute(query) as cursor: row = await cursor.fetchone() if row: + avg_lat = row[13] return { "total": row[0] or 0, "pending": row[1] or 0, @@ -393,6 +434,12 @@ class ProxyRepository: "https_count": row[5] or 0, "socks4_count": row[6] or 0, "socks5_count": row[7] or 0, + "pending_http_count": row[8] or 0, + "pending_https_count": row[9] or 0, + "pending_socks4_count": row[10] or 0, + "pending_socks5_count": row[11] or 0, + "invalid_count": row[12] or 0, + "avg_response_ms": round(avg_lat, 2) if avg_lat is not None else None, } return { "total": 0, @@ -403,6 +450,12 @@ class ProxyRepository: "https_count": 0, "socks4_count": 0, "socks5_count": 0, + "pending_http_count": 0, + "pending_https_count": 0, + "pending_socks4_count": 0, + "pending_socks5_count": 0, + "invalid_count": 0, + "avg_response_ms": None, } @staticmethod diff --git a/app/services/proxy_scoring.py b/app/services/proxy_scoring.py new file mode 100644 index 0000000..651935a --- /dev/null +++ b/app/services/proxy_scoring.py @@ -0,0 +1,54 @@ +"""代理质量分:延迟越低越高,被取用次数越多越低。 + +设计要点 +-------- +1. **延迟项**(0~100):用平滑倒数把毫秒映射到质量,避免线性过于极端。 + ``latency_quality = 100 / (1 + latency_ms / latency_ref_ms)`` + 在 ``latency_ref_ms`` 处约为 50 分;越快越接近 100。 + +2. **使用惩罚**:每次通过 API 随机取出代理视为一次「使用」,``use_count`` 递增; + 惩罚 ``min(max_use_penalty, use_count * use_penalty_per_pick)`` 从延迟项上扣除。 + +3. **未知延迟**:尚无 ``response_time_ms`` 时用 ``default_latency_ms`` 代替,避免给满分。 + +验证失败仍走 ``update_score`` 扣分;验证成功则用本函数**覆盖**分数(与当前延迟、使用次数一致)。 +""" +from __future__ import annotations + +from typing import Optional + +from app.core.config import Settings + + +def compute_proxy_quality_score( + latency_ms: Optional[float], + use_count: int, + settings: Settings, +) -> int: + """根据延迟与累计使用次数计算 0~100 的整数分。""" + ref = float(settings.score_latency_ref_ms) + penalty_per = float(settings.score_use_penalty_per_pick) + cap = float(settings.score_max_use_penalty) + default_lat = float(settings.score_default_latency_ms) + lo = int(settings.score_min) + hi = int(settings.score_max) + + if ref <= 0: + ref = 500.0 + if penalty_per < 0: + penalty_per = 0.0 + if cap < 0: + cap = 0.0 + if default_lat <= 0: + default_lat = 1500.0 + + ms = latency_ms + if ms is None or ms <= 0: + ms = default_lat + + latency_quality = 100.0 / (1.0 + float(ms) / ref) + uses = max(0, int(use_count)) + usage_penalty = min(cap, uses * penalty_per) + raw = latency_quality - usage_penalty + score = int(round(raw)) + return max(lo, min(hi, score)) diff --git a/app/services/proxy_service.py b/app/services/proxy_service.py index 8c521e5..8865ad8 100644 --- a/app/services/proxy_service.py +++ b/app/services/proxy_service.py @@ -9,6 +9,8 @@ from app.core.db import get_db from app.repositories.proxy_repo import ProxyRepository from app.models.domain import Proxy from app.core.log import logger +from app.core.config import settings as app_settings +from app.services.proxy_scoring import compute_proxy_quality_score class ProxyService: @@ -47,7 +49,19 @@ class ProxyService: async def get_random_proxy(self) -> Optional[Proxy]: async with get_db() as db: - return await self.proxy_repo.get_random(db) + p = await self.proxy_repo.get_random(db) + if not p: + return None + new_uc = int(getattr(p, "use_count", 0) or 0) + 1 + q_score = compute_proxy_quality_score( + p.response_time_ms, new_uc, app_settings + ) + await self.proxy_repo.set_use_count_and_score( + db, p.ip, p.port, new_uc, q_score + ) + p.use_count = new_uc + p.score = q_score + return p async def delete_proxy(self, ip: str, port: int) -> None: async with get_db() as db: diff --git a/config/app.json b/config/app.json new file mode 100644 index 0000000..8f25ae8 --- /dev/null +++ b/config/app.json @@ -0,0 +1,37 @@ +{ + "db_path": "db/proxies.sqlite", + "host": "127.0.0.1", + "port": 18080, + "validator_timeout": 5, + "validator_max_concurrency": 200, + "validator_connect_timeout": 3, + "crawler_num_validators": 50, + "crawler_max_queue_size": 500, + "log_level": "INFO", + "log_dir": "logs", + "ws_stats_interval_seconds": 1, + "export_max_records": 10000, + "score_valid": 10, + "score_invalid": -5, + "score_min": 0, + "score_max": 100, + "score_latency_ref_ms": 500.0, + "score_use_penalty_per_pick": 2.5, + "score_max_use_penalty": 70.0, + "score_default_latency_ms": 1500.0, + "validator_test_urls": [ + "http://httpbin.org/ip", + "https://httpbin.org/ip", + "http://api.ipify.org", + "https://api.ipify.org", + "http://www.baidu.com", + "http://www.qq.com" + ], + "plugins_dir": "plugins", + "cors_origins": [ + "http://localhost:8080", + "http://localhost:5173", + "http://127.0.0.1:18081", + "http://localhost:18081" + ] +} diff --git a/config/app.test.json b/config/app.test.json new file mode 100644 index 0000000..2834299 --- /dev/null +++ b/config/app.test.json @@ -0,0 +1,33 @@ +{ + "db_path": "db/proxies.test.sqlite", + "host": "127.0.0.1", + "port": 18080, + "validator_timeout": 5, + "validator_max_concurrency": 200, + "validator_connect_timeout": 3, + "crawler_num_validators": 50, + "crawler_max_queue_size": 500, + "log_level": "INFO", + "log_dir": "logs", + "ws_stats_interval_seconds": 1, + "export_max_records": 10000, + "score_valid": 10, + "score_invalid": -5, + "score_min": 0, + "score_max": 100, + "score_latency_ref_ms": 500.0, + "score_use_penalty_per_pick": 2.5, + "score_max_use_penalty": 70.0, + "score_default_latency_ms": 1500.0, + "validator_test_urls": [ + "http://httpbin.org/ip", + "https://httpbin.org/ip" + ], + "plugins_dir": "plugins", + "cors_origins": [ + "http://localhost:8080", + "http://127.0.0.1:18081", + "http://localhost:18081" + ], + "run_network_tests": false +} diff --git a/config/webui.json b/config/webui.json new file mode 100644 index 0000000..dbb7077 --- /dev/null +++ b/config/webui.json @@ -0,0 +1,4 @@ +{ + "api_base_url": "http://127.0.0.1:18080", + "ws_url": "" +} diff --git a/requirements.txt b/requirements.txt index 2806dfa..2437e96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,5 @@ aiohttp==3.9.1 aiohttp-socks==0.9.1 beautifulsoup4==4.12.3 lxml==5.1.0 -pydantic-settings==2.8.1 httpx[http2]==0.27.0 curl-cffi>=0.7.0 diff --git a/script/settings_maintain.py b/script/settings_maintain.py deleted file mode 100644 index ff428a1..0000000 --- a/script/settings_maintain.py +++ /dev/null @@ -1,44 +0,0 @@ -"""维护 SQLite settings 表:删除废弃键并写入推荐验证参数。 - -请在项目根目录执行(与 start.bat 同级的上一级): - python script/settings_maintain.py - -改库后需重启应用或在 WebUI 保存一次设置,WorkerPool / Validator 才会重载并发与超时。 -""" -import asyncio -import os -import sys - -_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -if _ROOT not in sys.path: - sys.path.insert(0, _ROOT) - -_SETTINGS_MAINTENANCE_SQL = """ -DELETE FROM settings WHERE key = 'crawl_timeout'; -DELETE FROM settings WHERE key = 'max_retries'; -INSERT INTO settings (key, value, updated_at) VALUES ('validation_timeout', '6', CURRENT_TIMESTAMP) -ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = CURRENT_TIMESTAMP; -INSERT INTO settings (key, value, updated_at) VALUES ('default_concurrency', '120', CURRENT_TIMESTAMP) -ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = CURRENT_TIMESTAMP; -""" - - -async def _run() -> None: - import aiosqlite - - from app.core.db import DB_PATH, ensure_db_dir - - ensure_db_dir() - if not os.path.isfile(DB_PATH): - print(f"数据库不存在,跳过: {DB_PATH}") - return - - async with aiosqlite.connect(DB_PATH) as db: - await db.executescript(_SETTINGS_MAINTENANCE_SQL) - await db.commit() - print(f"已执行设置维护: {DB_PATH}") - print("请重启应用或在 WebUI 保存一次设置以使并发/超时生效。") - - -if __name__ == "__main__": - asyncio.run(_run()) diff --git a/tests/conftest.py b/tests/conftest.py index 0f2ebb9..63c14ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,10 @@ """pytest 配置文件和 fixtures""" -# 必须在任何 app.* 导入之前:下方 app fixture 会清空表,不可与生产共用 db/proxies.sqlite -import os +# 必须在任何会加载 app.core.config 的导入之前(测试库与生产库隔离) +from pathlib import Path -os.environ["PROXYPOOL_DB_PATH"] = "db/proxies.test.sqlite" +from app.core.config_paths import set_config_file + +set_config_file(Path(__file__).resolve().parents[1] / "config" / "app.test.json") import asyncio import sys @@ -14,8 +16,9 @@ import pytest def _network_tests_enabled() -> bool: - v = os.environ.get("PROXYPOOL_RUN_NETWORK_TESTS", "").strip().lower() - return v in ("1", "true", "yes", "on") + from app.core.config import settings + + return bool(getattr(settings, "run_network_tests", False)) def pytest_collection_modifyitems(config, items) -> None: @@ -24,8 +27,8 @@ def pytest_collection_modifyitems(config, items) -> None: return skip = pytest.mark.skip( reason=( - "外网/真实爬取用例默认跳过。需要验收时设置环境变量 " - "PROXYPOOL_RUN_NETWORK_TESTS=1 后再运行对应文件或 -m network。" + "外网/真实爬取用例默认跳过。需要验收时在 config/app.test.json 中设置 " + "\"run_network_tests\": true 后再运行对应文件或 -m network。" ) ) for item in items: