fix: 全面修复代码问题并优化架构

修复问题:
- 添加缺失的 httpx 依赖到 requirements.txt
- 修复前端批量删除参数格式与后端不匹配(数组->对象数组)
- 移除 app/api/main.py 中重复创建 app 的冗余代码
- 修复 Plugins.vue v-model 直接修改 store 状态的 Vue 警告
- 修复 README 端口/启动命令文档与实际配置不一致
- 修正 pytest.ini 过时配置 (asyncio_default_fixture_loop_scope)
- 修复 WebUI index.html 语言设置为 zh-CN
- 修复 .gitignore 错误忽略 tests/ 目录

后端优化:
- 修复调度器默认间隔从 5 秒改为 30 分钟,避免无节制验证
- 修复 validate_all_now 在调度器停止时无法执行的 bug
- 设置保存后热更新运行中调度器的验证间隔
- 将 update_score 优化为原子单事务 SQL,消除并发竞态
- 导出功能改为真正的流式分批读取(iter_batches),降低大导出内存占用
- ProxyResponse Schema 补齐 response_time_ms 字段
- 日志级别改为从配置动态读取,不再硬编码 INFO
- 清理 validator_service 中的冗余 try/finally 代码

插件健壮性:
- 修复 ip3366/ip89/kuaidaili/proxylist_download/speedx/yundaili/proxyscrape
  的端口范围检查和 IPv6 地址解析问题(改用 rsplit + 1-65535 校验)
- 修复 PluginService.list_plugins 并发竞争条件
- 修复 run_all_plugins 去重逻辑与数据库 UNIQUE 约束保持一致
- 修复 proxyscrape 异常时错误跳过 fallback 的 bug

测试:
- 新增 7 个插件解析单元测试
- 新增 update_score 自动删除和 iter_batches 流式读取测试
- 全部 74 个测试通过
This commit is contained in:
祀梦
2026-04-04 21:03:43 +08:00
parent 875e61f17e
commit 4ef7931941
27 changed files with 212 additions and 113 deletions

View File

@@ -48,6 +48,6 @@ def create_scheduler_service(db_settings: dict | None = None) -> SchedulerServic
)
svc = SchedulerService(validation_queue=queue, proxy_repo=proxy_repo)
svc.interval_minutes = db_settings.get(
"validate_interval_minutes", app_settings.validator_timeout
"validate_interval_minutes", 30
)
return svc

View File

@@ -53,6 +53,3 @@ def create_app() -> FastAPI:
}
return app
app = create_app()

View File

@@ -1,9 +1,10 @@
"""设置相关路由"""
from fastapi import APIRouter
from fastapi import APIRouter, Request
from app.core.db import get_db
from app.repositories.settings_repo import SettingsRepository
from app.models.schemas import SettingsSchema
from app.api.common import success_response, error_response
from app.core.log import logger
router = APIRouter(prefix="/api/settings", tags=["settings"])
settings_repo = SettingsRepository()
@@ -17,9 +18,18 @@ async def get_settings():
@router.post("")
async def save_settings(request: SettingsSchema):
async def save_settings(request: SettingsSchema, http_request: Request):
async with get_db() as db:
success = await settings_repo.save(db, request.model_dump())
if not success:
return error_response("保存设置失败", 500)
# 热更新运行中调度器的间隔时间
scheduler = getattr(http_request.app.state, "scheduler_service", None)
if scheduler and scheduler.running:
new_interval = request.validate_interval_minutes
if scheduler.interval_minutes != new_interval:
scheduler.interval_minutes = new_interval
logger.info(f"Scheduler interval updated to {new_interval} minutes")
return success_response("保存设置成功", request.model_dump())

View File

@@ -29,7 +29,16 @@ console_handler.setFormatter(formatter)
# 获取标准 logger
logger = logging.getLogger('ProxyPool')
logger.setLevel(logging.INFO)
# 尝试从配置读取日志级别,默认 INFO
try:
from app.core.config import settings
log_level = getattr(logging, settings.log_level.upper(), logging.INFO)
except Exception:
log_level = logging.INFO
logger.setLevel(log_level)
file_handler.setLevel(log_level)
console_handler.setLevel(log_level)
# 防止重复添加 handler如模块重导入
if not logger.handlers:

View File

@@ -23,6 +23,7 @@ class ProxyResponse(BaseModel):
port: int
protocol: str
score: int
response_time_ms: Optional[float] = None
last_check: Optional[str] = None

View File

@@ -53,8 +53,11 @@ class Ip3366Plugin(BaseHTTPPlugin):
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
if protocol not in VALID_PROTOCOLS:
protocol = "http"
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
results.append(ProxyRaw(ip, int(port), protocol))
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), protocol))
except ValueError:
continue
if results:
logger.info(f"{self.display_name} 解析完成,获得 {len(results)} 个潜在代理")

View File

@@ -34,8 +34,11 @@ class Ip89Plugin(BaseHTTPPlugin):
if len(tds) >= 2:
ip = tds[0].get_text(strip=True)
port = tds[1].get_text(strip=True)
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
results.append(ProxyRaw(ip, int(port), "http"))
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), "http"))
except ValueError:
continue
await asyncio.sleep(random.uniform(1, 2))

View File

@@ -61,8 +61,11 @@ class KuaiDaiLiPlugin(BaseHTTPPlugin):
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
if protocol not in VALID_PROTOCOLS:
protocol = "http"
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit():
results.append(ProxyRaw(ip, int(port), protocol))
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), protocol))
except ValueError:
continue
await asyncio.sleep(random.uniform(5, 8))
if results:

View File

@@ -59,12 +59,14 @@ class ProxyListDownloadPlugin(BaseHTTPPlugin):
line = line.strip()
if not line or ":" not in line:
continue
parts = line.split(":")
if len(parts) >= 2:
ip = parts[0].strip()
port = parts[1].strip()
if ip and port.isdigit():
ip, _, port = line.rpartition(":")
ip = ip.strip()
port = port.strip()
if ip and port.isdigit() and 1 <= int(port) <= 65535:
try:
results.append(ProxyRaw(ip, int(port), protocol))
except ValueError:
continue
return results
async def crawl(self) -> List[ProxyRaw]:

View File

@@ -42,12 +42,14 @@ class ProxyScrapePlugin(BaseHTTPPlugin):
line = line.strip()
if not line or ":" not in line:
continue
parts = line.split(":")
if len(parts) >= 2:
ip = parts[0].strip()
port_str = parts[1].strip()
if port_str.isdigit():
ip, _, port_str = line.rpartition(":")
ip = ip.strip()
port_str = port_str.strip()
if port_str.isdigit() and 1 <= int(port_str) <= 65535:
try:
proxies.append(ProxyRaw(ip, int(port_str), protocol))
except ValueError:
continue
return proxies
async def crawl(self) -> List[ProxyRaw]:
@@ -71,7 +73,7 @@ class ProxyScrapePlugin(BaseHTTPPlugin):
htmls.append("")
except Exception:
htmls.append("")
done_protocols.add(protocols[i])
# 异常时不加入 done_protocols,以便触发 API fallback
fallback_protocols = []
for protocol, html in zip(protocols, htmls):

View File

@@ -42,15 +42,17 @@ class SpeedXPlugin(BaseHTTPPlugin):
line = line.strip()
if not line or ":" not in line:
continue
parts = line.split(":")
if len(parts) >= 2:
ip = parts[0].strip()
port = parts[1].strip()
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
continue
if not port.isdigit() or not (1 <= int(port) <= 65535):
continue
ip, _, port = line.rpartition(":")
ip = ip.strip()
port = port.strip()
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
continue
if not port.isdigit() or not (1 <= int(port) <= 65535):
continue
try:
results.append(ProxyRaw(ip, int(port), protocol))
except ValueError:
continue
return results
async def crawl(self) -> List[ProxyRaw]:

View File

@@ -40,17 +40,18 @@ class YunDaiLiPlugin(BaseHTTPPlugin):
line = line.strip()
if not line or ":" not in line:
continue
parts = line.split(":")
if len(parts) < 2:
continue
ip = parts[0].strip()
port_str = parts[1].strip()
ip, _, port_str = line.rpartition(":")
ip = ip.strip()
port_str = port_str.strip()
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
continue
if not port_str.isdigit() or not (1 <= int(port_str) <= 65535):
continue
final_protocol = protocol if protocol in VALID_PROTOCOLS else "http"
results.append(ProxyRaw(ip, int(port_str), final_protocol))
try:
results.append(ProxyRaw(ip, int(port_str), final_protocol))
except ValueError:
continue
count += 1
if count:

View File

@@ -76,23 +76,23 @@ class ProxyRepository:
max_score: int = 100,
) -> bool:
try:
async with db.execute(
"SELECT score FROM proxies WHERE ip = ? AND port = ?", (ip, port)
) as cursor:
row = await cursor.fetchone()
if not row:
return False
current_score = row[0]
new_score = max(min_score, min(max_score, current_score + delta))
await db.execute(
"UPDATE proxies SET score = ?, last_check = CURRENT_TIMESTAMP WHERE ip = ? AND port = ?",
(new_score, ip, port),
)
if new_score <= 0:
# 只删除当前代理,避免误删其他无效代理
await db.execute("DELETE FROM proxies WHERE ip = ? AND port = ?", (ip, port))
await db.commit()
return True
# 原子更新:计算新分数并直接更新
await db.execute(
"""
UPDATE proxies
SET score = MAX(?, MIN(?, score + ?)),
last_check = CURRENT_TIMESTAMP
WHERE ip = ? AND port = ?
""",
(min_score, max_score, delta, ip, port),
)
# 删除分数已降至 0 及以下的代理
await db.execute(
"DELETE FROM proxies WHERE ip = ? AND port = ? AND score <= ?",
(ip, port, min_score),
)
await db.commit()
return db.total_changes > 0
except Exception as e:
logger.error(f"update_score failed: {e}", exc_info=True)
return False
@@ -156,19 +156,35 @@ class ProxyRepository:
db: aiosqlite.Connection,
protocol: Optional[str] = None,
limit: int = 100000,
offset: int = 0,
) -> List[Proxy]:
query = "SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies"
params: List = []
if protocol:
query += " WHERE protocol = ?"
params.append(protocol.lower())
query += " LIMIT ?"
params.append(limit)
query += " LIMIT ? OFFSET ?"
params.extend([limit, offset])
async with db.execute(query, params) as cursor:
rows = await cursor.fetchall()
return [_row_to_proxy(row) for row in rows]
@staticmethod
async def iter_batches(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
batch_size: int = 1000,
):
"""流式分批读取代理,避免一次性加载大量数据到内存"""
offset = 0
while True:
batch = await ProxyRepository.list_all(db, protocol, batch_size, offset)
if not batch:
break
yield batch
offset += batch_size
@staticmethod
async def list_paginated(
db: aiosqlite.Connection,

View File

@@ -24,10 +24,9 @@ class PluginService:
result = []
for plugin in registry.list_plugins():
# 合并持久化状态
# 合并持久化状态(不修改全局实例,避免并发竞争)
state = db_states.get(plugin.name, {})
if "enabled" in state:
plugin.enabled = state["enabled"]
enabled = state.get("enabled", plugin.enabled)
if "config" in state and isinstance(state["config"], dict):
plugin.update_config(state["config"])
@@ -50,7 +49,7 @@ class PluginService:
name=plugin.name,
display_name=plugin.display_name or plugin.name,
description=plugin.description or f"{plugin.name} 爬取代理",
enabled=plugin.enabled,
enabled=enabled,
last_run=stat.get("last_run"),
success_count=stat.get("success_count", 0),
failure_count=stat.get("failure_count", 0),
@@ -133,11 +132,11 @@ class PluginService:
logger.error(f"Run all plugins error: {results}")
continue
all_results.extend(results)
# 去重
# 去重(与数据库 UNIQUE(ip, port) 约束保持一致)
seen = set()
unique = []
for p in all_results:
key = (p.ip, p.port, p.protocol)
key = (p.ip, p.port)
if key not in seen:
seen.add(key)
unique.append(p)

View File

@@ -61,28 +61,41 @@ class ProxyService:
protocol: Optional[str] = None,
limit: int = 10000,
) -> AsyncIterator[str]:
async with get_db() as db:
proxies = await self.proxy_repo.list_all(db, protocol=protocol, limit=limit)
if fmt == "csv":
yield "IP,Port,Protocol,Score,Last Check\n"
for p in proxies:
yield f"{p.ip},{p.port},{p.protocol},{p.score},{self._fmt_time(p.last_check)}\n"
yield "\ufeffIP,Port,Protocol,Score,Last Check\n"
elif fmt == "txt":
for p in proxies:
yield f"{p.ip}:{p.port}\n"
pass
elif fmt == "json":
data = [
{
"ip": p.ip,
"port": p.port,
"protocol": p.protocol,
"score": p.score,
"last_check": self._fmt_time(p.last_check),
}
for p in proxies
]
yield json.dumps(data, ensure_ascii=False, indent=2)
yield "["
first = True
exported = 0
async with get_db() as db:
async for batch in self.proxy_repo.iter_batches(db, protocol=protocol, batch_size=1000):
for p in batch:
if exported >= limit:
break
if fmt == "csv":
yield f"{p.ip},{p.port},{p.protocol},{p.score},{self._fmt_time(p.last_check)}\n"
elif fmt == "txt":
yield f"{p.ip}:{p.port}\n"
elif fmt == "json":
item = {
"ip": p.ip,
"port": p.port,
"protocol": p.protocol,
"score": p.score,
"last_check": self._fmt_time(p.last_check),
}
prefix = "" if first else ","
yield prefix + json.dumps(item, ensure_ascii=False)
first = False
exported += 1
if exported >= limit:
break
if fmt == "json":
yield "]"
@staticmethod
def _fmt_time(dt: Optional[datetime]) -> str:

View File

@@ -8,6 +8,7 @@ from app.core.tasks.queue import ValidationQueue
from app.core.config import settings as app_settings
from app.core.log import logger
from app.models.domain import ProxyRaw
from app.services.task_service import task_service
class SchedulerService:
@@ -58,16 +59,15 @@ class SchedulerService:
"""立即执行一次全量验证(后台运行,不阻塞)"""
if self._validate_task and not self._validate_task.done():
return
self._validate_task = asyncio.create_task(self._do_validate_all())
self._validate_task = asyncio.create_task(self._do_validate_all(from_loop=False))
async def _run_loop(self):
"""定时循环"""
while self.running:
try:
# 清理过期任务,防止内存无限增长
from app.services.task_service import task_service
task_service.cleanup_old_tasks()
await self._do_validate_all()
await self._do_validate_all(from_loop=True)
except Exception as e:
logger.error(f"Scheduler loop error: {e}", exc_info=True)
# 等待下一次
@@ -76,10 +76,17 @@ class SchedulerService:
except asyncio.TimeoutError:
pass
async def _do_validate_all(self):
async def _do_validate_all(self, from_loop: bool = True):
"""验证数据库中所有存量代理"""
queue_started_here = False
try:
logger.info("Starting scheduled validation for all proxies")
# 如果队列未运行,临时启动它(适用于 validate_all_now 在调度器停止时调用)
if not self.validation_queue._running:
await self.validation_queue.start()
queue_started_here = True
async with get_db() as db:
# 清理 7 天前的验证任务记录,防止表无限增长
cleaned = await ValidationTaskRepository.cleanup_old(db, days=7)
@@ -95,7 +102,7 @@ class SchedulerService:
batch_size = 100
total_batches = (len(proxies) - 1) // batch_size + 1
for i in range(0, len(proxies), batch_size):
if not self.running:
if from_loop and not self.running:
break
batch = proxies[i : i + batch_size]
await self.validation_queue.submit([
@@ -106,3 +113,6 @@ class SchedulerService:
logger.info("Scheduled validation batches submitted")
except Exception as e:
logger.error(f"Scheduled validation error: {e}", exc_info=True)
finally:
if queue_started_here:
await self.validation_queue.stop()

View File

@@ -95,17 +95,13 @@ class ValidatorService:
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
test_url = self._get_test_url("http")
try:
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with session.get(test_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
finally:
# ClientSession 的 async with 退出时会自动关闭 connector无需手动重复关闭
pass
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
async with session.get(test_url, allow_redirects=True) as response:
if response.status in (200, 301, 302):
latency = round((time.time() - start) * 1000, 2)
logger.info(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
return True, latency
return False, 0.0
async def close(self):
"""关闭共享的 HTTP ClientSession"""