feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation

- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
This commit is contained in:
祀梦
2026-04-05 13:39:19 +08:00
parent 92c7fa19e2
commit 0131c8b408
63 changed files with 2331 additions and 531 deletions

View File

@@ -2,7 +2,8 @@
import aiosqlite
from datetime import datetime, timedelta
from typing import List, Optional, Tuple, Union
from app.models.domain import Proxy
from app.models.domain import Proxy, ProxyRaw
from app.core.log import logger
@@ -32,9 +33,15 @@ def _row_to_proxy(row: Tuple) -> Proxy:
response_time_ms=row[4],
last_check=_to_datetime(row[5]),
created_at=_to_datetime(row[6]),
validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0,
)
_SELECT_PROXY_COLS = (
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated"
)
class ProxyRepository:
"""代理 Repository"""
@@ -51,12 +58,13 @@ class ProxyRepository:
try:
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP
last_check = CURRENT_TIMESTAMP,
validated = 1
""",
(ip, port, protocol, score),
)
@@ -66,6 +74,56 @@ class ProxyRepository:
logger.error(f"insert_or_update proxy failed: {e}", exc_info=True)
return False
@staticmethod
async def upsert_from_crawl(
db: aiosqlite.Connection,
ip: str,
port: int,
protocol: str = "http",
initial_score: int = 0,
) -> None:
"""爬取入库待验证状态validated=0, score=0再次爬取同一条则重置为待验证。"""
if protocol not in VALID_PROTOCOLS:
protocol = "http"
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
""",
(ip, port, protocol, initial_score),
)
@staticmethod
async def upsert_many_from_crawl(
db: aiosqlite.Connection,
proxies: List[ProxyRaw],
initial_score: int = 0,
) -> None:
"""批量爬取入库;不 commit由外层 transaction 提交。"""
if not proxies:
return
rows = []
for p in proxies:
proto = p.protocol if p.protocol in VALID_PROTOCOLS else "http"
rows.append((p.ip, p.port, proto, initial_score))
await db.executemany(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
""",
rows,
)
@staticmethod
async def update_score(
db: aiosqlite.Connection,
@@ -86,9 +144,12 @@ class ProxyRepository:
""",
(min_score, max_score, delta, ip, port),
)
# 删除分数已降至 0 及以下的代理
# 删除已入池且分数耗尽者;待验证(score=0)不经过此路径
await db.execute(
"DELETE FROM proxies WHERE ip = ? AND port = ? AND score <= ?",
"""
DELETE FROM proxies
WHERE ip = ? AND port = ? AND score <= ? AND validated = 1
""",
(ip, port, min_score),
)
await db.commit()
@@ -134,7 +195,7 @@ class ProxyRepository:
db: aiosqlite.Connection, ip: str, port: int
) -> Optional[Proxy]:
async with db.execute(
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE ip = ? AND port = ?",
f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE ip = ? AND port = ?",
(ip, port),
) as cursor:
row = await cursor.fetchone()
@@ -145,7 +206,11 @@ class ProxyRepository:
@staticmethod
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
async with db.execute(
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE score > 0 ORDER BY RANDOM() LIMIT 1"
f"""
SELECT {_SELECT_PROXY_COLS} FROM proxies
WHERE validated = 1 AND score > 0
ORDER BY RANDOM() LIMIT 1
"""
) as cursor:
row = await cursor.fetchone()
if row:
@@ -158,12 +223,19 @@ class ProxyRepository:
protocol: Optional[str] = None,
limit: int = 100000,
offset: int = 0,
validated: Optional[int] = None,
) -> List[Proxy]:
query = "SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies"
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
params: List = []
clauses = []
if protocol:
query += " WHERE protocol = ?"
clauses.append("protocol = ?")
params.append(protocol.lower())
if validated is not None:
clauses.append("validated = ?")
params.append(int(validated))
if clauses:
query += " WHERE " + " AND ".join(clauses)
query += " LIMIT ? OFFSET ?"
params.extend([limit, offset])
@@ -171,21 +243,77 @@ class ProxyRepository:
rows = await cursor.fetchall()
return [_row_to_proxy(row) for row in rows]
@staticmethod
async def list_for_validation(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
) -> List[Proxy]:
"""待验证优先,其次已验证按 last_check 升序(用于全量/调度复检)。"""
pending: List[Proxy] = []
q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 0"
params: List = []
if protocol:
q += " AND protocol = ?"
params.append(protocol.lower())
q += " ORDER BY created_at ASC"
async with db.execute(q, params) as cursor:
rows_p = await cursor.fetchall()
pending = [_row_to_proxy(r) for r in rows_p]
rest_q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 1"
rparams: List = []
if protocol:
rest_q += " AND protocol = ?"
rparams.append(protocol.lower())
rest_q += " ORDER BY last_check ASC"
async with db.execute(rest_q, rparams) as cursor:
rows_r = await cursor.fetchall()
rest = [_row_to_proxy(r) for r in rows_r]
return pending + rest
@staticmethod
async def iter_batches(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
batch_size: int = 1000,
only_usable: bool = False,
):
"""流式分批读取代理,避免一次性加载大量数据到内存"""
offset = 0
while True:
batch = await ProxyRepository.list_all(db, protocol, batch_size, offset)
batch = await ProxyRepository._list_batch_offset(
db, protocol, batch_size, offset, only_usable=only_usable
)
if not batch:
break
yield batch
offset += batch_size
@staticmethod
async def _list_batch_offset(
db: aiosqlite.Connection,
protocol: Optional[str],
batch_size: int,
offset: int,
only_usable: bool,
) -> List[Proxy]:
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
params: List = []
clauses = []
if only_usable:
clauses.append("validated = 1 AND score > 0")
if protocol:
clauses.append("protocol = ?")
params.append(protocol.lower())
if clauses:
query += " WHERE " + " AND ".join(clauses)
query += " LIMIT ? OFFSET ?"
params.extend([batch_size, offset])
async with db.execute(query, params) as cursor:
rows = await cursor.fetchall()
return [_row_to_proxy(row) for row in rows]
@staticmethod
async def list_paginated(
db: aiosqlite.Connection,
@@ -196,6 +324,7 @@ class ProxyRepository:
max_score: Optional[int] = None,
sort_by: str = "last_check",
sort_order: str = "DESC",
pool_filter: Optional[str] = None,
) -> Tuple[List[Proxy], int]:
conditions = ["score >= ?"]
params: List = [min_score]
@@ -206,6 +335,10 @@ class ProxyRepository:
if max_score is not None:
conditions.append("score <= ?")
params.append(max_score)
if pool_filter == "pending":
conditions.append("validated = 0")
elif pool_filter == "available":
conditions.append("validated = 1 AND score > 0")
where_clause = " AND ".join(conditions)
allowed_sort_by = {"ip", "port", "protocol", "score", "last_check"}
@@ -222,7 +355,7 @@ class ProxyRepository:
total = row[0] if row else 0
data_query = f"""
SELECT ip, port, protocol, score, response_time_ms, last_check, created_at
SELECT {_SELECT_PROXY_COLS}
FROM proxies
WHERE {where_clause}
ORDER BY {order_clause}
@@ -239,8 +372,9 @@ class ProxyRepository:
query = """
SELECT
COUNT(*) as total,
COUNT(CASE WHEN score > 0 THEN 1 END) as available,
AVG(score) as avg_score,
COUNT(CASE WHEN validated = 0 THEN 1 END) as pending,
COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available,
(SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score,
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
@@ -252,15 +386,17 @@ class ProxyRepository:
if row:
return {
"total": row[0] or 0,
"available": row[1] or 0,
"avg_score": round(row[2], 2) if row[2] else 0,
"http_count": row[3] or 0,
"https_count": row[4] or 0,
"socks4_count": row[5] or 0,
"socks5_count": row[6] or 0,
"pending": row[1] or 0,
"available": row[2] or 0,
"avg_score": round(row[3], 2) if row[3] is not None else 0,
"http_count": row[4] or 0,
"https_count": row[5] or 0,
"socks4_count": row[6] or 0,
"socks5_count": row[7] or 0,
}
return {
"total": 0,
"pending": 0,
"available": 0,
"avg_score": 0,
"http_count": 0,
@@ -271,9 +407,15 @@ class ProxyRepository:
@staticmethod
async def get_today_new_count(db: aiosqlite.Connection) -> int:
"""今日新增:仅统计今日入库且已验证可用(与 get_stats.available 语义一致)。"""
try:
async with db.execute(
"SELECT COUNT(*) FROM proxies WHERE DATE(created_at) = DATE('now', 'localtime')"
"""
SELECT COUNT(*) FROM proxies
WHERE DATE(created_at) = DATE('now', 'localtime')
AND validated = 1
AND score > 0
"""
) as cursor:
row = await cursor.fetchone()
return row[0] if row else 0
@@ -283,7 +425,9 @@ class ProxyRepository:
@staticmethod
async def clean_invalid(db: aiosqlite.Connection) -> int:
await db.execute("DELETE FROM proxies WHERE score <= 0")
await db.execute(
"DELETE FROM proxies WHERE validated = 1 AND score <= 0"
)
await db.commit()
return db.total_changes