feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
@@ -2,7 +2,8 @@
|
||||
import aiosqlite
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from app.models.domain import Proxy
|
||||
|
||||
from app.models.domain import Proxy, ProxyRaw
|
||||
from app.core.log import logger
|
||||
|
||||
|
||||
@@ -32,9 +33,15 @@ def _row_to_proxy(row: Tuple) -> Proxy:
|
||||
response_time_ms=row[4],
|
||||
last_check=_to_datetime(row[5]),
|
||||
created_at=_to_datetime(row[6]),
|
||||
validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0,
|
||||
)
|
||||
|
||||
|
||||
_SELECT_PROXY_COLS = (
|
||||
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated"
|
||||
)
|
||||
|
||||
|
||||
class ProxyRepository:
|
||||
"""代理 Repository"""
|
||||
|
||||
@@ -51,12 +58,13 @@ class ProxyRepository:
|
||||
try:
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1)
|
||||
ON CONFLICT(ip, port) DO UPDATE SET
|
||||
protocol = excluded.protocol,
|
||||
score = excluded.score,
|
||||
last_check = CURRENT_TIMESTAMP
|
||||
last_check = CURRENT_TIMESTAMP,
|
||||
validated = 1
|
||||
""",
|
||||
(ip, port, protocol, score),
|
||||
)
|
||||
@@ -66,6 +74,56 @@ class ProxyRepository:
|
||||
logger.error(f"insert_or_update proxy failed: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def upsert_from_crawl(
|
||||
db: aiosqlite.Connection,
|
||||
ip: str,
|
||||
port: int,
|
||||
protocol: str = "http",
|
||||
initial_score: int = 0,
|
||||
) -> None:
|
||||
"""爬取入库:待验证状态(validated=0, score=0);再次爬取同一条则重置为待验证。"""
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = "http"
|
||||
await db.execute(
|
||||
"""
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
|
||||
ON CONFLICT(ip, port) DO UPDATE SET
|
||||
protocol = excluded.protocol,
|
||||
score = excluded.score,
|
||||
last_check = CURRENT_TIMESTAMP,
|
||||
validated = 0
|
||||
""",
|
||||
(ip, port, protocol, initial_score),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def upsert_many_from_crawl(
|
||||
db: aiosqlite.Connection,
|
||||
proxies: List[ProxyRaw],
|
||||
initial_score: int = 0,
|
||||
) -> None:
|
||||
"""批量爬取入库;不 commit,由外层 transaction 提交。"""
|
||||
if not proxies:
|
||||
return
|
||||
rows = []
|
||||
for p in proxies:
|
||||
proto = p.protocol if p.protocol in VALID_PROTOCOLS else "http"
|
||||
rows.append((p.ip, p.port, proto, initial_score))
|
||||
await db.executemany(
|
||||
"""
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
|
||||
ON CONFLICT(ip, port) DO UPDATE SET
|
||||
protocol = excluded.protocol,
|
||||
score = excluded.score,
|
||||
last_check = CURRENT_TIMESTAMP,
|
||||
validated = 0
|
||||
""",
|
||||
rows,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def update_score(
|
||||
db: aiosqlite.Connection,
|
||||
@@ -86,9 +144,12 @@ class ProxyRepository:
|
||||
""",
|
||||
(min_score, max_score, delta, ip, port),
|
||||
)
|
||||
# 删除分数已降至 0 及以下的代理
|
||||
# 仅删除已入池且分数耗尽者;待验证(score=0)不经过此路径
|
||||
await db.execute(
|
||||
"DELETE FROM proxies WHERE ip = ? AND port = ? AND score <= ?",
|
||||
"""
|
||||
DELETE FROM proxies
|
||||
WHERE ip = ? AND port = ? AND score <= ? AND validated = 1
|
||||
""",
|
||||
(ip, port, min_score),
|
||||
)
|
||||
await db.commit()
|
||||
@@ -134,7 +195,7 @@ class ProxyRepository:
|
||||
db: aiosqlite.Connection, ip: str, port: int
|
||||
) -> Optional[Proxy]:
|
||||
async with db.execute(
|
||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE ip = ? AND port = ?",
|
||||
f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE ip = ? AND port = ?",
|
||||
(ip, port),
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
@@ -145,7 +206,11 @@ class ProxyRepository:
|
||||
@staticmethod
|
||||
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
|
||||
async with db.execute(
|
||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE score > 0 ORDER BY RANDOM() LIMIT 1"
|
||||
f"""
|
||||
SELECT {_SELECT_PROXY_COLS} FROM proxies
|
||||
WHERE validated = 1 AND score > 0
|
||||
ORDER BY RANDOM() LIMIT 1
|
||||
"""
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
@@ -158,12 +223,19 @@ class ProxyRepository:
|
||||
protocol: Optional[str] = None,
|
||||
limit: int = 100000,
|
||||
offset: int = 0,
|
||||
validated: Optional[int] = None,
|
||||
) -> List[Proxy]:
|
||||
query = "SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies"
|
||||
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
|
||||
params: List = []
|
||||
clauses = []
|
||||
if protocol:
|
||||
query += " WHERE protocol = ?"
|
||||
clauses.append("protocol = ?")
|
||||
params.append(protocol.lower())
|
||||
if validated is not None:
|
||||
clauses.append("validated = ?")
|
||||
params.append(int(validated))
|
||||
if clauses:
|
||||
query += " WHERE " + " AND ".join(clauses)
|
||||
query += " LIMIT ? OFFSET ?"
|
||||
params.extend([limit, offset])
|
||||
|
||||
@@ -171,21 +243,77 @@ class ProxyRepository:
|
||||
rows = await cursor.fetchall()
|
||||
return [_row_to_proxy(row) for row in rows]
|
||||
|
||||
@staticmethod
|
||||
async def list_for_validation(
|
||||
db: aiosqlite.Connection,
|
||||
protocol: Optional[str] = None,
|
||||
) -> List[Proxy]:
|
||||
"""待验证优先,其次已验证按 last_check 升序(用于全量/调度复检)。"""
|
||||
pending: List[Proxy] = []
|
||||
q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 0"
|
||||
params: List = []
|
||||
if protocol:
|
||||
q += " AND protocol = ?"
|
||||
params.append(protocol.lower())
|
||||
q += " ORDER BY created_at ASC"
|
||||
async with db.execute(q, params) as cursor:
|
||||
rows_p = await cursor.fetchall()
|
||||
pending = [_row_to_proxy(r) for r in rows_p]
|
||||
|
||||
rest_q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 1"
|
||||
rparams: List = []
|
||||
if protocol:
|
||||
rest_q += " AND protocol = ?"
|
||||
rparams.append(protocol.lower())
|
||||
rest_q += " ORDER BY last_check ASC"
|
||||
async with db.execute(rest_q, rparams) as cursor:
|
||||
rows_r = await cursor.fetchall()
|
||||
rest = [_row_to_proxy(r) for r in rows_r]
|
||||
|
||||
return pending + rest
|
||||
|
||||
@staticmethod
|
||||
async def iter_batches(
|
||||
db: aiosqlite.Connection,
|
||||
protocol: Optional[str] = None,
|
||||
batch_size: int = 1000,
|
||||
only_usable: bool = False,
|
||||
):
|
||||
"""流式分批读取代理,避免一次性加载大量数据到内存"""
|
||||
offset = 0
|
||||
while True:
|
||||
batch = await ProxyRepository.list_all(db, protocol, batch_size, offset)
|
||||
batch = await ProxyRepository._list_batch_offset(
|
||||
db, protocol, batch_size, offset, only_usable=only_usable
|
||||
)
|
||||
if not batch:
|
||||
break
|
||||
yield batch
|
||||
offset += batch_size
|
||||
|
||||
@staticmethod
|
||||
async def _list_batch_offset(
|
||||
db: aiosqlite.Connection,
|
||||
protocol: Optional[str],
|
||||
batch_size: int,
|
||||
offset: int,
|
||||
only_usable: bool,
|
||||
) -> List[Proxy]:
|
||||
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
|
||||
params: List = []
|
||||
clauses = []
|
||||
if only_usable:
|
||||
clauses.append("validated = 1 AND score > 0")
|
||||
if protocol:
|
||||
clauses.append("protocol = ?")
|
||||
params.append(protocol.lower())
|
||||
if clauses:
|
||||
query += " WHERE " + " AND ".join(clauses)
|
||||
query += " LIMIT ? OFFSET ?"
|
||||
params.extend([batch_size, offset])
|
||||
async with db.execute(query, params) as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
return [_row_to_proxy(row) for row in rows]
|
||||
|
||||
@staticmethod
|
||||
async def list_paginated(
|
||||
db: aiosqlite.Connection,
|
||||
@@ -196,6 +324,7 @@ class ProxyRepository:
|
||||
max_score: Optional[int] = None,
|
||||
sort_by: str = "last_check",
|
||||
sort_order: str = "DESC",
|
||||
pool_filter: Optional[str] = None,
|
||||
) -> Tuple[List[Proxy], int]:
|
||||
conditions = ["score >= ?"]
|
||||
params: List = [min_score]
|
||||
@@ -206,6 +335,10 @@ class ProxyRepository:
|
||||
if max_score is not None:
|
||||
conditions.append("score <= ?")
|
||||
params.append(max_score)
|
||||
if pool_filter == "pending":
|
||||
conditions.append("validated = 0")
|
||||
elif pool_filter == "available":
|
||||
conditions.append("validated = 1 AND score > 0")
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
allowed_sort_by = {"ip", "port", "protocol", "score", "last_check"}
|
||||
@@ -222,7 +355,7 @@ class ProxyRepository:
|
||||
total = row[0] if row else 0
|
||||
|
||||
data_query = f"""
|
||||
SELECT ip, port, protocol, score, response_time_ms, last_check, created_at
|
||||
SELECT {_SELECT_PROXY_COLS}
|
||||
FROM proxies
|
||||
WHERE {where_clause}
|
||||
ORDER BY {order_clause}
|
||||
@@ -239,8 +372,9 @@ class ProxyRepository:
|
||||
query = """
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN score > 0 THEN 1 END) as available,
|
||||
AVG(score) as avg_score,
|
||||
COUNT(CASE WHEN validated = 0 THEN 1 END) as pending,
|
||||
COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available,
|
||||
(SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score,
|
||||
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
|
||||
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
|
||||
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
|
||||
@@ -252,15 +386,17 @@ class ProxyRepository:
|
||||
if row:
|
||||
return {
|
||||
"total": row[0] or 0,
|
||||
"available": row[1] or 0,
|
||||
"avg_score": round(row[2], 2) if row[2] else 0,
|
||||
"http_count": row[3] or 0,
|
||||
"https_count": row[4] or 0,
|
||||
"socks4_count": row[5] or 0,
|
||||
"socks5_count": row[6] or 0,
|
||||
"pending": row[1] or 0,
|
||||
"available": row[2] or 0,
|
||||
"avg_score": round(row[3], 2) if row[3] is not None else 0,
|
||||
"http_count": row[4] or 0,
|
||||
"https_count": row[5] or 0,
|
||||
"socks4_count": row[6] or 0,
|
||||
"socks5_count": row[7] or 0,
|
||||
}
|
||||
return {
|
||||
"total": 0,
|
||||
"pending": 0,
|
||||
"available": 0,
|
||||
"avg_score": 0,
|
||||
"http_count": 0,
|
||||
@@ -271,9 +407,15 @@ class ProxyRepository:
|
||||
|
||||
@staticmethod
|
||||
async def get_today_new_count(db: aiosqlite.Connection) -> int:
|
||||
"""今日新增:仅统计今日入库且已验证可用(与 get_stats.available 语义一致)。"""
|
||||
try:
|
||||
async with db.execute(
|
||||
"SELECT COUNT(*) FROM proxies WHERE DATE(created_at) = DATE('now', 'localtime')"
|
||||
"""
|
||||
SELECT COUNT(*) FROM proxies
|
||||
WHERE DATE(created_at) = DATE('now', 'localtime')
|
||||
AND validated = 1
|
||||
AND score > 0
|
||||
"""
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
@@ -283,7 +425,9 @@ class ProxyRepository:
|
||||
|
||||
@staticmethod
|
||||
async def clean_invalid(db: aiosqlite.Connection) -> int:
|
||||
await db.execute("DELETE FROM proxies WHERE score <= 0")
|
||||
await db.execute(
|
||||
"DELETE FROM proxies WHERE validated = 1 AND score <= 0"
|
||||
)
|
||||
await db.commit()
|
||||
return db.total_changes
|
||||
|
||||
|
||||
@@ -6,13 +6,12 @@ from app.core.log import logger
|
||||
|
||||
|
||||
DEFAULT_SETTINGS = {
|
||||
"crawl_timeout": 30,
|
||||
"validation_timeout": 10,
|
||||
"max_retries": 3,
|
||||
"default_concurrency": 50,
|
||||
"validation_timeout": 6,
|
||||
"default_concurrency": 120,
|
||||
"min_proxy_score": 0,
|
||||
"proxy_expiry_days": 7,
|
||||
"auto_validate": True,
|
||||
"auto_validate_after_crawl": False,
|
||||
"validate_interval_minutes": 30,
|
||||
"validation_targets": [
|
||||
"http://httpbin.org/ip",
|
||||
@@ -50,6 +49,8 @@ class SettingsRepository:
|
||||
settings[key] = value
|
||||
except Exception as e:
|
||||
logger.error(f"get_all settings failed: {e}")
|
||||
# 已废弃:爬取限时改为每插件 crawl_timeout_seconds,不再存全局项
|
||||
settings.pop("crawl_timeout", None)
|
||||
return settings
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user