Files
ProxyPool/app/repositories/proxy_repo.py
祀梦 0131c8b408 feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them
- Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting
- Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore
- Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait
- Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys
- WebSocket dashboard stats; ProxyList pool_filter and API alignment
- POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404
- pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB
- .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import

Made-with: Cursor
2026-04-05 13:39:19 +08:00

446 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""代理数据访问层 - 所有 SQL 操作收敛于此"""
import aiosqlite
from datetime import datetime, timedelta
from typing import List, Optional, Tuple, Union
from app.models.domain import Proxy, ProxyRaw
from app.core.log import logger
VALID_PROTOCOLS = ("http", "https", "socks4", "socks5")
def _to_datetime(value: Union[str, datetime, None]) -> Optional[datetime]:
if value is None:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
try:
return datetime.strptime(value, fmt)
except ValueError:
continue
return None
def _row_to_proxy(row: Tuple) -> Proxy:
return Proxy(
ip=row[0],
port=row[1],
protocol=row[2],
score=row[3],
response_time_ms=row[4],
last_check=_to_datetime(row[5]),
created_at=_to_datetime(row[6]),
validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0,
)
_SELECT_PROXY_COLS = (
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated"
)
class ProxyRepository:
"""代理 Repository"""
@staticmethod
async def insert_or_update(
db: aiosqlite.Connection,
ip: str,
port: int,
protocol: str = "http",
score: int = 10,
) -> bool:
if protocol not in VALID_PROTOCOLS:
protocol = "http"
try:
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 1
""",
(ip, port, protocol, score),
)
await db.commit()
return True
except Exception as e:
logger.error(f"insert_or_update proxy failed: {e}", exc_info=True)
return False
@staticmethod
async def upsert_from_crawl(
db: aiosqlite.Connection,
ip: str,
port: int,
protocol: str = "http",
initial_score: int = 0,
) -> None:
"""爬取入库待验证状态validated=0, score=0再次爬取同一条则重置为待验证。"""
if protocol not in VALID_PROTOCOLS:
protocol = "http"
await db.execute(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
""",
(ip, port, protocol, initial_score),
)
@staticmethod
async def upsert_many_from_crawl(
db: aiosqlite.Connection,
proxies: List[ProxyRaw],
initial_score: int = 0,
) -> None:
"""批量爬取入库;不 commit由外层 transaction 提交。"""
if not proxies:
return
rows = []
for p in proxies:
proto = p.protocol if p.protocol in VALID_PROTOCOLS else "http"
rows.append((p.ip, p.port, proto, initial_score))
await db.executemany(
"""
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
ON CONFLICT(ip, port) DO UPDATE SET
protocol = excluded.protocol,
score = excluded.score,
last_check = CURRENT_TIMESTAMP,
validated = 0
""",
rows,
)
@staticmethod
async def update_score(
db: aiosqlite.Connection,
ip: str,
port: int,
delta: int,
min_score: int = 0,
max_score: int = 100,
) -> bool:
try:
# 原子更新:计算新分数并直接更新
await db.execute(
"""
UPDATE proxies
SET score = MAX(?, MIN(?, score + ?)),
last_check = CURRENT_TIMESTAMP
WHERE ip = ? AND port = ?
""",
(min_score, max_score, delta, ip, port),
)
# 仅删除已入池且分数耗尽者;待验证(score=0)不经过此路径
await db.execute(
"""
DELETE FROM proxies
WHERE ip = ? AND port = ? AND score <= ? AND validated = 1
""",
(ip, port, min_score),
)
await db.commit()
return db.total_changes > 0
except Exception as e:
logger.error(f"update_score failed: {e}", exc_info=True)
return False
@staticmethod
async def update_response_time(
db: aiosqlite.Connection,
ip: str,
port: int,
response_time_ms: float,
) -> bool:
try:
await db.execute(
"UPDATE proxies SET response_time_ms = ? WHERE ip = ? AND port = ?",
(response_time_ms, ip, port),
)
await db.commit()
return True
except Exception as e:
logger.error(f"update_response_time failed: {e}", exc_info=True)
return False
@staticmethod
async def delete(db: aiosqlite.Connection, ip: str, port: int) -> None:
await db.execute("DELETE FROM proxies WHERE ip = ? AND port = ?", (ip, port))
await db.commit()
@staticmethod
async def batch_delete(db: aiosqlite.Connection, proxies: List[Tuple[str, int]]) -> int:
if not proxies:
return 0
changes_before = db.total_changes
await db.executemany("DELETE FROM proxies WHERE ip = ? AND port = ?", proxies)
await db.commit()
return db.total_changes - changes_before
@staticmethod
async def get_by_ip_port(
db: aiosqlite.Connection, ip: str, port: int
) -> Optional[Proxy]:
async with db.execute(
f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE ip = ? AND port = ?",
(ip, port),
) as cursor:
row = await cursor.fetchone()
if row:
return _row_to_proxy(row)
return None
@staticmethod
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
async with db.execute(
f"""
SELECT {_SELECT_PROXY_COLS} FROM proxies
WHERE validated = 1 AND score > 0
ORDER BY RANDOM() LIMIT 1
"""
) as cursor:
row = await cursor.fetchone()
if row:
return _row_to_proxy(row)
return None
@staticmethod
async def list_all(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
limit: int = 100000,
offset: int = 0,
validated: Optional[int] = None,
) -> List[Proxy]:
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
params: List = []
clauses = []
if protocol:
clauses.append("protocol = ?")
params.append(protocol.lower())
if validated is not None:
clauses.append("validated = ?")
params.append(int(validated))
if clauses:
query += " WHERE " + " AND ".join(clauses)
query += " LIMIT ? OFFSET ?"
params.extend([limit, offset])
async with db.execute(query, params) as cursor:
rows = await cursor.fetchall()
return [_row_to_proxy(row) for row in rows]
@staticmethod
async def list_for_validation(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
) -> List[Proxy]:
"""待验证优先,其次已验证按 last_check 升序(用于全量/调度复检)。"""
pending: List[Proxy] = []
q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 0"
params: List = []
if protocol:
q += " AND protocol = ?"
params.append(protocol.lower())
q += " ORDER BY created_at ASC"
async with db.execute(q, params) as cursor:
rows_p = await cursor.fetchall()
pending = [_row_to_proxy(r) for r in rows_p]
rest_q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 1"
rparams: List = []
if protocol:
rest_q += " AND protocol = ?"
rparams.append(protocol.lower())
rest_q += " ORDER BY last_check ASC"
async with db.execute(rest_q, rparams) as cursor:
rows_r = await cursor.fetchall()
rest = [_row_to_proxy(r) for r in rows_r]
return pending + rest
@staticmethod
async def iter_batches(
db: aiosqlite.Connection,
protocol: Optional[str] = None,
batch_size: int = 1000,
only_usable: bool = False,
):
"""流式分批读取代理,避免一次性加载大量数据到内存"""
offset = 0
while True:
batch = await ProxyRepository._list_batch_offset(
db, protocol, batch_size, offset, only_usable=only_usable
)
if not batch:
break
yield batch
offset += batch_size
@staticmethod
async def _list_batch_offset(
db: aiosqlite.Connection,
protocol: Optional[str],
batch_size: int,
offset: int,
only_usable: bool,
) -> List[Proxy]:
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
params: List = []
clauses = []
if only_usable:
clauses.append("validated = 1 AND score > 0")
if protocol:
clauses.append("protocol = ?")
params.append(protocol.lower())
if clauses:
query += " WHERE " + " AND ".join(clauses)
query += " LIMIT ? OFFSET ?"
params.extend([batch_size, offset])
async with db.execute(query, params) as cursor:
rows = await cursor.fetchall()
return [_row_to_proxy(row) for row in rows]
@staticmethod
async def list_paginated(
db: aiosqlite.Connection,
page: int = 1,
page_size: int = 20,
protocol: Optional[str] = None,
min_score: int = 0,
max_score: Optional[int] = None,
sort_by: str = "last_check",
sort_order: str = "DESC",
pool_filter: Optional[str] = None,
) -> Tuple[List[Proxy], int]:
conditions = ["score >= ?"]
params: List = [min_score]
if protocol:
conditions.append("protocol = ?")
params.append(protocol)
if max_score is not None:
conditions.append("score <= ?")
params.append(max_score)
if pool_filter == "pending":
conditions.append("validated = 0")
elif pool_filter == "available":
conditions.append("validated = 1 AND score > 0")
where_clause = " AND ".join(conditions)
allowed_sort_by = {"ip", "port", "protocol", "score", "last_check"}
allowed_sort_order = {"ASC", "DESC"}
if sort_by not in allowed_sort_by or sort_order.upper() not in allowed_sort_order:
order_clause = "last_check DESC"
else:
order_clause = f"{sort_by} {sort_order.upper()}"
offset = (page - 1) * page_size
count_query = f"SELECT COUNT(*) FROM proxies WHERE {where_clause}"
async with db.execute(count_query, list(params)) as cursor:
row = await cursor.fetchone()
total = row[0] if row else 0
data_query = f"""
SELECT {_SELECT_PROXY_COLS}
FROM proxies
WHERE {where_clause}
ORDER BY {order_clause}
LIMIT ? OFFSET ?
"""
params.extend([page_size, offset])
async with db.execute(data_query, params) as cursor:
rows = await cursor.fetchall()
proxies = [_row_to_proxy(row) for row in rows]
return proxies, total
@staticmethod
async def get_stats(db: aiosqlite.Connection) -> dict:
query = """
SELECT
COUNT(*) as total,
COUNT(CASE WHEN validated = 0 THEN 1 END) as pending,
COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available,
(SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score,
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
COUNT(CASE WHEN protocol = 'socks5' THEN 1 END) as socks5_count
FROM proxies
"""
async with db.execute(query) as cursor:
row = await cursor.fetchone()
if row:
return {
"total": row[0] or 0,
"pending": row[1] or 0,
"available": row[2] or 0,
"avg_score": round(row[3], 2) if row[3] is not None else 0,
"http_count": row[4] or 0,
"https_count": row[5] or 0,
"socks4_count": row[6] or 0,
"socks5_count": row[7] or 0,
}
return {
"total": 0,
"pending": 0,
"available": 0,
"avg_score": 0,
"http_count": 0,
"https_count": 0,
"socks4_count": 0,
"socks5_count": 0,
}
@staticmethod
async def get_today_new_count(db: aiosqlite.Connection) -> int:
"""今日新增:仅统计今日入库且已验证可用(与 get_stats.available 语义一致)。"""
try:
async with db.execute(
"""
SELECT COUNT(*) FROM proxies
WHERE DATE(created_at) = DATE('now', 'localtime')
AND validated = 1
AND score > 0
"""
) as cursor:
row = await cursor.fetchone()
return row[0] if row else 0
except Exception as e:
logger.error(f"get_today_new_count failed: {e}", exc_info=True)
return 0
@staticmethod
async def clean_invalid(db: aiosqlite.Connection) -> int:
await db.execute(
"DELETE FROM proxies WHERE validated = 1 AND score <= 0"
)
await db.commit()
return db.total_changes
@staticmethod
async def clean_expired(db: aiosqlite.Connection, days: int) -> int:
try:
await db.execute(
"DELETE FROM proxies WHERE last_check < datetime('now', '-' || ? || ' days')",
(days,),
)
await db.commit()
return db.total_changes
except Exception as e:
logger.error(f"clean_expired failed: {e}", exc_info=True)
return 0