first commit
This commit is contained in:
89
core/auth.py
Normal file
89
core/auth.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from fastapi import HTTPException, Depends, Header, status
|
||||
from typing import Optional
|
||||
from config import Config
|
||||
from core.log import logger
|
||||
|
||||
class PermissionLevel:
|
||||
READ_ONLY = "read_only"
|
||||
ADMIN = "admin"
|
||||
|
||||
def verify_api_key(
|
||||
x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
|
||||
authorization: Optional[str] = Header(None)
|
||||
) -> str:
|
||||
"""
|
||||
验证API Key并返回权限级别
|
||||
|
||||
Args:
|
||||
x_api_key: X-API-Key header中的API Key
|
||||
authorization: Authorization header中的Bearer token
|
||||
|
||||
Returns:
|
||||
str: 权限级别
|
||||
|
||||
Raises:
|
||||
HTTPException: 认证失败时抛出401错误
|
||||
"""
|
||||
api_key = x_api_key
|
||||
|
||||
if authorization and authorization.startswith("Bearer "):
|
||||
api_key = authorization.replace("Bearer ", "")
|
||||
|
||||
if not api_key:
|
||||
logger.warning("API请求缺少API Key")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="缺少API Key,请在请求头中添加 X-API-Key 或 Authorization: Bearer <key>",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
if api_key == Config.ADMIN_API_KEY:
|
||||
logger.info(f"管理员API认证成功: {api_key[:8]}...")
|
||||
return PermissionLevel.ADMIN
|
||||
elif api_key == Config.API_KEY:
|
||||
logger.info(f"普通用户API认证成功: {api_key[:8]}...")
|
||||
return PermissionLevel.READ_ONLY
|
||||
else:
|
||||
logger.warning(f"无效的API Key尝试: {api_key[:8]}...")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="无效的API Key",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
def require_admin(permission_level: str = Depends(verify_api_key)) -> str:
|
||||
"""
|
||||
要求管理员权限的依赖函数
|
||||
|
||||
Args:
|
||||
permission_level: 从verify_api_key获得的权限级别
|
||||
|
||||
Returns:
|
||||
str: 权限级别
|
||||
|
||||
Raises:
|
||||
HTTPException: 权限不足时抛出403错误
|
||||
"""
|
||||
if permission_level != PermissionLevel.ADMIN:
|
||||
logger.warning(f"非管理员用户尝试访问管理接口")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="需要管理员权限才能执行此操作"
|
||||
)
|
||||
return permission_level
|
||||
|
||||
def skip_auth_for_dev() -> Optional[str]:
|
||||
"""
|
||||
开发环境跳过认证(仅在开发模式下使用)
|
||||
|
||||
Returns:
|
||||
Optional[str]: 返回管理员权限级别
|
||||
|
||||
Warning:
|
||||
仅用于开发环境,生产环境务必使用真实认证
|
||||
"""
|
||||
import os
|
||||
if os.getenv("SKIP_AUTH", "false").lower() == "true":
|
||||
logger.warning("开发模式:跳过API Key认证")
|
||||
return PermissionLevel.ADMIN
|
||||
return None
|
||||
86
core/crawler.py
Normal file
86
core/crawler.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import random
|
||||
from core.log import logger
|
||||
|
||||
class BaseCrawler:
|
||||
def __init__(self):
|
||||
self.user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Mobile/15E148 Safari/604.1"
|
||||
]
|
||||
|
||||
def get_headers(self):
|
||||
return {
|
||||
'User-Agent': random.choice(self.user_agents),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
||||
'Connection': 'keep-alive',
|
||||
}
|
||||
|
||||
async def fetch(self, url, method='GET', params=None, data=None, proxies=None, timeout=10, retry_count=3):
|
||||
"""异步抓取方法"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
}
|
||||
async with aiohttp.ClientSession(headers=headers) as session:
|
||||
for i in range(retry_count):
|
||||
try:
|
||||
# 注意:aiohttp 的代理格式与 requests 不同,通常为 http://user:pass@host:port
|
||||
async with session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
params=params,
|
||||
data=data,
|
||||
proxy=proxies,
|
||||
timeout=aiohttp.ClientTimeout(total=timeout)
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
# 先读取内容,再处理编码
|
||||
content = await response.read()
|
||||
|
||||
# 尝试获取编码
|
||||
encoding = response.get_encoding()
|
||||
if encoding == 'utf-8' or not encoding:
|
||||
try:
|
||||
return content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# 尝试从内容中检测编码或手动设置为 gbk (国内网站常见)
|
||||
return content.decode('gbk', errors='ignore')
|
||||
|
||||
return content.decode(encoding, errors='ignore')
|
||||
else:
|
||||
logger.warning(f"请求失败 [{response.status}]: {url}, 正在进行第 {i+1} 次重试...")
|
||||
except Exception as e:
|
||||
logger.error(f"请求异常: {url}, 错误: {e}, 正在进行第 {i+1} 次重试...")
|
||||
|
||||
await asyncio.sleep(random.uniform(1, 3))
|
||||
|
||||
return None
|
||||
|
||||
class BasePlugin(BaseCrawler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "BasePlugin"
|
||||
self.urls = []
|
||||
self.enabled = True
|
||||
|
||||
async def parse(self, html):
|
||||
"""异步解析网页内容,需在子类中实现"""
|
||||
raise NotImplementedError("Please implement parse method")
|
||||
|
||||
async def run(self):
|
||||
"""异步运行插件"""
|
||||
logger.info(f"正在运行插件: {self.name}")
|
||||
results = []
|
||||
for url in self.urls:
|
||||
self.current_url = url # 记录当前正在抓取的 URL,供 parse 使用
|
||||
html = await self.fetch(url)
|
||||
if html:
|
||||
async for proxy in self.parse(html):
|
||||
results.append(proxy)
|
||||
await asyncio.sleep(random.uniform(1, 2))
|
||||
return results
|
||||
38
core/log.py
Normal file
38
core/log.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
class LogHandler(logging.Logger):
|
||||
def __init__(self, name='ProxyPool', level=logging.INFO):
|
||||
super().__init__(name, level)
|
||||
|
||||
# 获取项目根目录并创建 logs 目录
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
log_dir = os.path.join(base_dir, 'logs')
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# 仅使用日期作为文件名
|
||||
log_filename = f"{datetime.now().strftime('%Y-%m-%d')}.log"
|
||||
log_file = os.path.join(log_dir, log_filename)
|
||||
|
||||
# 设置格式
|
||||
formatter = logging.Formatter(
|
||||
'[%(asctime)s] %(name)s [%(levelname)s] %(filename)s[line:%(lineno)d]: %(message)s'
|
||||
)
|
||||
|
||||
# 文件处理器
|
||||
file_handler = logging.FileHandler(log_file, encoding='utf-8')
|
||||
file_handler.setFormatter(formatter)
|
||||
self.addHandler(file_handler)
|
||||
|
||||
# 控制台处理器
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(formatter)
|
||||
self.addHandler(console_handler)
|
||||
|
||||
# 实例化一个默认 logger 供外部直接使用
|
||||
logger = LogHandler()
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info('这是一条按日期存储的日志测试')
|
||||
125
core/plugin_manager.py
Normal file
125
core/plugin_manager.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import os
|
||||
import importlib
|
||||
import inspect
|
||||
import asyncio
|
||||
from typing import List, Dict, Optional
|
||||
from core.crawler import BasePlugin
|
||||
from core.log import logger
|
||||
|
||||
class PluginManager:
|
||||
def __init__(self, plugin_dir='plugins'):
|
||||
self.plugin_dir = plugin_dir
|
||||
self.plugins = []
|
||||
self.plugin_stats = {}
|
||||
self._load_plugins()
|
||||
self._init_stats()
|
||||
|
||||
def _init_stats(self):
|
||||
for plugin in self.plugins:
|
||||
self.plugin_stats[plugin.name] = {
|
||||
'success_count': 0,
|
||||
'failure_count': 0,
|
||||
'last_run': None
|
||||
}
|
||||
|
||||
def _load_plugins(self):
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
full_plugin_path = os.path.join(base_dir, self.plugin_dir)
|
||||
|
||||
if not os.path.exists(full_plugin_path):
|
||||
logger.error(f"插件目录不存在: {full_plugin_path}")
|
||||
return
|
||||
|
||||
for filename in os.listdir(full_plugin_path):
|
||||
if filename.endswith('.py') and not filename.startswith('__'):
|
||||
module_name = f"{self.plugin_dir}.{filename[:-3]}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if inspect.isclass(obj) and issubclass(obj, BasePlugin) and obj is not BasePlugin:
|
||||
plugin_instance = obj()
|
||||
if plugin_instance.enabled:
|
||||
logger.info(f"成功加载插件: {name} 来自 {module_name}")
|
||||
self.plugins.append(plugin_instance)
|
||||
else:
|
||||
logger.info(f"插件已禁用,跳过加载: {name} 来自 {module_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"加载插件失败 {module_name}: {e}")
|
||||
|
||||
def get_plugin_by_name(self, plugin_name: str) -> Optional[BasePlugin]:
|
||||
for plugin in self.plugins:
|
||||
if plugin.name == plugin_name:
|
||||
return plugin
|
||||
return None
|
||||
|
||||
def get_all_plugin_info(self) -> List[Dict]:
|
||||
plugins_info = []
|
||||
for plugin in self.plugins:
|
||||
stats = self.plugin_stats.get(plugin.name, {
|
||||
'success_count': 0,
|
||||
'failure_count': 0,
|
||||
'last_run': None
|
||||
})
|
||||
plugins_info.append({
|
||||
'id': plugin.name,
|
||||
'name': plugin.name,
|
||||
'enabled': plugin.enabled,
|
||||
'description': getattr(plugin, 'description', f'从{plugin.name}网站爬取代理'),
|
||||
'last_run': stats['last_run'],
|
||||
'success_count': stats['success_count'],
|
||||
'failure_count': stats['failure_count']
|
||||
})
|
||||
return plugins_info
|
||||
|
||||
def toggle_plugin(self, plugin_name: str, enabled: bool) -> bool:
|
||||
plugin = self.get_plugin_by_name(plugin_name)
|
||||
if plugin:
|
||||
plugin.enabled = enabled
|
||||
logger.info(f"插件 {plugin_name} 已{'启用' if enabled else '禁用'}")
|
||||
return True
|
||||
return False
|
||||
|
||||
async def run_plugin(self, plugin_name: str):
|
||||
plugin = self.get_plugin_by_name(plugin_name)
|
||||
if not plugin:
|
||||
logger.error(f"插件不存在: {plugin_name}")
|
||||
return []
|
||||
|
||||
if not plugin.enabled:
|
||||
logger.warning(f"插件已禁用: {plugin_name}")
|
||||
return []
|
||||
|
||||
try:
|
||||
results = await plugin.run()
|
||||
success_count = len(results)
|
||||
failure_count = 0
|
||||
|
||||
from datetime import datetime
|
||||
self.plugin_stats[plugin.name] = {
|
||||
'success_count': self.plugin_stats[plugin.name]['success_count'] + success_count,
|
||||
'failure_count': self.plugin_stats[plugin.name]['failure_count'] + failure_count,
|
||||
'last_run': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
logger.info(f"插件 {plugin_name} 执行完成,成功: {success_count}")
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"插件 {plugin_name} 执行失败: {e}")
|
||||
from datetime import datetime
|
||||
self.plugin_stats[plugin.name] = {
|
||||
'success_count': self.plugin_stats[plugin.name]['success_count'],
|
||||
'failure_count': self.plugin_stats[plugin.name]['failure_count'] + 1,
|
||||
'last_run': datetime.now().isoformat()
|
||||
}
|
||||
return []
|
||||
|
||||
async def run_all(self):
|
||||
"""并发运行所有插件"""
|
||||
tasks = [plugin.run() for plugin in self.plugins]
|
||||
# 并发执行并收集结果
|
||||
results_list = await asyncio.gather(*tasks)
|
||||
|
||||
# 将嵌套列表扁平化并产出结果
|
||||
for results in results_list:
|
||||
for proxy in results:
|
||||
yield proxy
|
||||
334
core/sqlite.py
Normal file
334
core/sqlite.py
Normal file
@@ -0,0 +1,334 @@
|
||||
import aiosqlite
|
||||
import os
|
||||
import asyncio
|
||||
from core.log import logger
|
||||
|
||||
VALID_PROTOCOLS = ['http', 'https', 'socks4', 'socks5']
|
||||
|
||||
class SQLiteManager:
|
||||
_instance = None
|
||||
_connection = None
|
||||
_lock = asyncio.Lock()
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if cls._instance is None:
|
||||
cls._instance = super(SQLiteManager, cls).__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
if hasattr(self, 'initialized') and self.initialized:
|
||||
return
|
||||
|
||||
if db_path is None:
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
db_dir = os.path.join(base_dir, 'db')
|
||||
if not os.path.exists(db_dir):
|
||||
os.makedirs(db_dir)
|
||||
self.db_path = os.path.join(db_dir, 'proxies.sqlite')
|
||||
else:
|
||||
self.db_path = db_path
|
||||
|
||||
self.initialized = True
|
||||
|
||||
async def get_connection(self):
|
||||
async with self._lock:
|
||||
if self._connection is None:
|
||||
self._connection = await aiosqlite.connect(self.db_path)
|
||||
await self._connection.execute("PRAGMA journal_mode=WAL")
|
||||
await self._connection.execute("PRAGMA synchronous=NORMAL")
|
||||
await self._connection.execute("PRAGMA cache_size=-64000")
|
||||
await self._connection.execute("PRAGMA temp_store=MEMORY")
|
||||
return self._connection
|
||||
|
||||
async def close_connection(self):
|
||||
async with self._lock:
|
||||
if self._connection is not None:
|
||||
await self._connection.close()
|
||||
self._connection = None
|
||||
|
||||
async def init_db(self):
|
||||
"""初始化数据库和表结构"""
|
||||
db = await self.get_connection()
|
||||
await db.execute('''
|
||||
CREATE TABLE IF NOT EXISTS proxies (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ip TEXT NOT NULL,
|
||||
port INTEGER NOT NULL,
|
||||
protocol TEXT DEFAULT 'http',
|
||||
score INTEGER DEFAULT 10,
|
||||
last_check TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(ip, port)
|
||||
)
|
||||
''')
|
||||
|
||||
await db.execute('CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)')
|
||||
await db.execute('CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)')
|
||||
await db.execute('CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)')
|
||||
await db.execute('CREATE INDEX IF NOT EXISTS idx_ip_port ON proxies(ip, port)')
|
||||
|
||||
await db.commit()
|
||||
|
||||
async def insert_proxy(self, ip, port, protocol='http', score=10):
|
||||
"""异步插入或更新代理"""
|
||||
try:
|
||||
# 验证协议类型
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = 'http'
|
||||
logger.warning(f"无效的协议类型 {protocol},默认使用 http")
|
||||
|
||||
db = await self.get_connection()
|
||||
# 先检查是否存在
|
||||
async with db.execute('SELECT score FROM proxies WHERE ip = ? AND port = ?', (ip, port)) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
# 如果存在,则更新最后检查时间和分数
|
||||
await db.execute('''
|
||||
UPDATE proxies SET last_check = CURRENT_TIMESTAMP, score = ?, protocol = ? WHERE ip = ? AND port = ?
|
||||
''', (score, protocol, ip, port))
|
||||
else:
|
||||
# 如果不存在,则插入新记录
|
||||
await db.execute('''
|
||||
INSERT INTO proxies (ip, port, protocol, score, last_check)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
|
||||
''', (ip, port, protocol, score))
|
||||
await db.commit()
|
||||
return True
|
||||
except aiosqlite.IntegrityError as e:
|
||||
# 处理唯一性约束冲突
|
||||
if "UNIQUE" in str(e):
|
||||
# 代理已存在,更新它
|
||||
if protocol not in VALID_PROTOCOLS:
|
||||
protocol = 'http'
|
||||
db = await self.get_connection()
|
||||
await db.execute('''
|
||||
UPDATE proxies SET last_check = CURRENT_TIMESTAMP, score = ?, protocol = ? WHERE ip = ? AND port = ?
|
||||
''', (score, protocol, ip, port))
|
||||
await db.commit()
|
||||
return True
|
||||
else:
|
||||
logger.error(f"数据库完整性错误: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"插入代理失败 {ip}:{port} - {e}")
|
||||
return False
|
||||
|
||||
async def get_all_proxies(self):
|
||||
"""异步获取所有代理"""
|
||||
db = await self.get_connection()
|
||||
async with db.execute('SELECT ip, port, protocol, score, last_check FROM proxies') as cursor:
|
||||
return await cursor.fetchall()
|
||||
|
||||
async def get_random_proxy(self):
|
||||
"""异步随机获取一个高分代理"""
|
||||
db = await self.get_connection()
|
||||
async with db.execute('SELECT ip, port, protocol, score, last_check FROM proxies WHERE score > 0 ORDER BY RANDOM() LIMIT 1') as cursor:
|
||||
return await cursor.fetchone()
|
||||
|
||||
async def update_score(self, ip, port, delta, min_score=0, max_score=100):
|
||||
"""异步更新代理分数(增量更新,带分数限制)"""
|
||||
try:
|
||||
db = await self.get_connection()
|
||||
# 获取当前分数
|
||||
async with db.execute('SELECT score FROM proxies WHERE ip = ? AND port = ?', (ip, port)) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
if row:
|
||||
current_score = row[0]
|
||||
new_score = max(min_score, min(max_score, current_score + delta))
|
||||
await db.execute('''
|
||||
UPDATE proxies SET score = ?, last_check = CURRENT_TIMESTAMP WHERE ip = ? AND port = ?
|
||||
''', (new_score, ip, port))
|
||||
if new_score <= 0:
|
||||
await db.execute('DELETE FROM proxies WHERE score <= 0')
|
||||
await db.commit()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"更新代理分数失败 {ip}:{port} - {e}")
|
||||
return False
|
||||
|
||||
async def delete_proxy(self, ip, port):
|
||||
"""异步删除指定代理"""
|
||||
db = await self.get_connection()
|
||||
await db.execute('DELETE FROM proxies WHERE ip = ? AND port = ?', (ip, port))
|
||||
await db.commit()
|
||||
|
||||
async def count_proxies(self):
|
||||
"""异步统计代理数量"""
|
||||
db = await self.get_connection()
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
async def get_proxies_paginated_with_total(self, page: int = 1, page_size: int = 20,
|
||||
protocol: str = None, min_score: int = 0,
|
||||
max_score: int = None,
|
||||
sort_by: str = 'last_check',
|
||||
sort_order: str = 'DESC'):
|
||||
"""分页获取代理列表(一次查询返回数据和总数)"""
|
||||
db = await self.get_connection()
|
||||
conditions = ['score >= ?']
|
||||
params = [min_score]
|
||||
|
||||
if protocol:
|
||||
conditions.append('protocol = ?')
|
||||
params.append(protocol)
|
||||
|
||||
if max_score is not None:
|
||||
conditions.append('score <= ?')
|
||||
params.append(max_score)
|
||||
|
||||
where_clause = ' AND '.join(conditions)
|
||||
|
||||
order_by_clause = f'{sort_by} {sort_order}'
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
query = f'''
|
||||
SELECT ip, port, protocol, score, last_check,
|
||||
COUNT(*) OVER() as total_count
|
||||
FROM proxies
|
||||
WHERE {where_clause}
|
||||
ORDER BY {order_by_clause}
|
||||
LIMIT ? OFFSET ?
|
||||
'''
|
||||
params.extend([page_size, offset])
|
||||
|
||||
async with db.execute(query, params) as cursor:
|
||||
rows = await cursor.fetchall()
|
||||
total = rows[0][5] if rows else 0
|
||||
proxies = [(row[0], row[1], row[2], row[3], row[4]) for row in rows]
|
||||
return proxies, total
|
||||
|
||||
async def get_proxies_paginated(self, page: int = 1, page_size: int = 20,
|
||||
protocol: str = None, min_score: int = 0,
|
||||
max_score: int = None,
|
||||
sort_by: str = 'last_check',
|
||||
sort_order: str = 'DESC'):
|
||||
"""分页获取代理列表"""
|
||||
db = await self.get_connection()
|
||||
conditions = ['score >= ?']
|
||||
params = [min_score]
|
||||
|
||||
if protocol:
|
||||
conditions.append('protocol = ?')
|
||||
params.append(protocol)
|
||||
|
||||
if max_score is not None:
|
||||
conditions.append('score <= ?')
|
||||
params.append(max_score)
|
||||
|
||||
where_clause = ' AND '.join(conditions)
|
||||
|
||||
order_by_clause = f'{sort_by} {sort_order}'
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
query = f'''
|
||||
SELECT ip, port, protocol, score, last_check
|
||||
FROM proxies
|
||||
WHERE {where_clause}
|
||||
ORDER BY {order_by_clause}
|
||||
LIMIT ? OFFSET ?
|
||||
'''
|
||||
params.extend([page_size, offset])
|
||||
|
||||
async with db.execute(query, params) as cursor:
|
||||
return await cursor.fetchall()
|
||||
|
||||
async def get_proxies_total(self, protocol: str = None, min_score: int = 0, max_score: int = None):
|
||||
"""获取符合条件的代理总数"""
|
||||
db = await self.get_connection()
|
||||
conditions = ['score >= ?']
|
||||
params = [min_score]
|
||||
|
||||
if protocol:
|
||||
conditions.append('protocol = ?')
|
||||
params.append(protocol)
|
||||
|
||||
if max_score is not None:
|
||||
conditions.append('score <= ?')
|
||||
params.append(max_score)
|
||||
|
||||
where_clause = ' AND '.join(conditions)
|
||||
|
||||
query = f'SELECT COUNT(*) FROM proxies WHERE {where_clause}'
|
||||
|
||||
async with db.execute(query, params) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
async def get_proxy_detail(self, ip: str, port: int):
|
||||
"""获取单个代理的详细信息"""
|
||||
db = await self.get_connection()
|
||||
async with db.execute(
|
||||
'SELECT ip, port, protocol, score, last_check FROM proxies WHERE ip = ? AND port = ?',
|
||||
(ip, port)
|
||||
) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row
|
||||
|
||||
async def batch_delete_proxies(self, proxy_list: list):
|
||||
"""批量删除代理,返回实际删除的数量"""
|
||||
deleted_count = 0
|
||||
db = await self.get_connection()
|
||||
for ip, port in proxy_list:
|
||||
cursor = await db.execute('DELETE FROM proxies WHERE ip = ? AND port = ?', (ip, port))
|
||||
deleted_count += cursor.rowcount
|
||||
await db.commit()
|
||||
return deleted_count
|
||||
|
||||
async def get_stats(self):
|
||||
"""获取统计信息"""
|
||||
db = await self.get_connection()
|
||||
stats = {}
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['total'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies WHERE score > 0') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['available'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies WHERE protocol = "http"') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['http_count'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies WHERE protocol = "https"') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['https_count'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies WHERE protocol = "socks4"') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['socks4_count'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT COUNT(*) FROM proxies WHERE protocol = "socks5"') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['socks5_count'] = row[0] if row else 0
|
||||
|
||||
async with db.execute('SELECT AVG(score) FROM proxies') as cursor:
|
||||
row = await cursor.fetchone()
|
||||
stats['avg_score'] = row[0] if row and row[0] else 0
|
||||
|
||||
return stats
|
||||
|
||||
async def get_today_new_count(self):
|
||||
"""获取今日新增代理数量"""
|
||||
try:
|
||||
db = await self.get_connection()
|
||||
query = '''
|
||||
SELECT COUNT(*) FROM proxies
|
||||
WHERE DATE(last_check) = DATE('now', 'localtime')
|
||||
'''
|
||||
async with db.execute(query) as cursor:
|
||||
row = await cursor.fetchone()
|
||||
return row[0] if row else 0
|
||||
except Exception as e:
|
||||
logger.error(f"获取今日新增数量失败: {e}")
|
||||
return 0
|
||||
|
||||
async def clean_invalid_proxies(self):
|
||||
"""清理无效代理(分数<=0)"""
|
||||
db = await self.get_connection()
|
||||
async with db.execute('DELETE FROM proxies WHERE score <= 0') as cursor:
|
||||
deleted_count = cursor.rowcount
|
||||
await db.commit()
|
||||
return deleted_count
|
||||
76
core/validator.py
Normal file
76
core/validator.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import random
|
||||
import time
|
||||
from core.log import logger
|
||||
|
||||
class ProxyValidator:
|
||||
def __init__(self, max_concurrency=50, timeout=5):
|
||||
# 验证目标源(使用更适合代理验证的源)
|
||||
self.http_sources = [
|
||||
"http://httpbin.org/ip",
|
||||
"http://api.ipify.org"
|
||||
]
|
||||
self.https_sources = [
|
||||
"https://httpbin.org/ip",
|
||||
"https://api.ipify.org"
|
||||
]
|
||||
self.semaphore = asyncio.Semaphore(max_concurrency)
|
||||
self.timeout = timeout
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
# 允许通过 async with 管理 session
|
||||
if not self.session:
|
||||
self.session = aiohttp.ClientSession(
|
||||
connector=aiohttp.TCPConnector(ssl=False, limit=0, force_close=True),
|
||||
timeout=aiohttp.ClientTimeout(total=self.timeout, connect=3)
|
||||
)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def validate(self, ip, port, protocol='http'):
|
||||
"""
|
||||
验证单个代理是否可用
|
||||
"""
|
||||
protocol = protocol.lower()
|
||||
sources = self.https_sources if protocol == 'https' else self.http_sources
|
||||
test_url = random.choice(sources)
|
||||
|
||||
# aiohttp 代理 URL 格式
|
||||
proxy_url = f"http://{ip}:{port}"
|
||||
|
||||
async with self.semaphore:
|
||||
start_time = time.time()
|
||||
try:
|
||||
# 复用 session
|
||||
async with self.session.get(
|
||||
test_url,
|
||||
proxy=proxy_url,
|
||||
allow_redirects=True,
|
||||
timeout=aiohttp.ClientTimeout(total=self.timeout, connect=3)
|
||||
) as response:
|
||||
# 检查状态码和响应内容
|
||||
if response.status in [200, 301, 302]:
|
||||
try:
|
||||
content = await response.text()
|
||||
# 确保返回了有效的JSON响应
|
||||
if 'ip' in content.lower() or 'origin' in content.lower():
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
logger.info(f"验证成功: {ip}:{port} ({protocol}) - 延迟: {latency}ms")
|
||||
return True, latency
|
||||
except:
|
||||
# 即使无法解析内容,如果状态码正常也认为可用
|
||||
latency = round((time.time() - start_time) * 1000, 2)
|
||||
logger.info(f"验证成功: {ip}:{port} ({protocol}) - 延迟: {latency}ms")
|
||||
return True, latency
|
||||
return False, 0
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"验证超时: {ip}:{port} ({protocol})")
|
||||
return False, 0
|
||||
except Exception as e:
|
||||
logger.warning(f"验证失败: {ip}:{port} ({protocol}) - {e}")
|
||||
return False, 0
|
||||
Reference in New Issue
Block a user