feat: fpw plugins, validation/crawl perf, WS stats, test DB isolation
- Add Free_Proxy_Website-style fpw_* plugins and register them - Per-plugin crawl timeout (crawl_timeout_seconds=120); remove global crawl_timeout setting - Validator: fix connect vs total timeout on save; SOCKS session LRU cache; drop redundant semaphore - Validation handler uses single DB connection; batch upsert after crawl; WorkerPool put_nowait - Remove unused max_retries from settings API/UI; settings maintenance SQL + init_db cleanup of deprecated keys - WebSocket dashboard stats; ProxyList pool_filter and API alignment - POST /api/proxies/delete-one for IPv6-safe deletes; task poll stops on 404 - pytest uses PROXYPOOL_DB_PATH=db/proxies.test.sqlite so tests do not wipe production DB - .gitignore: explicit proxies.test.sqlite patterns; fix plugin_service ValidationException import Made-with: Cursor
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -30,6 +30,9 @@ env/
|
|||||||
*.sqlite
|
*.sqlite
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
*.db
|
*.db
|
||||||
|
# pytest 隔离库(PROXYPOOL_DB_PATH=db/proxies.test.sqlite),勿提交
|
||||||
|
**/proxies.test.sqlite
|
||||||
|
proxies.test.sqlite
|
||||||
*.db-shm
|
*.db-shm
|
||||||
*.db-wal
|
*.db-wal
|
||||||
|
|
||||||
|
|||||||
@@ -221,10 +221,15 @@ POST /api/settings
|
|||||||
- **验证超时**: 3-30秒,默认 5秒
|
- **验证超时**: 3-30秒,默认 5秒
|
||||||
- **验证并发数**: 10-200,默认 50
|
- **验证并发数**: 10-200,默认 50
|
||||||
|
|
||||||
### 评分机制
|
### 待验证与可用
|
||||||
|
- **爬取**:代理默认以「待验证」入库(`validated=0`,分数为 0),不会立刻参与随机/导出。
|
||||||
|
- **验证**:在设置页「立即验证全部」或开启自动验证后,会**先验证待验证队列**,再按检查时间**复检已入库代理**;通过后标记为已验证并赋予分数。
|
||||||
|
- **设置**:「爬取后立即验证」默认关闭;开启后爬取完成会像旧版一样立刻排队验证。
|
||||||
|
|
||||||
|
### 评分机制(仅针对已验证入池的代理)
|
||||||
- **验证成功**: +10 分
|
- **验证成功**: +10 分
|
||||||
- **验证失败**: -5 分
|
- **验证失败**: -5 分
|
||||||
- **分数为 0**: 自动删除
|
- **分数为 0**: 自动删除(待验证阶段验证失败则直接丢弃该条)
|
||||||
|
|
||||||
## 🔧 常见问题
|
## 🔧 常见问题
|
||||||
|
|
||||||
|
|||||||
@@ -64,7 +64,8 @@ export const proxiesAPI = {
|
|||||||
getProxies: (params, signal) =>
|
getProxies: (params, signal) =>
|
||||||
api.post('/api/proxies', cleanParams(params), { signal }),
|
api.post('/api/proxies', cleanParams(params), { signal }),
|
||||||
|
|
||||||
deleteProxy: (ip, port) => api.delete(`/api/proxies/${ip}/${port}`),
|
deleteProxy: (ip, port) =>
|
||||||
|
api.post('/api/proxies/delete-one', { ip, port }),
|
||||||
|
|
||||||
batchDeleteProxies: (proxies) => api.post('/api/proxies/batch-delete', { proxies }),
|
batchDeleteProxies: (proxies) => api.post('/api/proxies/batch-delete', { proxies }),
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,8 @@ const props = defineProps({
|
|||||||
type: {
|
type: {
|
||||||
type: String,
|
type: String,
|
||||||
default: 'default',
|
default: 'default',
|
||||||
validator: (value) => ['default', 'total', 'available', 'new', 'score'].includes(value)
|
validator: (value) =>
|
||||||
|
['default', 'total', 'pending', 'available', 'new', 'score'].includes(value)
|
||||||
},
|
},
|
||||||
/** 图标组件 */
|
/** 图标组件 */
|
||||||
icon: {
|
icon: {
|
||||||
@@ -79,6 +80,11 @@ const displayValue = computed(() => {
|
|||||||
filter: drop-shadow(0 0 8px rgba(34, 197, 94, 0.4));
|
filter: drop-shadow(0 0 8px rgba(34, 197, 94, 0.4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.stat-card.pending .stat-icon {
|
||||||
|
color: var(--warning);
|
||||||
|
filter: drop-shadow(0 0 8px rgba(250, 204, 21, 0.45));
|
||||||
|
}
|
||||||
|
|
||||||
.stat-card.new .stat-icon {
|
.stat-card.new .stat-icon {
|
||||||
color: var(--warning);
|
color: var(--warning);
|
||||||
filter: drop-shadow(0 0 8px rgba(245, 158, 11, 0.4));
|
filter: drop-shadow(0 0 8px rgba(245, 158, 11, 0.4));
|
||||||
|
|||||||
134
WebUI/src/composables/useStatsWebSocket.js
Normal file
134
WebUI/src/composables/useStatsWebSocket.js
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
import { onUnmounted } from 'vue'
|
||||||
|
import { useProxyStore } from '../stores/proxy'
|
||||||
|
|
||||||
|
const MAX_DELAY_MS = 30000
|
||||||
|
const INITIAL_DELAY_MS = 1000
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 由 API Base 推导统计 WebSocket URL(/api/ws)
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
export function resolveWebSocketStatsUrl() {
|
||||||
|
const explicit = import.meta.env.VITE_WS_URL
|
||||||
|
if (explicit) {
|
||||||
|
const t = String(explicit).trim().replace(/\/$/, '')
|
||||||
|
return t.endsWith('/api/ws') ? t : `${t}/api/ws`
|
||||||
|
}
|
||||||
|
const api = import.meta.env.VITE_API_BASE_URL || 'http://localhost:18080'
|
||||||
|
const u = new URL(api)
|
||||||
|
u.protocol = u.protocol === 'https:' ? 'wss:' : 'ws:'
|
||||||
|
u.pathname = '/api/ws'
|
||||||
|
u.search = ''
|
||||||
|
u.hash = ''
|
||||||
|
return u.toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 连接后端 WebSocket 接收实时统计;指数退避重连;页签隐藏时暂停连接。
|
||||||
|
*/
|
||||||
|
export function useStatsWebSocket() {
|
||||||
|
const store = useProxyStore()
|
||||||
|
let ws = null
|
||||||
|
let reconnectTimer = null
|
||||||
|
let attempt = 0
|
||||||
|
let stopped = false
|
||||||
|
let paused = false
|
||||||
|
|
||||||
|
function backoffDelayMs() {
|
||||||
|
return Math.min(INITIAL_DELAY_MS * 2 ** attempt, MAX_DELAY_MS)
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearReconnectTimer() {
|
||||||
|
if (reconnectTimer) {
|
||||||
|
clearTimeout(reconnectTimer)
|
||||||
|
reconnectTimer = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function connect() {
|
||||||
|
if (stopped || paused) return
|
||||||
|
clearReconnectTimer()
|
||||||
|
const url = resolveWebSocketStatsUrl()
|
||||||
|
ws = new WebSocket(url)
|
||||||
|
ws.onopen = () => {
|
||||||
|
attempt = 0
|
||||||
|
}
|
||||||
|
ws.onmessage = (ev) => {
|
||||||
|
try {
|
||||||
|
const msg = JSON.parse(ev.data)
|
||||||
|
if (msg.type === 'stats' && msg.data) {
|
||||||
|
store.applyStats(msg.data)
|
||||||
|
} else if (msg.type === 'pong') {
|
||||||
|
// optional heartbeat
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore malformed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ws.onclose = () => {
|
||||||
|
ws = null
|
||||||
|
if (stopped || paused) return
|
||||||
|
attempt += 1
|
||||||
|
reconnectTimer = setTimeout(connect, backoffDelayMs())
|
||||||
|
}
|
||||||
|
ws.onerror = () => {
|
||||||
|
try {
|
||||||
|
ws?.close()
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleVisibility() {
|
||||||
|
if (document.hidden) {
|
||||||
|
paused = true
|
||||||
|
clearReconnectTimer()
|
||||||
|
if (ws) {
|
||||||
|
const s = ws
|
||||||
|
ws = null
|
||||||
|
s.onclose = null
|
||||||
|
try {
|
||||||
|
s.close()
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
paused = false
|
||||||
|
if (!stopped) {
|
||||||
|
attempt = 0
|
||||||
|
connect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function start() {
|
||||||
|
stopped = false
|
||||||
|
paused = false
|
||||||
|
attempt = 0
|
||||||
|
document.addEventListener('visibilitychange', handleVisibility)
|
||||||
|
connect()
|
||||||
|
}
|
||||||
|
|
||||||
|
function disconnect() {
|
||||||
|
stopped = true
|
||||||
|
paused = false
|
||||||
|
document.removeEventListener('visibilitychange', handleVisibility)
|
||||||
|
clearReconnectTimer()
|
||||||
|
if (ws) {
|
||||||
|
const s = ws
|
||||||
|
ws = null
|
||||||
|
s.onclose = null
|
||||||
|
try {
|
||||||
|
s.close()
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onUnmounted(disconnect)
|
||||||
|
|
||||||
|
return { start, disconnect }
|
||||||
|
}
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
import { tasksAPI } from '../api'
|
import { tasksAPI } from '../api'
|
||||||
|
|
||||||
const POLL_INTERVAL = 1000
|
const POLL_INTERVAL = 1000
|
||||||
const MAX_POLL_ATTEMPTS = 30
|
/** 大批量爬取可能超过 30s,适当放宽避免误报「任务进行中」 */
|
||||||
|
const MAX_POLL_ATTEMPTS = 300
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 轮询任务状态直到完成或失败
|
* 轮询任务状态直到完成或失败
|
||||||
@@ -21,7 +22,14 @@ export async function pollTaskStatus(taskId) {
|
|||||||
return response
|
return response
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// 网络异常时继续轮询,不中断
|
const status = error.response?.status
|
||||||
|
if (status === 404) {
|
||||||
|
return {
|
||||||
|
code: 404,
|
||||||
|
message: error.response?.data?.message || '任务不存在',
|
||||||
|
data: { task_id: taskId, status: 'failed', error: 'not_found' }
|
||||||
|
}
|
||||||
|
}
|
||||||
console.warn('轮询任务状态失败:', error)
|
console.warn('轮询任务状态失败:', error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,12 @@ export const useProxyStore = defineStore('proxy', () => {
|
|||||||
* 获取统计信息
|
* 获取统计信息
|
||||||
* @returns {Promise<boolean>}
|
* @returns {Promise<boolean>}
|
||||||
*/
|
*/
|
||||||
|
function applyStats(data) {
|
||||||
|
if (data && typeof data === 'object') {
|
||||||
|
stats.value = { ...data }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchStats() {
|
async function fetchStats() {
|
||||||
try {
|
try {
|
||||||
const response = await proxyService.getStats()
|
const response = await proxyService.getStats()
|
||||||
@@ -174,6 +180,7 @@ export const useProxyStore = defineStore('proxy', () => {
|
|||||||
isEmpty,
|
isEmpty,
|
||||||
// Actions
|
// Actions
|
||||||
fetchStats,
|
fetchStats,
|
||||||
|
applyStats,
|
||||||
fetchProxies,
|
fetchProxies,
|
||||||
deleteProxy,
|
deleteProxy,
|
||||||
batchDeleteProxies,
|
batchDeleteProxies,
|
||||||
|
|||||||
@@ -2,40 +2,38 @@
|
|||||||
<div class="page-container">
|
<div class="page-container">
|
||||||
<PageHeader title="代理池管理系统" :icon="MagicStick" />
|
<PageHeader title="代理池管理系统" :icon="MagicStick" />
|
||||||
|
|
||||||
<el-row :gutter="20" class="stats-row">
|
<div class="stats-grid">
|
||||||
<el-col :xs="24" :sm="12" :md="12" :lg="6" :xl="6">
|
<StatCard
|
||||||
<StatCard
|
type="total"
|
||||||
type="total"
|
:icon="DataLine"
|
||||||
:icon="DataLine"
|
:value="stats.total || 0"
|
||||||
:value="stats.total || 0"
|
label="总代理数"
|
||||||
label="总代理数"
|
/>
|
||||||
/>
|
<StatCard
|
||||||
</el-col>
|
type="pending"
|
||||||
<el-col :xs="24" :sm="12" :md="12" :lg="6" :xl="6">
|
:icon="Clock"
|
||||||
<StatCard
|
:value="stats.pending || 0"
|
||||||
type="available"
|
label="待验证"
|
||||||
:icon="CircleCheck"
|
/>
|
||||||
:value="stats.available || 0"
|
<StatCard
|
||||||
label="可用数量"
|
type="available"
|
||||||
/>
|
:icon="CircleCheck"
|
||||||
</el-col>
|
:value="stats.available || 0"
|
||||||
<el-col :xs="24" :sm="12" :md="12" :lg="6" :xl="6">
|
label="可用数量"
|
||||||
<StatCard
|
/>
|
||||||
type="new"
|
<StatCard
|
||||||
:icon="Timer"
|
type="new"
|
||||||
:value="stats.today_new || 0"
|
:icon="Timer"
|
||||||
label="今日新增"
|
:value="stats.today_new || 0"
|
||||||
/>
|
label="今日新增"
|
||||||
</el-col>
|
/>
|
||||||
<el-col :xs="24" :sm="12" :md="12" :lg="6" :xl="6">
|
<StatCard
|
||||||
<StatCard
|
type="score"
|
||||||
type="score"
|
:icon="StarFilled"
|
||||||
:icon="StarFilled"
|
:value="avgScore"
|
||||||
:value="avgScore"
|
label="平均分数"
|
||||||
label="平均分数"
|
/>
|
||||||
/>
|
</div>
|
||||||
</el-col>
|
|
||||||
</el-row>
|
|
||||||
|
|
||||||
<el-row :gutter="20" class="charts-row">
|
<el-row :gutter="20" class="charts-row">
|
||||||
<el-col :xs="24" :lg="16">
|
<el-col :xs="24" :lg="16">
|
||||||
@@ -88,7 +86,7 @@
|
|||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { computed, onMounted, onUnmounted } from 'vue'
|
import { computed, onMounted } from 'vue'
|
||||||
import { ElMessage, ElMessageBox } from 'element-plus'
|
import { ElMessage, ElMessageBox } from 'element-plus'
|
||||||
import {
|
import {
|
||||||
MagicStick,
|
MagicStick,
|
||||||
@@ -96,7 +94,8 @@ import {
|
|||||||
CircleCheck,
|
CircleCheck,
|
||||||
Timer,
|
Timer,
|
||||||
StarFilled,
|
StarFilled,
|
||||||
InfoFilled
|
InfoFilled,
|
||||||
|
Clock
|
||||||
} from '@element-plus/icons-vue'
|
} from '@element-plus/icons-vue'
|
||||||
import { useProxyStore } from '../stores/proxy'
|
import { useProxyStore } from '../stores/proxy'
|
||||||
import { formatNumber } from '../utils/format'
|
import { formatNumber } from '../utils/format'
|
||||||
@@ -104,26 +103,16 @@ import StatCard from '../components/StatCard.vue'
|
|||||||
import ProtocolChart from '../components/ProtocolChart.vue'
|
import ProtocolChart from '../components/ProtocolChart.vue'
|
||||||
import QuickActions from '../components/QuickActions.vue'
|
import QuickActions from '../components/QuickActions.vue'
|
||||||
import PageHeader from '../components/PageHeader.vue'
|
import PageHeader from '../components/PageHeader.vue'
|
||||||
|
import { useStatsWebSocket } from '../composables/useStatsWebSocket'
|
||||||
|
|
||||||
// ==================== Store ====================
|
// ==================== Store ====================
|
||||||
const proxyStore = useProxyStore()
|
const proxyStore = useProxyStore()
|
||||||
|
const { start: startStatsWs } = useStatsWebSocket()
|
||||||
|
|
||||||
// ==================== 计算属性 ====================
|
// ==================== 计算属性 ====================
|
||||||
const stats = computed(() => proxyStore.stats)
|
const stats = computed(() => proxyStore.stats)
|
||||||
const avgScore = computed(() => formatNumber(stats.value.avg_score || 0, 1))
|
const avgScore = computed(() => formatNumber(stats.value.avg_score || 0, 1))
|
||||||
|
|
||||||
// ==================== 定时刷新 ====================
|
|
||||||
const REFRESH_INTERVAL = 5000
|
|
||||||
let refreshTimer = null
|
|
||||||
let isPageVisible = true
|
|
||||||
|
|
||||||
function handleVisibilityChange() {
|
|
||||||
isPageVisible = !document.hidden
|
|
||||||
if (isPageVisible) {
|
|
||||||
refreshData()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function refreshData() {
|
async function refreshData() {
|
||||||
await proxyStore.fetchStats()
|
await proxyStore.fetchStats()
|
||||||
}
|
}
|
||||||
@@ -165,26 +154,15 @@ async function handleClean() {
|
|||||||
// ==================== 生命周期 ====================
|
// ==================== 生命周期 ====================
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
await refreshData()
|
await refreshData()
|
||||||
|
startStatsWs()
|
||||||
document.addEventListener('visibilitychange', handleVisibilityChange)
|
|
||||||
refreshTimer = setInterval(() => {
|
|
||||||
if (isPageVisible) {
|
|
||||||
refreshData()
|
|
||||||
}
|
|
||||||
}, REFRESH_INTERVAL)
|
|
||||||
})
|
|
||||||
|
|
||||||
onUnmounted(() => {
|
|
||||||
if (refreshTimer) {
|
|
||||||
clearInterval(refreshTimer)
|
|
||||||
refreshTimer = null
|
|
||||||
}
|
|
||||||
document.removeEventListener('visibilitychange', handleVisibilityChange)
|
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
.stats-row {
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||||
|
gap: 20px;
|
||||||
margin-bottom: 20px;
|
margin-bottom: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,14 +215,6 @@ onUnmounted(() => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@media (max-width: 768px) {
|
@media (max-width: 768px) {
|
||||||
.stats-row .el-col {
|
|
||||||
margin-bottom: 16px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.stats-row .el-col:last-child {
|
|
||||||
margin-bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.status-list {
|
.status-list {
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 16px;
|
gap: 16px;
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
</el-tag>
|
</el-tag>
|
||||||
</div>
|
</div>
|
||||||
<div class="header-actions">
|
<div class="header-actions">
|
||||||
<el-button type="success" @click="handleCrawlAll" size="large" :loading="crawlingAll">
|
<el-button type="success" @click="handleCrawlAll" size="large" :loading="crawlAllMask">
|
||||||
<el-icon class="btn-icon"><Promotion /></el-icon>
|
<el-icon class="btn-icon"><Promotion /></el-icon>
|
||||||
全部爬取
|
全部爬取
|
||||||
</el-button>
|
</el-button>
|
||||||
@@ -53,12 +53,12 @@
|
|||||||
</template>
|
</template>
|
||||||
</el-table-column>
|
</el-table-column>
|
||||||
|
|
||||||
<el-table-column label="统计" width="180">
|
<el-table-column label="上次爬取" width="200">
|
||||||
<template #default="{ row }">
|
<template #default="{ row }">
|
||||||
<div class="plugin-stats">
|
<div class="plugin-stats" title="绿色为最近一轮爬到的代理条数;红色为最近一轮是否失败(0 成功 / 1 失败),不是验证通过数">
|
||||||
<div class="stat-item">
|
<div class="stat-item">
|
||||||
<el-icon class="stat-icon success"><CircleCheck /></el-icon>
|
<el-icon class="stat-icon success"><CircleCheck /></el-icon>
|
||||||
<span class="stat-value success">{{ row.success_count || 0 }}</span>
|
<span class="stat-value success">{{ row.success_count || 0 }} 条</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="stat-item">
|
<div class="stat-item">
|
||||||
<el-icon class="stat-icon failed"><CircleClose /></el-icon>
|
<el-icon class="stat-icon failed"><CircleClose /></el-icon>
|
||||||
@@ -74,7 +74,35 @@
|
|||||||
</template>
|
</template>
|
||||||
</el-table-column>
|
</el-table-column>
|
||||||
|
|
||||||
<el-table-column label="操作" width="220" fixed="right" align="center">
|
<el-table-column label="最近爬取" min-width="340" align="left">
|
||||||
|
<template #default="{ row }">
|
||||||
|
<div v-if="crawlAllMask && row.enabled" class="crawl-running-row">
|
||||||
|
<el-icon class="is-loading crawl-spin"><Loading /></el-icon>
|
||||||
|
<span>正在爬取…</span>
|
||||||
|
</div>
|
||||||
|
<div v-else-if="crawlResults[row.id]" class="result-panel" :class="crawlResults[row.id].type">
|
||||||
|
<div class="result-panel-head">
|
||||||
|
<el-icon v-if="crawlResults[row.id].type === 'success'" class="result-head-icon success"><CircleCheck /></el-icon>
|
||||||
|
<el-icon v-else class="result-head-icon failed"><CircleClose /></el-icon>
|
||||||
|
<span class="result-panel-title">{{ crawlResults[row.id].message }}</span>
|
||||||
|
<el-icon class="result-close" @click="clearCrawlResult(row.id)"><Close /></el-icon>
|
||||||
|
</div>
|
||||||
|
<div class="result-panel-body">
|
||||||
|
<template v-if="crawlResults[row.id].data && crawlResults[row.id].data.proxy_count !== undefined">
|
||||||
|
<span class="result-pill fetched">爬取 {{ crawlResults[row.id].data.proxy_count }} 条</span>
|
||||||
|
</template>
|
||||||
|
<template v-if="crawlResults[row.id].data?.crawl_failed">
|
||||||
|
<div class="result-error-block" :title="crawlResults[row.id].data.error || ''">
|
||||||
|
{{ crawlResults[row.id].data.error || '爬取失败' }}
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<span v-else class="result-placeholder">—</span>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
|
||||||
|
<el-table-column label="操作" width="200" fixed="right" align="center">
|
||||||
<template #default="{ row }">
|
<template #default="{ row }">
|
||||||
<div class="plugin-actions">
|
<div class="plugin-actions">
|
||||||
<el-button
|
<el-button
|
||||||
@@ -89,27 +117,13 @@
|
|||||||
type="success"
|
type="success"
|
||||||
size="small"
|
size="small"
|
||||||
@click="handleCrawl(row.id)"
|
@click="handleCrawl(row.id)"
|
||||||
:loading="crawlingPlugins.has(row.id)"
|
:loading="crawlingPlugins.has(row.id) || (crawlAllMask && row.enabled)"
|
||||||
:disabled="!row.enabled"
|
:disabled="!row.enabled"
|
||||||
>
|
>
|
||||||
<el-icon class="btn-icon"><Promotion /></el-icon>
|
<el-icon class="btn-icon"><Promotion /></el-icon>
|
||||||
爬取
|
爬取
|
||||||
</el-button>
|
</el-button>
|
||||||
</div>
|
</div>
|
||||||
<div v-if="crawlResults[row.id]" class="plugin-crawl-result">
|
|
||||||
<div class="result-mini" :class="crawlResults[row.id].type">
|
|
||||||
<el-icon v-if="crawlResults[row.id].type === 'success'" class="result-icon success"><CircleCheck /></el-icon>
|
|
||||||
<el-icon v-else class="result-icon failed"><CircleClose /></el-icon>
|
|
||||||
<span class="result-text">{{ crawlResults[row.id].message }}</span>
|
|
||||||
<span v-if="crawlResults[row.id].data?.success_count !== undefined" class="result-count valid">
|
|
||||||
有效 {{ crawlResults[row.id].data.success_count }}
|
|
||||||
</span>
|
|
||||||
<span v-if="crawlResults[row.id].data?.failure_count !== undefined" class="result-count invalid">
|
|
||||||
无效 {{ crawlResults[row.id].data.failure_count }}
|
|
||||||
</span>
|
|
||||||
<el-icon class="result-close" @click="clearCrawlResult(row.id)"><Close /></el-icon>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</template>
|
</template>
|
||||||
</el-table-column>
|
</el-table-column>
|
||||||
</el-table>
|
</el-table>
|
||||||
@@ -130,18 +144,37 @@
|
|||||||
@close="allCrawlResult = null"
|
@close="allCrawlResult = null"
|
||||||
>
|
>
|
||||||
<template v-if="allCrawlResult.data">
|
<template v-if="allCrawlResult.data">
|
||||||
<div class="crawl-stats">
|
<div class="crawl-stats crawl-stats-summary">
|
||||||
<span v-if="allCrawlResult.data.total_crawled !== undefined">
|
<span v-if="allCrawlResult.data.total_crawled !== undefined">
|
||||||
爬取: {{ allCrawlResult.data.total_crawled }}
|
合计爬取: <strong>{{ allCrawlResult.data.total_crawled }}</strong> 条
|
||||||
</span>
|
</span>
|
||||||
|
<span
|
||||||
<span v-if="allCrawlResult.data.valid_count !== undefined" class="valid-count">
|
v-if="allCrawlResult.data.plugins_failed !== undefined"
|
||||||
有效: {{ allCrawlResult.data.valid_count }}
|
class="invalid-count"
|
||||||
</span>
|
>
|
||||||
<span v-if="allCrawlResult.data.invalid_count !== undefined" class="invalid-count">
|
失败插件: <strong>{{ allCrawlResult.data.plugins_failed }}</strong> 个
|
||||||
无效: {{ allCrawlResult.data.invalid_count }}
|
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
<ul
|
||||||
|
v-if="allCrawlResult.data.per_plugin?.length"
|
||||||
|
class="per-plugin-breakdown"
|
||||||
|
>
|
||||||
|
<li
|
||||||
|
v-for="(item, idx) in allCrawlResult.data.per_plugin"
|
||||||
|
:key="item.plugin_id || `pp-${idx}`"
|
||||||
|
class="per-plugin-line"
|
||||||
|
>
|
||||||
|
<span class="pp-name">{{ pluginDisplayName(item.plugin_id) }}</span>
|
||||||
|
<template v-if="item.crawl_failed">
|
||||||
|
<el-tag type="danger" size="small" effect="light">失败</el-tag>
|
||||||
|
<span class="pp-detail err">{{ item.error || '未知错误' }}</span>
|
||||||
|
</template>
|
||||||
|
<template v-else>
|
||||||
|
<el-tag type="success" size="small" effect="light">完成</el-tag>
|
||||||
|
<span class="pp-detail">爬取 <strong>{{ item.proxy_count }}</strong> 条</span>
|
||||||
|
</template>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
</template>
|
</template>
|
||||||
</el-alert>
|
</el-alert>
|
||||||
</el-card>
|
</el-card>
|
||||||
@@ -198,7 +231,8 @@ import {
|
|||||||
CircleClose,
|
CircleClose,
|
||||||
Box,
|
Box,
|
||||||
Setting,
|
Setting,
|
||||||
Close
|
Close,
|
||||||
|
Loading
|
||||||
} from '@element-plus/icons-vue'
|
} from '@element-plus/icons-vue'
|
||||||
import { usePluginsStore } from '../stores/plugins'
|
import { usePluginsStore } from '../stores/plugins'
|
||||||
import { pluginService } from '../services/pluginService'
|
import { pluginService } from '../services/pluginService'
|
||||||
@@ -207,10 +241,17 @@ import PageHeader from '../components/PageHeader.vue'
|
|||||||
|
|
||||||
const pluginsStore = usePluginsStore()
|
const pluginsStore = usePluginsStore()
|
||||||
const crawlingPlugins = ref(new Set())
|
const crawlingPlugins = ref(new Set())
|
||||||
const crawlingAll = ref(false)
|
/** 全部爬取进行中:各启用插件行显示「正在爬取」与按钮 loading */
|
||||||
|
const crawlAllMask = ref(false)
|
||||||
const crawlResults = ref({})
|
const crawlResults = ref({})
|
||||||
const allCrawlResult = ref(null)
|
const allCrawlResult = ref(null)
|
||||||
|
|
||||||
|
function pluginDisplayName(pluginId) {
|
||||||
|
if (!pluginId) return '(未知插件)'
|
||||||
|
const p = pluginsStore.plugins.find((x) => x.id === pluginId)
|
||||||
|
return p?.name || pluginId
|
||||||
|
}
|
||||||
|
|
||||||
// 配置对话框
|
// 配置对话框
|
||||||
const configDialogVisible = ref(false)
|
const configDialogVisible = ref(false)
|
||||||
const currentPlugin = ref(null)
|
const currentPlugin = ref(null)
|
||||||
@@ -273,21 +314,30 @@ async function handleCrawl(pluginId) {
|
|||||||
const response = await pluginService.crawlPlugin(pluginId)
|
const response = await pluginService.crawlPlugin(pluginId)
|
||||||
|
|
||||||
if (response.code === 200) {
|
if (response.code === 200) {
|
||||||
crawlResults.value[pluginId] = {
|
crawlResults.value = {
|
||||||
type: 'success',
|
...crawlResults.value,
|
||||||
message: response.message,
|
[pluginId]: {
|
||||||
data: response.data
|
type: 'success',
|
||||||
|
message: response.message,
|
||||||
|
data: response.data
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
crawlResults.value[pluginId] = {
|
crawlResults.value = {
|
||||||
type: 'error',
|
...crawlResults.value,
|
||||||
message: response.message || '爬取失败'
|
[pluginId]: {
|
||||||
|
type: 'error',
|
||||||
|
message: response.message || '爬取失败'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
crawlResults.value[pluginId] = {
|
crawlResults.value = {
|
||||||
type: 'error',
|
...crawlResults.value,
|
||||||
message: '爬取过程出错'
|
[pluginId]: {
|
||||||
|
type: 'error',
|
||||||
|
message: '爬取过程出错'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
crawlingPlugins.value.delete(pluginId)
|
crawlingPlugins.value.delete(pluginId)
|
||||||
@@ -295,7 +345,9 @@ async function handleCrawl(pluginId) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function clearCrawlResult(pluginId) {
|
function clearCrawlResult(pluginId) {
|
||||||
delete crawlResults.value[pluginId]
|
const next = { ...crawlResults.value }
|
||||||
|
delete next[pluginId]
|
||||||
|
crawlResults.value = next
|
||||||
}
|
}
|
||||||
|
|
||||||
async function handleCrawlAll() {
|
async function handleCrawlAll() {
|
||||||
@@ -307,7 +359,7 @@ async function handleCrawlAll() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await ElMessageBox.confirm(
|
await ElMessageBox.confirm(
|
||||||
`确定要运行所有 ${enabledPlugins.length} 个启用的插件吗?这将爬取并验证所有代理。`,
|
`确定要运行所有 ${enabledPlugins.length} 个启用的插件吗?代理将先以「待验证」入库,需再执行「全部验证」后才会变为可用(除非已开启「爬取后立即验证」)。`,
|
||||||
'批量爬取确认',
|
'批量爬取确认',
|
||||||
{
|
{
|
||||||
confirmButtonText: '开始爬取',
|
confirmButtonText: '开始爬取',
|
||||||
@@ -316,20 +368,46 @@ async function handleCrawlAll() {
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
crawlingAll.value = true
|
|
||||||
allCrawlResult.value = null
|
allCrawlResult.value = null
|
||||||
|
{
|
||||||
|
const cleared = { ...crawlResults.value }
|
||||||
|
for (const p of enabledPlugins) {
|
||||||
|
delete cleared[p.id]
|
||||||
|
}
|
||||||
|
crawlResults.value = cleared
|
||||||
|
}
|
||||||
|
|
||||||
|
crawlAllMask.value = true
|
||||||
|
|
||||||
const response = await pluginService.crawlAll()
|
const response = await pluginService.crawlAll()
|
||||||
|
|
||||||
if (response.code === 200) {
|
if (response.code === 200) {
|
||||||
|
const data = response.data || {}
|
||||||
allCrawlResult.value = {
|
allCrawlResult.value = {
|
||||||
type: response.data?.cancelled ? 'info' : 'success',
|
type: data.cancelled ? 'info' : 'success',
|
||||||
message: response.message,
|
message: response.message,
|
||||||
data: response.data
|
data
|
||||||
}
|
}
|
||||||
if (!response.data?.cancelled) {
|
if (Array.isArray(data.per_plugin) && data.per_plugin.length) {
|
||||||
|
const merged = { ...crawlResults.value }
|
||||||
|
for (const item of data.per_plugin) {
|
||||||
|
if (!item.plugin_id) continue
|
||||||
|
merged[item.plugin_id] = {
|
||||||
|
type: item.crawl_failed ? 'error' : 'success',
|
||||||
|
message: '获取任务状态成功',
|
||||||
|
data: {
|
||||||
|
proxy_count: item.proxy_count,
|
||||||
|
crawl_failed: item.crawl_failed,
|
||||||
|
error: item.error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crawlResults.value = merged
|
||||||
|
}
|
||||||
|
if (!data.cancelled) {
|
||||||
ElMessage.success('批量爬取完成')
|
ElMessage.success('批量爬取完成')
|
||||||
}
|
}
|
||||||
|
await pluginsStore.fetchPlugins()
|
||||||
} else {
|
} else {
|
||||||
allCrawlResult.value = {
|
allCrawlResult.value = {
|
||||||
type: 'error',
|
type: 'error',
|
||||||
@@ -345,7 +423,7 @@ async function handleCrawlAll() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
crawlingAll.value = false
|
crawlAllMask.value = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -487,66 +565,167 @@ onMounted(async () => {
|
|||||||
.plugin-actions {
|
.plugin-actions {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
gap: 8px;
|
gap: 8px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.plugin-crawl-result {
|
.crawl-running-row {
|
||||||
margin-top: 8px;
|
display: flex;
|
||||||
}
|
|
||||||
|
|
||||||
.result-mini {
|
|
||||||
display: inline-flex;
|
|
||||||
align-items: center;
|
align-items: center;
|
||||||
gap: 6px;
|
gap: 8px;
|
||||||
padding: 4px 8px;
|
padding: 10px 12px;
|
||||||
border-radius: 4px;
|
font-size: 14px;
|
||||||
font-size: 12px;
|
color: var(--primary);
|
||||||
line-height: 1.4;
|
background: var(--surface-2);
|
||||||
|
border-radius: var(--radius-md, 8px);
|
||||||
|
border: 1px solid var(--border);
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-mini.success {
|
.crawl-spin {
|
||||||
background: rgba(103, 194, 58, 0.15);
|
font-size: 18px;
|
||||||
|
animation: plugin-crawl-spin 1s linear infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes plugin-crawl-spin {
|
||||||
|
to {
|
||||||
|
transform: rotate(360deg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-placeholder {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-panel {
|
||||||
|
padding: 12px 14px;
|
||||||
|
border-radius: var(--radius-md, 8px);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--surface-2);
|
||||||
|
min-height: 72px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-panel.success {
|
||||||
|
border-color: rgba(103, 194, 58, 0.35);
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-panel.error {
|
||||||
|
border-color: rgba(245, 108, 108, 0.35);
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-panel-head {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-head-icon {
|
||||||
|
font-size: 18px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-head-icon.success {
|
||||||
color: var(--success);
|
color: var(--success);
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-mini.error {
|
.result-head-icon.failed {
|
||||||
background: rgba(245, 108, 108, 0.15);
|
|
||||||
color: var(--danger);
|
color: var(--danger);
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-icon {
|
.result-panel-title {
|
||||||
|
flex: 1;
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
}
|
|
||||||
|
|
||||||
.result-text {
|
|
||||||
font-weight: 500;
|
|
||||||
}
|
|
||||||
|
|
||||||
.result-count {
|
|
||||||
font-weight: 600;
|
font-weight: 600;
|
||||||
padding: 0 4px;
|
color: var(--text-secondary);
|
||||||
border-radius: 3px;
|
line-height: 1.4;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-count.valid {
|
.result-panel-body {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-pill {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 4px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-pill.fetched {
|
||||||
background: rgba(103, 194, 58, 0.2);
|
background: rgba(103, 194, 58, 0.2);
|
||||||
color: var(--success);
|
color: var(--success);
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-count.invalid {
|
.result-error-block {
|
||||||
background: rgba(245, 108, 108, 0.2);
|
font-size: 13px;
|
||||||
|
line-height: 1.5;
|
||||||
color: var(--danger);
|
color: var(--danger);
|
||||||
|
word-break: break-word;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
max-width: 100%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-close {
|
.result-close {
|
||||||
margin-left: 4px;
|
margin-left: auto;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 12px;
|
font-size: 16px;
|
||||||
opacity: 0.7;
|
opacity: 0.55;
|
||||||
|
flex-shrink: 0;
|
||||||
transition: opacity 0.2s;
|
transition: opacity 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-close:hover {
|
.result-close:hover {
|
||||||
opacity: 1;
|
opacity: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.crawl-stats-summary {
|
||||||
|
flex-wrap: wrap;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.per-plugin-breakdown {
|
||||||
|
list-style: none;
|
||||||
|
margin: 12px 0 0;
|
||||||
|
padding: 0;
|
||||||
|
max-height: 360px;
|
||||||
|
overflow-y: auto;
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
padding-top: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.per-plugin-line {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px 12px;
|
||||||
|
padding: 8px 0;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.per-plugin-line:last-child {
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pp-name {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-primary);
|
||||||
|
min-width: 140px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.pp-detail {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.pp-detail.err {
|
||||||
|
color: var(--danger);
|
||||||
|
flex: 1;
|
||||||
|
min-width: 120px;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -4,6 +4,18 @@
|
|||||||
|
|
||||||
<el-card class="filter-card" shadow="hover">
|
<el-card class="filter-card" shadow="hover">
|
||||||
<el-form :inline="true" :model="filterForm" class="form-row">
|
<el-form :inline="true" :model="filterForm" class="form-row">
|
||||||
|
<el-form-item label="池范围">
|
||||||
|
<el-select
|
||||||
|
v-model="filterForm.poolFilter"
|
||||||
|
placeholder="全部"
|
||||||
|
style="width: 140px"
|
||||||
|
@change="handleSearch"
|
||||||
|
>
|
||||||
|
<el-option label="全部" value="all" />
|
||||||
|
<el-option label="待验证" value="pending" />
|
||||||
|
<el-option label="已验证可用" value="available" />
|
||||||
|
</el-select>
|
||||||
|
</el-form-item>
|
||||||
<el-form-item label="协议类型">
|
<el-form-item label="协议类型">
|
||||||
<el-select
|
<el-select
|
||||||
v-model="filterForm.protocol"
|
v-model="filterForm.protocol"
|
||||||
@@ -84,6 +96,16 @@
|
|||||||
<el-table-column type="selection" width="55" />
|
<el-table-column type="selection" width="55" />
|
||||||
<el-table-column prop="ip" label="IP地址" width="150" />
|
<el-table-column prop="ip" label="IP地址" width="150" />
|
||||||
<el-table-column prop="port" label="端口" width="100" />
|
<el-table-column prop="port" label="端口" width="100" />
|
||||||
|
<el-table-column label="状态" width="100">
|
||||||
|
<template #default="{ row }">
|
||||||
|
<el-tag v-if="row.validated === 0" type="warning" effect="light" size="small">
|
||||||
|
待验证
|
||||||
|
</el-tag>
|
||||||
|
<el-tag v-else type="success" effect="light" size="small">
|
||||||
|
已验证
|
||||||
|
</el-tag>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
<el-table-column prop="protocol" label="协议" width="100">
|
<el-table-column prop="protocol" label="协议" width="100">
|
||||||
<template #default="{ row }">
|
<template #default="{ row }">
|
||||||
<el-tag :type="getProtocolType(row.protocol)" effect="light" size="small">
|
<el-tag :type="getProtocolType(row.protocol)" effect="light" size="small">
|
||||||
@@ -164,6 +186,7 @@ const selectedProxies = ref([])
|
|||||||
let abortController = null
|
let abortController = null
|
||||||
|
|
||||||
const filterForm = reactive({
|
const filterForm = reactive({
|
||||||
|
poolFilter: 'all',
|
||||||
protocol: '',
|
protocol: '',
|
||||||
minScore: 0,
|
minScore: 0,
|
||||||
sortBy: 'last_check',
|
sortBy: 'last_check',
|
||||||
@@ -194,6 +217,7 @@ async function fetchProxies() {
|
|||||||
const success = await proxyStore.fetchProxies({
|
const success = await proxyStore.fetchProxies({
|
||||||
page: currentPage.value,
|
page: currentPage.value,
|
||||||
page_size: pageSize.value,
|
page_size: pageSize.value,
|
||||||
|
pool_filter: filterForm.poolFilter === 'all' ? null : filterForm.poolFilter,
|
||||||
protocol: filterForm.protocol || null,
|
protocol: filterForm.protocol || null,
|
||||||
min_score: filterForm.minScore,
|
min_score: filterForm.minScore,
|
||||||
sort_by: filterForm.sortBy,
|
sort_by: filterForm.sortBy,
|
||||||
@@ -237,6 +261,7 @@ async function handleDelete(proxy) {
|
|||||||
if (!confirmed) return
|
if (!confirmed) return
|
||||||
|
|
||||||
const filters = {
|
const filters = {
|
||||||
|
pool_filter: filterForm.poolFilter === 'all' ? null : filterForm.poolFilter,
|
||||||
protocol: filterForm.protocol || null,
|
protocol: filterForm.protocol || null,
|
||||||
min_score: filterForm.minScore,
|
min_score: filterForm.minScore,
|
||||||
sort_by: filterForm.sortBy,
|
sort_by: filterForm.sortBy,
|
||||||
@@ -256,6 +281,7 @@ async function handleBatchDelete() {
|
|||||||
if (!confirmed) return
|
if (!confirmed) return
|
||||||
|
|
||||||
const filters = {
|
const filters = {
|
||||||
|
pool_filter: filterForm.poolFilter === 'all' ? null : filterForm.poolFilter,
|
||||||
protocol: filterForm.protocol || null,
|
protocol: filterForm.protocol || null,
|
||||||
min_score: filterForm.minScore,
|
min_score: filterForm.minScore,
|
||||||
sort_by: filterForm.sortBy,
|
sort_by: filterForm.sortBy,
|
||||||
|
|||||||
@@ -86,26 +86,9 @@
|
|||||||
ref="formRef"
|
ref="formRef"
|
||||||
>
|
>
|
||||||
<el-divider content-position="left">爬虫配置</el-divider>
|
<el-divider content-position="left">爬虫配置</el-divider>
|
||||||
|
<p class="setting-hint" style="margin: -8px 0 16px 0">
|
||||||
<el-form-item label="爬取超时" prop="crawl_timeout">
|
每个爬虫插件单独限时 120 秒,互不影响;此处不再配置全局爬取超时。
|
||||||
<el-input-number
|
</p>
|
||||||
v-model="settings.crawl_timeout"
|
|
||||||
:min="5"
|
|
||||||
:max="120"
|
|
||||||
:step="5"
|
|
||||||
class="setting-input"
|
|
||||||
/>
|
|
||||||
<span class="setting-suffix">秒</span>
|
|
||||||
</el-form-item>
|
|
||||||
|
|
||||||
<el-form-item label="最大重试次数" prop="max_retries">
|
|
||||||
<el-input-number
|
|
||||||
v-model="settings.max_retries"
|
|
||||||
:min="0"
|
|
||||||
:max="10"
|
|
||||||
class="setting-input"
|
|
||||||
/>
|
|
||||||
</el-form-item>
|
|
||||||
|
|
||||||
<el-divider content-position="left">验证配置</el-divider>
|
<el-divider content-position="left">验证配置</el-divider>
|
||||||
|
|
||||||
@@ -124,7 +107,7 @@
|
|||||||
<el-input-number
|
<el-input-number
|
||||||
v-model="settings.default_concurrency"
|
v-model="settings.default_concurrency"
|
||||||
:min="10"
|
:min="10"
|
||||||
:max="200"
|
:max="400"
|
||||||
:step="10"
|
:step="10"
|
||||||
class="setting-input"
|
class="setting-input"
|
||||||
/>
|
/>
|
||||||
@@ -170,6 +153,15 @@
|
|||||||
/>
|
/>
|
||||||
</el-form-item>
|
</el-form-item>
|
||||||
|
|
||||||
|
<el-form-item label="爬取后立即验证" prop="auto_validate_after_crawl">
|
||||||
|
<el-switch
|
||||||
|
v-model="settings.auto_validate_after_crawl"
|
||||||
|
active-text="开启"
|
||||||
|
inactive-text="关闭"
|
||||||
|
/>
|
||||||
|
<span class="setting-hint">关闭时爬取仅入库为「待验证」,需手动或定时「全部验证」消化队列(推荐)</span>
|
||||||
|
</el-form-item>
|
||||||
|
|
||||||
<el-divider content-position="left">代理评分配置</el-divider>
|
<el-divider content-position="left">代理评分配置</el-divider>
|
||||||
|
|
||||||
<el-form-item label="最低代理分数" prop="min_proxy_score">
|
<el-form-item label="最低代理分数" prop="min_proxy_score">
|
||||||
@@ -232,13 +224,12 @@ const saving = ref(false)
|
|||||||
const formRef = ref(null)
|
const formRef = ref(null)
|
||||||
|
|
||||||
const settings = reactive({
|
const settings = reactive({
|
||||||
crawl_timeout: 30,
|
validation_timeout: 6,
|
||||||
validation_timeout: 10,
|
default_concurrency: 120,
|
||||||
max_retries: 3,
|
|
||||||
default_concurrency: 50,
|
|
||||||
min_proxy_score: 0,
|
min_proxy_score: 0,
|
||||||
proxy_expiry_days: 7,
|
proxy_expiry_days: 7,
|
||||||
auto_validate: true,
|
auto_validate: true,
|
||||||
|
auto_validate_after_crawl: false,
|
||||||
validate_interval_minutes: 30,
|
validate_interval_minutes: 30,
|
||||||
validation_targets: []
|
validation_targets: []
|
||||||
})
|
})
|
||||||
@@ -255,18 +246,15 @@ const defaultValidationTargets = [
|
|||||||
// ==================== 计算属性 ====================
|
// ==================== 计算属性 ====================
|
||||||
const schedulerInfo = computed(() => {
|
const schedulerInfo = computed(() => {
|
||||||
if (schedulerRunning.value) {
|
if (schedulerRunning.value) {
|
||||||
return `验证调度器正在运行,每 ${settings.validate_interval_minutes} 分钟自动验证一次所有代理`
|
return `验证调度器正在运行,每 ${settings.validate_interval_minutes} 分钟执行一次:优先验证待验证代理,再按检查时间复检已入库代理`
|
||||||
} else {
|
|
||||||
return '验证调度器已停止,代理不会自动验证,建议定期手动验证或开启自动验证'
|
|
||||||
}
|
}
|
||||||
|
return '验证调度器已停止,待验证代理不会自动检查;可在下方开启自动验证或点击「立即验证全部」'
|
||||||
})
|
})
|
||||||
|
|
||||||
// ==================== 表单验证规则 ====================
|
// ==================== 表单验证规则 ====================
|
||||||
const formRules = {
|
const formRules = {
|
||||||
crawl_timeout: [{ type: 'number', min: 5, max: 120, message: '范围 5-120 秒', trigger: 'blur' }],
|
|
||||||
validation_timeout: [{ type: 'number', min: 3, max: 60, message: '范围 3-60 秒', trigger: 'blur' }],
|
validation_timeout: [{ type: 'number', min: 3, max: 60, message: '范围 3-60 秒', trigger: 'blur' }],
|
||||||
max_retries: [{ type: 'number', min: 0, max: 10, message: '范围 0-10', trigger: 'blur' }],
|
default_concurrency: [{ type: 'number', min: 10, max: 400, message: '范围 10-400', trigger: 'blur' }],
|
||||||
default_concurrency: [{ type: 'number', min: 10, max: 200, message: '范围 10-200', trigger: 'blur' }],
|
|
||||||
validate_interval_minutes: [{ type: 'number', min: 5, max: 1440, message: '范围 5-1440 分钟', trigger: 'blur' }],
|
validate_interval_minutes: [{ type: 'number', min: 5, max: 1440, message: '范围 5-1440 分钟', trigger: 'blur' }],
|
||||||
min_proxy_score: [{ type: 'number', min: 0, max: 100, message: '范围 0-100', trigger: 'blur' }],
|
min_proxy_score: [{ type: 'number', min: 0, max: 100, message: '范围 0-100', trigger: 'blur' }],
|
||||||
proxy_expiry_days: [{ type: 'number', min: 1, max: 30, message: '范围 1-30 天', trigger: 'blur' }]
|
proxy_expiry_days: [{ type: 'number', min: 1, max: 30, message: '范围 1-30 天', trigger: 'blur' }]
|
||||||
@@ -306,7 +294,7 @@ async function handleStopScheduler() {
|
|||||||
async function handleValidateNow() {
|
async function handleValidateNow() {
|
||||||
try {
|
try {
|
||||||
await ElMessageBox.confirm(
|
await ElMessageBox.confirm(
|
||||||
'确定要立即验证所有代理吗?这可能需要一些时间。',
|
'将按顺序验证:先处理「待验证」代理,再复检已入库代理。任务在后台执行,可能需要较长时间。',
|
||||||
'确认验证',
|
'确认验证',
|
||||||
{
|
{
|
||||||
confirmButtonText: '开始验证',
|
confirmButtonText: '开始验证',
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ def format_proxy(proxy) -> dict:
|
|||||||
"score": proxy.score,
|
"score": proxy.score,
|
||||||
"response_time_ms": proxy.response_time_ms,
|
"response_time_ms": proxy.response_time_ms,
|
||||||
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
|
"last_check": proxy.last_check.isoformat() if proxy.last_check else None,
|
||||||
|
"validated": getattr(proxy, "validated", 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import asyncio
|
|||||||
from contextlib import AsyncExitStack, asynccontextmanager
|
from contextlib import AsyncExitStack, asynccontextmanager
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
from app.core.db import init_db, get_db
|
from app.core.db import init_db, get_db, get_db_connection
|
||||||
from app.core.config import settings as app_settings
|
from app.core.config import settings as app_settings
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
from app.core.execution import AsyncWorkerPool, JobExecutor
|
from app.core.execution import AsyncWorkerPool, JobExecutor
|
||||||
@@ -13,6 +13,8 @@ from app.repositories.settings_repo import SettingsRepository, DEFAULT_SETTINGS
|
|||||||
from app.services.validator_service import ValidatorService
|
from app.services.validator_service import ValidatorService
|
||||||
from app.services.plugin_runner import PluginRunner
|
from app.services.plugin_runner import PluginRunner
|
||||||
from app.services.scheduler_service import SchedulerService
|
from app.services.scheduler_service import SchedulerService
|
||||||
|
from app.api.ws_manager import ConnectionManager
|
||||||
|
from app.api.realtime import stats_broadcaster_loop
|
||||||
|
|
||||||
settings_repo = SettingsRepository()
|
settings_repo = SettingsRepository()
|
||||||
proxy_repo = ProxyRepository()
|
proxy_repo = ProxyRepository()
|
||||||
@@ -46,22 +48,50 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
# 验证 WorkerPool
|
# 验证 WorkerPool
|
||||||
async def validation_handler(proxy):
|
async def validation_handler(proxy):
|
||||||
from app.models.domain import ProxyRaw
|
async with get_db_connection() as db:
|
||||||
is_valid, latency = await validator.validate(
|
existing = await proxy_repo.get_by_ip_port(db, proxy.ip, proxy.port)
|
||||||
proxy.ip, proxy.port, proxy.protocol
|
is_valid, latency = await validator.validate(
|
||||||
)
|
proxy.ip, proxy.port, proxy.protocol
|
||||||
async with get_db() as db:
|
)
|
||||||
if is_valid:
|
if not existing:
|
||||||
await proxy_repo.insert_or_update(
|
return
|
||||||
db, proxy.ip, proxy.port, proxy.protocol, score=app_settings.score_valid
|
if existing.validated == 0:
|
||||||
)
|
if is_valid:
|
||||||
if latency:
|
await proxy_repo.insert_or_update(
|
||||||
await proxy_repo.update_response_time(db, proxy.ip, proxy.port, latency)
|
db,
|
||||||
|
proxy.ip,
|
||||||
|
proxy.port,
|
||||||
|
proxy.protocol,
|
||||||
|
score=app_settings.score_valid,
|
||||||
|
)
|
||||||
|
if latency:
|
||||||
|
await proxy_repo.update_response_time(
|
||||||
|
db, proxy.ip, proxy.port, latency
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await proxy_repo.delete(db, proxy.ip, proxy.port)
|
||||||
else:
|
else:
|
||||||
await proxy_repo.update_score(
|
if is_valid:
|
||||||
db, proxy.ip, proxy.port, app_settings.score_invalid,
|
await proxy_repo.insert_or_update(
|
||||||
app_settings.score_min, app_settings.score_max
|
db,
|
||||||
)
|
proxy.ip,
|
||||||
|
proxy.port,
|
||||||
|
proxy.protocol,
|
||||||
|
score=app_settings.score_valid,
|
||||||
|
)
|
||||||
|
if latency:
|
||||||
|
await proxy_repo.update_response_time(
|
||||||
|
db, proxy.ip, proxy.port, latency
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await proxy_repo.update_score(
|
||||||
|
db,
|
||||||
|
proxy.ip,
|
||||||
|
proxy.port,
|
||||||
|
app_settings.score_invalid,
|
||||||
|
app_settings.score_min,
|
||||||
|
app_settings.score_max,
|
||||||
|
)
|
||||||
|
|
||||||
worker_pool = AsyncWorkerPool(
|
worker_pool = AsyncWorkerPool(
|
||||||
worker_count=db_settings.get("default_concurrency", app_settings.validator_max_concurrency),
|
worker_count=db_settings.get("default_concurrency", app_settings.validator_max_concurrency),
|
||||||
@@ -75,7 +105,7 @@ async def lifespan(app: FastAPI):
|
|||||||
await stack.enter_async_context(executor)
|
await stack.enter_async_context(executor)
|
||||||
|
|
||||||
# 插件运行器
|
# 插件运行器
|
||||||
plugin_runner = PluginRunner(timeout=db_settings.get("crawl_timeout", 30))
|
plugin_runner = PluginRunner()
|
||||||
|
|
||||||
# 调度器
|
# 调度器
|
||||||
scheduler = SchedulerService(
|
scheduler = SchedulerService(
|
||||||
@@ -91,6 +121,9 @@ async def lifespan(app: FastAPI):
|
|||||||
app.state.plugin_runner = plugin_runner
|
app.state.plugin_runner = plugin_runner
|
||||||
app.state.scheduler = scheduler
|
app.state.scheduler = scheduler
|
||||||
|
|
||||||
|
app.state.ws_manager = ConnectionManager()
|
||||||
|
app.state.stats_broadcaster_task = asyncio.create_task(stats_broadcaster_loop(app))
|
||||||
|
|
||||||
# 启动调度器
|
# 启动调度器
|
||||||
if db_settings.get("auto_validate", True):
|
if db_settings.get("auto_validate", True):
|
||||||
try:
|
try:
|
||||||
@@ -101,6 +134,13 @@ async def lifespan(app: FastAPI):
|
|||||||
logger.info("API server started")
|
logger.info("API server started")
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
app.state.stats_broadcaster_task.cancel()
|
||||||
|
try:
|
||||||
|
await app.state.stats_broadcaster_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
await app.state.ws_manager.disconnect_all()
|
||||||
|
|
||||||
# 停止调度器
|
# 停止调度器
|
||||||
await scheduler.stop()
|
await scheduler.stop()
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,11 @@
|
|||||||
"""FastAPI 应用工厂"""
|
"""FastAPI 应用工厂"""
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Windows 上默认 Proactor 事件循环易导致 httpx 异步出站 ConnectTimeout,与同步请求表现不一致
|
||||||
|
if sys.platform == "win32":
|
||||||
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from app.api.lifespan import lifespan
|
from app.api.lifespan import lifespan
|
||||||
|
|||||||
25
app/api/realtime.py
Normal file
25
app/api/realtime.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""实时统计广播后台任务"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
from app.core.log import logger
|
||||||
|
from app.services.dashboard_stats import get_dashboard_stats
|
||||||
|
|
||||||
|
|
||||||
|
async def stats_broadcaster_loop(app: FastAPI) -> None:
|
||||||
|
manager = app.state.ws_manager
|
||||||
|
interval = settings.ws_stats_interval_seconds
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
if manager.connection_count == 0:
|
||||||
|
continue
|
||||||
|
scheduler = app.state.scheduler
|
||||||
|
stats = await get_dashboard_stats(scheduler.running)
|
||||||
|
await manager.broadcast_json({"type": "stats", "data": stats})
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
logger.exception("stats broadcaster tick failed")
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
"""路由包"""
|
"""路由包"""
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from app.api.routes import proxies, plugins, scheduler, settings, tasks
|
from app.api.routes import proxies, plugins, scheduler, settings, tasks, ws
|
||||||
|
|
||||||
api_router = APIRouter()
|
api_router = APIRouter()
|
||||||
api_router.include_router(proxies.router)
|
api_router.include_router(proxies.router)
|
||||||
|
api_router.include_router(ws.router)
|
||||||
api_router.include_router(plugins.router)
|
api_router.include_router(plugins.router)
|
||||||
api_router.include_router(scheduler.router)
|
api_router.include_router(scheduler.router)
|
||||||
api_router.include_router(settings.router)
|
api_router.include_router(settings.router)
|
||||||
|
|||||||
@@ -113,8 +113,8 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
|||||||
class CrawlAllAggregator(Job):
|
class CrawlAllAggregator(Job):
|
||||||
async def run(self):
|
async def run(self):
|
||||||
self._set_running()
|
self._set_running()
|
||||||
# 等待所有子 job 完成(最多等 30 秒)
|
# 等待所有子 job 完成(最多约 5 分钟,与前端轮询一致)
|
||||||
for _ in range(300):
|
for _ in range(3000):
|
||||||
if self.is_cancelled:
|
if self.is_cancelled:
|
||||||
break
|
break
|
||||||
all_done = all(
|
all_done = all(
|
||||||
@@ -125,15 +125,56 @@ def _create_crawl_all_aggregator(job_ids, executor):
|
|||||||
break
|
break
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
total = 0
|
total = 0
|
||||||
valid = 0
|
plugins_failed = 0
|
||||||
invalid = 0
|
per_plugin = []
|
||||||
for jid in job_ids:
|
for jid in job_ids:
|
||||||
job = executor.get_job(jid)
|
job = executor.get_job(jid)
|
||||||
if job and job.result:
|
plugin_id = getattr(job, "plugin_id", "") if job else ""
|
||||||
total += job.result.get("proxy_count", 0)
|
proxy_count = 0
|
||||||
valid += job.result.get("success_count", 0)
|
crawl_failed = False
|
||||||
invalid += job.result.get("failure_count", 0)
|
err_msg = None
|
||||||
result = {"total_crawled": total, "valid_count": valid, "invalid_count": invalid}
|
job_status = job.status.value if job else "missing"
|
||||||
|
|
||||||
|
if not job:
|
||||||
|
per_plugin.append({
|
||||||
|
"plugin_id": plugin_id,
|
||||||
|
"proxy_count": 0,
|
||||||
|
"crawl_failed": True,
|
||||||
|
"error": "任务不存在",
|
||||||
|
"job_status": job_status,
|
||||||
|
})
|
||||||
|
plugins_failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if job.status.value == "failed":
|
||||||
|
crawl_failed = True
|
||||||
|
plugins_failed += 1
|
||||||
|
err_msg = job.error or "任务失败"
|
||||||
|
elif job.result:
|
||||||
|
r = job.result
|
||||||
|
plugin_id = r.get("plugin_id") or plugin_id
|
||||||
|
proxy_count = r.get("proxy_count", 0)
|
||||||
|
total += proxy_count
|
||||||
|
if r.get("crawl_failed") or r.get("failure_count", 0) > 0:
|
||||||
|
crawl_failed = True
|
||||||
|
plugins_failed += 1
|
||||||
|
err_msg = r.get("error")
|
||||||
|
else:
|
||||||
|
total += 0
|
||||||
|
|
||||||
|
per_plugin.append({
|
||||||
|
"plugin_id": plugin_id,
|
||||||
|
"proxy_count": proxy_count,
|
||||||
|
"crawl_failed": crawl_failed,
|
||||||
|
"error": err_msg,
|
||||||
|
"job_status": job_status,
|
||||||
|
})
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"total_crawled": total,
|
||||||
|
"plugins_failed": plugins_failed,
|
||||||
|
"per_plugin": per_plugin,
|
||||||
|
}
|
||||||
if self.is_cancelled:
|
if self.is_cancelled:
|
||||||
result["cancelled"] = True
|
result["cancelled"] = True
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ from fastapi.responses import StreamingResponse
|
|||||||
|
|
||||||
from app.services.proxy_service import ProxyService
|
from app.services.proxy_service import ProxyService
|
||||||
from app.services.scheduler_service import SchedulerService
|
from app.services.scheduler_service import SchedulerService
|
||||||
from app.models.schemas import ProxyListRequest, BatchDeleteRequest
|
from app.services.dashboard_stats import get_dashboard_stats
|
||||||
|
from app.models.schemas import ProxyListRequest, BatchDeleteRequest, ProxyDeleteItem
|
||||||
from app.api.deps import get_proxy_service, get_scheduler_service
|
from app.api.deps import get_proxy_service, get_scheduler_service
|
||||||
from app.api.common import success_response, format_proxy
|
from app.api.common import success_response, format_proxy
|
||||||
from app.core.exceptions import ProxyPoolException, ProxyNotFoundException
|
from app.core.exceptions import ProxyPoolException, ProxyNotFoundException
|
||||||
@@ -15,11 +16,9 @@ router = APIRouter(prefix="/api/proxies", tags=["proxies"])
|
|||||||
|
|
||||||
@router.get("/stats")
|
@router.get("/stats")
|
||||||
async def get_stats(
|
async def get_stats(
|
||||||
proxy_service: ProxyService = Depends(get_proxy_service),
|
|
||||||
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
scheduler_service: SchedulerService = Depends(get_scheduler_service),
|
||||||
):
|
):
|
||||||
stats = await proxy_service.get_stats()
|
stats = await get_dashboard_stats(scheduler_service.running)
|
||||||
stats["scheduler_running"] = scheduler_service.running
|
|
||||||
return success_response("获取统计信息成功", stats)
|
return success_response("获取统计信息成功", stats)
|
||||||
|
|
||||||
|
|
||||||
@@ -36,6 +35,7 @@ async def list_proxies(
|
|||||||
max_score=request.max_score,
|
max_score=request.max_score,
|
||||||
sort_by=request.sort_by,
|
sort_by=request.sort_by,
|
||||||
sort_order=request.sort_order,
|
sort_order=request.sort_order,
|
||||||
|
pool_filter=request.pool_filter,
|
||||||
)
|
)
|
||||||
return success_response(
|
return success_response(
|
||||||
"获取代理列表成功",
|
"获取代理列表成功",
|
||||||
@@ -75,6 +75,16 @@ async def export_proxies(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/delete-one")
|
||||||
|
async def delete_proxy_one(
|
||||||
|
item: ProxyDeleteItem,
|
||||||
|
service: ProxyService = Depends(get_proxy_service),
|
||||||
|
):
|
||||||
|
"""JSON 删除(推荐):IPv6 等含冒号 IP 不受路径分段影响。"""
|
||||||
|
await service.delete_proxy(item.ip, item.port)
|
||||||
|
return success_response("删除代理成功")
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/{ip}/{port}")
|
@router.delete("/{ip}/{port}")
|
||||||
async def delete_proxy(ip: str, port: int, service: ProxyService = Depends(get_proxy_service)):
|
async def delete_proxy(ip: str, port: int, service: ProxyService = Depends(get_proxy_service)):
|
||||||
await service.delete_proxy(ip, port)
|
await service.delete_proxy(ip, port)
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
"""设置相关路由"""
|
"""设置相关路由"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
from fastapi import APIRouter, Request, Depends
|
from fastapi import APIRouter, Request, Depends
|
||||||
from app.core.db import get_db
|
from app.core.db import get_db
|
||||||
from app.repositories.settings_repo import SettingsRepository
|
from app.repositories.settings_repo import SettingsRepository
|
||||||
from app.models.schemas import SettingsSchema
|
from app.models.schemas import SettingsSchema
|
||||||
from app.api.common import success_response
|
from app.api.common import success_response
|
||||||
from app.api.deps import get_settings_repo
|
from app.api.deps import get_settings_repo
|
||||||
|
from app.core.config import settings as app_settings
|
||||||
from app.core.exceptions import ProxyPoolException
|
from app.core.exceptions import ProxyPoolException
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
|
|
||||||
@@ -47,17 +50,21 @@ async def save_settings(
|
|||||||
|
|
||||||
# 热更新验证器超时和并发(下次验证时生效)
|
# 热更新验证器超时和并发(下次验证时生效)
|
||||||
if validator:
|
if validator:
|
||||||
validator._init_timeout = request.validation_timeout
|
vt = float(request.validation_timeout)
|
||||||
validator._init_connect_timeout = request.validation_timeout
|
validator._init_timeout = vt
|
||||||
|
# 连接阶段单独收紧:勿与 total 等同,否则死代理会在 connect 上耗满整段超时
|
||||||
|
validator._init_connect_timeout = min(
|
||||||
|
float(app_settings.validator_connect_timeout), vt
|
||||||
|
)
|
||||||
validator._init_max_concurrency = request.default_concurrency
|
validator._init_max_concurrency = request.default_concurrency
|
||||||
if request.validation_targets is not None:
|
if request.validation_targets is not None:
|
||||||
validator.update_test_urls(request.validation_targets)
|
validator.update_test_urls(request.validation_targets)
|
||||||
# 延迟关闭旧 session:让正在验证的代理继续使用旧 session,
|
# 延迟关闭旧 session:让正在验证的代理继续使用旧 session,
|
||||||
# 新请求会通过 _ensure_session() 自动创建使用新配置的 session
|
# 新请求会通过 _ensure_session() 自动创建使用新配置的 session
|
||||||
|
await validator.close_socks_sessions()
|
||||||
old_session = validator._http_session
|
old_session = validator._http_session
|
||||||
validator._http_session = None
|
validator._http_session = None
|
||||||
validator._http_connector = None
|
validator._http_connector = None
|
||||||
validator._semaphore = None
|
|
||||||
if old_session and not old_session.closed:
|
if old_session and not old_session.closed:
|
||||||
asyncio.create_task(old_session.close())
|
asyncio.create_task(old_session.close())
|
||||||
logger.info(f"Validator config updated: timeout={request.validation_timeout}, concurrency={request.default_concurrency}, targets={request.validation_targets}")
|
logger.info(f"Validator config updated: timeout={request.validation_timeout}, concurrency={request.default_concurrency}, targets={request.validation_targets}")
|
||||||
|
|||||||
32
app/api/routes/ws.py
Normal file
32
app/api/routes/ws.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
"""WebSocket 实时推送"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
from fastapi import APIRouter, WebSocket
|
||||||
|
from starlette.websockets import WebSocketDisconnect
|
||||||
|
|
||||||
|
from app.services.dashboard_stats import get_dashboard_stats
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api", tags=["websocket"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.websocket("/ws")
|
||||||
|
async def websocket_dashboard(websocket: WebSocket):
|
||||||
|
app = websocket.app
|
||||||
|
await websocket.accept()
|
||||||
|
manager = app.state.ws_manager
|
||||||
|
await manager.connect(websocket)
|
||||||
|
try:
|
||||||
|
stats = await get_dashboard_stats(app.state.scheduler.running)
|
||||||
|
await websocket.send_json({"type": "stats", "data": stats})
|
||||||
|
while True:
|
||||||
|
raw = await websocket.receive_text()
|
||||||
|
try:
|
||||||
|
msg = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
if msg.get("type") == "ping":
|
||||||
|
await websocket.send_json({"type": "pong"})
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
await manager.disconnect(websocket)
|
||||||
52
app/api/ws_manager.py
Normal file
52
app/api/ws_manager.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
"""WebSocket 连接管理与广播"""
|
||||||
|
import asyncio
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from starlette.websockets import WebSocket, WebSocketState
|
||||||
|
|
||||||
|
|
||||||
|
class ConnectionManager:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._connections: List[WebSocket] = []
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def connection_count(self) -> int:
|
||||||
|
return len(self._connections)
|
||||||
|
|
||||||
|
async def connect(self, websocket: WebSocket) -> None:
|
||||||
|
async with self._lock:
|
||||||
|
self._connections.append(websocket)
|
||||||
|
|
||||||
|
async def disconnect(self, websocket: WebSocket) -> None:
|
||||||
|
async with self._lock:
|
||||||
|
if websocket in self._connections:
|
||||||
|
self._connections.remove(websocket)
|
||||||
|
|
||||||
|
async def broadcast_json(self, payload: dict) -> None:
|
||||||
|
async with self._lock:
|
||||||
|
targets = list(self._connections)
|
||||||
|
stale: List[WebSocket] = []
|
||||||
|
for ws in targets:
|
||||||
|
try:
|
||||||
|
if ws.client_state != WebSocketState.CONNECTED:
|
||||||
|
stale.append(ws)
|
||||||
|
continue
|
||||||
|
await ws.send_json(payload)
|
||||||
|
except Exception:
|
||||||
|
stale.append(ws)
|
||||||
|
if stale:
|
||||||
|
async with self._lock:
|
||||||
|
for ws in stale:
|
||||||
|
if ws in self._connections:
|
||||||
|
self._connections.remove(ws)
|
||||||
|
|
||||||
|
async def disconnect_all(self) -> None:
|
||||||
|
async with self._lock:
|
||||||
|
targets = list(self._connections)
|
||||||
|
self._connections.clear()
|
||||||
|
for ws in targets:
|
||||||
|
try:
|
||||||
|
await ws.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
"""全局配置 - 使用 Pydantic Settings 支持环境变量和 .env 文件"""
|
"""全局配置 - 使用 Pydantic Settings 支持环境变量和 .env 文件"""
|
||||||
import os
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from pydantic import AliasChoices, Field
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
@@ -11,8 +12,11 @@ class Settings(BaseSettings):
|
|||||||
extra="ignore",
|
extra="ignore",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 数据库配置
|
# 数据库配置(环境变量 PROXYPOOL_DB_PATH 优先,供 pytest 与生产隔离)
|
||||||
db_path: str = "db/proxies.sqlite"
|
db_path: str = Field(
|
||||||
|
default="db/proxies.sqlite",
|
||||||
|
validation_alias=AliasChoices("PROXYPOOL_DB_PATH", "DB_PATH", "db_path"),
|
||||||
|
)
|
||||||
|
|
||||||
# API 服务配置
|
# API 服务配置
|
||||||
host: str = "127.0.0.1"
|
host: str = "127.0.0.1"
|
||||||
@@ -31,6 +35,9 @@ class Settings(BaseSettings):
|
|||||||
log_level: str = "INFO"
|
log_level: str = "INFO"
|
||||||
log_dir: str = "logs"
|
log_dir: str = "logs"
|
||||||
|
|
||||||
|
# WebSocket:统计广播间隔(秒);无连接时不查库
|
||||||
|
ws_stats_interval_seconds: int = 1
|
||||||
|
|
||||||
# 导出配置
|
# 导出配置
|
||||||
export_max_records: int = 10000
|
export_max_records: int = 10000
|
||||||
|
|
||||||
|
|||||||
@@ -54,10 +54,23 @@ async def init_db():
|
|||||||
await db.execute("UPDATE proxies SET created_at = CURRENT_TIMESTAMP WHERE created_at IS NULL")
|
await db.execute("UPDATE proxies SET created_at = CURRENT_TIMESTAMP WHERE created_at IS NULL")
|
||||||
logger.info("Migrated: added created_at column")
|
logger.info("Migrated: added created_at column")
|
||||||
|
|
||||||
|
# 迁移:validated 0=待验证 1=已验证入池(参与分数维护)
|
||||||
|
try:
|
||||||
|
await db.execute("SELECT validated FROM proxies LIMIT 1")
|
||||||
|
except Exception:
|
||||||
|
await db.execute(
|
||||||
|
"ALTER TABLE proxies ADD COLUMN validated INTEGER NOT NULL DEFAULT 0"
|
||||||
|
)
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE proxies SET validated = 1 WHERE score > 0"
|
||||||
|
)
|
||||||
|
logger.info("Migrated: added validated column")
|
||||||
|
|
||||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)")
|
await db.execute("CREATE INDEX IF NOT EXISTS idx_score ON proxies(score)")
|
||||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)")
|
await db.execute("CREATE INDEX IF NOT EXISTS idx_protocol ON proxies(protocol)")
|
||||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)")
|
await db.execute("CREATE INDEX IF NOT EXISTS idx_last_check ON proxies(last_check)")
|
||||||
await db.execute("CREATE INDEX IF NOT EXISTS idx_ip_port ON proxies(ip, port)")
|
await db.execute("CREATE INDEX IF NOT EXISTS idx_ip_port ON proxies(ip, port)")
|
||||||
|
await db.execute("CREATE INDEX IF NOT EXISTS idx_validated ON proxies(validated)")
|
||||||
|
|
||||||
# 插件设置表
|
# 插件设置表
|
||||||
await db.execute("""
|
await db.execute("""
|
||||||
@@ -94,6 +107,10 @@ async def init_db():
|
|||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
# 仅移除已废弃设置键,不碰 proxies 表数据
|
||||||
|
await db.execute(
|
||||||
|
"DELETE FROM settings WHERE key IN ('crawl_timeout', 'max_retries')"
|
||||||
|
)
|
||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
logger.info("Database initialized")
|
logger.info("Database initialized")
|
||||||
@@ -112,6 +129,19 @@ async def get_db() -> AsyncIterator[aiosqlite.Connection]:
|
|||||||
await db.close()
|
await db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def get_db_connection() -> AsyncIterator[aiosqlite.Connection]:
|
||||||
|
"""单连接贯穿「读库 → await 网络 I/O → 写库」,减少验证 worker 每条代理两次 connect。"""
|
||||||
|
ensure_db_dir()
|
||||||
|
db = await aiosqlite.connect(DB_PATH)
|
||||||
|
try:
|
||||||
|
await db.execute("PRAGMA journal_mode=WAL")
|
||||||
|
await db.execute("PRAGMA synchronous=NORMAL")
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def transaction() -> AsyncIterator[aiosqlite.Connection]:
|
async def transaction() -> AsyncIterator[aiosqlite.Connection]:
|
||||||
"""获取带有显式事务控制的数据库连接
|
"""获取带有显式事务控制的数据库连接
|
||||||
|
|||||||
@@ -101,17 +101,51 @@ class CrawlJob(Job):
|
|||||||
result = await self.plugin_runner.run(plugin)
|
result = await self.plugin_runner.run(plugin)
|
||||||
proxies: List[ProxyRaw] = result.proxies if result else []
|
proxies: List[ProxyRaw] = result.proxies if result else []
|
||||||
|
|
||||||
if proxies and self.validator_pool:
|
if proxies:
|
||||||
await self.validator_pool.submit(proxies)
|
from app.core.db import transaction
|
||||||
logger.info(f"CrawlJob {self.id}: submitted {len(proxies)} proxies for validation")
|
from app.repositories.proxy_repo import ProxyRepository
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with transaction() as db:
|
||||||
|
await ProxyRepository.upsert_many_from_crawl(db, proxies, 0)
|
||||||
|
logger.info(
|
||||||
|
f"CrawlJob {self.id}: persisted {len(proxies)} crawled proxies as pending"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"CrawlJob {self.id}: failed to persist crawled proxies: {e}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
if proxies and self.validator_pool:
|
||||||
|
from app.core.db import get_db as _get_db
|
||||||
|
from app.repositories.settings_repo import (
|
||||||
|
SettingsRepository,
|
||||||
|
DEFAULT_SETTINGS,
|
||||||
|
)
|
||||||
|
|
||||||
|
async with _get_db() as db:
|
||||||
|
db_settings = await SettingsRepository.get_all(db)
|
||||||
|
if db_settings.get(
|
||||||
|
"auto_validate_after_crawl",
|
||||||
|
DEFAULT_SETTINGS["auto_validate_after_crawl"],
|
||||||
|
):
|
||||||
|
await self.validator_pool.submit(proxies)
|
||||||
|
logger.info(
|
||||||
|
f"CrawlJob {self.id}: submitted {len(proxies)} proxies for immediate validation"
|
||||||
|
)
|
||||||
|
|
||||||
|
crawl_failed = bool(result and (result.failure_count > 0 or result.error))
|
||||||
payload = {
|
payload = {
|
||||||
"plugin_id": self.plugin_id,
|
"plugin_id": self.plugin_id,
|
||||||
"proxy_count": len(proxies),
|
"proxy_count": len(proxies),
|
||||||
|
"crawl_failed": crawl_failed,
|
||||||
|
"error": result.error if result else None,
|
||||||
|
# 与持久化统计一致:success_count=本次爬到的条数,failure_count=是否失败(0/1)
|
||||||
|
"success_count": len(proxies),
|
||||||
|
"failure_count": result.failure_count if result else 0,
|
||||||
}
|
}
|
||||||
if result:
|
|
||||||
payload["success_count"] = result.success_count
|
|
||||||
payload["failure_count"] = result.failure_count
|
|
||||||
self._set_completed(payload)
|
self._set_completed(payload)
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
@@ -133,7 +167,7 @@ class ValidateAllJob(Job):
|
|||||||
repo = self.proxy_repo or ProxyRepository()
|
repo = self.proxy_repo or ProxyRepository()
|
||||||
|
|
||||||
async with get_db() as db:
|
async with get_db() as db:
|
||||||
proxies = await repo.list_all(db)
|
proxies = await repo.list_for_validation(db)
|
||||||
|
|
||||||
if not proxies:
|
if not proxies:
|
||||||
self._set_completed({"total": 0, "submitted": 0})
|
self._set_completed({"total": 0, "submitted": 0})
|
||||||
|
|||||||
@@ -65,9 +65,12 @@ class AsyncWorkerPool:
|
|||||||
logger.info(f"{self.name} stopped")
|
logger.info(f"{self.name} stopped")
|
||||||
|
|
||||||
async def submit(self, items: List[T]) -> None:
|
async def submit(self, items: List[T]) -> None:
|
||||||
"""提交一批任务到队列(阻塞直到有空位,天然背压)"""
|
"""提交一批任务到队列(优先 put_nowait,队列满时再 await put)"""
|
||||||
for item in items:
|
for item in items:
|
||||||
await self._queue.put(item)
|
try:
|
||||||
|
self._queue.put_nowait(item)
|
||||||
|
except asyncio.QueueFull:
|
||||||
|
await self._queue.put(item)
|
||||||
|
|
||||||
async def drain(self) -> None:
|
async def drain(self) -> None:
|
||||||
"""等待队列中所有任务被消费完毕"""
|
"""等待队列中所有任务被消费完毕"""
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ class BaseCrawlerPlugin(ABC):
|
|||||||
description: str = ""
|
description: str = ""
|
||||||
enabled: bool = True
|
enabled: bool = True
|
||||||
default_config: Dict[str, Any] = {}
|
default_config: Dict[str, Any] = {}
|
||||||
|
#: 单插件整段 crawl() 的 asyncio.wait_for 上限(秒),彼此独立、互不影响
|
||||||
|
crawl_timeout_seconds: float = 120.0
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._config: Dict[str, Any] = dict(self.default_config or {})
|
self._config: Dict[str, Any] = dict(self.default_config or {})
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ class ProxyRaw:
|
|||||||
@dataclass
|
@dataclass
|
||||||
class Proxy:
|
class Proxy:
|
||||||
"""数据库中的代理实体"""
|
"""数据库中的代理实体"""
|
||||||
|
|
||||||
ip: str
|
ip: str
|
||||||
port: int
|
port: int
|
||||||
protocol: str
|
protocol: str
|
||||||
@@ -29,6 +30,7 @@ class Proxy:
|
|||||||
response_time_ms: Optional[float] = None
|
response_time_ms: Optional[float] = None
|
||||||
last_check: Optional[datetime] = None
|
last_check: Optional[datetime] = None
|
||||||
created_at: Optional[datetime] = None
|
created_at: Optional[datetime] = None
|
||||||
|
validated: int = 0 # 0 待验证 1 已验证(可参与分数与对外取用)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -46,7 +48,12 @@ class PluginInfo:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CrawlResult:
|
class CrawlResult:
|
||||||
"""插件爬取结果"""
|
"""插件爬取结果
|
||||||
|
|
||||||
|
success_count: 最近一轮成功爬取到的代理条数(去重后),非「验证通过数」
|
||||||
|
failure_count: 最近一轮是否爬取失败(健康检查/超时/异常为 1,否则为 0)
|
||||||
|
"""
|
||||||
|
|
||||||
plugin_name: str
|
plugin_name: str
|
||||||
proxies: List[ProxyRaw] = field(default_factory=list)
|
proxies: List[ProxyRaw] = field(default_factory=list)
|
||||||
success_count: int = 0
|
success_count: int = 0
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
"""Pydantic 模型 - 用于 API 请求/响应校验"""
|
"""Pydantic 模型 - 用于 API 请求/响应校验"""
|
||||||
from pydantic import BaseModel, Field, field_validator
|
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
|
|
||||||
|
|
||||||
@@ -25,6 +25,7 @@ class ProxyResponse(BaseModel):
|
|||||||
score: int
|
score: int
|
||||||
response_time_ms: Optional[float] = None
|
response_time_ms: Optional[float] = None
|
||||||
last_check: Optional[str] = None
|
last_check: Optional[str] = None
|
||||||
|
validated: int = 0
|
||||||
|
|
||||||
|
|
||||||
class PluginResponse(BaseModel):
|
class PluginResponse(BaseModel):
|
||||||
@@ -39,13 +40,14 @@ class PluginResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class SettingsSchema(BaseModel):
|
class SettingsSchema(BaseModel):
|
||||||
crawl_timeout: int = Field(default=30, ge=5, le=120)
|
model_config = ConfigDict(extra="ignore")
|
||||||
validation_timeout: int = Field(default=10, ge=3, le=60)
|
|
||||||
max_retries: int = Field(default=3, ge=0, le=10)
|
validation_timeout: int = Field(default=6, ge=3, le=60)
|
||||||
default_concurrency: int = Field(default=50, ge=10, le=200)
|
default_concurrency: int = Field(default=120, ge=10, le=400)
|
||||||
min_proxy_score: int = Field(default=0, ge=0, le=100)
|
min_proxy_score: int = Field(default=0, ge=0, le=100)
|
||||||
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
|
proxy_expiry_days: int = Field(default=7, ge=1, le=30)
|
||||||
auto_validate: bool = True
|
auto_validate: bool = True
|
||||||
|
auto_validate_after_crawl: bool = False
|
||||||
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
|
validate_interval_minutes: int = Field(default=30, ge=5, le=1440)
|
||||||
validation_targets: List[str] = Field(
|
validation_targets: List[str] = Field(
|
||||||
default=[
|
default=[
|
||||||
@@ -60,10 +62,14 @@ class SettingsSchema(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class CrawlSummarySchema(BaseModel):
|
class CrawlSummarySchema(BaseModel):
|
||||||
|
"""单次爬取任务结果(与 CrawlJob 返回的 result 对齐)"""
|
||||||
|
|
||||||
plugin_id: str
|
plugin_id: str
|
||||||
proxy_count: int
|
proxy_count: int
|
||||||
valid_count: int
|
crawl_failed: bool = False
|
||||||
invalid_count: int = 0
|
error: Optional[str] = None
|
||||||
|
success_count: int = 0 # 与 proxy_count 相同,兼容旧前端
|
||||||
|
failure_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
class ProxyListRequest(BaseModel):
|
class ProxyListRequest(BaseModel):
|
||||||
@@ -74,6 +80,20 @@ class ProxyListRequest(BaseModel):
|
|||||||
max_score: Optional[int] = Field(default=None, ge=0)
|
max_score: Optional[int] = Field(default=None, ge=0)
|
||||||
sort_by: str = "last_check"
|
sort_by: str = "last_check"
|
||||||
sort_order: str = "DESC"
|
sort_order: str = "DESC"
|
||||||
|
pool_filter: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="all 或不传=全部;pending=待验证;available=已验证且可用",
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("pool_filter")
|
||||||
|
@classmethod
|
||||||
|
def validate_pool_filter(cls, v: Optional[str]):
|
||||||
|
if v is None or v == "" or v == "all":
|
||||||
|
return None
|
||||||
|
allowed = ("pending", "available")
|
||||||
|
if v not in allowed:
|
||||||
|
raise ValueError(f"pool_filter 必须是 {allowed} 之一或 all")
|
||||||
|
return v
|
||||||
|
|
||||||
@field_validator("protocol")
|
@field_validator("protocol")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -9,6 +9,15 @@ from .kuaidaili import KuaiDaiLiPlugin
|
|||||||
from .speedx import SpeedXPlugin
|
from .speedx import SpeedXPlugin
|
||||||
from .yundaili import YunDaiLiPlugin
|
from .yundaili import YunDaiLiPlugin
|
||||||
from .proxyscrape import ProxyScrapePlugin
|
from .proxyscrape import ProxyScrapePlugin
|
||||||
|
from .fpw_proxy_list_download import FpwProxyListDownloadPlugin
|
||||||
|
from .fpw_socks_ssl_proxy import FpwSocksSslProxyPlugin
|
||||||
|
from .fpw_spys_one import FpwSpysOnePlugin
|
||||||
|
from .fpw_proxynova import FpwProxynovaPlugin
|
||||||
|
from .fpw_hidemy import FpwHidemyPlugin
|
||||||
|
from .fpw_premproxy import FpwPremproxyPlugin
|
||||||
|
from .fpw_freeproxylists import FpwFreeproxylistsPlugin
|
||||||
|
from .fpw_gatherproxy import FpwGatherproxyPlugin
|
||||||
|
from .fpw_checkerproxy import FpwCheckerproxyPlugin
|
||||||
|
|
||||||
# 显式注册所有插件
|
# 显式注册所有插件
|
||||||
registry.register(Fate0Plugin)
|
registry.register(Fate0Plugin)
|
||||||
@@ -19,3 +28,12 @@ registry.register(KuaiDaiLiPlugin)
|
|||||||
registry.register(SpeedXPlugin)
|
registry.register(SpeedXPlugin)
|
||||||
registry.register(YunDaiLiPlugin)
|
registry.register(YunDaiLiPlugin)
|
||||||
registry.register(ProxyScrapePlugin)
|
registry.register(ProxyScrapePlugin)
|
||||||
|
registry.register(FpwProxyListDownloadPlugin)
|
||||||
|
registry.register(FpwSocksSslProxyPlugin)
|
||||||
|
registry.register(FpwSpysOnePlugin)
|
||||||
|
registry.register(FpwProxynovaPlugin)
|
||||||
|
registry.register(FpwHidemyPlugin)
|
||||||
|
registry.register(FpwPremproxyPlugin)
|
||||||
|
registry.register(FpwFreeproxylistsPlugin)
|
||||||
|
registry.register(FpwGatherproxyPlugin)
|
||||||
|
registry.register(FpwCheckerproxyPlugin)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import re
|
|||||||
import random
|
import random
|
||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
import httpx
|
||||||
from typing import List, Optional
|
from typing import Dict, List, Optional
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from app.core.plugin_system import BaseCrawlerPlugin
|
from app.core.plugin_system import BaseCrawlerPlugin
|
||||||
from app.models.domain import ProxyRaw
|
from app.models.domain import ProxyRaw
|
||||||
@@ -43,9 +43,56 @@ class BaseHTTPPlugin(BaseCrawlerPlugin):
|
|||||||
self._client = httpx.AsyncClient(
|
self._client = httpx.AsyncClient(
|
||||||
transport=transport,
|
transport=transport,
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
|
# 忽略系统 HTTP(S)_PROXY,避免误配导致列表站全部连接失败
|
||||||
|
trust_env=False,
|
||||||
)
|
)
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _http_timeout(seconds: float) -> httpx.Timeout:
|
||||||
|
"""连接阶段单独收紧,避免 AsyncClient 在部分环境下长时间卡在 connect。"""
|
||||||
|
t = max(2.0, float(seconds))
|
||||||
|
c = min(6.0, max(3.0, t * 0.35))
|
||||||
|
return httpx.Timeout(t, connect=c)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_response_body(response: httpx.Response) -> str:
|
||||||
|
content = response.content
|
||||||
|
encoding = response.encoding
|
||||||
|
if encoding == "utf-8" or not encoding:
|
||||||
|
try:
|
||||||
|
return content.decode("utf-8")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return content.decode("gbk", errors="ignore")
|
||||||
|
return content.decode(encoding, errors="ignore")
|
||||||
|
|
||||||
|
def _sync_get(self, url: str, timeout: float, headers: dict) -> str:
|
||||||
|
"""同步 GET(部分站点在 Windows 上 AsyncClient 易 ConnectTimeout,同步 Client 正常)。"""
|
||||||
|
to = BaseHTTPPlugin._http_timeout(timeout)
|
||||||
|
with httpx.Client(
|
||||||
|
transport=httpx.HTTPTransport(retries=0),
|
||||||
|
follow_redirects=True,
|
||||||
|
trust_env=False,
|
||||||
|
) as c:
|
||||||
|
r = c.get(url, headers=headers, timeout=to)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return ""
|
||||||
|
return self._decode_response_body(r)
|
||||||
|
|
||||||
|
def _sync_post(
|
||||||
|
self, url: str, data: Dict[str, str], timeout: float, headers: dict
|
||||||
|
) -> str:
|
||||||
|
to = BaseHTTPPlugin._http_timeout(timeout)
|
||||||
|
with httpx.Client(
|
||||||
|
transport=httpx.HTTPTransport(retries=0),
|
||||||
|
follow_redirects=True,
|
||||||
|
trust_env=False,
|
||||||
|
) as c:
|
||||||
|
r = c.post(url, headers=headers, data=data, timeout=to)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return ""
|
||||||
|
return self._decode_response_body(r)
|
||||||
|
|
||||||
async def fetch(
|
async def fetch(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
@@ -56,35 +103,81 @@ class BaseHTTPPlugin(BaseCrawlerPlugin):
|
|||||||
"""异步抓取指定 URL 的 HTML 内容"""
|
"""异步抓取指定 URL 的 HTML 内容"""
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
to = self._http_timeout(timeout)
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=self.get_headers(), timeout=timeout)
|
response = await client.get(url, headers=self.get_headers(), timeout=to)
|
||||||
if raise_for_status:
|
if raise_for_status:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
content = response.content
|
return self._decode_response_body(response)
|
||||||
encoding = response.encoding
|
logger.warning(f"Fetch {url} returned status {response.status_code}")
|
||||||
if encoding == "utf-8" or not encoding:
|
|
||||||
try:
|
|
||||||
return content.decode("utf-8")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return content.decode("gbk", errors="ignore")
|
|
||||||
return content.decode(encoding, errors="ignore")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Fetch {url} returned status {response.status_code}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Fetch {url} failed (attempt {attempt + 1}/{retries}): {e}")
|
logger.warning(f"Fetch {url} failed (attempt {attempt + 1}/{retries}): {e}")
|
||||||
if attempt < retries - 1:
|
if attempt < retries - 1:
|
||||||
await asyncio.sleep(random.uniform(1, 3))
|
await asyncio.sleep(random.uniform(1, 3))
|
||||||
|
try:
|
||||||
|
text = await asyncio.to_thread(
|
||||||
|
self._sync_get, url, timeout, self.get_headers()
|
||||||
|
)
|
||||||
|
if text:
|
||||||
|
logger.info(f"Fetch {url} 使用同步回退成功")
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Fetch {url} 同步回退失败: {e}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
async def fetch_all(self, urls: List[str], timeout: float = 15.0) -> List[str]:
|
async def fetch_post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[Dict[str, str]] = None,
|
||||||
|
timeout: float = 15.0,
|
||||||
|
retries: int = 2,
|
||||||
|
) -> str:
|
||||||
|
"""POST application/x-www-form-urlencoded,用于 spys.one 等表单页。"""
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
client = self._get_client()
|
||||||
|
payload = data or {}
|
||||||
|
to = self._http_timeout(timeout)
|
||||||
|
for attempt in range(retries):
|
||||||
|
try:
|
||||||
|
response = await client.post(
|
||||||
|
url,
|
||||||
|
headers=self.get_headers(),
|
||||||
|
data=payload,
|
||||||
|
timeout=to,
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return self._decode_response_body(response)
|
||||||
|
logger.warning(f"POST {url} returned status {response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"POST {url} failed (attempt {attempt + 1}/{retries}): {e}")
|
||||||
|
if attempt < retries - 1:
|
||||||
|
await asyncio.sleep(random.uniform(1, 3))
|
||||||
|
try:
|
||||||
|
text = await asyncio.to_thread(
|
||||||
|
self._sync_post, url, payload, timeout, self.get_headers()
|
||||||
|
)
|
||||||
|
if text:
|
||||||
|
logger.info(f"POST {url} 使用同步回退成功")
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"POST {url} 同步回退失败: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def fetch_all(
|
||||||
|
self,
|
||||||
|
urls: List[str],
|
||||||
|
timeout: float = 15.0,
|
||||||
|
retries: int = 2,
|
||||||
|
) -> List[str]:
|
||||||
"""并发抓取多个 URL,限制单个插件内部并发"""
|
"""并发抓取多个 URL,限制单个插件内部并发"""
|
||||||
semaphore = asyncio.Semaphore(self.max_concurrency)
|
semaphore = asyncio.Semaphore(self.max_concurrency)
|
||||||
|
|
||||||
async def _fetch_limited(url: str):
|
async def _fetch_limited(url: str):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await self.fetch(url, timeout=timeout)
|
return await self.fetch(url, timeout=timeout, retries=retries)
|
||||||
|
|
||||||
tasks = [_fetch_limited(url) for url in urls]
|
tasks = [_fetch_limited(url) for url in urls]
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
|||||||
65
app/plugins/fpw_checkerproxy.py
Normal file
65
app/plugins/fpw_checkerproxy.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
"""checkerproxy.net:尝试常见导出路径 + 正文中的 ip:port(排除示例占位)。"""
|
||||||
|
import re
|
||||||
|
from typing import List, Set, Tuple
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwCheckerproxyPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_checkerproxy"
|
||||||
|
display_name = "CheckerProxy.net"
|
||||||
|
description = "checkerproxy.net(无稳定公开 API 时可能为空;多路径尝试)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = [
|
||||||
|
"https://checkerproxy.net/",
|
||||||
|
"https://checkerproxy.net/export",
|
||||||
|
"https://checkerproxy.net/api/export",
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_ip_ports(text: str) -> List[ProxyRaw]:
|
||||||
|
bad = {"123.123.123.123", "127.0.0.1", "0.0.0.0"}
|
||||||
|
seen: Set[Tuple[str, int]] = set()
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
for m in re.finditer(
|
||||||
|
r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
|
||||||
|
text,
|
||||||
|
):
|
||||||
|
ip, ps = m.group(1), m.group(2)
|
||||||
|
if ip in bad:
|
||||||
|
continue
|
||||||
|
if not ps.isdigit() or not (1 <= int(ps) <= 65535):
|
||||||
|
continue
|
||||||
|
key = (ip, int(ps))
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
try:
|
||||||
|
out.append(ProxyRaw(ip, int(ps), "http"))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return out
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
merged: List[ProxyRaw] = []
|
||||||
|
seen: Set[Tuple[str, int, str]] = set()
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
|
||||||
|
for html in htmls:
|
||||||
|
if not html or len(html) < 200:
|
||||||
|
continue
|
||||||
|
for p in self._parse_ip_ports(html):
|
||||||
|
k = (p.ip, p.port, p.protocol)
|
||||||
|
if k not in seen:
|
||||||
|
seen.add(k)
|
||||||
|
merged.append(p)
|
||||||
|
if len(merged) >= 50:
|
||||||
|
break
|
||||||
|
if merged:
|
||||||
|
logger.info(f"{self.display_name} 解析 {len(merged)} 条")
|
||||||
|
else:
|
||||||
|
logger.warning(f"{self.display_name} 未解析到代理(站点可能仅提供在线检测)")
|
||||||
|
return merged
|
||||||
69
app/plugins/fpw_freeproxylists.py
Normal file
69
app/plugins/fpw_freeproxylists.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
"""freeproxylists.net 及常见镜像路径(表格 / 纯文本)。"""
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwFreeproxylistsPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_freeproxylists"
|
||||||
|
display_name = "FreeProxyLists"
|
||||||
|
description = "freeproxylists.net 系列页面(易被 403,多 URL 尝试)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = [
|
||||||
|
"http://www.freeproxylists.net/",
|
||||||
|
"http://freeproxylists.net/",
|
||||||
|
"http://www.freeproxylists.net/en/http-txt.html",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_any(self, html: str) -> List[ProxyRaw]:
|
||||||
|
ipport = re.findall(
|
||||||
|
r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
|
||||||
|
html,
|
||||||
|
)
|
||||||
|
if len(ipport) >= 5:
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
for ip, ps in ipport:
|
||||||
|
if ps.isdigit() and 1 <= int(ps) <= 65535:
|
||||||
|
try:
|
||||||
|
out.append(ProxyRaw(ip, int(ps), "http"))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
for tr in soup.find_all("tr"):
|
||||||
|
tds = tr.find_all("td")
|
||||||
|
if len(tds) < 2:
|
||||||
|
continue
|
||||||
|
ip = tds[0].get_text(strip=True)
|
||||||
|
port = tds[1].get_text(strip=True)
|
||||||
|
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip) and port.isdigit():
|
||||||
|
if 1 <= int(port) <= 65535:
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, int(port), "http"))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
seen = set()
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
|
||||||
|
for url, html in zip(self.urls, htmls):
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
for p in self._parse_any(html):
|
||||||
|
key = (p.ip, p.port, p.protocol)
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
out.append(p)
|
||||||
|
if out:
|
||||||
|
logger.info(f"{self.display_name} 自 {url} 累计 {len(out)} 条")
|
||||||
|
return out
|
||||||
61
app/plugins/fpw_gatherproxy.py
Normal file
61
app/plugins/fpw_gatherproxy.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""gatherproxy.com 页面内嵌 JSON(PROXY_IP / PROXY_PORT)。"""
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwGatherproxyPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_gatherproxy"
|
||||||
|
display_name = "GatherProxy"
|
||||||
|
description = "gatherproxy.com 内嵌代理 JSON(站点常有限流)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = [
|
||||||
|
"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite",
|
||||||
|
"http://www.gatherproxy.com/proxylist/country/?c=United%20States",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_from_text(self, text: str) -> List[ProxyRaw]:
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
for m in re.finditer(
|
||||||
|
r"PROXY_IP['\"]?\s*:\s*['\"]([\d.]+)['\"].{0,120}?PROXY_PORT['\"]?\s*:\s*['\"](\d+)['\"]",
|
||||||
|
text,
|
||||||
|
re.DOTALL | re.IGNORECASE,
|
||||||
|
):
|
||||||
|
ip, port = m.group(1), m.group(2)
|
||||||
|
if port.isdigit() and 1 <= int(port) <= 65535:
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, int(port), "http"))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
for m in re.finditer(
|
||||||
|
r"\{[^{}]*\"PROXY_IP\"\s*:\s*\"([\d.]+)\"[^{}]*\"PROXY_PORT\"\s*:\s*\"(\d+)\"[^{}]*\}",
|
||||||
|
text,
|
||||||
|
):
|
||||||
|
ip, port = m.group(1), m.group(2)
|
||||||
|
if port.isdigit() and 1 <= int(port) <= 65535:
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, int(port), "http"))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
seen = set()
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=10, retries=1)
|
||||||
|
for url, html in zip(self.urls, htmls):
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
for p in self._extract_from_text(html):
|
||||||
|
k = (p.ip, p.port)
|
||||||
|
if k not in seen:
|
||||||
|
seen.add(k)
|
||||||
|
out.append(p)
|
||||||
|
if out:
|
||||||
|
logger.info(f"{self.display_name} 自 {url} 累计 {len(out)} 条")
|
||||||
|
return out
|
||||||
38
app/plugins/fpw_hidemy.py
Normal file
38
app/plugins/fpw_hidemy.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""hidemyna.me 免费代理列表表格。"""
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwHidemyPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_hidemy"
|
||||||
|
display_name = "HideMy.name"
|
||||||
|
description = "hidemyna.me 英文代理列表(HTTP/HTTPS/SOCKS)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = [
|
||||||
|
"https://hidemyna.me/en/proxy-list/",
|
||||||
|
"https://hidemyna.me/en/proxy-list/?type=hs",
|
||||||
|
"https://hidemyna.me/en/proxy-list/?type=socks4",
|
||||||
|
]
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
|
||||||
|
for url, html in zip(self.urls, htmls):
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
batch = self.parse_html_table(
|
||||||
|
html,
|
||||||
|
column_map={"ip": 0, "port": 1, "protocol": 4},
|
||||||
|
protocol="http",
|
||||||
|
)
|
||||||
|
if batch:
|
||||||
|
results.extend(batch)
|
||||||
|
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||||||
|
if results:
|
||||||
|
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||||||
|
return results
|
||||||
64
app/plugins/fpw_premproxy.py
Normal file
64
app/plugins/fpw_premproxy.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
"""premproxy.com 列表页表格。"""
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwPremproxyPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_premproxy"
|
||||||
|
display_name = "PremProxy"
|
||||||
|
description = "premproxy.com HTTP/SOCKS 列表页"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = [
|
||||||
|
"https://premproxy.com/list/",
|
||||||
|
"https://premproxy.com/socks-list/",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_html(self, html: str) -> List[ProxyRaw]:
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
for tr in soup.find_all("tr"):
|
||||||
|
tds = tr.find_all("td")
|
||||||
|
if len(tds) < 2:
|
||||||
|
continue
|
||||||
|
ip = tds[0].get_text(strip=True)
|
||||||
|
port = tds[1].get_text(strip=True)
|
||||||
|
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
|
||||||
|
continue
|
||||||
|
if not port.isdigit() or not (1 <= int(port) <= 65535):
|
||||||
|
continue
|
||||||
|
row = tr.get_text(" ", strip=True).lower()
|
||||||
|
if "socks5" in row:
|
||||||
|
proto = "socks5"
|
||||||
|
elif "socks4" in row or "socks" in row:
|
||||||
|
proto = "socks4"
|
||||||
|
elif "https" in row:
|
||||||
|
proto = "https"
|
||||||
|
else:
|
||||||
|
proto = "http"
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, int(port), proto))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
merged: List[ProxyRaw] = []
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
|
||||||
|
for url, html in zip(self.urls, htmls):
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
batch = self._parse_html(html)
|
||||||
|
if batch:
|
||||||
|
merged.extend(batch)
|
||||||
|
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||||||
|
if merged:
|
||||||
|
logger.info(f"{self.display_name} 合计 {len(merged)} 条")
|
||||||
|
return merged
|
||||||
54
app/plugins/fpw_proxy_list_download.py
Normal file
54
app/plugins/fpw_proxy_list_download.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
"""www.proxy-list.download 公开 API(README: Free_Proxy_Website)。"""
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwProxyListDownloadPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_proxy_list_download"
|
||||||
|
display_name = "Proxy-List.download"
|
||||||
|
description = "proxy-list.download 官方 API(http/https/socks4/socks5)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.max_concurrency = 8
|
||||||
|
self.api_pairs = [
|
||||||
|
("http", "https://www.proxy-list.download/api/v1/get?type=http"),
|
||||||
|
("https", "https://www.proxy-list.download/api/v1/get?type=https"),
|
||||||
|
("socks4", "https://www.proxy-list.download/api/v1/get?type=socks4"),
|
||||||
|
("socks5", "https://www.proxy-list.download/api/v1/get?type=socks5"),
|
||||||
|
]
|
||||||
|
self.fallback_pairs = [
|
||||||
|
("http", "https://api.proxyscrape.com/v2/?request=get&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all"),
|
||||||
|
("https", "https://api.proxyscrape.com/v2/?request=get&protocol=https&timeout=10000&country=all&ssl=all&anonymity=all"),
|
||||||
|
("socks4", "https://api.proxyscrape.com/v2/?request=get&protocol=socks4&timeout=10000&country=all&ssl=all&anonymity=all"),
|
||||||
|
("socks5", "https://api.proxyscrape.com/v2/?request=get&protocol=socks5&timeout=10000&country=all&ssl=all&anonymity=all"),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
urls = [u for _, u in self.api_pairs]
|
||||||
|
htmls = await self.fetch_all(urls, timeout=10, retries=1)
|
||||||
|
for (protocol, _), text in zip(self.api_pairs, htmls):
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
batch = self.parse_text_proxies(text, protocol)
|
||||||
|
if batch:
|
||||||
|
results.extend(batch)
|
||||||
|
logger.info(f"{self.display_name} {protocol}: {len(batch)} 条")
|
||||||
|
if not results:
|
||||||
|
logger.warning(f"{self.display_name} 主 API 无数据,尝试 ProxyScrape 备用")
|
||||||
|
fb_urls = [u for _, u in self.fallback_pairs]
|
||||||
|
fb_htmls = await self.fetch_all(fb_urls, timeout=10, retries=1)
|
||||||
|
for (protocol, _), text in zip(self.fallback_pairs, fb_htmls):
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
batch = self.parse_text_proxies(text, protocol)
|
||||||
|
if batch:
|
||||||
|
results.extend(batch)
|
||||||
|
logger.info(f"{self.display_name} fallback {protocol}: {len(batch)} 条")
|
||||||
|
if results:
|
||||||
|
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||||||
|
return results
|
||||||
74
app/plugins/fpw_proxynova.py
Normal file
74
app/plugins/fpw_proxynova.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
"""proxynova.com 表格内 JS 混淆 IP + 明文端口。"""
|
||||||
|
import re
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwProxynovaPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_proxynova"
|
||||||
|
display_name = "ProxyNova"
|
||||||
|
description = "proxynova.com 代理列表(解析 document.write 混淆 IP)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.urls = ["https://www.proxynova.com/proxy-server-list/"]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_proxynova_ip(script_inner: str) -> Optional[str]:
|
||||||
|
"""解析 document.write(\".081.301\".split(\"\").reverse()...concat(\"118.174\"...))"""
|
||||||
|
m1 = re.search(r'document\.write\("([^"]+)"\.split', script_inner)
|
||||||
|
m2 = re.search(r'\.concat\("([^"]+)"', script_inner)
|
||||||
|
if not m1 or not m2:
|
||||||
|
return None
|
||||||
|
a, b = m1.group(1), m2.group(1)
|
||||||
|
part1 = "".join(reversed(a))
|
||||||
|
return part1 + b
|
||||||
|
|
||||||
|
def _parse_rows(self, html: str) -> List[ProxyRaw]:
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
tbody = soup.find("tbody")
|
||||||
|
if not tbody:
|
||||||
|
return []
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
for tr in tbody.find_all("tr"):
|
||||||
|
tds = tr.find_all("td")
|
||||||
|
if len(tds) < 2:
|
||||||
|
continue
|
||||||
|
script = tds[0].find("script")
|
||||||
|
if not script or not script.string:
|
||||||
|
continue
|
||||||
|
ip = self._decode_proxynova_ip(script.string)
|
||||||
|
port_txt = tds[1].get_text(strip=True)
|
||||||
|
if not ip or not port_txt.isdigit():
|
||||||
|
continue
|
||||||
|
port = int(port_txt)
|
||||||
|
if not (1 <= port <= 65535):
|
||||||
|
continue
|
||||||
|
row_text = tr.get_text(" ", strip=True).upper()
|
||||||
|
if "SOCKS5" in row_text:
|
||||||
|
proto = "socks5"
|
||||||
|
elif "SOCKS4" in row_text:
|
||||||
|
proto = "socks4"
|
||||||
|
elif "HTTPS" in row_text:
|
||||||
|
proto = "https"
|
||||||
|
else:
|
||||||
|
proto = "http"
|
||||||
|
try:
|
||||||
|
out.append(ProxyRaw(ip, port, proto))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return out
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
html = await self.fetch(self.urls[0], timeout=14, retries=1)
|
||||||
|
if not html:
|
||||||
|
return []
|
||||||
|
results = self._parse_rows(html)
|
||||||
|
if results:
|
||||||
|
logger.info(f"{self.display_name} 解析 {len(results)} 条")
|
||||||
|
return results
|
||||||
56
app/plugins/fpw_socks_ssl_proxy.py
Normal file
56
app/plugins/fpw_socks_ssl_proxy.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
"""socks-proxy.net / sslproxies.org 表格(README 参考 GetProxyFromSocks-proxy.py)。"""
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwSocksSslProxyPlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_socks_ssl_proxy"
|
||||||
|
display_name = "Socks-Proxy / SSLProxies"
|
||||||
|
description = "socks-proxy.net 与 sslproxies.org 首页表格(HTTP/HTTPS 列表)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.max_concurrency = 6
|
||||||
|
# 与 sslproxies 同模板的镜像站较多,socks-proxy 在部分网络下不稳定,多源提高成功率
|
||||||
|
self.urls = [
|
||||||
|
"https://www.sslproxies.org/",
|
||||||
|
"https://free-proxy-list.net/",
|
||||||
|
"https://www.us-proxy.org/",
|
||||||
|
"https://www.socks-proxy.net/",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]:
|
||||||
|
results = []
|
||||||
|
pattern = re.compile(
|
||||||
|
r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>\s*<td[^>]*>\s*(\d+)",
|
||||||
|
re.I,
|
||||||
|
)
|
||||||
|
for ip, port in pattern.findall(html):
|
||||||
|
if port.isdigit() and 1 <= int(port) <= 65535:
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, int(port), default_protocol))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
htmls = await self.fetch_all(self.urls, timeout=12, retries=1)
|
||||||
|
for url, html in zip(self.urls, htmls):
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
if "socks-proxy" in url:
|
||||||
|
proto = "socks4"
|
||||||
|
else:
|
||||||
|
proto = "http"
|
||||||
|
batch = self._parse_page(html, proto)
|
||||||
|
results.extend(batch)
|
||||||
|
if batch:
|
||||||
|
logger.info(f"{self.display_name} {url}: {len(batch)} 条")
|
||||||
|
if results:
|
||||||
|
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||||||
|
return results
|
||||||
148
app/plugins/fpw_spys_one.py
Normal file
148
app/plugins/fpw_spys_one.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
"""spys.one 表单 POST + 端口 XOR 解码(README: GetProxyFromSPYSONE.py)。"""
|
||||||
|
import asyncio
|
||||||
|
import re
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
|
from app.core.plugin_system import ProxyRaw
|
||||||
|
from app.plugins.base import BaseHTTPPlugin
|
||||||
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
|
class FpwSpysOnePlugin(BaseHTTPPlugin):
|
||||||
|
name = "fpw_spys_one"
|
||||||
|
display_name = "Spys.one"
|
||||||
|
description = "spys.one HTTP/SOCKS 列表(POST 筛选 + XOR 端口解码)"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.pages: List[Tuple[str, str, str]] = [
|
||||||
|
("http", "http://spys.one/en/http-proxy-list/", "1"),
|
||||||
|
("socks5", "http://spys.one/en/socks-proxy-list/", "2"),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _exec_spys_decoder(body: str) -> Dict[str, int]:
|
||||||
|
body = re.sub(r"\s+", "", body)
|
||||||
|
stmts = [s.strip() for s in body.split(";") if s.strip() and "document" not in s]
|
||||||
|
env: Dict[str, int] = {}
|
||||||
|
for _ in range(8):
|
||||||
|
progressed = False
|
||||||
|
for stmt in stmts:
|
||||||
|
if "=" not in stmt:
|
||||||
|
continue
|
||||||
|
lhs, rhs = stmt.split("=", 1)
|
||||||
|
lhs = lhs.strip()
|
||||||
|
rhs = rhs.strip()
|
||||||
|
if lhs in env:
|
||||||
|
continue
|
||||||
|
if "^" not in rhs:
|
||||||
|
if rhs.isdigit():
|
||||||
|
env[lhs] = int(rhs)
|
||||||
|
progressed = True
|
||||||
|
continue
|
||||||
|
a, b = rhs.split("^", 1)
|
||||||
|
a, b = a.strip(), b.strip()
|
||||||
|
|
||||||
|
def gv(x: str) -> int:
|
||||||
|
if x.isdigit():
|
||||||
|
return int(x)
|
||||||
|
return env[x]
|
||||||
|
|
||||||
|
try:
|
||||||
|
env[lhs] = gv(a) ^ gv(b)
|
||||||
|
progressed = True
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
if not progressed:
|
||||||
|
break
|
||||||
|
return env
|
||||||
|
|
||||||
|
def _decoder_env_from_html(self, html: str) -> Dict[str, int]:
|
||||||
|
best: Dict[str, int] = {}
|
||||||
|
for m in re.finditer(r"<script[^>]*>([\s\S]*?)</script>", html, re.IGNORECASE):
|
||||||
|
chunk = m.group(1).strip()
|
||||||
|
if "document.write" in chunk:
|
||||||
|
continue
|
||||||
|
xor_assigns = len(re.findall(r"\w+=\d+\^\w+", chunk))
|
||||||
|
if xor_assigns < 4:
|
||||||
|
continue
|
||||||
|
env = self._exec_spys_decoder(chunk)
|
||||||
|
if len(env) > len(best):
|
||||||
|
best = env
|
||||||
|
return best
|
||||||
|
|
||||||
|
def _parse_page(self, html: str, default_protocol: str) -> List[ProxyRaw]:
|
||||||
|
env = self._decoder_env_from_html(html)
|
||||||
|
if not env:
|
||||||
|
logger.warning(f"{self.display_name} 未解析到 XOR 变量表")
|
||||||
|
return []
|
||||||
|
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
for m in re.finditer(
|
||||||
|
r"class=spy14>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})<script[^>]*>([\s\S]+?)</script>",
|
||||||
|
html,
|
||||||
|
re.IGNORECASE,
|
||||||
|
):
|
||||||
|
ip = m.group(1)
|
||||||
|
inner = m.group(2)
|
||||||
|
dw = re.search(
|
||||||
|
r'document\.write\("[^"]*"\+((?:\(\w+\^\w+\)\+?)+)\)',
|
||||||
|
inner,
|
||||||
|
)
|
||||||
|
if not dw:
|
||||||
|
continue
|
||||||
|
pairs = re.findall(r"\((\w+)\^(\w+)\)", dw.group(1))
|
||||||
|
if not pairs:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
digits = "".join(str(env[a] ^ env[b]) for a, b in pairs)
|
||||||
|
port = int(digits)
|
||||||
|
except (KeyError, ValueError):
|
||||||
|
continue
|
||||||
|
if not (1 <= port <= 65535):
|
||||||
|
continue
|
||||||
|
tail = html[m.end() : m.end() + 2000]
|
||||||
|
u = tail.upper()
|
||||||
|
if "SOCKS5" in u:
|
||||||
|
proto = "socks5"
|
||||||
|
elif "SOCKS4" in u:
|
||||||
|
proto = "socks4"
|
||||||
|
elif "HTTPS" in u:
|
||||||
|
proto = "https"
|
||||||
|
elif "HTTP" in u:
|
||||||
|
proto = "http"
|
||||||
|
else:
|
||||||
|
proto = default_protocol
|
||||||
|
try:
|
||||||
|
results.append(ProxyRaw(ip, port, proto))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
results: List[ProxyRaw] = []
|
||||||
|
form_base = {
|
||||||
|
"xpp": "3",
|
||||||
|
"xf1": "0",
|
||||||
|
"xf2": "0",
|
||||||
|
"xf4": "0",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _one(proto: str, url: str, xf5: str) -> Tuple[str, str]:
|
||||||
|
data = {**form_base, "xf5": xf5}
|
||||||
|
html = await self.fetch_post(url, data=data, timeout=14, retries=1)
|
||||||
|
return proto, html or ""
|
||||||
|
|
||||||
|
pairs = await asyncio.gather(
|
||||||
|
*(_one(proto, url, xf5) for proto, url, xf5 in self.pages)
|
||||||
|
)
|
||||||
|
for proto, html in pairs:
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
batch = self._parse_page(html, proto)
|
||||||
|
if batch:
|
||||||
|
results.extend(batch)
|
||||||
|
logger.info(f"{self.display_name} ({proto}): {len(batch)} 条")
|
||||||
|
if results:
|
||||||
|
logger.info(f"{self.display_name} 合计 {len(results)} 条")
|
||||||
|
return results
|
||||||
@@ -18,17 +18,19 @@ class KuaiDaiLiPlugin(BaseHTTPPlugin):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
# 减少页数,降低被反爬概率,确保至少能拿到数据
|
# fps/dps 列表页目前仍可 200;inha/intr 常返回 567(反爬),作末位兜底
|
||||||
self.urls = [
|
self.urls = [
|
||||||
|
"https://www.kuaidaili.com/free/fps/",
|
||||||
|
"https://www.kuaidaili.com/free/dps/",
|
||||||
"https://www.kuaidaili.com/free/inha/1/",
|
"https://www.kuaidaili.com/free/inha/1/",
|
||||||
"https://www.kuaidaili.com/free/intr/1/",
|
"https://www.kuaidaili.com/free/intr/1/",
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_headers(self) -> dict:
|
def get_headers(self) -> dict:
|
||||||
headers = super().get_headers()
|
headers = super().get_headers()
|
||||||
headers["Referer"] = "https://www.kuaidaili.com/free/inha/"
|
headers["Referer"] = "https://www.kuaidaili.com/free/"
|
||||||
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
|
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
|
||||||
headers["Accept-Encoding"] = "gzip, deflate, br"
|
headers["Accept-Encoding"] = "gzip, deflate"
|
||||||
headers["Accept-Language"] = "zh-CN,zh;q=0.9,en;q=0.8"
|
headers["Accept-Language"] = "zh-CN,zh;q=0.9,en;q=0.8"
|
||||||
headers["Sec-Fetch-Dest"] = "document"
|
headers["Sec-Fetch-Dest"] = "document"
|
||||||
headers["Sec-Fetch-Mode"] = "navigate"
|
headers["Sec-Fetch-Mode"] = "navigate"
|
||||||
@@ -36,15 +38,56 @@ class KuaiDaiLiPlugin(BaseHTTPPlugin):
|
|||||||
headers["Upgrade-Insecure-Requests"] = "1"
|
headers["Upgrade-Insecure-Requests"] = "1"
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _infer_protocol(texts: List[str]) -> str:
|
||||||
|
"""从一行单元格文本中推断协议(兼容 fps / dps / inha 等版式)。"""
|
||||||
|
for t in texts[2:]:
|
||||||
|
tl = t.lower().replace(" ", "")
|
||||||
|
if tl in VALID_PROTOCOLS:
|
||||||
|
return tl
|
||||||
|
if "http(s)" in tl or tl in ("http/https",):
|
||||||
|
return "http"
|
||||||
|
if "socks5" in tl:
|
||||||
|
return "socks5"
|
||||||
|
if "socks4" in tl:
|
||||||
|
return "socks4"
|
||||||
|
if tl == "https":
|
||||||
|
return "https"
|
||||||
|
if len(texts) >= 5:
|
||||||
|
t4 = texts[4].lower().strip()
|
||||||
|
if t4 in VALID_PROTOCOLS:
|
||||||
|
return t4
|
||||||
|
return "http"
|
||||||
|
|
||||||
|
def _parse_table(self, table) -> List[ProxyRaw]:
|
||||||
|
out: List[ProxyRaw] = []
|
||||||
|
for row in table.find_all("tr"):
|
||||||
|
tds = row.find_all("td")
|
||||||
|
if len(tds) < 2:
|
||||||
|
continue
|
||||||
|
texts = [td.get_text(strip=True) for td in tds]
|
||||||
|
ip = texts[0]
|
||||||
|
port_s = texts[1]
|
||||||
|
if not re.match(r"^\d+\.\d+\.\d+\.\d+$", ip):
|
||||||
|
continue
|
||||||
|
if not port_s.isdigit() or not (1 <= int(port_s) <= 65535):
|
||||||
|
continue
|
||||||
|
protocol = self._infer_protocol(texts)
|
||||||
|
if protocol not in VALID_PROTOCOLS:
|
||||||
|
protocol = "http"
|
||||||
|
try:
|
||||||
|
out.append(ProxyRaw(ip, int(port_s), protocol))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return out
|
||||||
|
|
||||||
async def crawl(self) -> List[ProxyRaw]:
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
results = []
|
results = []
|
||||||
# 先访问首页预热会话,获取 cookie,降低被反爬概率
|
await self.fetch("https://www.kuaidaili.com/free/", timeout=10)
|
||||||
await self.fetch("https://www.kuaidaili.com/", timeout=10)
|
await asyncio.sleep(random.uniform(1, 2))
|
||||||
await asyncio.sleep(random.uniform(2, 4))
|
|
||||||
|
|
||||||
# 顺序请求免费代理页面
|
|
||||||
for url in self.urls:
|
for url in self.urls:
|
||||||
html = await self.fetch(url, timeout=10)
|
html = await self.fetch(url, timeout=15)
|
||||||
if not html:
|
if not html:
|
||||||
continue
|
continue
|
||||||
soup = BeautifulSoup(html, "lxml")
|
soup = BeautifulSoup(html, "lxml")
|
||||||
@@ -53,20 +96,11 @@ class KuaiDaiLiPlugin(BaseHTTPPlugin):
|
|||||||
logger.warning(f"{self.display_name} 未能找到表格,可能是触发了反爬: {url}")
|
logger.warning(f"{self.display_name} 未能找到表格,可能是触发了反爬: {url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for row in table.find_all("tr"):
|
batch = self._parse_table(table)
|
||||||
tds = row.find_all("td")
|
if batch:
|
||||||
if len(tds) >= 5:
|
results.extend(batch)
|
||||||
ip = tds[0].get_text(strip=True)
|
logger.info(f"{self.display_name} {url} 解析 {len(batch)} 条")
|
||||||
port = tds[1].get_text(strip=True)
|
await asyncio.sleep(random.uniform(1, 2))
|
||||||
protocol = tds[4].get_text(strip=True).lower() if len(tds) > 4 else "http"
|
|
||||||
if protocol not in VALID_PROTOCOLS:
|
|
||||||
protocol = "http"
|
|
||||||
if re.match(r"^\d+\.\d+\.\d+\.\d+$", ip) and port.isdigit() and 1 <= int(port) <= 65535:
|
|
||||||
try:
|
|
||||||
results.append(ProxyRaw(ip, int(port), protocol))
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
await asyncio.sleep(random.uniform(5, 8))
|
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
logger.info(f"{self.display_name} 解析完成,获取 {len(results)} 个潜在代理")
|
logger.info(f"{self.display_name} 解析完成,获取 {len(results)} 个潜在代理")
|
||||||
|
|||||||
@@ -109,21 +109,5 @@ class ProxyScrapePlugin(BaseHTTPPlugin):
|
|||||||
if results:
|
if results:
|
||||||
logger.info(f"ProxyScrape 总计获取 {len(results)} 个代理")
|
logger.info(f"ProxyScrape 总计获取 {len(results)} 个代理")
|
||||||
else:
|
else:
|
||||||
# Fallback:生成测试代理,确保在测试环境也能验证完整流程
|
logger.warning("ProxyScrape 所有真实源均不可用,返回空列表")
|
||||||
logger.warning("ProxyScrape 所有真实源均不可用,生成测试代理用于架构验证")
|
|
||||||
results = self._generate_test_proxies()
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _generate_test_proxies(self) -> List[ProxyRaw]:
|
|
||||||
"""生成测试代理数据,覆盖全协议类型,用于验证插件系统"""
|
|
||||||
import random
|
|
||||||
test_proxies = []
|
|
||||||
protocols = ["http", "https", "socks4", "socks5"]
|
|
||||||
for protocol in protocols:
|
|
||||||
for _ in range(3):
|
|
||||||
# 生成随机公网格式 IP(仅用于测试流程)
|
|
||||||
ip = f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
|
|
||||||
port = random.randint(1024, 65535)
|
|
||||||
test_proxies.append(ProxyRaw(ip, port, protocol))
|
|
||||||
logger.info(f"生成 {len(test_proxies)} 个测试代理 HTTP/HTTPS/SOCKS4/SOCKS5 各 3 个")
|
|
||||||
return test_proxies
|
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
import aiosqlite
|
import aiosqlite
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
from app.models.domain import Proxy
|
|
||||||
|
from app.models.domain import Proxy, ProxyRaw
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
|
|
||||||
|
|
||||||
@@ -32,9 +33,15 @@ def _row_to_proxy(row: Tuple) -> Proxy:
|
|||||||
response_time_ms=row[4],
|
response_time_ms=row[4],
|
||||||
last_check=_to_datetime(row[5]),
|
last_check=_to_datetime(row[5]),
|
||||||
created_at=_to_datetime(row[6]),
|
created_at=_to_datetime(row[6]),
|
||||||
|
validated=int(row[7]) if len(row) > 7 and row[7] is not None else 0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SELECT_PROXY_COLS = (
|
||||||
|
"ip, port, protocol, score, response_time_ms, last_check, created_at, validated"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ProxyRepository:
|
class ProxyRepository:
|
||||||
"""代理 Repository"""
|
"""代理 Repository"""
|
||||||
|
|
||||||
@@ -51,12 +58,13 @@ class ProxyRepository:
|
|||||||
try:
|
try:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at)
|
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 1)
|
||||||
ON CONFLICT(ip, port) DO UPDATE SET
|
ON CONFLICT(ip, port) DO UPDATE SET
|
||||||
protocol = excluded.protocol,
|
protocol = excluded.protocol,
|
||||||
score = excluded.score,
|
score = excluded.score,
|
||||||
last_check = CURRENT_TIMESTAMP
|
last_check = CURRENT_TIMESTAMP,
|
||||||
|
validated = 1
|
||||||
""",
|
""",
|
||||||
(ip, port, protocol, score),
|
(ip, port, protocol, score),
|
||||||
)
|
)
|
||||||
@@ -66,6 +74,56 @@ class ProxyRepository:
|
|||||||
logger.error(f"insert_or_update proxy failed: {e}", exc_info=True)
|
logger.error(f"insert_or_update proxy failed: {e}", exc_info=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def upsert_from_crawl(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
ip: str,
|
||||||
|
port: int,
|
||||||
|
protocol: str = "http",
|
||||||
|
initial_score: int = 0,
|
||||||
|
) -> None:
|
||||||
|
"""爬取入库:待验证状态(validated=0, score=0);再次爬取同一条则重置为待验证。"""
|
||||||
|
if protocol not in VALID_PROTOCOLS:
|
||||||
|
protocol = "http"
|
||||||
|
await db.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||||
|
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
|
||||||
|
ON CONFLICT(ip, port) DO UPDATE SET
|
||||||
|
protocol = excluded.protocol,
|
||||||
|
score = excluded.score,
|
||||||
|
last_check = CURRENT_TIMESTAMP,
|
||||||
|
validated = 0
|
||||||
|
""",
|
||||||
|
(ip, port, protocol, initial_score),
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def upsert_many_from_crawl(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
proxies: List[ProxyRaw],
|
||||||
|
initial_score: int = 0,
|
||||||
|
) -> None:
|
||||||
|
"""批量爬取入库;不 commit,由外层 transaction 提交。"""
|
||||||
|
if not proxies:
|
||||||
|
return
|
||||||
|
rows = []
|
||||||
|
for p in proxies:
|
||||||
|
proto = p.protocol if p.protocol in VALID_PROTOCOLS else "http"
|
||||||
|
rows.append((p.ip, p.port, proto, initial_score))
|
||||||
|
await db.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO proxies (ip, port, protocol, score, last_check, created_at, validated)
|
||||||
|
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, 0)
|
||||||
|
ON CONFLICT(ip, port) DO UPDATE SET
|
||||||
|
protocol = excluded.protocol,
|
||||||
|
score = excluded.score,
|
||||||
|
last_check = CURRENT_TIMESTAMP,
|
||||||
|
validated = 0
|
||||||
|
""",
|
||||||
|
rows,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def update_score(
|
async def update_score(
|
||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
@@ -86,9 +144,12 @@ class ProxyRepository:
|
|||||||
""",
|
""",
|
||||||
(min_score, max_score, delta, ip, port),
|
(min_score, max_score, delta, ip, port),
|
||||||
)
|
)
|
||||||
# 删除分数已降至 0 及以下的代理
|
# 仅删除已入池且分数耗尽者;待验证(score=0)不经过此路径
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"DELETE FROM proxies WHERE ip = ? AND port = ? AND score <= ?",
|
"""
|
||||||
|
DELETE FROM proxies
|
||||||
|
WHERE ip = ? AND port = ? AND score <= ? AND validated = 1
|
||||||
|
""",
|
||||||
(ip, port, min_score),
|
(ip, port, min_score),
|
||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
@@ -134,7 +195,7 @@ class ProxyRepository:
|
|||||||
db: aiosqlite.Connection, ip: str, port: int
|
db: aiosqlite.Connection, ip: str, port: int
|
||||||
) -> Optional[Proxy]:
|
) -> Optional[Proxy]:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE ip = ? AND port = ?",
|
f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE ip = ? AND port = ?",
|
||||||
(ip, port),
|
(ip, port),
|
||||||
) as cursor:
|
) as cursor:
|
||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
@@ -145,7 +206,11 @@ class ProxyRepository:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
|
async def get_random(db: aiosqlite.Connection) -> Optional[Proxy]:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies WHERE score > 0 ORDER BY RANDOM() LIMIT 1"
|
f"""
|
||||||
|
SELECT {_SELECT_PROXY_COLS} FROM proxies
|
||||||
|
WHERE validated = 1 AND score > 0
|
||||||
|
ORDER BY RANDOM() LIMIT 1
|
||||||
|
"""
|
||||||
) as cursor:
|
) as cursor:
|
||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
if row:
|
if row:
|
||||||
@@ -158,12 +223,19 @@ class ProxyRepository:
|
|||||||
protocol: Optional[str] = None,
|
protocol: Optional[str] = None,
|
||||||
limit: int = 100000,
|
limit: int = 100000,
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
|
validated: Optional[int] = None,
|
||||||
) -> List[Proxy]:
|
) -> List[Proxy]:
|
||||||
query = "SELECT ip, port, protocol, score, response_time_ms, last_check, created_at FROM proxies"
|
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
|
||||||
params: List = []
|
params: List = []
|
||||||
|
clauses = []
|
||||||
if protocol:
|
if protocol:
|
||||||
query += " WHERE protocol = ?"
|
clauses.append("protocol = ?")
|
||||||
params.append(protocol.lower())
|
params.append(protocol.lower())
|
||||||
|
if validated is not None:
|
||||||
|
clauses.append("validated = ?")
|
||||||
|
params.append(int(validated))
|
||||||
|
if clauses:
|
||||||
|
query += " WHERE " + " AND ".join(clauses)
|
||||||
query += " LIMIT ? OFFSET ?"
|
query += " LIMIT ? OFFSET ?"
|
||||||
params.extend([limit, offset])
|
params.extend([limit, offset])
|
||||||
|
|
||||||
@@ -171,21 +243,77 @@ class ProxyRepository:
|
|||||||
rows = await cursor.fetchall()
|
rows = await cursor.fetchall()
|
||||||
return [_row_to_proxy(row) for row in rows]
|
return [_row_to_proxy(row) for row in rows]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def list_for_validation(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
protocol: Optional[str] = None,
|
||||||
|
) -> List[Proxy]:
|
||||||
|
"""待验证优先,其次已验证按 last_check 升序(用于全量/调度复检)。"""
|
||||||
|
pending: List[Proxy] = []
|
||||||
|
q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 0"
|
||||||
|
params: List = []
|
||||||
|
if protocol:
|
||||||
|
q += " AND protocol = ?"
|
||||||
|
params.append(protocol.lower())
|
||||||
|
q += " ORDER BY created_at ASC"
|
||||||
|
async with db.execute(q, params) as cursor:
|
||||||
|
rows_p = await cursor.fetchall()
|
||||||
|
pending = [_row_to_proxy(r) for r in rows_p]
|
||||||
|
|
||||||
|
rest_q = f"SELECT {_SELECT_PROXY_COLS} FROM proxies WHERE validated = 1"
|
||||||
|
rparams: List = []
|
||||||
|
if protocol:
|
||||||
|
rest_q += " AND protocol = ?"
|
||||||
|
rparams.append(protocol.lower())
|
||||||
|
rest_q += " ORDER BY last_check ASC"
|
||||||
|
async with db.execute(rest_q, rparams) as cursor:
|
||||||
|
rows_r = await cursor.fetchall()
|
||||||
|
rest = [_row_to_proxy(r) for r in rows_r]
|
||||||
|
|
||||||
|
return pending + rest
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def iter_batches(
|
async def iter_batches(
|
||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
protocol: Optional[str] = None,
|
protocol: Optional[str] = None,
|
||||||
batch_size: int = 1000,
|
batch_size: int = 1000,
|
||||||
|
only_usable: bool = False,
|
||||||
):
|
):
|
||||||
"""流式分批读取代理,避免一次性加载大量数据到内存"""
|
"""流式分批读取代理,避免一次性加载大量数据到内存"""
|
||||||
offset = 0
|
offset = 0
|
||||||
while True:
|
while True:
|
||||||
batch = await ProxyRepository.list_all(db, protocol, batch_size, offset)
|
batch = await ProxyRepository._list_batch_offset(
|
||||||
|
db, protocol, batch_size, offset, only_usable=only_usable
|
||||||
|
)
|
||||||
if not batch:
|
if not batch:
|
||||||
break
|
break
|
||||||
yield batch
|
yield batch
|
||||||
offset += batch_size
|
offset += batch_size
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _list_batch_offset(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
protocol: Optional[str],
|
||||||
|
batch_size: int,
|
||||||
|
offset: int,
|
||||||
|
only_usable: bool,
|
||||||
|
) -> List[Proxy]:
|
||||||
|
query = f"SELECT {_SELECT_PROXY_COLS} FROM proxies"
|
||||||
|
params: List = []
|
||||||
|
clauses = []
|
||||||
|
if only_usable:
|
||||||
|
clauses.append("validated = 1 AND score > 0")
|
||||||
|
if protocol:
|
||||||
|
clauses.append("protocol = ?")
|
||||||
|
params.append(protocol.lower())
|
||||||
|
if clauses:
|
||||||
|
query += " WHERE " + " AND ".join(clauses)
|
||||||
|
query += " LIMIT ? OFFSET ?"
|
||||||
|
params.extend([batch_size, offset])
|
||||||
|
async with db.execute(query, params) as cursor:
|
||||||
|
rows = await cursor.fetchall()
|
||||||
|
return [_row_to_proxy(row) for row in rows]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def list_paginated(
|
async def list_paginated(
|
||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
@@ -196,6 +324,7 @@ class ProxyRepository:
|
|||||||
max_score: Optional[int] = None,
|
max_score: Optional[int] = None,
|
||||||
sort_by: str = "last_check",
|
sort_by: str = "last_check",
|
||||||
sort_order: str = "DESC",
|
sort_order: str = "DESC",
|
||||||
|
pool_filter: Optional[str] = None,
|
||||||
) -> Tuple[List[Proxy], int]:
|
) -> Tuple[List[Proxy], int]:
|
||||||
conditions = ["score >= ?"]
|
conditions = ["score >= ?"]
|
||||||
params: List = [min_score]
|
params: List = [min_score]
|
||||||
@@ -206,6 +335,10 @@ class ProxyRepository:
|
|||||||
if max_score is not None:
|
if max_score is not None:
|
||||||
conditions.append("score <= ?")
|
conditions.append("score <= ?")
|
||||||
params.append(max_score)
|
params.append(max_score)
|
||||||
|
if pool_filter == "pending":
|
||||||
|
conditions.append("validated = 0")
|
||||||
|
elif pool_filter == "available":
|
||||||
|
conditions.append("validated = 1 AND score > 0")
|
||||||
|
|
||||||
where_clause = " AND ".join(conditions)
|
where_clause = " AND ".join(conditions)
|
||||||
allowed_sort_by = {"ip", "port", "protocol", "score", "last_check"}
|
allowed_sort_by = {"ip", "port", "protocol", "score", "last_check"}
|
||||||
@@ -222,7 +355,7 @@ class ProxyRepository:
|
|||||||
total = row[0] if row else 0
|
total = row[0] if row else 0
|
||||||
|
|
||||||
data_query = f"""
|
data_query = f"""
|
||||||
SELECT ip, port, protocol, score, response_time_ms, last_check, created_at
|
SELECT {_SELECT_PROXY_COLS}
|
||||||
FROM proxies
|
FROM proxies
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
ORDER BY {order_clause}
|
ORDER BY {order_clause}
|
||||||
@@ -239,8 +372,9 @@ class ProxyRepository:
|
|||||||
query = """
|
query = """
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) as total,
|
COUNT(*) as total,
|
||||||
COUNT(CASE WHEN score > 0 THEN 1 END) as available,
|
COUNT(CASE WHEN validated = 0 THEN 1 END) as pending,
|
||||||
AVG(score) as avg_score,
|
COUNT(CASE WHEN validated = 1 AND score > 0 THEN 1 END) as available,
|
||||||
|
(SELECT AVG(score) FROM proxies WHERE validated = 1 AND score > 0) as avg_score,
|
||||||
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
|
COUNT(CASE WHEN protocol = 'http' THEN 1 END) as http_count,
|
||||||
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
|
COUNT(CASE WHEN protocol = 'https' THEN 1 END) as https_count,
|
||||||
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
|
COUNT(CASE WHEN protocol = 'socks4' THEN 1 END) as socks4_count,
|
||||||
@@ -252,15 +386,17 @@ class ProxyRepository:
|
|||||||
if row:
|
if row:
|
||||||
return {
|
return {
|
||||||
"total": row[0] or 0,
|
"total": row[0] or 0,
|
||||||
"available": row[1] or 0,
|
"pending": row[1] or 0,
|
||||||
"avg_score": round(row[2], 2) if row[2] else 0,
|
"available": row[2] or 0,
|
||||||
"http_count": row[3] or 0,
|
"avg_score": round(row[3], 2) if row[3] is not None else 0,
|
||||||
"https_count": row[4] or 0,
|
"http_count": row[4] or 0,
|
||||||
"socks4_count": row[5] or 0,
|
"https_count": row[5] or 0,
|
||||||
"socks5_count": row[6] or 0,
|
"socks4_count": row[6] or 0,
|
||||||
|
"socks5_count": row[7] or 0,
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
"total": 0,
|
"total": 0,
|
||||||
|
"pending": 0,
|
||||||
"available": 0,
|
"available": 0,
|
||||||
"avg_score": 0,
|
"avg_score": 0,
|
||||||
"http_count": 0,
|
"http_count": 0,
|
||||||
@@ -271,9 +407,15 @@ class ProxyRepository:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def get_today_new_count(db: aiosqlite.Connection) -> int:
|
async def get_today_new_count(db: aiosqlite.Connection) -> int:
|
||||||
|
"""今日新增:仅统计今日入库且已验证可用(与 get_stats.available 语义一致)。"""
|
||||||
try:
|
try:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT COUNT(*) FROM proxies WHERE DATE(created_at) = DATE('now', 'localtime')"
|
"""
|
||||||
|
SELECT COUNT(*) FROM proxies
|
||||||
|
WHERE DATE(created_at) = DATE('now', 'localtime')
|
||||||
|
AND validated = 1
|
||||||
|
AND score > 0
|
||||||
|
"""
|
||||||
) as cursor:
|
) as cursor:
|
||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
return row[0] if row else 0
|
return row[0] if row else 0
|
||||||
@@ -283,7 +425,9 @@ class ProxyRepository:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def clean_invalid(db: aiosqlite.Connection) -> int:
|
async def clean_invalid(db: aiosqlite.Connection) -> int:
|
||||||
await db.execute("DELETE FROM proxies WHERE score <= 0")
|
await db.execute(
|
||||||
|
"DELETE FROM proxies WHERE validated = 1 AND score <= 0"
|
||||||
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
return db.total_changes
|
return db.total_changes
|
||||||
|
|
||||||
|
|||||||
@@ -6,13 +6,12 @@ from app.core.log import logger
|
|||||||
|
|
||||||
|
|
||||||
DEFAULT_SETTINGS = {
|
DEFAULT_SETTINGS = {
|
||||||
"crawl_timeout": 30,
|
"validation_timeout": 6,
|
||||||
"validation_timeout": 10,
|
"default_concurrency": 120,
|
||||||
"max_retries": 3,
|
|
||||||
"default_concurrency": 50,
|
|
||||||
"min_proxy_score": 0,
|
"min_proxy_score": 0,
|
||||||
"proxy_expiry_days": 7,
|
"proxy_expiry_days": 7,
|
||||||
"auto_validate": True,
|
"auto_validate": True,
|
||||||
|
"auto_validate_after_crawl": False,
|
||||||
"validate_interval_minutes": 30,
|
"validate_interval_minutes": 30,
|
||||||
"validation_targets": [
|
"validation_targets": [
|
||||||
"http://httpbin.org/ip",
|
"http://httpbin.org/ip",
|
||||||
@@ -50,6 +49,8 @@ class SettingsRepository:
|
|||||||
settings[key] = value
|
settings[key] = value
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"get_all settings failed: {e}")
|
logger.error(f"get_all settings failed: {e}")
|
||||||
|
# 已废弃:爬取限时改为每插件 crawl_timeout_seconds,不再存全局项
|
||||||
|
settings.pop("crawl_timeout", None)
|
||||||
return settings
|
return settings
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
9
app/services/dashboard_stats.py
Normal file
9
app/services/dashboard_stats.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
"""首页 / 仪表盘统计快照(供 REST 与 WebSocket 复用)"""
|
||||||
|
from app.services.proxy_service import ProxyService
|
||||||
|
|
||||||
|
|
||||||
|
async def get_dashboard_stats(scheduler_running: bool) -> dict:
|
||||||
|
proxy_service = ProxyService()
|
||||||
|
stats = await proxy_service.get_stats()
|
||||||
|
stats["scheduler_running"] = scheduler_running
|
||||||
|
return stats
|
||||||
@@ -4,7 +4,6 @@ from datetime import datetime
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from app.core.plugin_system.base import BaseCrawlerPlugin
|
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||||
from app.core.config import settings as app_settings
|
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
from app.models.domain import CrawlResult, ProxyRaw
|
from app.models.domain import CrawlResult, ProxyRaw
|
||||||
|
|
||||||
@@ -12,14 +11,13 @@ from app.models.domain import CrawlResult, ProxyRaw
|
|||||||
class PluginRunner:
|
class PluginRunner:
|
||||||
"""统一插件执行器
|
"""统一插件执行器
|
||||||
|
|
||||||
- 超时控制(从 settings 读取 crawl_timeout)
|
- 超时:每插件独立,使用 plugin.crawl_timeout_seconds(默认 120s)
|
||||||
- 异常捕获和统计更新
|
- 可选 crawl_timeout_override:仅用于测试等场景覆盖插件自身限时
|
||||||
- 可选的健康检查前置
|
- 异常捕获和统计更新、健康检查前置、结果去重
|
||||||
- 结果去重
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, timeout: Optional[float] = None):
|
def __init__(self, crawl_timeout_override: Optional[float] = None):
|
||||||
self.timeout = timeout if timeout is not None else getattr(app_settings, "crawler_timeout", 30)
|
self.crawl_timeout_override = crawl_timeout_override
|
||||||
|
|
||||||
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
|
async def run(self, plugin: BaseCrawlerPlugin) -> CrawlResult:
|
||||||
"""执行单个插件爬取"""
|
"""执行单个插件爬取"""
|
||||||
@@ -42,19 +40,22 @@ class PluginRunner:
|
|||||||
await self._save_stats(plugin, result)
|
await self._save_stats(plugin, result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# 执行爬取
|
crawl_limit = float(getattr(plugin, "crawl_timeout_seconds", 120.0))
|
||||||
|
if self.crawl_timeout_override is not None:
|
||||||
|
crawl_limit = float(self.crawl_timeout_override)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
proxies = await asyncio.wait_for(
|
proxies = await asyncio.wait_for(
|
||||||
plugin.crawl(),
|
plugin.crawl(),
|
||||||
timeout=self.timeout,
|
timeout=crawl_limit,
|
||||||
)
|
)
|
||||||
result.proxies = self._dedup(proxies)
|
result.proxies = self._dedup(proxies)
|
||||||
result.success_count = 1 if result.proxies else 0
|
result.success_count = len(result.proxies)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Plugin {plugin.name} crawled {len(result.proxies)} unique proxies"
|
f"Plugin {plugin.name} crawled {len(result.proxies)} unique proxies"
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
result.error = f"crawl timeout after {self.timeout}s"
|
result.error = f"crawl timeout after {crawl_limit}s"
|
||||||
result.failure_count = 1
|
result.failure_count = 1
|
||||||
logger.error(f"Plugin {plugin.name} crawl timeout")
|
logger.error(f"Plugin {plugin.name} crawl timeout")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|||||||
from app.core.db import get_db
|
from app.core.db import get_db
|
||||||
from app.core.plugin_system.registry import registry
|
from app.core.plugin_system.registry import registry
|
||||||
from app.core.plugin_system.base import BaseCrawlerPlugin
|
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||||
from app.core.exceptions import PluginNotFoundException
|
from app.core.exceptions import PluginNotFoundException, ValidationException
|
||||||
from app.repositories.settings_repo import PluginSettingsRepository
|
from app.repositories.settings_repo import PluginSettingsRepository
|
||||||
from app.models.domain import PluginInfo, ProxyRaw, CrawlResult
|
from app.models.domain import PluginInfo, ProxyRaw, CrawlResult
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
|
|||||||
@@ -30,10 +30,19 @@ class ProxyService:
|
|||||||
max_score: Optional[int] = None,
|
max_score: Optional[int] = None,
|
||||||
sort_by: str = "last_check",
|
sort_by: str = "last_check",
|
||||||
sort_order: str = "DESC",
|
sort_order: str = "DESC",
|
||||||
|
pool_filter: Optional[str] = None,
|
||||||
) -> Tuple[List[Proxy], int]:
|
) -> Tuple[List[Proxy], int]:
|
||||||
async with get_db() as db:
|
async with get_db() as db:
|
||||||
return await self.proxy_repo.list_paginated(
|
return await self.proxy_repo.list_paginated(
|
||||||
db, page, page_size, protocol, min_score, max_score, sort_by, sort_order
|
db,
|
||||||
|
page,
|
||||||
|
page_size,
|
||||||
|
protocol,
|
||||||
|
min_score,
|
||||||
|
max_score,
|
||||||
|
sort_by,
|
||||||
|
sort_order,
|
||||||
|
pool_filter=pool_filter,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_random_proxy(self) -> Optional[Proxy]:
|
async def get_random_proxy(self) -> Optional[Proxy]:
|
||||||
@@ -72,7 +81,9 @@ class ProxyService:
|
|||||||
|
|
||||||
exported = 0
|
exported = 0
|
||||||
async with get_db() as db:
|
async with get_db() as db:
|
||||||
async for batch in self.proxy_repo.iter_batches(db, protocol=protocol, batch_size=1000):
|
async for batch in self.proxy_repo.iter_batches(
|
||||||
|
db, protocol=protocol, batch_size=1000, only_usable=True
|
||||||
|
):
|
||||||
for p in batch:
|
for p in batch:
|
||||||
if exported >= limit:
|
if exported >= limit:
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -2,9 +2,11 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
from collections import OrderedDict
|
||||||
|
from typing import Tuple, Optional, List
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import aiohttp_socks
|
import aiohttp_socks
|
||||||
from typing import Tuple, Optional, List
|
|
||||||
|
|
||||||
from app.core.config import settings as app_settings
|
from app.core.config import settings as app_settings
|
||||||
from app.core.log import logger
|
from app.core.log import logger
|
||||||
@@ -14,6 +16,7 @@ class ValidatorService:
|
|||||||
"""代理验证器
|
"""代理验证器
|
||||||
|
|
||||||
支持动态读取配置,实现设置热更新。
|
支持动态读取配置,实现设置热更新。
|
||||||
|
并发由 AsyncWorkerPool.worker_count 限制,此处不再套 Semaphore。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 测试 URL 默认池
|
# 测试 URL 默认池
|
||||||
@@ -32,23 +35,30 @@ class ValidatorService:
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_SOCKS_CACHE_CAP = 128
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
connect_timeout: Optional[float] = None,
|
connect_timeout: Optional[float] = None,
|
||||||
max_concurrency: Optional[int] = None,
|
max_concurrency: Optional[int] = None,
|
||||||
):
|
):
|
||||||
# 初始化时使用传入值或默认值,但运行期会动态读取 settings
|
|
||||||
self._init_timeout = timeout if timeout is not None else app_settings.validator_timeout
|
self._init_timeout = timeout if timeout is not None else app_settings.validator_timeout
|
||||||
self._init_connect_timeout = connect_timeout if connect_timeout is not None else app_settings.validator_connect_timeout
|
self._init_connect_timeout = (
|
||||||
self._init_max_concurrency = max_concurrency if max_concurrency is not None else app_settings.validator_max_concurrency
|
connect_timeout if connect_timeout is not None else app_settings.validator_connect_timeout
|
||||||
|
)
|
||||||
|
self._init_max_concurrency = (
|
||||||
|
max_concurrency if max_concurrency is not None else app_settings.validator_max_concurrency
|
||||||
|
)
|
||||||
|
|
||||||
self._http_connector: Optional[aiohttp.TCPConnector] = None
|
self._http_connector: Optional[aiohttp.TCPConnector] = None
|
||||||
self._http_session: Optional[aiohttp.ClientSession] = None
|
self._http_session: Optional[aiohttp.ClientSession] = None
|
||||||
self._semaphore: Optional[asyncio.Semaphore] = None
|
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
self._test_urls: Optional[List[str]] = None
|
self._test_urls: Optional[List[str]] = None
|
||||||
|
|
||||||
|
self._socks_sessions: "OrderedDict[Tuple[str, str, int], aiohttp.ClientSession]" = OrderedDict()
|
||||||
|
self._socks_lock = asyncio.Lock()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def timeout(self) -> float:
|
def timeout(self) -> float:
|
||||||
return float(self._init_timeout)
|
return float(self._init_timeout)
|
||||||
@@ -61,11 +71,16 @@ class ValidatorService:
|
|||||||
def max_concurrency(self) -> int:
|
def max_concurrency(self) -> int:
|
||||||
return int(self._init_max_concurrency)
|
return int(self._init_max_concurrency)
|
||||||
|
|
||||||
|
def _client_timeout(self) -> aiohttp.ClientTimeout:
|
||||||
|
t = float(self.timeout)
|
||||||
|
c = min(float(self.connect_timeout), t)
|
||||||
|
sock_read = min(t, max(2.0, t * 0.85))
|
||||||
|
return aiohttp.ClientTimeout(total=t, connect=c, sock_read=sock_read)
|
||||||
|
|
||||||
async def _ensure_session(self) -> aiohttp.ClientSession:
|
async def _ensure_session(self) -> aiohttp.ClientSession:
|
||||||
"""懒加载共享 HTTP session"""
|
"""懒加载共享 HTTP session"""
|
||||||
if self._http_session is None or self._http_session.closed:
|
if self._http_session is None or self._http_session.closed:
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
# 双重检查,避免多个协程在获取锁后重复创建
|
|
||||||
if self._http_session is None or self._http_session.closed:
|
if self._http_session is None or self._http_session.closed:
|
||||||
connector = aiohttp.TCPConnector(
|
connector = aiohttp.TCPConnector(
|
||||||
ssl=False,
|
ssl=False,
|
||||||
@@ -73,28 +88,18 @@ class ValidatorService:
|
|||||||
limit_per_host=self.max_concurrency,
|
limit_per_host=self.max_concurrency,
|
||||||
force_close=False,
|
force_close=False,
|
||||||
)
|
)
|
||||||
timeout = aiohttp.ClientTimeout(
|
|
||||||
total=self.timeout, connect=self.connect_timeout
|
|
||||||
)
|
|
||||||
self._http_connector = connector
|
self._http_connector = connector
|
||||||
self._http_session = aiohttp.ClientSession(
|
self._http_session = aiohttp.ClientSession(
|
||||||
connector=connector,
|
connector=connector,
|
||||||
timeout=timeout,
|
timeout=self._client_timeout(),
|
||||||
)
|
)
|
||||||
return self._http_session
|
return self._http_session
|
||||||
|
|
||||||
def _ensure_semaphore(self) -> asyncio.Semaphore:
|
|
||||||
if self._semaphore is None:
|
|
||||||
self._semaphore = asyncio.Semaphore(self.max_concurrency)
|
|
||||||
return self._semaphore
|
|
||||||
|
|
||||||
def _get_test_url(self, protocol: str) -> str:
|
def _get_test_url(self, protocol: str) -> str:
|
||||||
custom_urls = self._test_urls
|
custom_urls = self._test_urls
|
||||||
if not custom_urls:
|
if not custom_urls:
|
||||||
from app.core.config import settings as app_settings
|
|
||||||
custom_urls = getattr(app_settings, "validator_test_urls", None)
|
custom_urls = getattr(app_settings, "validator_test_urls", None)
|
||||||
if custom_urls and isinstance(custom_urls, list) and len(custom_urls) > 0:
|
if custom_urls and isinstance(custom_urls, list) and len(custom_urls) > 0:
|
||||||
# 按协议过滤自定义 URL,如果没有匹配的则使用全部
|
|
||||||
filtered = [u for u in custom_urls if u.lower().startswith(protocol.lower())]
|
filtered = [u for u in custom_urls if u.lower().startswith(protocol.lower())]
|
||||||
if filtered:
|
if filtered:
|
||||||
return random.choice(filtered)
|
return random.choice(filtered)
|
||||||
@@ -105,21 +110,17 @@ class ValidatorService:
|
|||||||
async def validate(self, ip: str, port: int, protocol: str = "http") -> Tuple[bool, float]:
|
async def validate(self, ip: str, port: int, protocol: str = "http") -> Tuple[bool, float]:
|
||||||
"""验证单个代理,返回 (是否有效, 延迟毫秒)"""
|
"""验证单个代理,返回 (是否有效, 延迟毫秒)"""
|
||||||
protocol = protocol.lower()
|
protocol = protocol.lower()
|
||||||
semaphore = self._ensure_semaphore()
|
start = time.time()
|
||||||
|
try:
|
||||||
async with semaphore:
|
if protocol in ("socks4", "socks5"):
|
||||||
start = time.time()
|
return await self._validate_socks(ip, port, protocol, start)
|
||||||
try:
|
return await self._validate_http(ip, port, protocol, start)
|
||||||
if protocol in ("socks4", "socks5"):
|
except asyncio.TimeoutError:
|
||||||
return await self._validate_socks(ip, port, protocol, start)
|
logger.debug(f"Validation timeout: {ip}:{port} ({protocol})")
|
||||||
else:
|
return False, 0.0
|
||||||
return await self._validate_http(ip, port, protocol, start)
|
except Exception as e:
|
||||||
except asyncio.TimeoutError:
|
logger.debug(f"Validation error {ip}:{port} ({protocol}): {e}")
|
||||||
logger.debug(f"Validation timeout: {ip}:{port} ({protocol})")
|
return False, 0.0
|
||||||
return False, 0.0
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Validation error {ip}:{port} ({protocol}): {e}")
|
|
||||||
return False, 0.0
|
|
||||||
|
|
||||||
async def _validate_http(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
async def _validate_http(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
||||||
proxy_url = f"{protocol}://{ip}:{port}"
|
proxy_url = f"{protocol}://{ip}:{port}"
|
||||||
@@ -129,40 +130,63 @@ class ValidatorService:
|
|||||||
async with session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
|
async with session.get(test_url, proxy=proxy_url, allow_redirects=True) as response:
|
||||||
if response.status in (200, 301, 302):
|
if response.status in (200, 301, 302):
|
||||||
latency = round((time.time() - start) * 1000, 2)
|
latency = round((time.time() - start) * 1000, 2)
|
||||||
logger.info(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
|
logger.debug(f"HTTP valid: {ip}:{port} ({protocol}) {latency}ms")
|
||||||
return True, latency
|
return True, latency
|
||||||
return False, 0.0
|
return False, 0.0
|
||||||
|
|
||||||
async def _validate_socks(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
async def _get_socks_session(self, protocol: str, ip: str, port: int) -> aiohttp.ClientSession:
|
||||||
proxy_type = (
|
key = (protocol, ip, port)
|
||||||
aiohttp_socks.ProxyType.SOCKS4
|
async with self._socks_lock:
|
||||||
if protocol == "socks4"
|
sess = self._socks_sessions.get(key)
|
||||||
else aiohttp_socks.ProxyType.SOCKS5
|
if sess is not None:
|
||||||
)
|
if sess.closed:
|
||||||
connector = aiohttp_socks.ProxyConnector(
|
del self._socks_sessions[key]
|
||||||
proxy_type=proxy_type,
|
else:
|
||||||
host=ip,
|
self._socks_sessions.move_to_end(key)
|
||||||
port=port,
|
return sess
|
||||||
rdns=True,
|
while len(self._socks_sessions) >= self._SOCKS_CACHE_CAP:
|
||||||
ssl=False,
|
_, old = self._socks_sessions.popitem(last=False)
|
||||||
)
|
if old is not None and not old.closed:
|
||||||
timeout = aiohttp.ClientTimeout(total=self.timeout, connect=self.connect_timeout)
|
await old.close()
|
||||||
test_url = self._get_test_url("http")
|
proxy_type = (
|
||||||
|
aiohttp_socks.ProxyType.SOCKS4
|
||||||
|
if protocol == "socks4"
|
||||||
|
else aiohttp_socks.ProxyType.SOCKS5
|
||||||
|
)
|
||||||
|
connector = aiohttp_socks.ProxyConnector(
|
||||||
|
proxy_type=proxy_type,
|
||||||
|
host=ip,
|
||||||
|
port=port,
|
||||||
|
rdns=True,
|
||||||
|
ssl=False,
|
||||||
|
)
|
||||||
|
sess = aiohttp.ClientSession(connector=connector, timeout=self._client_timeout())
|
||||||
|
self._socks_sessions[key] = sess
|
||||||
|
return sess
|
||||||
|
|
||||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
async def _validate_socks(self, ip: str, port: int, protocol: str, start: float) -> Tuple[bool, float]:
|
||||||
async with session.get(test_url, allow_redirects=True) as response:
|
test_url = self._get_test_url("http")
|
||||||
if response.status in (200, 301, 302):
|
session = await self._get_socks_session(protocol, ip, port)
|
||||||
latency = round((time.time() - start) * 1000, 2)
|
async with session.get(test_url, allow_redirects=True) as response:
|
||||||
logger.info(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
|
if response.status in (200, 301, 302):
|
||||||
return True, latency
|
latency = round((time.time() - start) * 1000, 2)
|
||||||
return False, 0.0
|
logger.debug(f"SOCKS valid: {ip}:{port} ({protocol}) {latency}ms")
|
||||||
|
return True, latency
|
||||||
|
return False, 0.0
|
||||||
|
|
||||||
|
async def close_socks_sessions(self) -> None:
|
||||||
|
"""关闭 SOCKS 会话缓存(设置热更新或进程退出时调用)。"""
|
||||||
|
async with self._socks_lock:
|
||||||
|
for s in list(self._socks_sessions.values()):
|
||||||
|
if not s.closed:
|
||||||
|
await s.close()
|
||||||
|
self._socks_sessions.clear()
|
||||||
|
|
||||||
def update_test_urls(self, urls: List[str]) -> None:
|
def update_test_urls(self, urls: List[str]) -> None:
|
||||||
"""运行时更新验证目标 URL 列表"""
|
|
||||||
self._test_urls = list(urls) if urls else None
|
self._test_urls = list(urls) if urls else None
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
"""关闭共享的 HTTP ClientSession"""
|
await self.close_socks_sessions()
|
||||||
if self._http_session and not self._http_session.closed:
|
if self._http_session and not self._http_session.closed:
|
||||||
await self._http_session.close()
|
await self._http_session.close()
|
||||||
self._http_session = None
|
self._http_session = None
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
[tool:pytest]
|
[pytest]
|
||||||
testpaths = tests
|
testpaths = tests
|
||||||
python_files = test_*.py
|
python_files = test_*.py
|
||||||
python_classes = Test*
|
python_classes = Test*
|
||||||
@@ -13,5 +13,6 @@ markers =
|
|||||||
integration: 集成测试
|
integration: 集成测试
|
||||||
e2e: 端到端测试
|
e2e: 端到端测试
|
||||||
slow: 慢速测试
|
slow: 慢速测试
|
||||||
|
network: 需要出站网络(真实爬取/验证)
|
||||||
async_test: 异步测试
|
async_test: 异步测试
|
||||||
asyncio_default_fixture_loop_scope = function
|
asyncio_default_fixture_loop_scope = function
|
||||||
|
|||||||
35
scripts/apply_settings_maintenance.py
Normal file
35
scripts/apply_settings_maintenance.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
"""对 SQLite settings 表执行维护 SQL(见 db_optimize_settings.sql)。
|
||||||
|
|
||||||
|
使用当前应用配置的数据库路径(app.core.db.DB_PATH)。pytest 使用 PROXYPOOL_DB_PATH
|
||||||
|
指向 db/proxies.test.sqlite,勿在生产库路径上误跑测试夹具。
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# 保证可 import app
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
from app.core.db import DB_PATH, ensure_db_dir
|
||||||
|
import aiosqlite
|
||||||
|
|
||||||
|
sql_path = os.path.join(os.path.dirname(__file__), "db_optimize_settings.sql")
|
||||||
|
with open(sql_path, encoding="utf-8") as f:
|
||||||
|
script = f.read()
|
||||||
|
|
||||||
|
ensure_db_dir()
|
||||||
|
if not os.path.isfile(DB_PATH):
|
||||||
|
print(f"数据库不存在,跳过: {DB_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
|
async with aiosqlite.connect(DB_PATH) as db:
|
||||||
|
await db.executescript(script)
|
||||||
|
await db.commit()
|
||||||
|
print(f"已执行设置维护: {DB_PATH}")
|
||||||
|
print("请重启应用或在 WebUI 保存一次设置以使并发/超时生效。")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
14
scripts/db_optimize_settings.sql
Normal file
14
scripts/db_optimize_settings.sql
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
-- ProxyPool:设置表维护(负优化清理 + 推荐验证参数)
|
||||||
|
-- 用法:在停服或确认无并发写入时执行;或运行 python scripts/apply_settings_maintenance.py
|
||||||
|
-- 注意:改库后需「重启应用」或在 WebUI「保存设置」才会让运行中的 WorkerPool / Validator 重载并发与超时。
|
||||||
|
|
||||||
|
-- 废弃键
|
||||||
|
DELETE FROM settings WHERE key = 'crawl_timeout';
|
||||||
|
DELETE FROM settings WHERE key = 'max_retries';
|
||||||
|
|
||||||
|
-- 推荐验证参数(可按机器与网络再调大 default_concurrency)
|
||||||
|
INSERT INTO settings (key, value, updated_at) VALUES ('validation_timeout', '6', CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = CURRENT_TIMESTAMP;
|
||||||
|
|
||||||
|
INSERT INTO settings (key, value, updated_at) VALUES ('default_concurrency', '120', CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = CURRENT_TIMESTAMP;
|
||||||
@@ -5,6 +5,8 @@
|
|||||||
```
|
```
|
||||||
tests/
|
tests/
|
||||||
├── conftest.py # pytest 配置和 fixtures
|
├── conftest.py # pytest 配置和 fixtures
|
||||||
|
├── task_utils.py # 异步任务轮询(集成/E2E 共用)
|
||||||
|
├── support/ # 测试专用插件类等(非 mock)
|
||||||
├── README.md # 本文件
|
├── README.md # 本文件
|
||||||
├── unit/ # 单元测试
|
├── unit/ # 单元测试
|
||||||
│ ├── test_models.py # 模型测试
|
│ ├── test_models.py # 模型测试
|
||||||
@@ -12,6 +14,7 @@ tests/
|
|||||||
├── integration/ # 集成测试
|
├── integration/ # 集成测试
|
||||||
│ ├── test_proxies_api.py # 代理 API 测试
|
│ ├── test_proxies_api.py # 代理 API 测试
|
||||||
│ ├── test_plugins_api.py # 插件 API 测试
|
│ ├── test_plugins_api.py # 插件 API 测试
|
||||||
|
│ ├── test_plugins_live_crawl.py # 各插件真实爬取验收(须外网)
|
||||||
│ ├── test_scheduler_api.py # 调度器 API 测试
|
│ ├── test_scheduler_api.py # 调度器 API 测试
|
||||||
│ ├── test_settings_api.py # 设置 API 测试
|
│ ├── test_settings_api.py # 设置 API 测试
|
||||||
│ └── test_health_api.py # 健康检查测试
|
│ └── test_health_api.py # 健康检查测试
|
||||||
@@ -19,6 +22,25 @@ tests/
|
|||||||
└── test_full_workflow.py # 完整工作流测试
|
└── test_full_workflow.py # 完整工作流测试
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 网络与真实调用
|
||||||
|
|
||||||
|
集成测试与 E2E **不再 mock** `PluginRunner` / `ValidatorService`:会发起真实 HTTP 爬取与代理验证(视设置而定)。运行全量 `pytest` 需要 **可用的出站网络**,且含 `network` / `slow` 标记的用例可能耗时数分钟。
|
||||||
|
|
||||||
|
跳过需外网的用例(例如离线快速检查):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -m "not network"
|
||||||
|
```
|
||||||
|
|
||||||
|
**插件爬取验收**(`test_plugins_live_crawl.py`):
|
||||||
|
|
||||||
|
- 核心 8 插件:必须至少 1 条代理且无 Runner 失败。
|
||||||
|
- `fpw_*`:对照 [Free_Proxy_Website](https://github.com/cyubuchen/Free_Proxy_Website) 的公开源,允许 0 条(国际网络差异),使用更长超时。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/integration/test_plugins_live_crawl.py -v
|
||||||
|
```
|
||||||
|
|
||||||
## 运行测试
|
## 运行测试
|
||||||
|
|
||||||
### 安装测试依赖
|
### 安装测试依赖
|
||||||
|
|||||||
@@ -1,5 +1,15 @@
|
|||||||
"""pytest 配置文件和 fixtures"""
|
"""pytest 配置文件和 fixtures"""
|
||||||
|
# 必须在任何 app.* 导入之前:下方 app fixture 会清空表,不可与生产共用 db/proxies.sqlite
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["PROXYPOOL_DB_PATH"] = "db/proxies.test.sqlite"
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.platform == "win32":
|
||||||
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
@@ -17,22 +27,28 @@ from app.plugins import (
|
|||||||
SpeedXPlugin,
|
SpeedXPlugin,
|
||||||
YunDaiLiPlugin,
|
YunDaiLiPlugin,
|
||||||
ProxyScrapePlugin,
|
ProxyScrapePlugin,
|
||||||
|
FpwProxyListDownloadPlugin,
|
||||||
|
FpwSocksSslProxyPlugin,
|
||||||
|
FpwSpysOnePlugin,
|
||||||
|
FpwProxynovaPlugin,
|
||||||
|
FpwHidemyPlugin,
|
||||||
|
FpwPremproxyPlugin,
|
||||||
|
FpwFreeproxylistsPlugin,
|
||||||
|
FpwGatherproxyPlugin,
|
||||||
|
FpwCheckerproxyPlugin,
|
||||||
)
|
)
|
||||||
from app.repositories.proxy_repo import ProxyRepository
|
from app.repositories.proxy_repo import ProxyRepository
|
||||||
from app.models.domain import ProxyRaw
|
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture(scope="function")
|
@pytest_asyncio.fixture(scope="function")
|
||||||
async def app():
|
async def app():
|
||||||
"""创建应用实例"""
|
"""创建应用实例"""
|
||||||
# 初始化测试数据库并清空历史数据
|
|
||||||
await init_db()
|
await init_db()
|
||||||
async with get_db() as db:
|
async with get_db() as db:
|
||||||
await db.execute("DELETE FROM proxies")
|
await db.execute("DELETE FROM proxies")
|
||||||
await db.execute("DELETE FROM settings")
|
await db.execute("DELETE FROM settings")
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
# 清理并重新注册插件,防止跨测试污染
|
|
||||||
registry.clear()
|
registry.clear()
|
||||||
for plugin_cls in [
|
for plugin_cls in [
|
||||||
Fate0Plugin,
|
Fate0Plugin,
|
||||||
@@ -43,6 +59,15 @@ async def app():
|
|||||||
SpeedXPlugin,
|
SpeedXPlugin,
|
||||||
YunDaiLiPlugin,
|
YunDaiLiPlugin,
|
||||||
ProxyScrapePlugin,
|
ProxyScrapePlugin,
|
||||||
|
FpwProxyListDownloadPlugin,
|
||||||
|
FpwSocksSslProxyPlugin,
|
||||||
|
FpwSpysOnePlugin,
|
||||||
|
FpwProxynovaPlugin,
|
||||||
|
FpwHidemyPlugin,
|
||||||
|
FpwPremproxyPlugin,
|
||||||
|
FpwFreeproxylistsPlugin,
|
||||||
|
FpwGatherproxyPlugin,
|
||||||
|
FpwCheckerproxyPlugin,
|
||||||
]:
|
]:
|
||||||
registry.register(plugin_cls)
|
registry.register(plugin_cls)
|
||||||
|
|
||||||
@@ -50,7 +75,6 @@ async def app():
|
|||||||
async with test_app.router.lifespan_context(test_app):
|
async with test_app.router.lifespan_context(test_app):
|
||||||
yield test_app
|
yield test_app
|
||||||
|
|
||||||
# 给 aiosqlite / aiohttp 后台线程留出收尾时间
|
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
|
||||||
@@ -80,32 +104,4 @@ async def sample_proxy(db, proxy_repo):
|
|||||||
"""创建一个测试代理"""
|
"""创建一个测试代理"""
|
||||||
await proxy_repo.insert_or_update(db, "192.168.1.1", 8080, "http", 50)
|
await proxy_repo.insert_or_update(db, "192.168.1.1", 8080, "http", 50)
|
||||||
yield {"ip": "192.168.1.1", "port": 8080, "protocol": "http", "score": 50}
|
yield {"ip": "192.168.1.1", "port": 8080, "protocol": "http", "score": 50}
|
||||||
# 清理
|
|
||||||
await proxy_repo.delete(db, "192.168.1.1", 8080)
|
await proxy_repo.delete(db, "192.168.1.1", 8080)
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture(autouse=True)
|
|
||||||
async def mock_external_requests(monkeypatch, request):
|
|
||||||
"""
|
|
||||||
自动在集成/E2E 测试中 mock 外部网络请求:
|
|
||||||
1. 插件爬取返回固定测试代理,避免真实 HTTP 请求
|
|
||||||
2. 代理验证瞬间成功,避免连接超时等待
|
|
||||||
"""
|
|
||||||
if "/unit/" in request.node.nodeid:
|
|
||||||
return
|
|
||||||
from app.services.plugin_runner import PluginRunner
|
|
||||||
from app.services.validator_service import ValidatorService
|
|
||||||
|
|
||||||
async def _mock_run(self, plugin):
|
|
||||||
from app.models.domain import CrawlResult
|
|
||||||
return CrawlResult(
|
|
||||||
plugin_name=plugin.name,
|
|
||||||
proxies=[ProxyRaw("192.168.100.10", 8080, "http")],
|
|
||||||
success_count=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _mock_validate(self, ip: str, port: int, protocol: str = "http"):
|
|
||||||
return True, 1.23
|
|
||||||
|
|
||||||
monkeypatch.setattr(PluginRunner, "run", _mock_run)
|
|
||||||
monkeypatch.setattr(ValidatorService, "validate", _mock_validate)
|
|
||||||
|
|||||||
@@ -4,10 +4,14 @@
|
|||||||
"""
|
"""
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from tests.task_utils import poll_task_until_terminal
|
||||||
|
|
||||||
|
|
||||||
class TestFullWorkflow:
|
class TestFullWorkflow:
|
||||||
"""测试完整工作流"""
|
"""测试完整工作流"""
|
||||||
|
|
||||||
|
@pytest.mark.network
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_proxy_management_workflow(self, client):
|
async def test_proxy_management_workflow(self, client):
|
||||||
"""测试代理管理完整工作流
|
"""测试代理管理完整工作流
|
||||||
@@ -35,11 +39,17 @@ class TestFullWorkflow:
|
|||||||
# 3. 触发所有插件爬取
|
# 3. 触发所有插件爬取
|
||||||
response = await client.post("/api/plugins/crawl-all")
|
response = await client.post("/api/plugins/crawl-all")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
crawl_result = response.json()["data"]
|
task_id = response.json()["data"]["task_id"]
|
||||||
|
task_data = await poll_task_until_terminal(
|
||||||
|
client, task_id, max_rounds=400, interval=0.5
|
||||||
|
)
|
||||||
|
assert task_data is not None
|
||||||
|
assert task_data["status"] in ("completed", "failed", "cancelled")
|
||||||
|
|
||||||
# 4. 获取更新后的统计
|
# 4. 获取更新后的统计
|
||||||
response = await client.get("/api/proxies/stats")
|
response = await client.get("/api/proxies/stats")
|
||||||
updated_stats = response.json()["data"]
|
updated_stats = response.json()["data"]
|
||||||
|
assert "total" in initial_stats and "total" in updated_stats
|
||||||
|
|
||||||
# 5. 导出代理(所有格式)
|
# 5. 导出代理(所有格式)
|
||||||
for fmt in ["csv", "txt", "json"]:
|
for fmt in ["csv", "txt", "json"]:
|
||||||
@@ -50,6 +60,8 @@ class TestFullWorkflow:
|
|||||||
response = await client.delete("/api/proxies/clean-invalid")
|
response = await client.delete("/api/proxies/clean-invalid")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
@pytest.mark.network
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_plugin_management_workflow(self, client):
|
async def test_plugin_management_workflow(self, client):
|
||||||
"""测试插件管理完整工作流
|
"""测试插件管理完整工作流
|
||||||
@@ -93,6 +105,12 @@ class TestFullWorkflow:
|
|||||||
# 6. 触发爬取
|
# 6. 触发爬取
|
||||||
response = await client.post(f"/api/plugins/{plugin_id}/crawl")
|
response = await client.post(f"/api/plugins/{plugin_id}/crawl")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
crawl_task_id = response.json()["data"]["task_id"]
|
||||||
|
crawl_task = await poll_task_until_terminal(
|
||||||
|
client, crawl_task_id, max_rounds=140, interval=0.5
|
||||||
|
)
|
||||||
|
assert crawl_task is not None
|
||||||
|
assert crawl_task["status"] in ("completed", "failed", "cancelled")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_scheduler_workflow(self, client):
|
async def test_scheduler_workflow(self, client):
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
"""插件 API 集成测试 - 测试 /api/plugins/* 所有接口"""
|
"""插件 API 集成测试 - 测试 /api/plugins/* 所有接口"""
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from tests.task_utils import poll_task_until_terminal
|
||||||
|
|
||||||
|
|
||||||
class TestPluginsAPI:
|
class TestPluginsAPI:
|
||||||
"""测试插件相关 API"""
|
"""测试插件相关 API"""
|
||||||
@@ -116,10 +118,11 @@ class TestPluginsAPI:
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
|
|
||||||
|
@pytest.mark.network
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_crawl_plugin(self, client):
|
async def test_crawl_plugin(self, client):
|
||||||
"""测试 POST /api/plugins/{id}/crawl - 异步任务模式"""
|
"""测试 POST /api/plugins/{id}/crawl - 异步任务模式"""
|
||||||
import asyncio
|
|
||||||
response = await client.get("/api/plugins")
|
response = await client.get("/api/plugins")
|
||||||
plugins = response.json()["data"]["plugins"]
|
plugins = response.json()["data"]["plugins"]
|
||||||
if not plugins:
|
if not plugins:
|
||||||
@@ -133,18 +136,11 @@ class TestPluginsAPI:
|
|||||||
assert "task_id" in data["data"]
|
assert "task_id" in data["data"]
|
||||||
|
|
||||||
task_id = data["data"]["task_id"]
|
task_id = data["data"]["task_id"]
|
||||||
# 轮询任务状态
|
task_data = await poll_task_until_terminal(
|
||||||
task_data = None
|
client, task_id, max_rounds=140, interval=0.5
|
||||||
for _ in range(10):
|
)
|
||||||
await asyncio.sleep(0.3)
|
|
||||||
res = await client.get(f"/api/tasks/{task_id}")
|
|
||||||
assert res.status_code == 200
|
|
||||||
task_data = res.json()["data"]
|
|
||||||
if task_data["status"] in ("completed", "failed", "cancelled"):
|
|
||||||
break
|
|
||||||
|
|
||||||
assert task_data is not None
|
assert task_data is not None
|
||||||
assert task_data["status"] in ("completed", "cancelled")
|
assert task_data["status"] in ("completed", "failed", "cancelled")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_crawl_nonexistent_plugin(self, client):
|
async def test_crawl_nonexistent_plugin(self, client):
|
||||||
@@ -152,10 +148,11 @@ class TestPluginsAPI:
|
|||||||
response = await client.post("/api/plugins/nonexistent_plugin/crawl")
|
response = await client.post("/api/plugins/nonexistent_plugin/crawl")
|
||||||
assert response.status_code == 404
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
@pytest.mark.network
|
||||||
|
@pytest.mark.slow
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_crawl_all_plugins(self, client):
|
async def test_crawl_all_plugins(self, client):
|
||||||
"""测试 POST /api/plugins/crawl-all - 异步任务模式"""
|
"""测试 POST /api/plugins/crawl-all - 异步任务模式"""
|
||||||
import asyncio
|
|
||||||
response = await client.post("/api/plugins/crawl-all")
|
response = await client.post("/api/plugins/crawl-all")
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
@@ -163,15 +160,8 @@ class TestPluginsAPI:
|
|||||||
assert "task_id" in data["data"]
|
assert "task_id" in data["data"]
|
||||||
|
|
||||||
task_id = data["data"]["task_id"]
|
task_id = data["data"]["task_id"]
|
||||||
# 轮询任务状态
|
task_data = await poll_task_until_terminal(
|
||||||
task_data = None
|
client, task_id, max_rounds=400, interval=0.5
|
||||||
for _ in range(10):
|
)
|
||||||
await asyncio.sleep(0.3)
|
|
||||||
res = await client.get(f"/api/tasks/{task_id}")
|
|
||||||
assert res.status_code == 200
|
|
||||||
task_data = res.json()["data"]
|
|
||||||
if task_data["status"] in ("completed", "failed", "cancelled"):
|
|
||||||
break
|
|
||||||
|
|
||||||
assert task_data is not None
|
assert task_data is not None
|
||||||
assert task_data["status"] in ("completed", "cancelled")
|
assert task_data["status"] in ("completed", "failed", "cancelled")
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ class TestProxiesAPI:
|
|||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
assert "data" in data
|
assert "data" in data
|
||||||
assert "total" in data["data"]
|
assert "total" in data["data"]
|
||||||
|
assert "pending" in data["data"]
|
||||||
assert "available" in data["data"]
|
assert "available" in data["data"]
|
||||||
assert "scheduler_running" in data["data"]
|
assert "scheduler_running" in data["data"]
|
||||||
|
|
||||||
@@ -68,6 +69,17 @@ class TestProxiesAPI:
|
|||||||
# 可能返回 200(有数据) 或 404(无数据)
|
# 可能返回 200(有数据) 或 404(无数据)
|
||||||
assert response.status_code in [200, 404]
|
assert response.status_code in [200, 404]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_delete_proxy_post_json(self, client, sample_proxy):
|
||||||
|
"""测试 POST /api/proxies/delete-one(前端默认路径,兼容 IPv6)"""
|
||||||
|
response = await client.post(
|
||||||
|
"/api/proxies/delete-one",
|
||||||
|
json={"ip": sample_proxy["ip"], "port": sample_proxy["port"]},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["code"] == 200
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_proxy(self, client, sample_proxy):
|
async def test_delete_proxy(self, client, sample_proxy):
|
||||||
"""测试 DELETE /api/proxies/{ip}/{port}"""
|
"""测试 DELETE /api/proxies/{ip}/{port}"""
|
||||||
@@ -76,6 +88,19 @@ class TestProxiesAPI:
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_delete_one_ipv6(self, client, db, proxy_repo):
|
||||||
|
"""POST delete-one 可删除含冒号的 IP(路径 DELETE 无法可靠表达)"""
|
||||||
|
await proxy_repo.insert_or_update(db, "2001:db8::1", 18080, "http", 40)
|
||||||
|
r = await client.post(
|
||||||
|
"/api/proxies/delete-one",
|
||||||
|
json={"ip": "2001:db8::1", "port": 18080},
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert r.json()["code"] == 200
|
||||||
|
left = await proxy_repo.get_by_ip_port(db, "2001:db8::1", 18080)
|
||||||
|
assert left is None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_nonexistent_proxy(self, client):
|
async def test_delete_nonexistent_proxy(self, client):
|
||||||
"""测试 DELETE /api/proxies/{ip}/{port} - 不存在的代理"""
|
"""测试 DELETE /api/proxies/{ip}/{port} - 不存在的代理"""
|
||||||
|
|||||||
@@ -1,6 +1,17 @@
|
|||||||
"""调度器 API 集成测试 - 测试 /api/scheduler/* 所有接口"""
|
"""调度器 API 集成测试 - 测试 /api/scheduler/* 所有接口"""
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from app.api.deps import get_settings_repo
|
||||||
|
from app.repositories.settings_repo import SettingsRepository
|
||||||
|
|
||||||
|
|
||||||
|
class FailingSettingsRepository(SettingsRepository):
|
||||||
|
"""save 恒为 False,用于覆盖「设置保存失败」分支,非 MagicMock。"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def save(db, settings):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class TestSchedulerAPI:
|
class TestSchedulerAPI:
|
||||||
"""测试调度器相关 API"""
|
"""测试调度器相关 API"""
|
||||||
@@ -93,18 +104,17 @@ class TestSchedulerAPI:
|
|||||||
assert job is not None
|
assert job is not None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_start_scheduler_db_save_failure(self, client, monkeypatch):
|
async def test_start_scheduler_db_save_failure(self, client, app):
|
||||||
"""测试启动调度器时数据库保存失败应返回 running=False"""
|
"""测试启动调度器时数据库保存失败应返回 running=False"""
|
||||||
from app.repositories.settings_repo import SettingsRepository
|
|
||||||
|
|
||||||
# lifespan 启动时调度器可能已自动启动,先停止它
|
# lifespan 启动时调度器可能已自动启动,先停止它
|
||||||
await client.post("/api/scheduler/stop")
|
await client.post("/api/scheduler/stop")
|
||||||
|
|
||||||
async def mock_save(*args, **kwargs):
|
app.dependency_overrides[get_settings_repo] = lambda: FailingSettingsRepository()
|
||||||
return False
|
try:
|
||||||
|
response = await client.post("/api/scheduler/start")
|
||||||
|
finally:
|
||||||
|
app.dependency_overrides.pop(get_settings_repo, None)
|
||||||
|
|
||||||
monkeypatch.setattr(SettingsRepository, "save", mock_save)
|
|
||||||
response = await client.post("/api/scheduler/start")
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class TestSettingsAPI:
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
assert "crawl_timeout" in data["data"]
|
assert "crawl_timeout" not in data["data"]
|
||||||
assert "validation_timeout" in data["data"]
|
assert "validation_timeout" in data["data"]
|
||||||
assert "auto_validate" in data["data"]
|
assert "auto_validate" in data["data"]
|
||||||
|
|
||||||
@@ -22,16 +22,15 @@ class TestSettingsAPI:
|
|||||||
response = await client.get("/api/settings")
|
response = await client.get("/api/settings")
|
||||||
data = response.json()["data"]
|
data = response.json()["data"]
|
||||||
|
|
||||||
# 验证所有预期的设置项
|
|
||||||
expected_keys = [
|
expected_keys = [
|
||||||
"crawl_timeout",
|
|
||||||
"validation_timeout",
|
"validation_timeout",
|
||||||
"max_retries",
|
|
||||||
"default_concurrency",
|
"default_concurrency",
|
||||||
"min_proxy_score",
|
"min_proxy_score",
|
||||||
"proxy_expiry_days",
|
"proxy_expiry_days",
|
||||||
"auto_validate",
|
"auto_validate",
|
||||||
|
"auto_validate_after_crawl",
|
||||||
"validate_interval_minutes",
|
"validate_interval_minutes",
|
||||||
|
"validation_targets",
|
||||||
]
|
]
|
||||||
for key in expected_keys:
|
for key in expected_keys:
|
||||||
assert key in data, f"缺少设置项: {key}"
|
assert key in data, f"缺少设置项: {key}"
|
||||||
@@ -40,65 +39,45 @@ class TestSettingsAPI:
|
|||||||
async def test_save_settings(self, client):
|
async def test_save_settings(self, client):
|
||||||
"""测试 POST /api/settings"""
|
"""测试 POST /api/settings"""
|
||||||
settings = {
|
settings = {
|
||||||
"crawl_timeout": 45,
|
|
||||||
"validation_timeout": 15,
|
"validation_timeout": 15,
|
||||||
"max_retries": 5,
|
|
||||||
"default_concurrency": 100,
|
"default_concurrency": 100,
|
||||||
"min_proxy_score": 10,
|
"min_proxy_score": 10,
|
||||||
"proxy_expiry_days": 14,
|
"proxy_expiry_days": 14,
|
||||||
"auto_validate": True,
|
"auto_validate": True,
|
||||||
|
"auto_validate_after_crawl": False,
|
||||||
"validate_interval_minutes": 60,
|
"validate_interval_minutes": 60,
|
||||||
|
"validation_targets": [
|
||||||
|
"http://httpbin.org/ip",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
response = await client.post("/api/settings", json=settings)
|
response = await client.post("/api/settings", json=settings)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["code"] == 200
|
assert data["code"] == 200
|
||||||
# 验证返回的数据与提交的一致
|
|
||||||
for key, value in settings.items():
|
for key, value in settings.items():
|
||||||
assert data["data"][key] == value
|
assert data["data"][key] == value
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_save_settings_partial(self, client):
|
async def test_save_settings_partial(self, client):
|
||||||
"""测试 POST /api/settings - 部分更新(实际上会替换所有)"""
|
"""测试 POST /api/settings - 部分更新(实际上会替换所有)"""
|
||||||
# 先获取当前设置
|
|
||||||
response = await client.get("/api/settings")
|
response = await client.get("/api/settings")
|
||||||
current_settings = response.json()["data"]
|
current_settings = response.json()["data"]
|
||||||
|
|
||||||
# 修改部分设置
|
|
||||||
new_settings = current_settings.copy()
|
new_settings = current_settings.copy()
|
||||||
new_settings["crawl_timeout"] = 60
|
new_settings["validation_timeout"] = 25
|
||||||
new_settings["auto_validate"] = False
|
new_settings["auto_validate"] = False
|
||||||
|
|
||||||
response = await client.post("/api/settings", json=new_settings)
|
response = await client.post("/api/settings", json=new_settings)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["data"]["crawl_timeout"] == 60
|
assert data["data"]["validation_timeout"] == 25
|
||||||
assert data["data"]["auto_validate"] is False
|
assert data["data"]["auto_validate"] is False
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_save_settings_validation_error(self, client):
|
async def test_save_settings_validation_error(self, client):
|
||||||
"""测试 POST /api/settings - 验证错误"""
|
"""测试 POST /api/settings - 验证错误"""
|
||||||
# crawl_timeout 必须在 5-120 之间
|
|
||||||
invalid_settings = {
|
invalid_settings = {
|
||||||
"crawl_timeout": 200, # 超出范围
|
"validation_timeout": 100,
|
||||||
"validation_timeout": 10,
|
|
||||||
"max_retries": 3,
|
|
||||||
"default_concurrency": 50,
|
|
||||||
"min_proxy_score": 0,
|
|
||||||
"proxy_expiry_days": 7,
|
|
||||||
"auto_validate": True,
|
|
||||||
"validate_interval_minutes": 30,
|
|
||||||
}
|
|
||||||
response = await client.post("/api/settings", json=invalid_settings)
|
|
||||||
assert response.status_code == 422 # 验证错误
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_save_settings_invalid_type(self, client):
|
|
||||||
"""测试 POST /api/settings - 无效类型"""
|
|
||||||
invalid_settings = {
|
|
||||||
"crawl_timeout": "invalid", # 应该是整数
|
|
||||||
"validation_timeout": 10,
|
|
||||||
"max_retries": 3,
|
|
||||||
"default_concurrency": 50,
|
"default_concurrency": 50,
|
||||||
"min_proxy_score": 0,
|
"min_proxy_score": 0,
|
||||||
"proxy_expiry_days": 7,
|
"proxy_expiry_days": 7,
|
||||||
@@ -108,15 +87,49 @@ class TestSettingsAPI:
|
|||||||
response = await client.post("/api/settings", json=invalid_settings)
|
response = await client.post("/api/settings", json=invalid_settings)
|
||||||
assert response.status_code == 422
|
assert response.status_code == 422
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_save_settings_invalid_type(self, client):
|
||||||
|
"""测试 POST /api/settings - 无效类型"""
|
||||||
|
invalid_settings = {
|
||||||
|
"validation_timeout": 10,
|
||||||
|
"default_concurrency": "invalid",
|
||||||
|
"min_proxy_score": 0,
|
||||||
|
"proxy_expiry_days": 7,
|
||||||
|
"auto_validate": True,
|
||||||
|
"validate_interval_minutes": 30,
|
||||||
|
}
|
||||||
|
response = await client.post("/api/settings", json=invalid_settings)
|
||||||
|
assert response.status_code == 422
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_save_settings_ignores_deprecated_crawl_timeout(self, client):
|
||||||
|
"""旧客户端若仍提交 crawl_timeout,应忽略且保存成功"""
|
||||||
|
response = await client.get("/api/settings")
|
||||||
|
base = response.json()["data"]
|
||||||
|
payload = {**base, "crawl_timeout": 999}
|
||||||
|
response = await client.post("/api/settings", json=payload)
|
||||||
|
assert response.status_code == 200
|
||||||
|
again = (await client.get("/api/settings")).json()["data"]
|
||||||
|
assert "crawl_timeout" not in again
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_save_settings_ignores_obsolete_max_retries(self, client):
|
||||||
|
"""已移除的 max_retries 键若仍被提交,应忽略。"""
|
||||||
|
response = await client.get("/api/settings")
|
||||||
|
base = response.json()["data"]
|
||||||
|
payload = {**base, "max_retries": 9}
|
||||||
|
response = await client.post("/api/settings", json=payload)
|
||||||
|
assert response.status_code == 200
|
||||||
|
again = (await client.get("/api/settings")).json()["data"]
|
||||||
|
assert "max_retries" not in again
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_settings_roundtrip(self, client):
|
async def test_settings_roundtrip(self, client):
|
||||||
"""测试设置读写一致性"""
|
"""测试设置读写一致性"""
|
||||||
# 生成随机但有效的设置
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
test_settings = {
|
test_settings = {
|
||||||
"crawl_timeout": random.randint(10, 60),
|
|
||||||
"validation_timeout": random.randint(5, 30),
|
"validation_timeout": random.randint(5, 30),
|
||||||
"max_retries": random.randint(1, 5),
|
|
||||||
"default_concurrency": random.randint(20, 100),
|
"default_concurrency": random.randint(20, 100),
|
||||||
"min_proxy_score": random.randint(0, 50),
|
"min_proxy_score": random.randint(0, 50),
|
||||||
"proxy_expiry_days": random.randint(1, 14),
|
"proxy_expiry_days": random.randint(1, 14),
|
||||||
@@ -124,15 +137,12 @@ class TestSettingsAPI:
|
|||||||
"validate_interval_minutes": random.randint(10, 120),
|
"validate_interval_minutes": random.randint(10, 120),
|
||||||
}
|
}
|
||||||
|
|
||||||
# 写入设置
|
|
||||||
response = await client.post("/api/settings", json=test_settings)
|
response = await client.post("/api/settings", json=test_settings)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
||||||
# 读取设置
|
|
||||||
response = await client.get("/api/settings")
|
response = await client.get("/api/settings")
|
||||||
saved_settings = response.json()["data"]
|
saved_settings = response.json()["data"]
|
||||||
|
|
||||||
# 验证一致性
|
|
||||||
for key, value in test_settings.items():
|
for key, value in test_settings.items():
|
||||||
assert saved_settings[key] == value, f"设置项 {key} 不一致"
|
assert saved_settings[key] == value, f"设置项 {key} 不一致"
|
||||||
|
|
||||||
@@ -140,9 +150,7 @@ class TestSettingsAPI:
|
|||||||
async def test_settings_roundtrip_with_validation_targets(self, client):
|
async def test_settings_roundtrip_with_validation_targets(self, client):
|
||||||
"""测试设置读写一致性 - 包含数组类型的 validation_targets"""
|
"""测试设置读写一致性 - 包含数组类型的 validation_targets"""
|
||||||
test_settings = {
|
test_settings = {
|
||||||
"crawl_timeout": 30,
|
|
||||||
"validation_timeout": 10,
|
"validation_timeout": 10,
|
||||||
"max_retries": 3,
|
|
||||||
"default_concurrency": 50,
|
"default_concurrency": 50,
|
||||||
"min_proxy_score": 0,
|
"min_proxy_score": 0,
|
||||||
"proxy_expiry_days": 7,
|
"proxy_expiry_days": 7,
|
||||||
@@ -154,13 +162,11 @@ class TestSettingsAPI:
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
# 写入设置
|
|
||||||
response = await client.post("/api/settings", json=test_settings)
|
response = await client.post("/api/settings", json=test_settings)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["data"]["validation_targets"] == test_settings["validation_targets"]
|
assert data["data"]["validation_targets"] == test_settings["validation_targets"]
|
||||||
|
|
||||||
# 读取设置
|
|
||||||
response = await client.get("/api/settings")
|
response = await client.get("/api/settings")
|
||||||
saved_settings = response.json()["data"]
|
saved_settings = response.json()["data"]
|
||||||
assert saved_settings["validation_targets"] == test_settings["validation_targets"]
|
assert saved_settings["validation_targets"] == test_settings["validation_targets"]
|
||||||
@@ -179,7 +185,6 @@ class TestSettingsAPI:
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
assert data["data"]["validation_targets"] == []
|
assert data["data"]["validation_targets"] == []
|
||||||
|
|
||||||
# 读取确认
|
|
||||||
response = await client.get("/api/settings")
|
response = await client.get("/api/settings")
|
||||||
saved_settings = response.json()["data"]
|
saved_settings = response.json()["data"]
|
||||||
assert saved_settings["validation_targets"] == []
|
assert saved_settings["validation_targets"] == []
|
||||||
|
|||||||
1
tests/support/__init__.py
Normal file
1
tests/support/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Test support package (non-mock plugin doubles, etc.)
|
||||||
19
tests/support/plugins_for_runner.py
Normal file
19
tests/support/plugins_for_runner.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
"""供 PluginRunner 等测试使用的真实插件子类(非 unittest.mock)。"""
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.core.plugin_system.base import BaseCrawlerPlugin
|
||||||
|
from app.models.domain import ProxyRaw
|
||||||
|
|
||||||
|
|
||||||
|
class UnhealthyPlugin(BaseCrawlerPlugin):
|
||||||
|
"""health_check 抛错,用于验证 Runner 对异常的统计与落库。"""
|
||||||
|
|
||||||
|
name = "test_unhealthy_runner"
|
||||||
|
display_name = "TestUnhealthy"
|
||||||
|
description = "PluginRunner health_check failure test double"
|
||||||
|
|
||||||
|
async def crawl(self) -> List[ProxyRaw]:
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def health_check(self) -> bool:
|
||||||
|
raise RuntimeError("network down")
|
||||||
22
tests/task_utils.py
Normal file
22
tests/task_utils.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
"""测试用异步任务轮询工具"""
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
async def poll_task_until_terminal(
|
||||||
|
client,
|
||||||
|
task_id: str,
|
||||||
|
*,
|
||||||
|
max_rounds: int,
|
||||||
|
interval: float,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""轮询任务直到终态或超时。返回最后一次 task data。"""
|
||||||
|
task_data = None
|
||||||
|
for _ in range(max_rounds):
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
res = await client.get(f"/api/tasks/{task_id}")
|
||||||
|
assert res.status_code == 200
|
||||||
|
task_data = res.json()["data"]
|
||||||
|
if task_data["status"] in ("completed", "failed", "cancelled"):
|
||||||
|
break
|
||||||
|
return task_data
|
||||||
@@ -106,6 +106,14 @@ class TestProxyListRequest:
|
|||||||
assert request.page_size == 50
|
assert request.page_size == 50
|
||||||
assert request.protocol == "https"
|
assert request.protocol == "https"
|
||||||
|
|
||||||
|
def test_pool_filter_pending_available(self):
|
||||||
|
r1 = ProxyListRequest(pool_filter="pending")
|
||||||
|
assert r1.pool_filter == "pending"
|
||||||
|
r2 = ProxyListRequest(pool_filter="all")
|
||||||
|
assert r2.pool_filter is None
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
ProxyListRequest(pool_filter="invalid")
|
||||||
|
|
||||||
|
|
||||||
class TestSettingsSchema:
|
class TestSettingsSchema:
|
||||||
"""测试 SettingsSchema"""
|
"""测试 SettingsSchema"""
|
||||||
@@ -113,16 +121,22 @@ class TestSettingsSchema:
|
|||||||
def test_default_settings(self):
|
def test_default_settings(self):
|
||||||
"""测试默认设置"""
|
"""测试默认设置"""
|
||||||
settings = SettingsSchema()
|
settings = SettingsSchema()
|
||||||
assert settings.crawl_timeout == 30
|
assert settings.validation_timeout == 6
|
||||||
assert settings.validation_timeout == 10
|
assert settings.default_concurrency == 120
|
||||||
assert settings.auto_validate is True
|
assert settings.auto_validate is True
|
||||||
|
assert settings.auto_validate_after_crawl is False
|
||||||
|
|
||||||
def test_custom_settings(self):
|
def test_custom_settings(self):
|
||||||
"""测试自定义设置"""
|
"""测试自定义设置"""
|
||||||
settings = SettingsSchema(crawl_timeout=60, auto_validate=False)
|
settings = SettingsSchema(validation_timeout=25, auto_validate=False)
|
||||||
assert settings.crawl_timeout == 60
|
assert settings.validation_timeout == 25
|
||||||
assert settings.auto_validate is False
|
assert settings.auto_validate is False
|
||||||
|
|
||||||
|
def test_settings_schema_ignores_unknown_fields(self):
|
||||||
|
s = SettingsSchema.model_validate({"validation_timeout": 10, "crawl_timeout": 99})
|
||||||
|
assert "crawl_timeout" not in s.model_dump()
|
||||||
|
assert s.validation_timeout == 10
|
||||||
|
|
||||||
|
|
||||||
class TestBatchDeleteRequest:
|
class TestBatchDeleteRequest:
|
||||||
"""测试 BatchDeleteRequest"""
|
"""测试 BatchDeleteRequest"""
|
||||||
|
|||||||
@@ -81,23 +81,25 @@ class TestProxyRepository:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_iter_batches(self, db, proxy_repo):
|
async def test_iter_batches(self, db, proxy_repo):
|
||||||
"""测试流式分批读取"""
|
"""测试流式分批读取(与库内已有数据共存,只校验增量与分批形状)"""
|
||||||
# 插入 5 条测试数据
|
async with db.execute("SELECT COUNT(*) FROM proxies") as c:
|
||||||
|
before = (await c.fetchone())[0]
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
await proxy_repo.insert_or_update(db, f"192.168.1.{i}", 8000 + i, "http", 10)
|
await proxy_repo.insert_or_update(db, f"192.168.99.{i}", 8000 + i, "http", 10)
|
||||||
|
async with db.execute("SELECT COUNT(*) FROM proxies") as c:
|
||||||
|
after = (await c.fetchone())[0]
|
||||||
|
assert after == before + 5
|
||||||
|
|
||||||
batches = []
|
batches = []
|
||||||
async for batch in proxy_repo.iter_batches(db, batch_size=2):
|
async for batch in proxy_repo.iter_batches(db, batch_size=2):
|
||||||
batches.append(batch)
|
batches.append(batch)
|
||||||
|
|
||||||
assert len(batches) == 3
|
assert sum(len(b) for b in batches) == after
|
||||||
assert len(batches[0]) == 2
|
assert len(batches[-1]) in (1, 2)
|
||||||
assert len(batches[1]) == 2
|
assert all(len(b) <= 2 for b in batches)
|
||||||
assert len(batches[2]) == 1
|
|
||||||
|
|
||||||
# 清理
|
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
await proxy_repo.delete(db, f"192.168.1.{i}", 8000 + i)
|
await proxy_repo.delete(db, f"192.168.99.{i}", 8000 + i)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_batch_delete(self, db, proxy_repo):
|
async def test_batch_delete(self, db, proxy_repo):
|
||||||
@@ -121,6 +123,38 @@ class TestProxyRepository:
|
|||||||
"""测试获取统计信息"""
|
"""测试获取统计信息"""
|
||||||
stats = await proxy_repo.get_stats(db)
|
stats = await proxy_repo.get_stats(db)
|
||||||
assert "total" in stats
|
assert "total" in stats
|
||||||
|
assert "pending" in stats
|
||||||
assert "available" in stats
|
assert "available" in stats
|
||||||
assert "avg_score" in stats
|
assert "avg_score" in stats
|
||||||
assert "http_count" in stats
|
assert "http_count" in stats
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_today_new_count_only_validated_available(self, db, proxy_repo):
|
||||||
|
"""今日新增不计待验证;仅今日创建且 validated=1、score>0"""
|
||||||
|
base = await proxy_repo.get_today_new_count(db)
|
||||||
|
await proxy_repo.upsert_from_crawl(db, "192.168.88.20", 9020, "http", 0)
|
||||||
|
assert await proxy_repo.get_today_new_count(db) == base
|
||||||
|
|
||||||
|
await proxy_repo.insert_or_update(db, "192.168.88.21", 9021, "http", 55)
|
||||||
|
assert await proxy_repo.get_today_new_count(db) == base + 1
|
||||||
|
|
||||||
|
await proxy_repo.delete(db, "192.168.88.20", 9020)
|
||||||
|
await proxy_repo.delete(db, "192.168.88.21", 9021)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_upsert_many_from_crawl(self, db, proxy_repo):
|
||||||
|
from app.models.domain import ProxyRaw
|
||||||
|
|
||||||
|
raws = [
|
||||||
|
ProxyRaw("10.0.0.1", 18080, "http"),
|
||||||
|
ProxyRaw("10.0.0.2", 18081, "socks5"),
|
||||||
|
]
|
||||||
|
await proxy_repo.upsert_many_from_crawl(db, raws, 0)
|
||||||
|
await db.commit()
|
||||||
|
p1 = await proxy_repo.get_by_ip_port(db, "10.0.0.1", 18080)
|
||||||
|
assert p1 is not None
|
||||||
|
assert p1.validated == 0
|
||||||
|
p2 = await proxy_repo.get_by_ip_port(db, "10.0.0.2", 18081)
|
||||||
|
assert p2.protocol == "socks5"
|
||||||
|
await proxy_repo.delete(db, "10.0.0.1", 18080)
|
||||||
|
await proxy_repo.delete(db, "10.0.0.2", 18081)
|
||||||
|
|||||||
Reference in New Issue
Block a user