fix(plugins): fpw parsers for JSON API, mirrors, and looser HTML
- fpw_proxy_list_download: parse JSON list/proxies bodies; jsDelivr monosans tier; crawl timeout 300s - fpw_socks_ssl: try parse_html_table before regex - fpw_hidemy: loose row scan when fixed columns fail - fpw_proxynova: plain IP/port row fallback - fpw_spys_one: HTTPS endpoints; crawl timeout 180s - fpw_gatherproxy: HTTPS + extra JSON key patterns - fpw_checkerproxy: lower min HTML length for parse - fpw_premproxy: ip:port regex fallback when few table rows Made-with: Cursor
This commit is contained in:
@@ -21,6 +21,21 @@ class FpwPremproxyPlugin(BaseHTTPPlugin):
|
||||
"https://premproxy.com/socks-list/",
|
||||
]
|
||||
|
||||
def _parse_ipport_embedded(self, html: str) -> List[ProxyRaw]:
|
||||
found = re.findall(
|
||||
r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{2,5})\b",
|
||||
html,
|
||||
)
|
||||
out: List[ProxyRaw] = []
|
||||
for ip, ps in found:
|
||||
if not ps.isdigit() or not (1 <= int(ps) <= 65535):
|
||||
continue
|
||||
try:
|
||||
out.append(ProxyRaw(ip, int(ps), "http"))
|
||||
except ValueError:
|
||||
continue
|
||||
return out
|
||||
|
||||
def _parse_html(self, html: str) -> List[ProxyRaw]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
results: List[ProxyRaw] = []
|
||||
@@ -47,6 +62,8 @@ class FpwPremproxyPlugin(BaseHTTPPlugin):
|
||||
results.append(ProxyRaw(ip, int(port), proto))
|
||||
except ValueError:
|
||||
continue
|
||||
if len(results) < 5:
|
||||
results.extend(self._parse_ipport_embedded(html))
|
||||
return results
|
||||
|
||||
async def crawl(self) -> List[ProxyRaw]:
|
||||
|
||||
Reference in New Issue
Block a user