Files
Cyrene/scripts/voice/cdp_download.py
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

176 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""
通过 Chrome DevTools Protocol 下载文件。
需要 Chrome 以 --remote-debugging-port=9222 启动。
用法:
python cdp_download.py <url> <output_path>
python cdp_download.py https://github.com/.../vgmstream-win.zip tools/vgmstream.zip
"""
import json
import os
import sys
import time
import threading
from websocket import create_connection
class CDPClient:
def __init__(self, ws_url: str):
self.ws = create_connection(ws_url, origin="http://127.0.0.1:9222")
self._id = 0
self._lock = threading.Lock()
self._pending = {}
self._events = []
self._running = True
# Start background reader
self._reader_thread = threading.Thread(target=self._read_loop, daemon=True)
self._reader_thread.start()
def _read_loop(self):
while self._running:
try:
msg = json.loads(self.ws.recv())
msg_id = msg.get('id')
if msg_id is not None:
with self._lock:
self._pending[msg_id] = msg
else:
self._events.append(msg)
except Exception:
if self._running:
time.sleep(0.1)
else:
break
def send(self, method: str, params: dict = None) -> dict:
self._id += 1
msg_id = self._id
payload = json.dumps({
'id': msg_id,
'method': method,
'params': params or {}
})
self.ws.send(payload)
# Wait for response
timeout = 60
start = time.time()
while time.time() - start < timeout:
with self._lock:
if msg_id in self._pending:
result = self._pending.pop(msg_id)
if 'error' in result:
raise Exception(f"CDP Error: {result['error']}")
return result.get('result', {})
time.sleep(0.1)
raise TimeoutError(f"CDP command {method} timed out")
def wait_for_event(self, event_type: str, timeout: float = 60) -> dict:
start = time.time()
while time.time() - start < timeout:
for i, evt in enumerate(self._events):
if evt.get('method') == event_type:
return self._events.pop(i)['params']
time.sleep(0.2)
raise TimeoutError(f"Event {event_type} not received within {timeout}s")
def close(self):
self._running = False
self.ws.close()
def download_via_cdp(url: str, output_path: str, cdp_url: str = "http://localhost:9222"):
"""
使用 Chrome CDP 下载文件。
"""
import urllib.request
# 1. 创建新标签页
print(f"[CDP] 创建标签页...")
req = urllib.request.Request(f"{cdp_url}/json/new", method='PUT')
resp = urllib.request.urlopen(req)
tab = json.loads(resp.read())
ws_url = tab['webSocketDebuggerUrl']
print(f"[CDP] 标签页: {tab['id']}")
client = CDPClient(ws_url)
try:
# 2. 启用必要的域
print(f"[CDP] 启用 Page...")
client.send('Page.enable')
# 3. 设置下载目录
download_dir = os.path.abspath(os.path.dirname(output_path))
os.makedirs(download_dir, exist_ok=True)
print(f"[CDP] 下载目录: {download_dir}")
client.send('Browser.setDownloadBehavior', {
'behavior': 'allow',
'downloadPath': download_dir
})
# 4. 导航到下载 URL
print(f"[CDP] 导航到: {url}")
client.send('Page.navigate', {'url': url})
# 5. 等待下载完成
print(f"[CDP] 等待下载开始...")
will_begin = client.wait_for_event('Browser.downloadWillBegin', timeout=30)
guid = will_begin['guid']
suggested_name = will_begin.get('suggestedFilename', 'unknown')
print(f"[CDP] 下载开始: {suggested_name} (guid={guid})")
# 等待下载进度完成
print(f"[CDP] 等待下载完成...")
while True:
progress = client.wait_for_event('Browser.downloadProgress', timeout=120)
state = progress.get('state', '')
if state == 'completed':
print(f"[CDP] 下载完成")
break
elif state == 'canceled':
raise Exception("下载被取消")
elif state == 'interrupted':
print(f"[CDP] 下载中断, 重试...")
client.send('Browser.resumeDownload', {'guid': guid})
else:
received = progress.get('receivedBytes', 0)
total = progress.get('totalBytes', 0)
if total > 0:
print(f"[CDP] 进度: {received}/{total} ({100*received//total}%)")
# 6. 移动到目标路径
downloaded_file = os.path.join(download_dir, suggested_name)
if os.path.exists(downloaded_file) and downloaded_file != output_path:
if os.path.exists(output_path):
os.remove(output_path)
os.rename(downloaded_file, output_path)
print(f"[CDP] 文件保存到: {output_path}")
return output_path
finally:
client.close()
# 关闭标签页
urllib.request.urlopen(urllib.request.Request(
f"{cdp_url}/json/close/{tab['id']}", method='PUT'))
def main():
if len(sys.argv) < 3:
print("用法: python cdp_download.py <url> <output_path>")
print("示例: python cdp_download.py https://example.com/file.zip tools/file.zip")
sys.exit(1)
url = sys.argv[1]
output = sys.argv[2]
download_via_cdp(url, output)
if __name__ == '__main__':
main()