Files
Cyrene/scripts/voice/batch_convert.py
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

101 lines
2.8 KiB
Python

#!/usr/bin/env python3
"""
批量 WEM → WAV 转换,使用 vgmstream-cli + ffmpeg 标准化。
输出: 22050Hz, mono, 16-bit PCM WAV
"""
import os
import subprocess
import sys
from pathlib import Path
VGMSTREAM = r"D:\Project\Code\Uni\Cyrene\scripts\voice\tools\vgmstream\vgmstream-cli.exe"
RAW_DIR = r"D:\Project\Code\Uni\Cyrene-Voice-Model\data\raw"
CLEANED_DIR = r"D:\Project\Code\Uni\Cyrene-Voice-Model\data\cleaned"
# 要转换的子目录(按优先级)
TARGETS = [
"VoBanks27", "VoBanks28", "VoBanks29", "VoBanks30", "VoBanks31",
]
def convert_wem_to_wav(wem_path: str, wav_path: str) -> bool:
"""vgmstream WEM→临时WAV, ffmpeg 标准化 → 最终WAV (22050Hz mono s16)"""
os.makedirs(os.path.dirname(wav_path), exist_ok=True)
# 跳过已存在且非空的文件
if os.path.exists(wav_path) and os.path.getsize(wav_path) > 100:
return True
tmp_path = wav_path + ".tmp.wav"
try:
# Step 1: vgmstream → temp WAV
result = subprocess.run(
[VGMSTREAM, "-o", tmp_path, wem_path],
capture_output=True, timeout=30,
)
if result.returncode != 0 or not os.path.exists(tmp_path):
return False
# Step 2: ffmpeg → 标准化 22050Hz mono s16
result = subprocess.run(
["ffmpeg", "-y", "-i", tmp_path,
"-ar", "22050", "-ac", "1", "-sample_fmt", "s16",
wav_path],
capture_output=True, timeout=30,
)
if result.returncode != 0:
return False
return os.path.exists(wav_path) and os.path.getsize(wav_path) > 100
except Exception as e:
print(f" FAIL [{os.path.basename(wem_path)}]: {e}")
return False
finally:
# 清理临时文件
if os.path.exists(tmp_path):
os.remove(tmp_path)
def main():
print("=== 批量 WEM → WAV 转换 (VoBanks) ===\n")
total = 0
ok = 0
fail = 0
for target in TARGETS:
src_dir = os.path.join(RAW_DIR, target)
dst_dir = os.path.join(CLEANED_DIR, target)
if not os.path.isdir(src_dir):
print(f"SKIP: {target} (not found)")
continue
wem_files = sorted(Path(src_dir).glob("*.wem"))
if not wem_files:
print(f"SKIP: {target} (empty)")
continue
print(f"[{target}] {len(wem_files)} files...")
for i, wem in enumerate(wem_files):
wav = os.path.join(dst_dir, wem.stem + ".wav")
if convert_wem_to_wav(str(wem), wav):
ok += 1
else:
fail += 1
total += 1
if (i + 1) % 50 == 0:
print(f" {i+1}/{len(wem_files)} (ok:{ok} fail:{fail})")
print(f" Done: {len(wem_files)} files\n")
print(f"=== 转换完成: {ok} ok, {fail} fail, {total} total ===")
if __name__ == "__main__":
main()