Files
Cyrene/scripts/voice/batch_convert.sh
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

74 lines
1.9 KiB
Bash

#!/bin/bash
# 批量 WEM → WAV 转换 (使用 vgmstream-cli)
set -e
RAW_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/raw"
CLEANED_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/cleaned"
VGMSTREAM="D:/Project/Code/Uni/Cyrene/scripts/voice/tools/vgmstream/vgmstream-cli.exe"
if [ ! -f "$VGMSTREAM" ]; then
echo "错误: 找不到 vgmstream-cli.exe"
exit 1
fi
echo "=== 批量 WEM → WAV 转换 ==="
echo ""
TOTAL=0
SUCCESS=0
FAILED=0
while IFS= read -r -d '' wem_file; do
TOTAL=$((TOTAL + 1))
# 输出路径: cleaned/ 目录下保持相同子目录结构,改 .wem 为 .wav
rel_path="${wem_file#$RAW_DIR/}"
wav_file="${CLEANED_DIR}/${rel_path%.wem}.wav"
wav_dir="$(dirname "$wav_file")"
mkdir -p "$wav_dir"
# 跳过已转换的
if [ -f "$wav_file" ] && [ "$(stat -c%s "$wav_file" 2>/dev/null || echo 0)" -gt 100 ]; then
SUCCESS=$((SUCCESS + 1))
continue
fi
# 转换
if cmd.exe //c "$VGMSTREAM -o \"$wav_file\" \"$wem_file\"" 2>/dev/null; then
SUCCESS=$((SUCCESS + 1))
else
FAILED=$((FAILED + 1))
fi
# 进度显示
if [ $((TOTAL % 100)) -eq 0 ]; then
echo " 进度: $TOTAL 文件 (成功: $SUCCESS, 失败: $FAILED)"
fi
done < <(find "$RAW_DIR" -name "*.wem" -print0)
echo ""
echo "=== 转换完成 ==="
echo "总计: $TOTAL | 成功: $SUCCESS | 失败: $FAILED"
# 统计分类
echo ""
echo "音频时长分布:"
find "$CLEANED_DIR" -name "*.wav" | while read wav; do
dur=$(ffprobe -v quiet -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$wav" 2>/dev/null || echo "0")
echo "$dur"
done | awk '
{ d = $1 + 0 }
d < 1 { lt1++ }
d < 3 { lt3++ }
d < 10 { lt10++ }
d < 30 { lt30++ }
d >= 30 { gt30++ }
END {
printf " < 1s: %d\n", lt1
printf " 1-3s: %d\n", lt3
printf " 3-10s: %d\n", lt10
printf " 10-30s: %d\n", lt30
printf " > 30s: %d\n", gt30
}'