Files
Cyrene/scripts/voice/extract_cyrene_voice.sh
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

162 lines
5.7 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# ============================================================
# 昔涟语音提取管线
#
# 步骤:
# 1. 从 HSR 音频包提取 .wem (本脚本)
# 2. 用 ww2ogg/vgmstream 转换 .wem → .wav (需手动安装工具)
# 3. 用 ffmpeg 标准化音频格式
# ============================================================
set -e
HSR_AUDIO_DIR="D:/MeowG/HonkaiStar_Rail/StarRail_Data/Persistent/Audio/AudioPackage/Windows/Chinese(PRC)"
RAW_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/raw"
CLEANED_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/cleaned"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== 昔涟语音提取管线 ==="
echo ""
# ---- 阶段 1: 从 .pck 提取 .wem ----
echo "[阶段 1/4] 提取候选 .pck 文件..."
# 昔涟是 3.x 角色,语音在以下文件中:
# - VoBanks 27-31 (最新角色语音库)
# - External_del_4.0_chapter5_* (3.0 主线昔涟出场)
# - External_del_4.1_chapter_* (3.1 主线昔涟出场)
TARGETS=(
"VoBanks27.pck"
"VoBanks28.pck"
"VoBanks29.pck"
"VoBanks30.pck"
"VoBanks31.pck"
"External_del_4.0_chapter5_0.pck"
"External_del_4.0_chapter5_1.pck"
"External_del_4.0_chapter5_2.pck"
"External_del_4.1_chapter_0.pck"
"External_del_4.1_chapter_1.pck"
"External_del_4.1_chapter_2.pck"
)
for target in "${TARGETS[@]}"; do
pck_path="${HSR_AUDIO_DIR}/${target}"
if [ -f "$pck_path" ]; then
echo " 提取: $target ($(du -h "$pck_path" | cut -f1))"
python3 "${SCRIPT_DIR}/extract_pck.py" "$pck_path" "${RAW_DIR}/${target%.pck}/"
else
echo " 跳过: $target (文件不存在)"
fi
done
echo ""
echo "[阶段 1/4] 完成: .wem 文件已提取到 ${RAW_DIR}/"
# ---- 阶段 2: .wem → .wav 转换 ----
echo ""
echo "[阶段 2/4] 需要转换 .wem → .wav"
echo ""
echo " HSR 使用 Wwise 专有编码 (0xFFFF)ffmpeg 无法直接解码。"
echo " 请使用以下工具之一进行转换:"
echo ""
echo " 方案 A — vgmstream CLI (推荐, 最简单):"
echo " 下载: https://github.com/vgmstream/vgmstream/releases"
echo " 解压后将 vgmstream-cli.exe 放入 scripts/voice/tools/"
echo " 然后运行本脚本的 --convert 模式"
echo ""
echo " 方案 B — AnimeWwise GUI (最强大, 保留原始文件名):"
echo " 下载: https://github.com/Escartem/AnimeWwise/releases"
echo " 直接打开 GUI, 选择 HSR 目录, 导出昔涟语音"
echo ""
echo " 方案 C — ww2ogg + revorb (传统方案):"
echo " 下载 ww2ogg.exe + revorb.exe + packed_codebooks.bin"
echo " 放入 scripts/voice/tools/"
echo ""
echo " 安装工具后, 运行: $0 --convert"
echo ""
# ---- 阶段 3 (条件): 批量转换 ----
if [ "$1" = "--convert" ]; then
echo "[阶段 3/4] 转换 .wem → .wav..."
TOOLS_DIR="${SCRIPT_DIR}/tools"
# 优先使用 vgmstream
if [ -f "${TOOLS_DIR}/vgmstream-cli.exe" ]; then
echo " 使用 vgmstream-cli..."
find "${RAW_DIR}" -name "*.wem" | while read wem; do
wav="${wem%.wem}.wav"
if [ ! -f "$wav" ]; then
"${TOOLS_DIR}/vgmstream-cli.exe" -o "$wav" "$wem" 2>/dev/null
fi
done
elif [ -f "${TOOLS_DIR}/ww2ogg.exe" ]; then
echo " 使用 ww2ogg + ffmpeg..."
find "${RAW_DIR}" -name "*.wem" | while read wem; do
ogg="${wem%.wem}.ogg"
wav="${wem%.wem}.wav"
if [ ! -f "$wav" ]; then
"${TOOLS_DIR}/ww2ogg.exe" "$wem" -o "$ogg" --pcb "${TOOLS_DIR}/packed_codebooks.bin" 2>/dev/null
ffmpeg -y -i "$ogg" -ar 22050 -ac 1 -sample_fmt s16 "$wav" 2>/dev/null
rm -f "$ogg"
fi
done
else
echo " 错误: 未找到转换工具, 请先安装 vgmstream 或 ww2ogg"
exit 1
fi
echo "[阶段 3/4] 完成"
# ---- 阶段 4: 音频标准化 + 分类 ----
echo ""
echo "[阶段 4/4] 标准化 + 分类..."
# 按音频时长初步分类 (语音通常 1-15 秒)
mkdir -p "${CLEANED_DIR}/daily" "${CLEANED_DIR}/battle" \
"${CLEANED_DIR}/emotional" "${CLEANED_DIR}/story"
find "${RAW_DIR}" -name "*.wav" | while read wav; do
# 获取时长
duration=$(ffprobe -v quiet -show_entries format=duration \
-of default=noprint_wrappers=1:nokey=1 "$wav" 2>/dev/null || echo "0")
dur_float=$(echo "$duration" | awk '{print int($1 * 1000)}')
basename=$(basename "$wav" .wav)
parent=$(basename "$(dirname "$wav")")
# 分类逻辑
if [ "$dur_float" -lt 500 ]; then
# < 0.5s: 可能是战斗短语音 / 语气词
target_dir="${CLEANED_DIR}/battle"
elif [ "$dur_float" -gt 15000 ]; then
# > 15s: 可能是剧情长对话
target_dir="${CLEANED_DIR}/story"
elif echo "$parent" | grep -qi "chapter"; then
target_dir="${CLEANED_DIR}/story"
elif echo "$parent" | grep -qi "vobanks"; then
# VoBanks 包含战斗 + 日常语音, 需要人工筛选
target_dir="${CLEANED_DIR}/daily"
else
target_dir="${CLEANED_DIR}/daily"
fi
# 用 ffmpeg 标准化: 22.05kHz mono 16bit
ffmpeg -y -i "$wav" -ar 22050 -ac 1 -sample_fmt s16 \
"${target_dir}/${parent}_${basename}.wav" 2>/dev/null
echo -n "."
done
echo ""
echo "[阶段 4/4] 完成: 音频已分类到 ${CLEANED_DIR}/"
echo ""
echo "文件分布:"
echo " 日常对话: $(ls "${CLEANED_DIR}/daily/" 2>/dev/null | wc -l)"
echo " 战斗语音: $(ls "${CLEANED_DIR}/battle/" 2>/dev/null | wc -l)"
echo " 情感表达: $(ls "${CLEANED_DIR}/emotional/" 2>/dev/null | wc -l)"
echo " 剧情对话: $(ls "${CLEANED_DIR}/story/" 2>/dev/null | wc -l)"
fi
echo ""
echo "=== 管线完成 ==="