6ef9e082a6
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST) - Gateway: VoiceStreamManager代理WS流式STT到voice-service - Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码 - 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端) - 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并 - 文档: 完善gateway-api.md和voice-service.md语音API文档 - 工具: scripts/voice/ 语音转换脚本集 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
162 lines
5.7 KiB
Bash
162 lines
5.7 KiB
Bash
#!/bin/bash
|
||
# ============================================================
|
||
# 昔涟语音提取管线
|
||
#
|
||
# 步骤:
|
||
# 1. 从 HSR 音频包提取 .wem (本脚本)
|
||
# 2. 用 ww2ogg/vgmstream 转换 .wem → .wav (需手动安装工具)
|
||
# 3. 用 ffmpeg 标准化音频格式
|
||
# ============================================================
|
||
set -e
|
||
|
||
HSR_AUDIO_DIR="D:/MeowG/Honkai:Star_Rail/StarRail_Data/Persistent/Audio/AudioPackage/Windows/Chinese(PRC)"
|
||
RAW_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/raw"
|
||
CLEANED_DIR="D:/Project/Code/Uni/Cyrene-Voice-Model/data/cleaned"
|
||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
|
||
echo "=== 昔涟语音提取管线 ==="
|
||
echo ""
|
||
|
||
# ---- 阶段 1: 从 .pck 提取 .wem ----
|
||
echo "[阶段 1/4] 提取候选 .pck 文件..."
|
||
|
||
# 昔涟是 3.x 角色,语音在以下文件中:
|
||
# - VoBanks 27-31 (最新角色语音库)
|
||
# - External_del_4.0_chapter5_* (3.0 主线昔涟出场)
|
||
# - External_del_4.1_chapter_* (3.1 主线昔涟出场)
|
||
TARGETS=(
|
||
"VoBanks27.pck"
|
||
"VoBanks28.pck"
|
||
"VoBanks29.pck"
|
||
"VoBanks30.pck"
|
||
"VoBanks31.pck"
|
||
"External_del_4.0_chapter5_0.pck"
|
||
"External_del_4.0_chapter5_1.pck"
|
||
"External_del_4.0_chapter5_2.pck"
|
||
"External_del_4.1_chapter_0.pck"
|
||
"External_del_4.1_chapter_1.pck"
|
||
"External_del_4.1_chapter_2.pck"
|
||
)
|
||
|
||
for target in "${TARGETS[@]}"; do
|
||
pck_path="${HSR_AUDIO_DIR}/${target}"
|
||
if [ -f "$pck_path" ]; then
|
||
echo " 提取: $target ($(du -h "$pck_path" | cut -f1))"
|
||
python3 "${SCRIPT_DIR}/extract_pck.py" "$pck_path" "${RAW_DIR}/${target%.pck}/"
|
||
else
|
||
echo " 跳过: $target (文件不存在)"
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
echo "[阶段 1/4] 完成: .wem 文件已提取到 ${RAW_DIR}/"
|
||
|
||
# ---- 阶段 2: .wem → .wav 转换 ----
|
||
echo ""
|
||
echo "[阶段 2/4] 需要转换 .wem → .wav"
|
||
echo ""
|
||
echo " HSR 使用 Wwise 专有编码 (0xFFFF),ffmpeg 无法直接解码。"
|
||
echo " 请使用以下工具之一进行转换:"
|
||
echo ""
|
||
echo " 方案 A — vgmstream CLI (推荐, 最简单):"
|
||
echo " 下载: https://github.com/vgmstream/vgmstream/releases"
|
||
echo " 解压后将 vgmstream-cli.exe 放入 scripts/voice/tools/"
|
||
echo " 然后运行本脚本的 --convert 模式"
|
||
echo ""
|
||
echo " 方案 B — AnimeWwise GUI (最强大, 保留原始文件名):"
|
||
echo " 下载: https://github.com/Escartem/AnimeWwise/releases"
|
||
echo " 直接打开 GUI, 选择 HSR 目录, 导出昔涟语音"
|
||
echo ""
|
||
echo " 方案 C — ww2ogg + revorb (传统方案):"
|
||
echo " 下载 ww2ogg.exe + revorb.exe + packed_codebooks.bin"
|
||
echo " 放入 scripts/voice/tools/"
|
||
echo ""
|
||
echo " 安装工具后, 运行: $0 --convert"
|
||
echo ""
|
||
|
||
# ---- 阶段 3 (条件): 批量转换 ----
|
||
if [ "$1" = "--convert" ]; then
|
||
echo "[阶段 3/4] 转换 .wem → .wav..."
|
||
|
||
TOOLS_DIR="${SCRIPT_DIR}/tools"
|
||
|
||
# 优先使用 vgmstream
|
||
if [ -f "${TOOLS_DIR}/vgmstream-cli.exe" ]; then
|
||
echo " 使用 vgmstream-cli..."
|
||
find "${RAW_DIR}" -name "*.wem" | while read wem; do
|
||
wav="${wem%.wem}.wav"
|
||
if [ ! -f "$wav" ]; then
|
||
"${TOOLS_DIR}/vgmstream-cli.exe" -o "$wav" "$wem" 2>/dev/null
|
||
fi
|
||
done
|
||
elif [ -f "${TOOLS_DIR}/ww2ogg.exe" ]; then
|
||
echo " 使用 ww2ogg + ffmpeg..."
|
||
find "${RAW_DIR}" -name "*.wem" | while read wem; do
|
||
ogg="${wem%.wem}.ogg"
|
||
wav="${wem%.wem}.wav"
|
||
if [ ! -f "$wav" ]; then
|
||
"${TOOLS_DIR}/ww2ogg.exe" "$wem" -o "$ogg" --pcb "${TOOLS_DIR}/packed_codebooks.bin" 2>/dev/null
|
||
ffmpeg -y -i "$ogg" -ar 22050 -ac 1 -sample_fmt s16 "$wav" 2>/dev/null
|
||
rm -f "$ogg"
|
||
fi
|
||
done
|
||
else
|
||
echo " 错误: 未找到转换工具, 请先安装 vgmstream 或 ww2ogg"
|
||
exit 1
|
||
fi
|
||
|
||
echo "[阶段 3/4] 完成"
|
||
|
||
# ---- 阶段 4: 音频标准化 + 分类 ----
|
||
echo ""
|
||
echo "[阶段 4/4] 标准化 + 分类..."
|
||
|
||
# 按音频时长初步分类 (语音通常 1-15 秒)
|
||
mkdir -p "${CLEANED_DIR}/daily" "${CLEANED_DIR}/battle" \
|
||
"${CLEANED_DIR}/emotional" "${CLEANED_DIR}/story"
|
||
|
||
find "${RAW_DIR}" -name "*.wav" | while read wav; do
|
||
# 获取时长
|
||
duration=$(ffprobe -v quiet -show_entries format=duration \
|
||
-of default=noprint_wrappers=1:nokey=1 "$wav" 2>/dev/null || echo "0")
|
||
dur_float=$(echo "$duration" | awk '{print int($1 * 1000)}')
|
||
|
||
basename=$(basename "$wav" .wav)
|
||
parent=$(basename "$(dirname "$wav")")
|
||
|
||
# 分类逻辑
|
||
if [ "$dur_float" -lt 500 ]; then
|
||
# < 0.5s: 可能是战斗短语音 / 语气词
|
||
target_dir="${CLEANED_DIR}/battle"
|
||
elif [ "$dur_float" -gt 15000 ]; then
|
||
# > 15s: 可能是剧情长对话
|
||
target_dir="${CLEANED_DIR}/story"
|
||
elif echo "$parent" | grep -qi "chapter"; then
|
||
target_dir="${CLEANED_DIR}/story"
|
||
elif echo "$parent" | grep -qi "vobanks"; then
|
||
# VoBanks 包含战斗 + 日常语音, 需要人工筛选
|
||
target_dir="${CLEANED_DIR}/daily"
|
||
else
|
||
target_dir="${CLEANED_DIR}/daily"
|
||
fi
|
||
|
||
# 用 ffmpeg 标准化: 22.05kHz mono 16bit
|
||
ffmpeg -y -i "$wav" -ar 22050 -ac 1 -sample_fmt s16 \
|
||
"${target_dir}/${parent}_${basename}.wav" 2>/dev/null
|
||
|
||
echo -n "."
|
||
done
|
||
|
||
echo ""
|
||
echo "[阶段 4/4] 完成: 音频已分类到 ${CLEANED_DIR}/"
|
||
echo ""
|
||
echo "文件分布:"
|
||
echo " 日常对话: $(ls "${CLEANED_DIR}/daily/" 2>/dev/null | wc -l) 个"
|
||
echo " 战斗语音: $(ls "${CLEANED_DIR}/battle/" 2>/dev/null | wc -l) 个"
|
||
echo " 情感表达: $(ls "${CLEANED_DIR}/emotional/" 2>/dev/null | wc -l) 个"
|
||
echo " 剧情对话: $(ls "${CLEANED_DIR}/story/" 2>/dev/null | wc -l) 个"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=== 管线完成 ==="
|