Cyrene/scripts/voice/convert_wem.py

#!/usr/bin/env python3
"""
将 .wem (Wwise Encoded Media) 文件批量转换为 .wav 格式。
使用 ffmpeg 进行转换（需预先安装 ffmpeg）。

.wem 文件本质上是 RIFF/WAVE 容器，内部编码可能是:
- PCM 16-bit (ffmpeg 直接支持)
- Wwise ADPCM (ffmpeg 需要额外解码器)
- Vorbis (部分 ffmpeg 版本支持)

用法:
    python convert_wem.py <input_dir> <output_dir>
    python convert_wem.py ./wem_output/ ./wav_output/
"""

import argparse
import os
import subprocess
import sys
from pathlib import Path


def convert_wem_to_wav(wem_path: str, wav_path: str) -> bool:
    """使用 ffmpeg 将单个 .wem 文件转为 .wav."""
    try:
        result = subprocess.run(
            ['ffmpeg', '-y', '-i', wem_path,
             '-ar', '22050',      # 22.05kHz (与 persona.yaml 设定的训练格式一致)
             '-ac', '1',           # mono
             '-sample_fmt', 's16', # 16-bit
             wav_path],
            capture_output=True,
            timeout=30,
        )
        if result.returncode == 0 and os.path.getsize(wav_path) > 100:
            return True
        else:
            # 部分 WEM 是 Vorbis 编码，需要用不同方式
            return _convert_wem_vorbis(wem_path, wav_path)
    except Exception as e:
        print(f"  ffmpeg 错误 [{os.path.basename(wem_path)}]: {e}")
        return False


def _convert_wem_vorbis(wem_path: str, wav_path: str) -> bool:
    """尝试处理 Vorbis 编码的 WEM 文件 (带 Wwise 头的 Ogg Vorbis)."""
    try:
        # 方式: 跳过 RIFF 头 + fmt chunk, 直接取 Vorbis 数据
        # .wem 的 Vorbis 数据从 "vorb" chunk 开始
        with open(wem_path, 'rb') as f:
            data = f.read()

        # 查找 "vorb" 标识
        vorb_pos = data.find(b'vorb')
        if vorb_pos == -1:
            return False

        # 重新封装为标准 Ogg (在 vorb 数据前加 OggS 头)
        # 简化方法: 用 ffmpeg 的 libvorbis 解码
        # 如果上面失败了，尝试用 -f s16le 强制读取
        result = subprocess.run(
            ['ffmpeg', '-y', '-f', 's16le',
             '-ar', '48000', '-ac', '1',
             '-i', wem_path,
             '-ar', '22050', '-ac', '1',
             wav_path],
            capture_output=True,
            timeout=30,
        )
        return result.returncode == 0 and os.path.getsize(wav_path) > 100
    except Exception:
        return False


def convert_directory(input_dir: str, output_dir: str) -> tuple[int, int]:
    """批量转换目录中所有 .wem 文件."""
    os.makedirs(output_dir, exist_ok=True)
    wem_files = sorted(Path(input_dir).glob('*.wem'))

    if not wem_files:
        print(f"在 {input_dir} 中未找到 .wem 文件")
        return 0, 0

    print(f"找到 {len(wem_files)} 个 .wem 文件，开始转换...")
    success = 0
    failed = 0

    for i, wem_path in enumerate(wem_files):
        wav_name = wem_path.stem + '.wav'
        wav_path = os.path.join(output_dir, wav_name)

        # 跳过已存在的
        if os.path.exists(wav_path) and os.path.getsize(wav_path) > 100:
            success += 1
            continue

        if convert_wem_to_wav(str(wem_path), wav_path):
            success += 1
        else:
            failed += 1

        if (i + 1) % 50 == 0:
            print(f"  进度: {i+1}/{len(wem_files)} (成功: {success}, 失败: {failed})")

    print(f"\n转换完成: {success} 成功, {failed} 失败")
    return success, failed


def main():
    parser = argparse.ArgumentParser(description="批量转换 .wem → .wav (需要 ffmpeg)")
    parser.add_argument('input_dir', help='包含 .wem 文件的输入目录')
    parser.add_argument('output_dir', help='输出目录')
    parser.add_argument('--single', nargs=2, metavar=('WEM', 'WAV'),
                        help='转换单个文件')
    args = parser.parse_args()

    if args.single:
        ok = convert_wem_to_wav(args.single[0], args.single[1])
        print(f"{'OK' if ok else 'FAILED'}: {args.single[0]} -> {args.single[1]}")
    else:
        convert_directory(args.input_dir, args.output_dir)


if __name__ == '__main__':
    main()