Cyrene/scripts/voice/extract_pck.py

#!/usr/bin/env python3
"""
从 Honkai: Star Rail 的 .pck (AKPK/Wwise SoundBank) 文件中提取 .wem 音频。

用法:
    python extract_pck.py <input.pck> <output_dir>
    python extract_pck.py VoBanks31.pck ./output/
    python extract_pck.py --all <pck_dir> <output_dir>
"""

import argparse
import os
import struct
import sys
from pathlib import Path


def find_wem_files(data: bytes) -> list[tuple[int, int, str]]:
    """
    扫描数据中的所有 RIFF/WAVE 块 (.wem 文件).
    返回 [(offset, size, riff_type), ...] 列表.
    """
    results = []
    pos = 0
    data_len = len(data)
    while pos < data_len - 12:
        if data[pos:pos + 4] == b'RIFF':
            chunk_size = struct.unpack_from('<I', data, pos + 4)[0]
            riff_type = data[pos + 8:pos + 12]
            # .wem 文件的 riff_type 是 b'WAVE'
            if riff_type == b'WAVE' and chunk_size > 100:
                total_size = chunk_size + 8  # RIFF header + data
                if pos + total_size <= data_len:
                    results.append((pos, total_size, riff_type.decode('ascii', errors='replace')))
            # 跳过已匹配的块
            pos += 8 + chunk_size
            continue
        pos += 1
    return results


def extract_pck(pck_path: str, output_dir: str, prefix: str = "") -> list[str]:
    """
    从单个 .pck 文件提取所有 .wem 文件到 output_dir.
    返回提取的文件路径列表.
    """
    pck_name = Path(pck_path).stem
    os.makedirs(output_dir, exist_ok=True)

    with open(pck_path, 'rb') as f:
        data = f.read()

    print(f"[{pck_name}] 文件大小: {len(data):,} bytes, 扫描 WEM 块...")
    wem_entries = find_wem_files(data)
    print(f"[{pck_name}] 找到 {len(wem_entries)} 个音频文件")

    extracted = []
    for i, (offset, size, riff_type) in enumerate(wem_entries):
        # 使用 offset 作为唯一 ID (Wwise 文件 ID 就是 offset 的某种映射)
        wem_data = data[offset:offset + size]

        if prefix:
            filename = f"{prefix}_{i:04d}_{offset:08x}.wem"
        else:
            filename = f"{pck_name}_{i:04d}_{offset:08x}.wem"

        out_path = os.path.join(output_dir, filename)
        with open(out_path, 'wb') as f:
            f.write(wem_data)
        extracted.append(out_path)

    total_mb = sum(wem_entries[i][1] for i in range(len(wem_entries))) / 1024 / 1024
    print(f"[{pck_name}] 提取完成: {len(extracted)} 文件, {total_mb:.1f} MB")
    return extracted


def main():
    parser = argparse.ArgumentParser(description="从 HSR .pck 文件提取 .wem 音频")
    parser.add_argument('input', help='输入 .pck 文件路径，或 --all 模式下的目录路径')
    parser.add_argument('output', help='输出目录')
    parser.add_argument('--all', action='store_true', help='批量模式：提取目录中所有 .pck 文件')
    parser.add_argument('--prefix', default='', help='输出文件名前缀')
    args = parser.parse_args()

    if args.all:
        pck_dir = Path(args.input)
        pck_files = sorted(pck_dir.glob('*.pck'))
        print(f"批量模式: 找到 {len(pck_files)} 个 .pck 文件")
        total_extracted = 0
        for pck_file in pck_files:
            extracted = extract_pck(str(pck_file), args.output, args.prefix)
            total_extracted += len(extracted)
        print(f"\n总计: {total_extracted} 个音频文件")
    else:
        extract_pck(args.input, args.output, args.prefix)


if __name__ == '__main__':
    main()