Files
Cyrene/scripts/voice/wem2ogg.py
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

297 lines
8.6 KiB
Python

#!/usr/bin/env python3
"""
将 Wwise Vorbis .wem 文件转换为标准 Ogg Vorbis (.ogg) 文件。
纯 Python 实现,不依赖 ww2ogg 或 revorb 外部工具。
基于 Wwise RIFF/Vorbis 格式:
- Codec ID: 0xFFFF
- Vorbis 数据存储在 "vorb" chunk 中
- 数据包可直接封装为 Ogg 容器
用法:
python wem2ogg.py <input.wem> <output.ogg>
python wem2ogg.py --batch <input_dir> <output_dir>
"""
import argparse
import os
import struct
import sys
import zlib
from pathlib import Path
# Ogg 页类型
OGG_HEADER = 0x02
OGG_FIRST_DATA = 0x00
OGG_CONTINUED = 0x00
OGG_LAST = 0x04
# CRC32 表 (预计算)
_crc_table = None
def _get_crc_table():
global _crc_table
if _crc_table is None:
_crc_table = []
for i in range(256):
r = i << 24
for _ in range(8):
if r & 0x80000000:
r = (r << 1) ^ 0x04c11db7
else:
r <<= 1
_crc_table.append(r & 0xffffffff)
return _crc_table
def ogg_crc32(data: bytes) -> int:
table = _get_crc_table()
crc = 0
for b in data:
crc = (crc << 8) ^ table[((crc >> 24) & 0xff) ^ b]
crc &= 0xffffffff
return crc
def make_ogg_page(segment_data: bytes, granule: int,
header_type: int, stream_serial: int = 0,
page_index: int = 0) -> bytes:
"""构造一个 Ogg 页."""
# 将数据分割成最多 255 字节的段
segments = []
pos = 0
while pos < len(segment_data):
seg_len = min(255, len(segment_data) - pos)
segments.append(seg_len)
pos += seg_len
num_segments = len(segments)
page_header = bytearray(27 + num_segments)
# OggS 签名
page_header[0:4] = b'OggS'
# Version
page_header[4] = 0
# Header type
page_header[5] = header_type
# Granule position (8 bytes, little-endian)
struct.pack_into('<q', page_header, 6, granule)
# Stream serial
struct.pack_into('<I', page_header, 14, stream_serial)
# Page index
struct.pack_into('<I', page_header, 18, page_index)
# Checksum (先填 0)
struct.pack_into('<I', page_header, 22, 0)
# Number of segments
page_header[26] = num_segments
# Segment table
for i, seg_len in enumerate(segments):
page_header[27 + i] = seg_len
# 计算 CRC
full_page = bytearray(page_header) + bytearray(segment_data)
crc = ogg_crc32(bytes(full_page))
struct.pack_into('<I', full_page, 22, crc)
return bytes(full_page)
def extract_vorbis_packets(wem_path: str) -> list[bytes]:
"""从 WEM 文件中提取 Vorbis 数据包."""
with open(wem_path, 'rb') as f:
data = f.read()
# 验证 RIFF 头
if data[:4] != b'RIFF':
raise ValueError("不是有效的 RIFF 文件")
# 查找 "vorb" chunk
pos = 12 # 跳过 RIFF 头
vorb_data = None
while pos < len(data) - 8:
chunk_id = data[pos:pos + 4]
chunk_size = struct.unpack_from('<I', data, pos + 4)[0]
if chunk_id == b'vorb' or chunk_id == b'data':
vorb_start = pos + 8
vorb_data = data[vorb_start:vorb_start + chunk_size]
break
# 对齐到 2 字节边界
pos += 8 + chunk_size
if chunk_size % 2:
pos += 1
if vorb_data is None:
raise ValueError("未找到 vorb/data chunk")
# 解析 Vorbis 数据包
# 前 4 字节: 数据包数量 (实际上可能是样本数)
setup_offset = struct.unpack_from('<I', vorb_data, 0)[0]
# 每个数据包: [2 bytes: granule/size info][packet data]
packets = []
pos = 4 # 跳过 setup offset
# 第一个数据包是 Vorbis 头 (identification header)
# Wwise 格式: 2 bytes granule + 2 bytes size (或只是 2 bytes size)
# 尝试解析...
# 通常第一个 packet 是 setup 数据
# 格式: 对于每个 packet:
# - uint16: 如果最高位为 1,这是 granule 的高位部分
# 实际上 Wwise Vorbis 数据包格式比较复杂
# 简化处理: 跳过 4 字节后就是连续的 Vorbis 数据包
# 每个 packet 前 2 字节表示该 packet 的大小
# packet_size & 0x8000: granule 在下一个 packet 变化
remaining = vorb_data[4:]
while len(remaining) > 2:
# 读取 packet 大小 (可能用 2 或 4 字节)
header = struct.unpack_from('<H', remaining, 0)[0]
has_granule = (header & 0x8000) != 0
pkt_size = header & 0x7FFF
if pkt_size == 0:
break
offset = 2
granule_val = 0
if has_granule:
granule_val = struct.unpack_from('<H', remaining, offset)[0]
offset += 2
if offset + pkt_size > len(remaining):
break
packet = remaining[offset:offset + pkt_size]
packets.append(packet)
remaining = remaining[offset + pkt_size:]
return packets
def wem_to_ogg(wem_path: str, ogg_path: str) -> bool:
"""转换单个 .wem 文件为 .ogg."""
try:
packets = extract_vorbis_packets(wem_path)
if len(packets) < 3:
print(f" 警告: {os.path.basename(wem_path)} 只有 {len(packets)} 个数据包")
return False
# Vorbis 头三个数据包:
# 1. Identification header
# 2. Comment header
# 3. Setup header
# 后续: 音频数据包
ident_pkt = packets[0]
comment_pkt = packets[1]
setup_pkt = packets[2]
audio_packets = packets[3:]
# 构造 Ogg 文件
ogg_data = bytearray()
# 第 0 页: Identification header
ogg_data += make_ogg_page(ident_pkt, granule=0,
header_type=OGG_HEADER,
page_index=0)
# 第 1 页: Comment + Setup headers
header_data = comment_pkt + setup_pkt
ogg_data += make_ogg_page(header_data, granule=0,
header_type=OGG_FIRST_DATA,
page_index=1)
# 后续页: 音频数据 (每页放尽可能多的 packet)
page_idx = 2
granule = 0
buf = bytearray()
for pkt in audio_packets:
# Vorbis granule = 累积样本数
# 粗略估计: 每个 packet 约 576 或 1024 samples
granule += 576
if len(buf) + len(pkt) > 45000: # Ogg 页最大约 65KB
ogg_data += make_ogg_page(bytes(buf), granule=granule,
header_type=OGG_CONTINUED,
page_index=page_idx)
page_idx += 1
buf = bytearray()
buf += pkt
# 最后一页
if buf:
ogg_data += make_ogg_page(bytes(buf), granule=granule,
header_type=OGG_LAST,
page_index=page_idx)
with open(ogg_path, 'wb') as f:
f.write(ogg_data)
return os.path.getsize(ogg_path) > 100
except Exception as e:
print(f" 错误 [{os.path.basename(wem_path)}]: {e}")
return False
def batch_convert(input_dir: str, output_dir: str) -> tuple[int, int]:
"""批量转换目录中所有 .wem 文件."""
os.makedirs(output_dir, exist_ok=True)
wem_files = sorted(Path(input_dir).glob('*.wem'))
if not wem_files:
print(f"{input_dir} 中未找到 .wem 文件")
return 0, 0
print(f"找到 {len(wem_files)} 个 .wem 文件")
success = 0
failed = 0
for i, wem_path in enumerate(wem_files):
ogg_name = wem_path.stem + '.ogg'
ogg_path = os.path.join(output_dir, ogg_name)
if wem_to_ogg(str(wem_path), ogg_path):
success += 1
else:
failed += 1
if (i + 1) % 50 == 0:
print(f" 进度: {i+1}/{len(wem_files)} (成功: {success})")
print(f"\n转换完成: {success} 成功, {failed} 失败")
return success, failed
def main():
parser = argparse.ArgumentParser(description="WEM → OGG 转换器 (纯 Python)")
parser.add_argument('input', help='输入 .wem 文件或目录')
parser.add_argument('output', help='输出 .ogg 文件或目录')
parser.add_argument('--batch', action='store_true',
help='批量模式: 转换目录中所有 .wem 文件')
args = parser.parse_args()
if args.batch:
batch_convert(args.input, args.output)
else:
ok = wem_to_ogg(args.input, args.output)
if ok:
print(f"OK: {args.input}{args.output}")
else:
print(f"FAILED: {args.input}")
sys.exit(1)
if __name__ == '__main__':
main()