6ef9e082a6
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST) - Gateway: VoiceStreamManager代理WS流式STT到voice-service - Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码 - 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端) - 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并 - 文档: 完善gateway-api.md和voice-service.md语音API文档 - 工具: scripts/voice/ 语音转换脚本集 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
297 lines
8.6 KiB
Python
297 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
将 Wwise Vorbis .wem 文件转换为标准 Ogg Vorbis (.ogg) 文件。
|
|
纯 Python 实现,不依赖 ww2ogg 或 revorb 外部工具。
|
|
|
|
基于 Wwise RIFF/Vorbis 格式:
|
|
- Codec ID: 0xFFFF
|
|
- Vorbis 数据存储在 "vorb" chunk 中
|
|
- 数据包可直接封装为 Ogg 容器
|
|
|
|
用法:
|
|
python wem2ogg.py <input.wem> <output.ogg>
|
|
python wem2ogg.py --batch <input_dir> <output_dir>
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import struct
|
|
import sys
|
|
import zlib
|
|
from pathlib import Path
|
|
|
|
|
|
# Ogg 页类型
|
|
OGG_HEADER = 0x02
|
|
OGG_FIRST_DATA = 0x00
|
|
OGG_CONTINUED = 0x00
|
|
OGG_LAST = 0x04
|
|
|
|
# CRC32 表 (预计算)
|
|
_crc_table = None
|
|
|
|
|
|
def _get_crc_table():
|
|
global _crc_table
|
|
if _crc_table is None:
|
|
_crc_table = []
|
|
for i in range(256):
|
|
r = i << 24
|
|
for _ in range(8):
|
|
if r & 0x80000000:
|
|
r = (r << 1) ^ 0x04c11db7
|
|
else:
|
|
r <<= 1
|
|
_crc_table.append(r & 0xffffffff)
|
|
return _crc_table
|
|
|
|
|
|
def ogg_crc32(data: bytes) -> int:
|
|
table = _get_crc_table()
|
|
crc = 0
|
|
for b in data:
|
|
crc = (crc << 8) ^ table[((crc >> 24) & 0xff) ^ b]
|
|
crc &= 0xffffffff
|
|
return crc
|
|
|
|
|
|
def make_ogg_page(segment_data: bytes, granule: int,
|
|
header_type: int, stream_serial: int = 0,
|
|
page_index: int = 0) -> bytes:
|
|
"""构造一个 Ogg 页."""
|
|
# 将数据分割成最多 255 字节的段
|
|
segments = []
|
|
pos = 0
|
|
while pos < len(segment_data):
|
|
seg_len = min(255, len(segment_data) - pos)
|
|
segments.append(seg_len)
|
|
pos += seg_len
|
|
|
|
num_segments = len(segments)
|
|
page_header = bytearray(27 + num_segments)
|
|
|
|
# OggS 签名
|
|
page_header[0:4] = b'OggS'
|
|
# Version
|
|
page_header[4] = 0
|
|
# Header type
|
|
page_header[5] = header_type
|
|
# Granule position (8 bytes, little-endian)
|
|
struct.pack_into('<q', page_header, 6, granule)
|
|
# Stream serial
|
|
struct.pack_into('<I', page_header, 14, stream_serial)
|
|
# Page index
|
|
struct.pack_into('<I', page_header, 18, page_index)
|
|
# Checksum (先填 0)
|
|
struct.pack_into('<I', page_header, 22, 0)
|
|
# Number of segments
|
|
page_header[26] = num_segments
|
|
# Segment table
|
|
for i, seg_len in enumerate(segments):
|
|
page_header[27 + i] = seg_len
|
|
|
|
# 计算 CRC
|
|
full_page = bytearray(page_header) + bytearray(segment_data)
|
|
crc = ogg_crc32(bytes(full_page))
|
|
struct.pack_into('<I', full_page, 22, crc)
|
|
|
|
return bytes(full_page)
|
|
|
|
|
|
def extract_vorbis_packets(wem_path: str) -> list[bytes]:
|
|
"""从 WEM 文件中提取 Vorbis 数据包."""
|
|
with open(wem_path, 'rb') as f:
|
|
data = f.read()
|
|
|
|
# 验证 RIFF 头
|
|
if data[:4] != b'RIFF':
|
|
raise ValueError("不是有效的 RIFF 文件")
|
|
|
|
# 查找 "vorb" chunk
|
|
pos = 12 # 跳过 RIFF 头
|
|
vorb_data = None
|
|
|
|
while pos < len(data) - 8:
|
|
chunk_id = data[pos:pos + 4]
|
|
chunk_size = struct.unpack_from('<I', data, pos + 4)[0]
|
|
|
|
if chunk_id == b'vorb' or chunk_id == b'data':
|
|
vorb_start = pos + 8
|
|
vorb_data = data[vorb_start:vorb_start + chunk_size]
|
|
break
|
|
|
|
# 对齐到 2 字节边界
|
|
pos += 8 + chunk_size
|
|
if chunk_size % 2:
|
|
pos += 1
|
|
|
|
if vorb_data is None:
|
|
raise ValueError("未找到 vorb/data chunk")
|
|
|
|
# 解析 Vorbis 数据包
|
|
# 前 4 字节: 数据包数量 (实际上可能是样本数)
|
|
setup_offset = struct.unpack_from('<I', vorb_data, 0)[0]
|
|
|
|
# 每个数据包: [2 bytes: granule/size info][packet data]
|
|
packets = []
|
|
pos = 4 # 跳过 setup offset
|
|
|
|
# 第一个数据包是 Vorbis 头 (identification header)
|
|
# Wwise 格式: 2 bytes granule + 2 bytes size (或只是 2 bytes size)
|
|
# 尝试解析...
|
|
|
|
# 通常第一个 packet 是 setup 数据
|
|
# 格式: 对于每个 packet:
|
|
# - uint16: 如果最高位为 1,这是 granule 的高位部分
|
|
# 实际上 Wwise Vorbis 数据包格式比较复杂
|
|
|
|
# 简化处理: 跳过 4 字节后就是连续的 Vorbis 数据包
|
|
# 每个 packet 前 2 字节表示该 packet 的大小
|
|
# packet_size & 0x8000: granule 在下一个 packet 变化
|
|
|
|
remaining = vorb_data[4:]
|
|
while len(remaining) > 2:
|
|
# 读取 packet 大小 (可能用 2 或 4 字节)
|
|
header = struct.unpack_from('<H', remaining, 0)[0]
|
|
has_granule = (header & 0x8000) != 0
|
|
pkt_size = header & 0x7FFF
|
|
|
|
if pkt_size == 0:
|
|
break
|
|
|
|
offset = 2
|
|
granule_val = 0
|
|
if has_granule:
|
|
granule_val = struct.unpack_from('<H', remaining, offset)[0]
|
|
offset += 2
|
|
|
|
if offset + pkt_size > len(remaining):
|
|
break
|
|
|
|
packet = remaining[offset:offset + pkt_size]
|
|
packets.append(packet)
|
|
remaining = remaining[offset + pkt_size:]
|
|
|
|
return packets
|
|
|
|
|
|
def wem_to_ogg(wem_path: str, ogg_path: str) -> bool:
|
|
"""转换单个 .wem 文件为 .ogg."""
|
|
try:
|
|
packets = extract_vorbis_packets(wem_path)
|
|
|
|
if len(packets) < 3:
|
|
print(f" 警告: {os.path.basename(wem_path)} 只有 {len(packets)} 个数据包")
|
|
return False
|
|
|
|
# Vorbis 头三个数据包:
|
|
# 1. Identification header
|
|
# 2. Comment header
|
|
# 3. Setup header
|
|
# 后续: 音频数据包
|
|
|
|
ident_pkt = packets[0]
|
|
comment_pkt = packets[1]
|
|
setup_pkt = packets[2]
|
|
audio_packets = packets[3:]
|
|
|
|
# 构造 Ogg 文件
|
|
ogg_data = bytearray()
|
|
|
|
# 第 0 页: Identification header
|
|
ogg_data += make_ogg_page(ident_pkt, granule=0,
|
|
header_type=OGG_HEADER,
|
|
page_index=0)
|
|
|
|
# 第 1 页: Comment + Setup headers
|
|
header_data = comment_pkt + setup_pkt
|
|
ogg_data += make_ogg_page(header_data, granule=0,
|
|
header_type=OGG_FIRST_DATA,
|
|
page_index=1)
|
|
|
|
# 后续页: 音频数据 (每页放尽可能多的 packet)
|
|
page_idx = 2
|
|
granule = 0
|
|
buf = bytearray()
|
|
|
|
for pkt in audio_packets:
|
|
# Vorbis granule = 累积样本数
|
|
# 粗略估计: 每个 packet 约 576 或 1024 samples
|
|
granule += 576
|
|
|
|
if len(buf) + len(pkt) > 45000: # Ogg 页最大约 65KB
|
|
ogg_data += make_ogg_page(bytes(buf), granule=granule,
|
|
header_type=OGG_CONTINUED,
|
|
page_index=page_idx)
|
|
page_idx += 1
|
|
buf = bytearray()
|
|
|
|
buf += pkt
|
|
|
|
# 最后一页
|
|
if buf:
|
|
ogg_data += make_ogg_page(bytes(buf), granule=granule,
|
|
header_type=OGG_LAST,
|
|
page_index=page_idx)
|
|
|
|
with open(ogg_path, 'wb') as f:
|
|
f.write(ogg_data)
|
|
|
|
return os.path.getsize(ogg_path) > 100
|
|
|
|
except Exception as e:
|
|
print(f" 错误 [{os.path.basename(wem_path)}]: {e}")
|
|
return False
|
|
|
|
|
|
def batch_convert(input_dir: str, output_dir: str) -> tuple[int, int]:
|
|
"""批量转换目录中所有 .wem 文件."""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
wem_files = sorted(Path(input_dir).glob('*.wem'))
|
|
|
|
if not wem_files:
|
|
print(f"在 {input_dir} 中未找到 .wem 文件")
|
|
return 0, 0
|
|
|
|
print(f"找到 {len(wem_files)} 个 .wem 文件")
|
|
success = 0
|
|
failed = 0
|
|
|
|
for i, wem_path in enumerate(wem_files):
|
|
ogg_name = wem_path.stem + '.ogg'
|
|
ogg_path = os.path.join(output_dir, ogg_name)
|
|
|
|
if wem_to_ogg(str(wem_path), ogg_path):
|
|
success += 1
|
|
else:
|
|
failed += 1
|
|
|
|
if (i + 1) % 50 == 0:
|
|
print(f" 进度: {i+1}/{len(wem_files)} (成功: {success})")
|
|
|
|
print(f"\n转换完成: {success} 成功, {failed} 失败")
|
|
return success, failed
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="WEM → OGG 转换器 (纯 Python)")
|
|
parser.add_argument('input', help='输入 .wem 文件或目录')
|
|
parser.add_argument('output', help='输出 .ogg 文件或目录')
|
|
parser.add_argument('--batch', action='store_true',
|
|
help='批量模式: 转换目录中所有 .wem 文件')
|
|
args = parser.parse_args()
|
|
|
|
if args.batch:
|
|
batch_convert(args.input, args.output)
|
|
else:
|
|
ok = wem_to_ogg(args.input, args.output)
|
|
if ok:
|
|
print(f"OK: {args.input} → {args.output}")
|
|
else:
|
|
print(f"FAILED: {args.input}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|