6ef9e082a6
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST) - Gateway: VoiceStreamManager代理WS流式STT到voice-service - Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码 - 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端) - 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并 - 文档: 完善gateway-api.md和voice-service.md语音API文档 - 工具: scripts/voice/ 语音转换脚本集 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
225 lines
5.5 KiB
Go
225 lines
5.5 KiB
Go
package main
|
||
|
||
import (
|
||
"flag"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"os"
|
||
"os/signal"
|
||
"time"
|
||
|
||
"github.com/gorilla/websocket"
|
||
)
|
||
|
||
func main() {
|
||
mode := flag.String("mode", "offline", "测试模式: offline (非实时) 或 realtime (实时)")
|
||
file := flag.String("file", "", "音频文件路径 (WAV/MP3/OGG/FLAC)")
|
||
server := flag.String("server", "http://localhost:8093", "Voice-Service 地址")
|
||
lang := flag.String("lang", "zh", "语言代码")
|
||
flag.Parse()
|
||
|
||
if *file == "" {
|
||
fmt.Println("用法: test_asr -mode=offline -file=audio.wav [-server=http://localhost:8093]")
|
||
os.Exit(1)
|
||
}
|
||
|
||
switch *mode {
|
||
case "offline":
|
||
testOffline(*server, *file, *lang)
|
||
case "realtime":
|
||
testRealtime(*server, *file, *lang)
|
||
default:
|
||
fmt.Printf("未知模式: %s (支持: offline, realtime)\n", *mode)
|
||
os.Exit(1)
|
||
}
|
||
}
|
||
|
||
// testOffline 测试非实时语音识别 (HTTP multipart 上传)。
|
||
func testOffline(server, filePath, lang string) {
|
||
fmt.Printf("=== 非实时 ASR 测试 ===\n")
|
||
fmt.Printf("服务器: %s\n", server)
|
||
fmt.Printf("文件: %s\n", filePath)
|
||
fmt.Printf("语言: %s\n\n", lang)
|
||
|
||
// 读取音频文件
|
||
audioData, err := os.ReadFile(filePath)
|
||
if err != nil {
|
||
fmt.Printf("读取文件失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
fmt.Printf("音频大小: %d bytes\n", len(audioData))
|
||
|
||
// 创建 multipart 请求
|
||
req, err := http.NewRequest("POST", server+"/api/v1/transcribe", nil)
|
||
if err != nil {
|
||
fmt.Printf("创建请求失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
|
||
// 使用 multipart form
|
||
body, contentType, err := createMultipartBody(audioData, filePath, lang)
|
||
if err != nil {
|
||
fmt.Printf("创建 multipart body 失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
req.Body = body
|
||
req.Header.Set("Content-Type", contentType)
|
||
|
||
start := time.Now()
|
||
resp, err := http.DefaultClient.Do(req)
|
||
if err != nil {
|
||
fmt.Printf("请求失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
elapsed := time.Since(start)
|
||
respBody, _ := io.ReadAll(resp.Body)
|
||
|
||
fmt.Printf("状态码: %d\n", resp.StatusCode)
|
||
fmt.Printf("耗时: %v\n", elapsed)
|
||
fmt.Printf("响应:\n%s\n", string(respBody))
|
||
|
||
if resp.StatusCode == 200 {
|
||
fmt.Println("\n✅ 非实时语音识别成功!")
|
||
} else {
|
||
fmt.Println("\n❌ 非实时语音识别失败")
|
||
}
|
||
}
|
||
|
||
// testRealtime 测试实时语音识别 (WebSocket 流式)。
|
||
func testRealtime(server, filePath, lang string) {
|
||
fmt.Printf("=== 实时 ASR 测试 ===\n")
|
||
fmt.Printf("服务器: %s\n", server)
|
||
fmt.Printf("文件: %s\n", filePath)
|
||
fmt.Printf("语言: %s\n\n", lang)
|
||
|
||
// 读取音频文件
|
||
audioData, err := os.ReadFile(filePath)
|
||
if err != nil {
|
||
fmt.Printf("读取文件失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
fmt.Printf("音频大小: %d bytes\n", len(audioData))
|
||
|
||
// 推断格式
|
||
format := inferFormat(filePath)
|
||
|
||
// 连接 WebSocket
|
||
wsURL := fmt.Sprintf("ws://%s/api/v1/stt/stream?format=%s&language=%s",
|
||
server[7:], format, lang) // 去掉 http:// 前缀
|
||
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
|
||
if err != nil {
|
||
fmt.Printf("WebSocket 连接失败: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
defer conn.Close()
|
||
|
||
fmt.Printf("WebSocket 已连接: %s\n", wsURL)
|
||
|
||
// 设置 interrupt 处理
|
||
interrupt := make(chan os.Signal, 1)
|
||
signal.Notify(interrupt, os.Interrupt)
|
||
|
||
// goroutine: 读取识别结果
|
||
done := make(chan struct{})
|
||
go func() {
|
||
defer close(done)
|
||
for {
|
||
_, msg, err := conn.ReadMessage()
|
||
if err != nil {
|
||
fmt.Printf("读取结果错误: %v\n", err)
|
||
return
|
||
}
|
||
fmt.Printf("◀ 结果: %s\n", string(msg))
|
||
}
|
||
}()
|
||
|
||
// 模拟实时流式发送音频(每 100ms 发送 3200 bytes)
|
||
chunkSize := 3200
|
||
totalSent := 0
|
||
start := time.Now()
|
||
var elapsed time.Duration
|
||
|
||
cancelled := false
|
||
for i := 0; i < len(audioData); i += chunkSize {
|
||
end := i + chunkSize
|
||
if end > len(audioData) {
|
||
end = len(audioData)
|
||
}
|
||
|
||
select {
|
||
case <-interrupt:
|
||
fmt.Println("\n用户中断")
|
||
cancelled = true
|
||
default:
|
||
}
|
||
if cancelled {
|
||
break
|
||
}
|
||
|
||
if err := conn.WriteMessage(websocket.BinaryMessage, audioData[i:end]); err != nil {
|
||
fmt.Printf("发送音频失败: %v\n", err)
|
||
break
|
||
}
|
||
totalSent += end - i
|
||
fmt.Printf("▶ 发送 %d/%d bytes (%.1f%%)\n", totalSent, len(audioData),
|
||
float64(totalSent)/float64(len(audioData))*100)
|
||
time.Sleep(100 * time.Millisecond)
|
||
}
|
||
|
||
elapsed = time.Since(start)
|
||
|
||
// 发送停止消息
|
||
conn.WriteMessage(websocket.TextMessage, []byte(`{"action":"stop"}`))
|
||
|
||
// 等待最后的结果
|
||
time.Sleep(2 * time.Second)
|
||
|
||
fmt.Printf("\n总耗时: %v, 总发送: %d bytes\n", elapsed, totalSent)
|
||
fmt.Println("✅ 实时语音识别测试完成")
|
||
}
|
||
|
||
func inferFormat(filename string) string {
|
||
ext := ""
|
||
for i := len(filename) - 1; i >= 0; i-- {
|
||
if filename[i] == '.' {
|
||
ext = filename[i+1:]
|
||
break
|
||
}
|
||
}
|
||
switch ext {
|
||
case "wav", "wave":
|
||
return "wav"
|
||
case "mp3", "mpeg":
|
||
return "mp3"
|
||
case "ogg", "opus":
|
||
return "ogg"
|
||
case "flac":
|
||
return "flac"
|
||
case "m4a", "mp4", "aac":
|
||
return "m4a"
|
||
default:
|
||
return "pcm"
|
||
}
|
||
}
|
||
|
||
func createMultipartBody(audioData []byte, filename, lang string) (io.ReadCloser, string, error) {
|
||
boundary := "cyrene-asr-test-boundary"
|
||
header := fmt.Sprintf("--%s\r\nContent-Disposition: form-data; name=\"audio\"; filename=\"%s\"\r\nContent-Type: application/octet-stream\r\n\r\n",
|
||
boundary, filename)
|
||
footer := fmt.Sprintf("\r\n--%s\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n%s\r\n--%s--\r\n",
|
||
boundary, lang, boundary)
|
||
|
||
pr, pw := io.Pipe()
|
||
go func() {
|
||
pw.Write([]byte(header))
|
||
pw.Write(audioData)
|
||
pw.Write([]byte(footer))
|
||
pw.Close()
|
||
}()
|
||
|
||
return pr, "multipart/form-data; boundary=" + boundary, nil
|
||
}
|