package main import ( "flag" "fmt" "io" "net/http" "os" "os/signal" "time" "github.com/gorilla/websocket" ) func main() { mode := flag.String("mode", "offline", "测试模式: offline (非实时) 或 realtime (实时)") file := flag.String("file", "", "音频文件路径 (WAV/MP3/OGG/FLAC)") server := flag.String("server", "http://localhost:8093", "Voice-Service 地址") lang := flag.String("lang", "zh", "语言代码") flag.Parse() if *file == "" { fmt.Println("用法: test_asr -mode=offline -file=audio.wav [-server=http://localhost:8093]") os.Exit(1) } switch *mode { case "offline": testOffline(*server, *file, *lang) case "realtime": testRealtime(*server, *file, *lang) default: fmt.Printf("未知模式: %s (支持: offline, realtime)\n", *mode) os.Exit(1) } } // testOffline 测试非实时语音识别 (HTTP multipart 上传)。 func testOffline(server, filePath, lang string) { fmt.Printf("=== 非实时 ASR 测试 ===\n") fmt.Printf("服务器: %s\n", server) fmt.Printf("文件: %s\n", filePath) fmt.Printf("语言: %s\n\n", lang) // 读取音频文件 audioData, err := os.ReadFile(filePath) if err != nil { fmt.Printf("读取文件失败: %v\n", err) os.Exit(1) } fmt.Printf("音频大小: %d bytes\n", len(audioData)) // 创建 multipart 请求 req, err := http.NewRequest("POST", server+"/api/v1/transcribe", nil) if err != nil { fmt.Printf("创建请求失败: %v\n", err) os.Exit(1) } // 使用 multipart form body, contentType, err := createMultipartBody(audioData, filePath, lang) if err != nil { fmt.Printf("创建 multipart body 失败: %v\n", err) os.Exit(1) } req.Body = body req.Header.Set("Content-Type", contentType) start := time.Now() resp, err := http.DefaultClient.Do(req) if err != nil { fmt.Printf("请求失败: %v\n", err) os.Exit(1) } defer resp.Body.Close() elapsed := time.Since(start) respBody, _ := io.ReadAll(resp.Body) fmt.Printf("状态码: %d\n", resp.StatusCode) fmt.Printf("耗时: %v\n", elapsed) fmt.Printf("响应:\n%s\n", string(respBody)) if resp.StatusCode == 200 { fmt.Println("\n✅ 非实时语音识别成功!") } else { fmt.Println("\n❌ 非实时语音识别失败") } } // testRealtime 测试实时语音识别 (WebSocket 流式)。 func testRealtime(server, filePath, lang string) { fmt.Printf("=== 实时 ASR 测试 ===\n") fmt.Printf("服务器: %s\n", server) fmt.Printf("文件: %s\n", filePath) fmt.Printf("语言: %s\n\n", lang) // 读取音频文件 audioData, err := os.ReadFile(filePath) if err != nil { fmt.Printf("读取文件失败: %v\n", err) os.Exit(1) } fmt.Printf("音频大小: %d bytes\n", len(audioData)) // 推断格式 format := inferFormat(filePath) // 连接 WebSocket wsURL := fmt.Sprintf("ws://%s/api/v1/stt/stream?format=%s&language=%s", server[7:], format, lang) // 去掉 http:// 前缀 conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil) if err != nil { fmt.Printf("WebSocket 连接失败: %v\n", err) os.Exit(1) } defer conn.Close() fmt.Printf("WebSocket 已连接: %s\n", wsURL) // 设置 interrupt 处理 interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt) // goroutine: 读取识别结果 done := make(chan struct{}) go func() { defer close(done) for { _, msg, err := conn.ReadMessage() if err != nil { fmt.Printf("读取结果错误: %v\n", err) return } fmt.Printf("◀ 结果: %s\n", string(msg)) } }() // 模拟实时流式发送音频(每 100ms 发送 3200 bytes) chunkSize := 3200 totalSent := 0 start := time.Now() var elapsed time.Duration cancelled := false for i := 0; i < len(audioData); i += chunkSize { end := i + chunkSize if end > len(audioData) { end = len(audioData) } select { case <-interrupt: fmt.Println("\n用户中断") cancelled = true default: } if cancelled { break } if err := conn.WriteMessage(websocket.BinaryMessage, audioData[i:end]); err != nil { fmt.Printf("发送音频失败: %v\n", err) break } totalSent += end - i fmt.Printf("▶ 发送 %d/%d bytes (%.1f%%)\n", totalSent, len(audioData), float64(totalSent)/float64(len(audioData))*100) time.Sleep(100 * time.Millisecond) } elapsed = time.Since(start) // 发送停止消息 conn.WriteMessage(websocket.TextMessage, []byte(`{"action":"stop"}`)) // 等待最后的结果 time.Sleep(2 * time.Second) fmt.Printf("\n总耗时: %v, 总发送: %d bytes\n", elapsed, totalSent) fmt.Println("✅ 实时语音识别测试完成") } func inferFormat(filename string) string { ext := "" for i := len(filename) - 1; i >= 0; i-- { if filename[i] == '.' { ext = filename[i+1:] break } } switch ext { case "wav", "wave": return "wav" case "mp3", "mpeg": return "mp3" case "ogg", "opus": return "ogg" case "flac": return "flac" case "m4a", "mp4", "aac": return "m4a" default: return "pcm" } } func createMultipartBody(audioData []byte, filename, lang string) (io.ReadCloser, string, error) { boundary := "cyrene-asr-test-boundary" header := fmt.Sprintf("--%s\r\nContent-Disposition: form-data; name=\"audio\"; filename=\"%s\"\r\nContent-Type: application/octet-stream\r\n\r\n", boundary, filename) footer := fmt.Sprintf("\r\n--%s\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\n%s\r\n--%s--\r\n", boundary, lang, boundary) pr, pw := io.Pipe() go func() { pw.Write([]byte(header)) pw.Write(audioData) pw.Write([]byte(footer)) pw.Close() }() return pr, "multipart/form-data; boundary=" + boundary, nil }