package handler import ( "encoding/json" "net/http" "sync" "time" "github.com/gorilla/websocket" "github.com/yourname/cyrene-ai/pkg/logger" "github.com/yourname/cyrene-ai/voice-service/internal/service" ) var upgrader = websocket.Upgrader{ ReadBufferSize: 4096, WriteBufferSize: 4096, CheckOrigin: func(r *http.Request) bool { return true }, } // StreamingSTTHandler 处理实时语音识别 WebSocket 连接。 // 客户端通过 WebSocket 流式发送音频二进制帧,服务端通过一条持久的 // DashScope WebSocket 连接转发音频并持续返回识别结果。 type StreamingSTTHandler struct { svc *service.STTService } // NewStreamingSTTHandler 创建流式 STT 处理器。 func NewStreamingSTTHandler(svc *service.STTService) *StreamingSTTHandler { return &StreamingSTTHandler{svc: svc} } // HandleStreamingSTT 处理 WebSocket 升级和实时 STT 会话。 // GET /api/v1/stt/stream func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.Request) { if !h.svc.IsAvailable() { writeJSON(w, http.StatusServiceUnavailable, map[string]interface{}{ "error": "STT 引擎不可用", }) return } language := r.URL.Query().Get("language") if language == "" { language = "zh" } format := r.URL.Query().Get("format") if format == "" { format = "pcm" } conn, err := upgrader.Upgrade(w, r, nil) if err != nil { logger.Printf("[stream-stt] WebSocket 升级失败: %v", err) return } defer conn.Close() logger.Printf("[stream-stt] 客户端已连接, format=%s, language=%s", format, language) // 创建持久的 DashScope 流式会话 session, err := h.svc.StartStreaming(format, language) if err != nil { logger.Printf("[stream-stt] 创建 DashScope 会话失败: %v", err) conn.WriteJSON(map[string]interface{}{ "type": "error", "error": "启动语音识别失败: " + err.Error(), }) return } defer session.Close() var mu sync.Mutex conn.SetReadDeadline(time.Now().Add(300 * time.Second)) // 5 分钟超时 // goroutine: 读取 DashScope 结果并推送到客户端 resultDone := make(chan struct{}) go func() { defer close(resultDone) for result := range session.Results() { mu.Lock() if result.Error != "" { logger.Printf("[stream-stt] DashScope 错误: %s", result.Error) conn.WriteJSON(map[string]interface{}{ "type": "error", "error": result.Error, }) } else if result.Text != "" { conn.WriteJSON(map[string]interface{}{ "type": "result", "text": result.Text, "isFinal": result.IsFinal, }) } mu.Unlock() } }() // 主循环: 读取客户端音频帧 for { msgType, data, err := conn.ReadMessage() if err != nil { if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseNormalClosure) { logger.Printf("[stream-stt] 读取错误: %v", err) } break } // 文本控制消息 if msgType == websocket.TextMessage { var ctrl map[string]interface{} if json.Unmarshal(data, &ctrl) == nil { action, _ := ctrl["action"].(string) if action == "stop" { logger.Printf("[stream-stt] 客户端请求停止") mu.Lock() conn.WriteJSON(map[string]interface{}{ "type": "done", "action": "stop", }) mu.Unlock() break } // 支持动态切换语言 if lang, ok := ctrl["language"].(string); ok && lang != "" { language = lang logger.Printf("[stream-stt] 切换语言: %s", lang) } } continue } // 二进制音频帧: 发送到 DashScope if msgType == websocket.BinaryMessage && len(data) > 0 { if err := session.SendAudio(data); err != nil { logger.Printf("[stream-stt] 发送音频帧失败: %v", err) mu.Lock() conn.WriteJSON(map[string]interface{}{ "type": "error", "error": "发送音频失败: " + err.Error(), }) mu.Unlock() break } } } // 等待结果推送完成 <-resultDone logger.Printf("[stream-stt] 会话结束") } // RegisterStreamingRoutes 注册流式 STT 路由。 func (h *StreamingSTTHandler) RegisterStreamingRoutes(mux *http.ServeMux) { mux.HandleFunc("/api/v1/stt/stream", h.HandleStreamingSTT) }