feat: Phase 5 STT — DashScope Gummy 实时语音识别 + 本地 Whisper 回退

- DashScope WebSocket STT 客户端 (gummy-chat-v1)
- 双引擎架构: DashScope 优先, Whisper 本地回退
- 实时流式 STT WebSocket 端点
- DevTools 模型搜索框焦点修复

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:15:43 +08:00
parent 0717928496
commit b1e89c606e
9 changed files with 545 additions and 84 deletions
@@ -0,0 +1,118 @@
package handler
import (
"encoding/json"
"net/http"
"sync"
"time"
"github.com/gorilla/websocket"
"github.com/yourname/cyrene-ai/pkg/logger"
"github.com/yourname/cyrene-ai/voice-service/internal/service"
)
var upgrader = websocket.Upgrader{
ReadBufferSize: 4096,
WriteBufferSize: 4096,
CheckOrigin: func(r *http.Request) bool {
return true
},
}
// StreamingSTTHandler 处理实时语音识别 WebSocket 连接。
// 客户端通过 WebSocket 流式发送音频二进制帧,服务端逐帧转发到 DashScope,
// 将识别结果通过 WebSocket JSON 消息返回。
type StreamingSTTHandler struct {
svc *service.STTService
}
// NewStreamingSTTHandler 创建流式 STT 处理器。
func NewStreamingSTTHandler(svc *service.STTService) *StreamingSTTHandler {
return &StreamingSTTHandler{svc: svc}
}
// HandleStreamingSTT 处理 WebSocket 升级和实时 STT 会话。
// GET /api/v1/stt/stream
func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.Request) {
if !h.svc.IsAvailable() {
writeJSON(w, http.StatusServiceUnavailable, map[string]interface{}{
"error": "STT 引擎不可用",
})
return
}
language := r.URL.Query().Get("language")
if language == "" {
language = "zh"
}
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
logger.Printf("[stream-stt] WebSocket 升级失败: %v", err)
return
}
defer conn.Close()
logger.Printf("[stream-stt] 客户端已连接")
var mu sync.Mutex
conn.SetWriteDeadline(time.Now().Add(60 * time.Second))
// 读取音频帧并发送到 DashScope
for {
msgType, data, err := conn.ReadMessage()
if err != nil {
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseNormalClosure) {
logger.Printf("[stream-stt] 读取错误: %v", err)
}
break
}
// 支持文本控制消息
if msgType == websocket.TextMessage {
var ctrl map[string]interface{}
if json.Unmarshal(data, &ctrl) == nil {
if ctrl["action"] == "stop" {
mu.Lock()
conn.WriteJSON(map[string]interface{}{
"type": "done",
"action": "stop",
})
mu.Unlock()
break
}
}
continue
}
// 二进制音频帧:进行识别
if msgType == websocket.BinaryMessage {
format := r.URL.Query().Get("format")
if format == "" {
format = "pcm"
}
text, err := h.svc.Transcribe(data, format, language)
mu.Lock()
if err != nil {
conn.WriteJSON(map[string]interface{}{
"type": "error",
"error": err.Error(),
})
} else if text != "" {
conn.WriteJSON(map[string]interface{}{
"type": "result",
"text": text,
"final": true,
})
}
mu.Unlock()
}
}
}
// RegisterStreamingRoutes 注册流式 STT 路由。
func (h *StreamingSTTHandler) RegisterStreamingRoutes(mux *http.ServeMux) {
mux.HandleFunc("/api/v1/stt/stream", h.HandleStreamingSTT)
}