feat: Phase 5 STT — DashScope Gummy 实时语音识别 + 本地 Whisper 回退
- DashScope WebSocket STT 客户端 (gummy-chat-v1) - 双引擎架构: DashScope 优先, Whisper 本地回退 - 实时流式 STT WebSocket 端点 - DevTools 模型搜索框焦点修复 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/websocket"
|
||||
|
||||
"github.com/yourname/cyrene-ai/pkg/logger"
|
||||
"github.com/yourname/cyrene-ai/voice-service/internal/service"
|
||||
)
|
||||
|
||||
var upgrader = websocket.Upgrader{
|
||||
ReadBufferSize: 4096,
|
||||
WriteBufferSize: 4096,
|
||||
CheckOrigin: func(r *http.Request) bool {
|
||||
return true
|
||||
},
|
||||
}
|
||||
|
||||
// StreamingSTTHandler 处理实时语音识别 WebSocket 连接。
|
||||
// 客户端通过 WebSocket 流式发送音频二进制帧,服务端逐帧转发到 DashScope,
|
||||
// 将识别结果通过 WebSocket JSON 消息返回。
|
||||
type StreamingSTTHandler struct {
|
||||
svc *service.STTService
|
||||
}
|
||||
|
||||
// NewStreamingSTTHandler 创建流式 STT 处理器。
|
||||
func NewStreamingSTTHandler(svc *service.STTService) *StreamingSTTHandler {
|
||||
return &StreamingSTTHandler{svc: svc}
|
||||
}
|
||||
|
||||
// HandleStreamingSTT 处理 WebSocket 升级和实时 STT 会话。
|
||||
// GET /api/v1/stt/stream
|
||||
func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.Request) {
|
||||
if !h.svc.IsAvailable() {
|
||||
writeJSON(w, http.StatusServiceUnavailable, map[string]interface{}{
|
||||
"error": "STT 引擎不可用",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
language := r.URL.Query().Get("language")
|
||||
if language == "" {
|
||||
language = "zh"
|
||||
}
|
||||
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
logger.Printf("[stream-stt] WebSocket 升级失败: %v", err)
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
logger.Printf("[stream-stt] 客户端已连接")
|
||||
|
||||
var mu sync.Mutex
|
||||
conn.SetWriteDeadline(time.Now().Add(60 * time.Second))
|
||||
|
||||
// 读取音频帧并发送到 DashScope
|
||||
for {
|
||||
msgType, data, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseNormalClosure) {
|
||||
logger.Printf("[stream-stt] 读取错误: %v", err)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// 支持文本控制消息
|
||||
if msgType == websocket.TextMessage {
|
||||
var ctrl map[string]interface{}
|
||||
if json.Unmarshal(data, &ctrl) == nil {
|
||||
if ctrl["action"] == "stop" {
|
||||
mu.Lock()
|
||||
conn.WriteJSON(map[string]interface{}{
|
||||
"type": "done",
|
||||
"action": "stop",
|
||||
})
|
||||
mu.Unlock()
|
||||
break
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// 二进制音频帧:进行识别
|
||||
if msgType == websocket.BinaryMessage {
|
||||
format := r.URL.Query().Get("format")
|
||||
if format == "" {
|
||||
format = "pcm"
|
||||
}
|
||||
|
||||
text, err := h.svc.Transcribe(data, format, language)
|
||||
mu.Lock()
|
||||
if err != nil {
|
||||
conn.WriteJSON(map[string]interface{}{
|
||||
"type": "error",
|
||||
"error": err.Error(),
|
||||
})
|
||||
} else if text != "" {
|
||||
conn.WriteJSON(map[string]interface{}{
|
||||
"type": "result",
|
||||
"text": text,
|
||||
"final": true,
|
||||
})
|
||||
}
|
||||
mu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterStreamingRoutes 注册流式 STT 路由。
|
||||
func (h *StreamingSTTHandler) RegisterStreamingRoutes(mux *http.ServeMux) {
|
||||
mux.HandleFunc("/api/v1/stt/stream", h.HandleStreamingSTT)
|
||||
}
|
||||
Reference in New Issue
Block a user