refactor: 认证系统重构 + DevTools CLI 重写 + 文档全面更新

- auth: Login 简化为管理员始终通过 .env 验证,GetProfile 修正 admin DB 查询
- devtools: .sh/.bat 同步重写为完整 CLI (start/stop/status/logs/build/db:*)
- docs: 新增 devtools.md,重写 Deploy.md (三种方式+Windows说明),更新 README/gateway-api
- voice-service: DashScope 实时流式 STT 支持
- gateway: Phase 6 多模型配置 + 多端客户端管理 + WebSocket 增强

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-24 14:55:47 +08:00
parent 83e94d9e97
commit 7eb5e984c2
18 changed files with 2405 additions and 677 deletions
@@ -21,8 +21,8 @@ var upgrader = websocket.Upgrader{
}
// StreamingSTTHandler 处理实时语音识别 WebSocket 连接。
// 客户端通过 WebSocket 流式发送音频二进制帧,服务端逐帧转发到 DashScope
// 将识别结果通过 WebSocket JSON 消息返回
// 客户端通过 WebSocket 流式发送音频二进制帧,服务端通过一条持久的
// DashScope WebSocket 连接转发音频并持续返回识别结果
type StreamingSTTHandler struct {
svc *service.STTService
}
@@ -46,6 +46,10 @@ func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.
if language == "" {
language = "zh"
}
format := r.URL.Query().Get("format")
if format == "" {
format = "pcm"
}
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
@@ -54,12 +58,47 @@ func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.
}
defer conn.Close()
logger.Printf("[stream-stt] 客户端已连接")
logger.Printf("[stream-stt] 客户端已连接, format=%s, language=%s", format, language)
// 创建持久的 DashScope 流式会话
session, err := h.svc.StartStreaming(format, language)
if err != nil {
logger.Printf("[stream-stt] 创建 DashScope 会话失败: %v", err)
conn.WriteJSON(map[string]interface{}{
"type": "error",
"error": "启动语音识别失败: " + err.Error(),
})
return
}
defer session.Close()
var mu sync.Mutex
conn.SetWriteDeadline(time.Now().Add(60 * time.Second))
conn.SetReadDeadline(time.Now().Add(300 * time.Second)) // 5 分钟超时
// 读取音频帧并发送到 DashScope
// goroutine: 读取 DashScope 结果并推送到客户端
resultDone := make(chan struct{})
go func() {
defer close(resultDone)
for result := range session.Results() {
mu.Lock()
if result.Error != "" {
logger.Printf("[stream-stt] DashScope 错误: %s", result.Error)
conn.WriteJSON(map[string]interface{}{
"type": "error",
"error": result.Error,
})
} else if result.Text != "" {
conn.WriteJSON(map[string]interface{}{
"type": "result",
"text": result.Text,
"isFinal": result.IsFinal,
})
}
mu.Unlock()
}
}()
// 主循环: 读取客户端音频帧
for {
msgType, data, err := conn.ReadMessage()
if err != nil {
@@ -69,11 +108,13 @@ func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.
break
}
// 支持文本控制消息
// 文本控制消息
if msgType == websocket.TextMessage {
var ctrl map[string]interface{}
if json.Unmarshal(data, &ctrl) == nil {
if ctrl["action"] == "stop" {
action, _ := ctrl["action"].(string)
if action == "stop" {
logger.Printf("[stream-stt] 客户端请求停止")
mu.Lock()
conn.WriteJSON(map[string]interface{}{
"type": "done",
@@ -82,34 +123,33 @@ func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.
mu.Unlock()
break
}
// 支持动态切换语言
if lang, ok := ctrl["language"].(string); ok && lang != "" {
language = lang
logger.Printf("[stream-stt] 切换语言: %s", lang)
}
}
continue
}
// 二进制音频帧:进行识别
if msgType == websocket.BinaryMessage {
format := r.URL.Query().Get("format")
if format == "" {
format = "pcm"
}
text, err := h.svc.Transcribe(data, format, language)
mu.Lock()
if err != nil {
// 二进制音频帧: 发送到 DashScope
if msgType == websocket.BinaryMessage && len(data) > 0 {
if err := session.SendAudio(data); err != nil {
logger.Printf("[stream-stt] 发送音频帧失败: %v", err)
mu.Lock()
conn.WriteJSON(map[string]interface{}{
"type": "error",
"error": err.Error(),
})
} else if text != "" {
conn.WriteJSON(map[string]interface{}{
"type": "result",
"text": text,
"final": true,
"error": "发送音频失败: " + err.Error(),
})
mu.Unlock()
break
}
mu.Unlock()
}
}
// 等待结果推送完成
<-resultDone
logger.Printf("[stream-stt] 会话结束")
}
// RegisterStreamingRoutes 注册流式 STT 路由。