7eb5e984c2
- auth: Login 简化为管理员始终通过 .env 验证,GetProfile 修正 admin DB 查询 - devtools: .sh/.bat 同步重写为完整 CLI (start/stop/status/logs/build/db:*) - docs: 新增 devtools.md,重写 Deploy.md (三种方式+Windows说明),更新 README/gateway-api - voice-service: DashScope 实时流式 STT 支持 - gateway: Phase 6 多模型配置 + 多端客户端管理 + WebSocket 增强 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
212 lines
5.6 KiB
Go
212 lines
5.6 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/yourname/cyrene-ai/voice-service/internal/config"
|
||
)
|
||
|
||
// SupportedLanguages STT 支持的语言列表
|
||
var SupportedLanguages = []string{"zh", "en", "ja", "ko", "auto"}
|
||
|
||
// STTService 语音转文字服务。
|
||
// 优先使用 DashScope API,不可用时回退到本地 Whisper。
|
||
type STTService struct {
|
||
whisperBinary string
|
||
whisperModel string
|
||
language string
|
||
dashscope *DashScopeSTT // 实时 ASR (qwen3-asr-flash-realtime)
|
||
}
|
||
|
||
// NewSTTService 创建 STT 服务。
|
||
func NewSTTService(cfg *config.Config) *STTService {
|
||
// 实时模型用于所有 WebSocket ASR 请求(支持 one-shot 和 streaming)
|
||
// 离线模型 (qwen3-asr-flash-2026-02-10) 是 HTTP REST API,暂未实现
|
||
model := cfg.DashScopeSTTRealtime
|
||
if model == "" {
|
||
model = cfg.DashScopeModel
|
||
}
|
||
return &STTService{
|
||
whisperBinary: cfg.WhisperBinary,
|
||
whisperModel: cfg.WhisperModel,
|
||
language: cfg.WhisperLanguage,
|
||
dashscope: NewDashScopeSTT(cfg.DashScopeAPIKey, model),
|
||
}
|
||
}
|
||
|
||
// IsAvailable 检查是否有任一 STT 引擎可用。
|
||
func (s *STTService) IsAvailable() bool {
|
||
if s.dashscope.IsAvailable() {
|
||
return true
|
||
}
|
||
_, err := os.Stat(s.whisperBinary)
|
||
return err == nil
|
||
}
|
||
|
||
// Transcribe 将音频数据转录为文字。
|
||
// 优先使用 DashScope,不可用时回退到本地 Whisper。
|
||
func (s *STTService) Transcribe(audioData []byte, format string, language string) (string, error) {
|
||
if language == "" {
|
||
language = s.language
|
||
}
|
||
if !isSupportedLanguage(language) {
|
||
return "", fmt.Errorf("不支持的语言: %s,支持的语言: %s", language, strings.Join(SupportedLanguages, ", "))
|
||
}
|
||
|
||
// 优先 DashScope
|
||
if s.dashscope.IsAvailable() {
|
||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||
defer cancel()
|
||
text, err := s.dashscope.Transcribe(ctx, audioData, format, language)
|
||
if err == nil {
|
||
return text, nil
|
||
}
|
||
// DashScope 失败,返回具体错误而不是回退到 Whisper
|
||
return "", fmt.Errorf("语音识别失败: %w", err)
|
||
}
|
||
|
||
// 回退到本地 Whisper
|
||
return s.transcribeWhisper(audioData, format, language)
|
||
}
|
||
|
||
// StartStreaming 创建持久的流式语音识别会话。
|
||
func (s *STTService) StartStreaming(format, language string) (*StreamingSession, error) {
|
||
if !s.dashscope.IsAvailable() {
|
||
return nil, fmt.Errorf("流式识别需要 DashScope,请配置 DASHSCOPE_API_KEY")
|
||
}
|
||
if language == "" {
|
||
language = s.language
|
||
}
|
||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||
defer cancel()
|
||
return s.dashscope.StartStreaming(ctx, format, language)
|
||
}
|
||
|
||
// transcribeWhisper 使用本地 Whisper 引擎转录。
|
||
func (s *STTService) transcribeWhisper(audioData []byte, format string, language string) (string, error) {
|
||
if _, err := os.Stat(s.whisperBinary); err != nil {
|
||
return "", fmt.Errorf("STT 引擎不可用: DashScope API Key 未配置且 Whisper 未安装")
|
||
}
|
||
|
||
ext := normalizeExt(format)
|
||
tmpFile, err := os.CreateTemp("/tmp", "cyrene-stt-*"+ext)
|
||
if err != nil {
|
||
return "", fmt.Errorf("创建临时文件失败: %w", err)
|
||
}
|
||
tmpPath := tmpFile.Name()
|
||
defer os.Remove(tmpPath)
|
||
|
||
if _, err := tmpFile.Write(audioData); err != nil {
|
||
tmpFile.Close()
|
||
return "", fmt.Errorf("写入临时文件失败: %w", err)
|
||
}
|
||
tmpFile.Close()
|
||
|
||
inputPath := tmpPath
|
||
if format != "wav" && format != "" {
|
||
convertedPath := tmpPath + ".wav"
|
||
if err := convertToWav(tmpPath, convertedPath); err == nil {
|
||
defer os.Remove(convertedPath)
|
||
inputPath = convertedPath
|
||
}
|
||
}
|
||
|
||
outputPrefix := strings.TrimSuffix(inputPath, filepath.Ext(inputPath))
|
||
outputTxt := outputPrefix + ".txt"
|
||
|
||
cmd := exec.Command(s.whisperBinary,
|
||
"-m", s.whisperModel,
|
||
"-l", language,
|
||
"-f", inputPath,
|
||
"-otxt",
|
||
"-of", outputPrefix,
|
||
)
|
||
cmd.Stderr = os.Stderr
|
||
|
||
if err := cmd.Run(); err != nil {
|
||
os.Remove(outputTxt)
|
||
return "", fmt.Errorf("whisper 转录失败: %w", err)
|
||
}
|
||
|
||
defer os.Remove(outputTxt)
|
||
txtData, err := os.ReadFile(outputTxt)
|
||
if err != nil {
|
||
return "", fmt.Errorf("读取转录结果失败: %w", err)
|
||
}
|
||
return strings.TrimSpace(string(txtData)), nil
|
||
}
|
||
|
||
// GetStatus 返回服务状态。
|
||
func (s *STTService) GetStatus() map[string]interface{} {
|
||
binaryAvailable := false
|
||
if _, err := os.Stat(s.whisperBinary); err == nil {
|
||
binaryAvailable = true
|
||
}
|
||
modelExists := false
|
||
if _, err := os.Stat(s.whisperModel); err == nil {
|
||
modelExists = true
|
||
}
|
||
|
||
return map[string]interface{}{
|
||
"available": s.IsAvailable(),
|
||
"primary": "dashscope",
|
||
"dashscope": s.dashscope.GetStatus(),
|
||
"whisper": map[string]interface{}{
|
||
"available": binaryAvailable && modelExists,
|
||
"binary_available": binaryAvailable,
|
||
"model_loaded": modelExists,
|
||
"model_name": filepath.Base(s.whisperModel),
|
||
},
|
||
"default_language": s.language,
|
||
"supported_languages": SupportedLanguages,
|
||
}
|
||
}
|
||
|
||
// normalizeExt 规范化文件扩展名。
|
||
func normalizeExt(format string) string {
|
||
switch strings.ToLower(format) {
|
||
case "wav":
|
||
return ".wav"
|
||
case "mp3", "mpeg":
|
||
return ".mp3"
|
||
case "ogg", "opus":
|
||
return ".ogg"
|
||
case "flac":
|
||
return ".flac"
|
||
case "m4a", "mp4", "aac":
|
||
return ".m4a"
|
||
default:
|
||
return ".wav"
|
||
}
|
||
}
|
||
|
||
// isSupportedLanguage 检查语言是否支持。
|
||
func isSupportedLanguage(lang string) bool {
|
||
for _, l := range SupportedLanguages {
|
||
if l == lang {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// convertToWav 使用 ffmpeg 将音频转换为 WAV 格式。
|
||
func convertToWav(inputPath, outputPath string) error {
|
||
cmd := exec.Command("ffmpeg",
|
||
"-i", inputPath,
|
||
"-ar", "16000",
|
||
"-ac", "1",
|
||
"-c:a", "pcm_s16le",
|
||
outputPath,
|
||
"-y",
|
||
)
|
||
cmd.Stderr = nil
|
||
return cmd.Run()
|
||
}
|