feat: Phase 5 STT — DashScope Gummy 实时语音识别 + 本地 Whisper 回退

- DashScope WebSocket STT 客户端 (gummy-chat-v1)
- 双引擎架构: DashScope 优先, Whisper 本地回退
- 实时流式 STT WebSocket 端点
- DevTools 模型搜索框焦点修复

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:15:43 +08:00
parent 0717928496
commit b1e89c606e
9 changed files with 545 additions and 84 deletions
+18 -12
View File
@@ -14,44 +14,50 @@ import (
func main() {
logger.SetDefault(logger.New("voice-service"))
logger.Println("🎤 Voice-Service (STT + TTS) 启动中...")
logger.Println("Voice-Service (STT + TTS) 启动中...")
// 加载配置
cfg := config.Load()
logger.Printf("配置: 端口=%s, WhisperBinary=%s, WhisperModel=%s, Language=%s",
cfg.Port, cfg.WhisperBinary, cfg.WhisperModel, cfg.WhisperLanguage)
logger.Printf("配置: 端口=%s, DashScope=%v, WhisperBinary=%s, WhisperModel=%s, Language=%s",
cfg.Port, cfg.DashScopeAPIKey != "", cfg.WhisperBinary, cfg.WhisperModel, cfg.WhisperLanguage)
// 初始化 STT 服务
// 初始化 STT 服务 (DashScope 优先, Whisper 回退)
sttSvc := service.NewSTTService(cfg)
// 检查 whisper 引擎是否可用
if !sttSvc.IsAvailable() {
logger.Printf("⚠️ Whisper 引擎未安装 (%s)STT 功能不可用", cfg.WhisperBinary)
logger.Printf(" 请运行: bash scripts/setup-whisper.sh")
if sttSvc.IsAvailable() {
dashAvailable := cfg.DashScopeAPIKey != ""
if dashAvailable {
logger.Println("STT: DashScope Gummy (主) + Whisper (回退)")
} else {
logger.Println("STT: Whisper 本地引擎")
}
} else {
logger.Println(" Whisper 引擎已就绪")
logger.Printf("STT 引擎不可用。请配置 DASHSCOPE_API_KEY 或安装 Whisper")
logger.Printf(" Whisper 安装: bash scripts/setup-whisper.sh")
}
// 初始化 TTS 服务
ttsSvc := service.NewTTSService()
if !ttsSvc.IsAvailable() {
logger.Println("⚠️ TTS 引擎不可用 (请安装: pip install edge-tts)")
logger.Println("TTS 引擎不可用 (请安装: pip install edge-tts)")
} else {
ttsStatus := ttsSvc.GetEngineStatus()
logger.Printf("TTS 引擎已就绪 (引擎: %s)", ttsStatus["engine"])
logger.Printf("TTS 引擎已就绪 (引擎: %s)", ttsStatus["engine"])
}
// 初始化 HTTP 处理器
sttHandler := handler.NewSTTHandler(sttSvc, cfg)
sttHandler.SetTTSService(ttsSvc)
ttsHandler := handler.NewTTSHandler(ttsSvc)
streamingHandler := handler.NewStreamingSTTHandler(sttSvc)
// 注册路由
mux := http.NewServeMux()
sttHandler.RegisterRoutes(mux)
ttsHandler.RegisterRoutes(mux)
streamingHandler.RegisterStreamingRoutes(mux)
// 启动 HTTP 服务
srv := &http.Server{
@@ -60,7 +66,7 @@ func main() {
}
go func() {
logger.Printf("🚀 Voice-Service 已启动在端口 %s", cfg.Port)
logger.Printf("Voice-Service 已启动在端口 %s", cfg.Port)
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
logger.Fatalf("服务启动失败: %v", err)
}
+4 -1
View File
@@ -4,4 +4,7 @@ go 1.26.2
replace github.com/yourname/cyrene-ai/pkg/logger => ../pkg/logger
require github.com/yourname/cyrene-ai/pkg/logger v0.0.0
require (
github.com/gorilla/websocket v1.5.3
github.com/yourname/cyrene-ai/pkg/logger v0.0.0
)
+2
View File
@@ -0,0 +1,2 @@
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
@@ -9,6 +9,10 @@ type Config struct {
WhisperModel string
WhisperLanguage string
MaxAudioSize int64 // 字节
// DashScope STT 配置
DashScopeAPIKey string
DashScopeModel string
}
// Load 从环境变量加载配置
@@ -19,6 +23,8 @@ func Load() *Config {
WhisperModel: getEnv("WHISPER_MODEL", "./whisper.cpp/models/ggml-small.bin"),
WhisperLanguage: getEnv("WHISPER_LANGUAGE", "zh"),
MaxAudioSize: 10 * 1024 * 1024, // 10MB
DashScopeAPIKey: getEnv("DASHSCOPE_API_KEY", ""),
DashScopeModel: getEnv("DASHSCOPE_STT_MODEL", "gummy-chat-v1"),
}
}
@@ -0,0 +1,118 @@
package handler
import (
"encoding/json"
"net/http"
"sync"
"time"
"github.com/gorilla/websocket"
"github.com/yourname/cyrene-ai/pkg/logger"
"github.com/yourname/cyrene-ai/voice-service/internal/service"
)
var upgrader = websocket.Upgrader{
ReadBufferSize: 4096,
WriteBufferSize: 4096,
CheckOrigin: func(r *http.Request) bool {
return true
},
}
// StreamingSTTHandler 处理实时语音识别 WebSocket 连接。
// 客户端通过 WebSocket 流式发送音频二进制帧,服务端逐帧转发到 DashScope,
// 将识别结果通过 WebSocket JSON 消息返回。
type StreamingSTTHandler struct {
svc *service.STTService
}
// NewStreamingSTTHandler 创建流式 STT 处理器。
func NewStreamingSTTHandler(svc *service.STTService) *StreamingSTTHandler {
return &StreamingSTTHandler{svc: svc}
}
// HandleStreamingSTT 处理 WebSocket 升级和实时 STT 会话。
// GET /api/v1/stt/stream
func (h *StreamingSTTHandler) HandleStreamingSTT(w http.ResponseWriter, r *http.Request) {
if !h.svc.IsAvailable() {
writeJSON(w, http.StatusServiceUnavailable, map[string]interface{}{
"error": "STT 引擎不可用",
})
return
}
language := r.URL.Query().Get("language")
if language == "" {
language = "zh"
}
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
logger.Printf("[stream-stt] WebSocket 升级失败: %v", err)
return
}
defer conn.Close()
logger.Printf("[stream-stt] 客户端已连接")
var mu sync.Mutex
conn.SetWriteDeadline(time.Now().Add(60 * time.Second))
// 读取音频帧并发送到 DashScope
for {
msgType, data, err := conn.ReadMessage()
if err != nil {
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseNormalClosure) {
logger.Printf("[stream-stt] 读取错误: %v", err)
}
break
}
// 支持文本控制消息
if msgType == websocket.TextMessage {
var ctrl map[string]interface{}
if json.Unmarshal(data, &ctrl) == nil {
if ctrl["action"] == "stop" {
mu.Lock()
conn.WriteJSON(map[string]interface{}{
"type": "done",
"action": "stop",
})
mu.Unlock()
break
}
}
continue
}
// 二进制音频帧:进行识别
if msgType == websocket.BinaryMessage {
format := r.URL.Query().Get("format")
if format == "" {
format = "pcm"
}
text, err := h.svc.Transcribe(data, format, language)
mu.Lock()
if err != nil {
conn.WriteJSON(map[string]interface{}{
"type": "error",
"error": err.Error(),
})
} else if text != "" {
conn.WriteJSON(map[string]interface{}{
"type": "result",
"text": text,
"final": true,
})
}
mu.Unlock()
}
}
}
// RegisterStreamingRoutes 注册流式 STT 路由。
func (h *StreamingSTTHandler) RegisterStreamingRoutes(mux *http.ServeMux) {
mux.HandleFunc("/api/v1/stt/stream", h.HandleStreamingSTT)
}
@@ -0,0 +1,273 @@
package service
import (
"context"
"fmt"
"sync"
"time"
"github.com/gorilla/websocket"
)
// DashScopeSTT 使用阿里云百炼 Gummy 模型进行语音识别。
// WebSocket API: wss://dashscope.aliyuncs.com/api-ws/v1/inference
type DashScopeSTT struct {
apiKey string
model string
timeout time.Duration
}
// NewDashScopeSTT 创建 DashScope STT 客户端。
func NewDashScopeSTT(apiKey, model string) *DashScopeSTT {
if model == "" {
model = "gummy-chat-v1"
}
return &DashScopeSTT{
apiKey: apiKey,
model: model,
timeout: 30 * time.Second,
}
}
// IsAvailable 检查 API Key 是否已配置。
func (d *DashScopeSTT) IsAvailable() bool {
return d.apiKey != ""
}
// sttMessage 定义 STT WebSocket 协议消息格式。
type sttMessage struct {
Header sttHeader `json:"header"`
Payload sttPayload `json:"payload"`
}
type sttHeader struct {
Streaming string `json:"streaming"`
TaskID string `json:"task_id"`
Action string `json:"action"`
}
type sttPayload struct {
Model string `json:"model"`
TaskGroup string `json:"task_group"`
Task string `json:"task"`
Function string `json:"function"`
Input map[string]interface{} `json:"input,omitempty"`
Parameters sttParameters `json:"parameters"`
Output map[string]interface{} `json:"output,omitempty"`
}
type sttParameters struct {
SampleRate int `json:"sample_rate"`
Format string `json:"format"`
TranscriptionEnabled bool `json:"transcription_enabled"`
TranslationEnabled bool `json:"translation_enabled"`
SourceLanguage string `json:"source_language,omitempty"`
MaxEndSilence int `json:"max_end_silence,omitempty"`
}
// sttServerMsg 服务端返回的消息格式。
type sttServerMsg struct {
Header sttServerHeader `json:"header"`
Payload sttServerPayload `json:"payload"`
}
type sttServerHeader struct {
TaskID string `json:"task_id"`
Event string `json:"event"`
}
type sttServerPayload struct {
Output map[string]interface{} `json:"output,omitempty"`
Usage map[string]interface{} `json:"usage,omitempty"`
Error sttError `json:"error,omitempty"`
}
type sttError struct {
Code string `json:"code"`
Message string `json:"message"`
}
// Transcribe 将音频数据发送到 DashScope 进行识别,返回识别文本。
func (d *DashScopeSTT) Transcribe(ctx context.Context, audioData []byte, format string, language string) (string, error) {
if !d.IsAvailable() {
return "", fmt.Errorf("DashScope API Key 未配置")
}
dialer := websocket.Dialer{
HandshakeTimeout: 10 * time.Second,
}
header := make(map[string][]string)
header["Authorization"] = []string{"Bearer " + d.apiKey}
conn, _, err := dialer.DialContext(ctx, "wss://dashscope.aliyuncs.com/api-ws/v1/inference", header)
if err != nil {
return "", fmt.Errorf("连接 DashScope STT 失败: %w", err)
}
defer conn.Close()
conn.SetReadDeadline(time.Now().Add(d.timeout))
taskID := fmt.Sprintf("cyrene-stt-%d", time.Now().UnixNano())
// 规范化音频格式
normFormat := normalizeSTTFormat(format)
if language == "" || language == "auto" {
language = "zh"
}
// 发送 run-task
startMsg := sttMessage{
Header: sttHeader{
Streaming: "duplex",
TaskID: taskID,
Action: "run-task",
},
Payload: sttPayload{
Model: d.model,
TaskGroup: "audio",
Task: "asr",
Function: "recognition",
Parameters: sttParameters{
SampleRate: 16000,
Format: normFormat,
TranscriptionEnabled: true,
TranslationEnabled: false,
SourceLanguage: language,
},
},
}
if err := conn.WriteJSON(startMsg); err != nil {
return "", fmt.Errorf("发送 run-task 失败: %w", err)
}
// 等待 task-started
var textResult string
var mu sync.Mutex
started := make(chan struct{})
errc := make(chan error, 1)
done := make(chan struct{})
go func() {
defer close(done)
startedClosed := false
for {
var msg sttServerMsg
if err := conn.ReadJSON(&msg); err != nil {
select {
case errc <- fmt.Errorf("读取响应失败: %w", err):
default:
}
return
}
switch msg.Header.Event {
case "task-started":
if !startedClosed {
close(started)
startedClosed = true
}
case "result-generated":
if out, ok := msg.Payload.Output["transcription"]; ok {
if transMap, ok := out.(map[string]interface{}); ok {
if text, ok := transMap["text"].(string); ok {
mu.Lock()
textResult = text
mu.Unlock()
}
}
}
case "task-finished":
return
case "task-failed":
errMsg := msg.Payload.Error.Message
if errMsg == "" {
errMsg = "未知错误"
}
select {
case errc <- fmt.Errorf("DashScope 识别失败: %s (code=%s)", errMsg, msg.Payload.Error.Code):
default:
}
return
}
}
}()
// 等待 task-started 或错误
select {
case <-started:
case err := <-errc:
return "", err
case <-ctx.Done():
return "", ctx.Err()
}
// 发送音频数据(分块发送,每块 ~10KB)
chunkSize := 10240
for i := 0; i < len(audioData); i += chunkSize {
end := i + chunkSize
if end > len(audioData) {
end = len(audioData)
}
conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
if err := conn.WriteMessage(websocket.BinaryMessage, audioData[i:end]); err != nil {
return "", fmt.Errorf("发送音频数据失败: %w", err)
}
}
// 发送 finish-task
finishMsg := sttMessage{
Header: sttHeader{
Streaming: "duplex",
TaskID: taskID,
Action: "finish-task",
},
}
if err := conn.WriteJSON(finishMsg); err != nil {
return "", fmt.Errorf("发送 finish-task 失败: %w", err)
}
// 等待完成
select {
case <-done:
mu.Lock()
text := textResult
mu.Unlock()
if text == "" {
return "", fmt.Errorf("未收到识别结果")
}
return text, nil
case err := <-errc:
return "", err
case <-ctx.Done():
return "", ctx.Err()
}
}
// normalizeSTTFormat 将音频格式映射到 DashScope 支持的格式名。
func normalizeSTTFormat(format string) string {
switch format {
case "wav":
return "wav"
case "mp3", "mpeg":
return "mp3"
case "ogg", "opus":
return "ogg"
case "flac":
return "flac"
case "m4a", "aac", "mp4":
return "aac"
default:
return "pcm"
}
}
// GetStatus 返回 DashScope STT 状态。
func (d *DashScopeSTT) GetStatus() map[string]interface{} {
return map[string]interface{}{
"available": d.IsAvailable(),
"model": d.model,
"provider": "dashscope",
}
}
@@ -1,11 +1,13 @@
package service
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/yourname/cyrene-ai/voice-service/internal/config"
)
@@ -13,48 +15,64 @@ import (
// SupportedLanguages STT 支持的语言列表
var SupportedLanguages = []string{"zh", "en", "ja", "ko", "auto"}
// STTService 语音转文字服务
// STTService 语音转文字服务
// 优先使用 DashScope Gummy API,不可用时回退到本地 Whisper。
type STTService struct {
whisperBinary string
whisperModel string
language string
dashscope *DashScopeSTT
}
// NewSTTService 创建 STT 服务
// NewSTTService 创建 STT 服务
func NewSTTService(cfg *config.Config) *STTService {
return &STTService{
whisperBinary: cfg.WhisperBinary,
whisperModel: cfg.WhisperModel,
language: cfg.WhisperLanguage,
dashscope: NewDashScopeSTT(cfg.DashScopeAPIKey, cfg.DashScopeModel),
}
}
// IsAvailable 检查 whisper binary 是否存在
// IsAvailable 检查是否有任一 STT 引擎可用。
func (s *STTService) IsAvailable() bool {
if s.dashscope.IsAvailable() {
return true
}
_, err := os.Stat(s.whisperBinary)
return err == nil
}
// Transcribe 将音频数据转录为文字
// audioData: 音频文件的二进制数据
// format: 音频格式 (wav, mp3, ogg, flac, m4a)
// language: 转录语言 (zh, en, ja, ko, auto),为空则使用默认语言
// Transcribe 将音频数据转录为文字
// 优先使用 DashScope,不可用时回退到本地 Whisper。
func (s *STTService) Transcribe(audioData []byte, format string, language string) (string, error) {
if !s.IsAvailable() {
return "", fmt.Errorf("STT 引擎未安装,请运行 scripts/setup-whisper.sh")
}
// 如果未指定语言,使用默认语言
if language == "" {
language = s.language
}
// 验证语言是否支持
if !isSupportedLanguage(language) {
return "", fmt.Errorf("不支持的语言: %s,支持的语言: %s", language, strings.Join(SupportedLanguages, ", "))
}
// 将音频数据写入临时文件
// 优先 DashScope
if s.dashscope.IsAvailable() {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
text, err := s.dashscope.Transcribe(ctx, audioData, format, language)
if err == nil && text != "" {
return text, nil
}
}
// 回退到本地 Whisper
return s.transcribeWhisper(audioData, format, language)
}
// transcribeWhisper 使用本地 Whisper 引擎转录。
func (s *STTService) transcribeWhisper(audioData []byte, format string, language string) (string, error) {
if _, err := os.Stat(s.whisperBinary); err != nil {
return "", fmt.Errorf("STT 引擎不可用: DashScope API Key 未配置且 Whisper 未安装")
}
ext := normalizeExt(format)
tmpFile, err := os.CreateTemp("/tmp", "cyrene-stt-*"+ext)
if err != nil {
@@ -69,7 +87,6 @@ func (s *STTService) Transcribe(audioData []byte, format string, language string
}
tmpFile.Close()
// 如果不是 WAV 格式,尝试用 ffmpeg 转换
inputPath := tmpPath
if format != "wav" && format != "" {
convertedPath := tmpPath + ".wav"
@@ -77,11 +94,8 @@ func (s *STTService) Transcribe(audioData []byte, format string, language string
defer os.Remove(convertedPath)
inputPath = convertedPath
}
// 转换失败则仍使用原始文件(whisper.cpp 也支持其他格式)
}
// 调用 whisper.cpp
// whisper-cli 的 -of 标志会在去掉扩展名后追加 .txt
outputPrefix := strings.TrimSuffix(inputPath, filepath.Ext(inputPath))
outputTxt := outputPrefix + ".txt"
@@ -99,42 +113,41 @@ func (s *STTService) Transcribe(audioData []byte, format string, language string
return "", fmt.Errorf("whisper 转录失败: %w", err)
}
// 读取输出文本
defer os.Remove(outputTxt)
txtData, err := os.ReadFile(outputTxt)
if err != nil {
return "", fmt.Errorf("读取转录结果失败: %w", err)
}
text := strings.TrimSpace(string(txtData))
return text, nil
return strings.TrimSpace(string(txtData)), nil
}
// GetStatus 返回服务状态
// GetStatus 返回服务状态
func (s *STTService) GetStatus() map[string]interface{} {
binaryAvailable := s.IsAvailable()
binaryAvailable := false
if _, err := os.Stat(s.whisperBinary); err == nil {
binaryAvailable = true
}
modelExists := false
if _, err := os.Stat(s.whisperModel); err == nil {
modelExists = true
}
modelName := filepath.Base(s.whisperModel)
return map[string]interface{}{
"available": binaryAvailable && modelExists,
"binary_available": binaryAvailable,
"model_loaded": modelExists,
"binary_path": s.whisperBinary,
"model_path": s.whisperModel,
"model_name": modelName,
"default_language": s.language,
"available": s.IsAvailable(),
"primary": "dashscope",
"dashscope": s.dashscope.GetStatus(),
"whisper": map[string]interface{}{
"available": binaryAvailable && modelExists,
"binary_available": binaryAvailable,
"model_loaded": modelExists,
"model_name": filepath.Base(s.whisperModel),
},
"default_language": s.language,
"supported_languages": SupportedLanguages,
}
}
// normalizeExt 规范化文件扩展名
// normalizeExt 规范化文件扩展名
func normalizeExt(format string) string {
switch strings.ToLower(format) {
case "wav":
@@ -152,7 +165,7 @@ func normalizeExt(format string) string {
}
}
// isSupportedLanguage 检查语言是否支持
// isSupportedLanguage 检查语言是否支持
func isSupportedLanguage(lang string) bool {
for _, l := range SupportedLanguages {
if l == lang {
@@ -162,7 +175,7 @@ func isSupportedLanguage(lang string) bool {
return false
}
// convertToWav 使用 ffmpeg 将音频转换为 WAV 格式
// convertToWav 使用 ffmpeg 将音频转换为 WAV 格式
func convertToWav(inputPath, outputPath string) error {
cmd := exec.Command("ffmpeg",
"-i", inputPath,
+70 -32
View File
@@ -3892,32 +3892,63 @@ function updateModelTemplateOptions() {
}
}
function renderFetchedModelList(models, filter) {
var area = document.getElementById('model-template-area');
if (!area) return;
var filterLower = (filter || '').toLowerCase();
var filtered = filterLower ? models.filter(function(m) { return m.toLowerCase().indexOf(filterLower) >= 0; }) : models;
var countInfo = filterLower ? '\uff08' + filtered.length + '/' + models.length + '\uff09' : '\uff08共 ' + models.length + ' \u4e2a\uff09';
var html = '<div style="display:flex;gap:8px;margin-bottom:8px">' +
'<input id="model-search-input" class="input" type="text" placeholder="\U0001f50d \u641c\u7d22\u6a21\u578b\u540d\u79f0...' + countInfo + '" value="' + escHtml(filter) + '"' +
' oninput="renderFetchedModelList(STATE.fetchedModels, this.value)" style="flex:1;background:var(--bg);font-size:12px">' +
'<button type="button" class="btn btn-xs" onclick="var s=document.getElementById(&quot;model-search-input&quot;);if(s)s.value=&quot;&quot;;renderFetchedModelList(STATE.fetchedModels,&quot;&quot;);" title="\u6e05\u9664\u641c\u7d22">\u2715</button></div>' +
'<div id="fetched-model-list" style="max-height:220px;overflow-y:auto;border:1px solid var(--border);border-radius:var(--radius-sm);background:var(--bg)">';
if (filtered.length === 0) {
html += '<div class="empty-state" style="padding:12px"><div class="icon">\U0001f50d</div>\u65e0\u5339\u914d\u6a21\u578b</div>';
} else {
html += filtered.map(function(m) {
return '<div class="fetched-model-item"' +
' data-model="' + escHtml(m) + '"' +
' onclick="var mn=this.getAttribute(&quot;data-model&quot;);selectFetchedModel(mn);"' +
' style="padding:6px 12px;cursor:pointer;font-size:12px;border-bottom:1px solid var(--border);transition:background .12s"' +
' onmouseenter="this.style.background=&quot;var(--bg3)&quot;"' +
' onmouseleave="this.style.background=&quot;&quot;">' + escHtml(m) + '</div>';
}).join('');
}
html += '</div>';
area.innerHTML = html;
}
function renderFetchedModelList(models, filter) {
var area = document.getElementById('model-template-area');
if (!area) return;
var filterLower = (filter || '').toLowerCase();
var filtered = filterLower ? models.filter(function(m) { return m.toLowerCase().indexOf(filterLower) >= 0; }) : models;
var countInfo = filterLower ? '\uff08' + filtered.length + '/' + models.length + '\uff09' : '\uff08共 ' + models.length + ' \u4e2a\uff09';
// 搜索栏 + 结果列表分离:搜索框保持在 DOM 中,oninput 只更新结果列表
var html = '<div id="model-search-bar" style="display:flex;gap:8px;margin-bottom:8px">' +
'<input id="model-search-input" class="input" type="text" placeholder="\U0001f50d \u641c索模型名称...' + countInfo + '" value="' + escHtml(filter) + '"' +
' oninput="filterFetchedModels()" style="flex:1;background:var(--bg);font-size:12px">' +
'<button type="button" class="btn btn-xs" onclick="clearModelSearch()" title="\u6e05\u9664\u641c索">\u2715</button></div>' +
'<div id="model-search-results" style="max-height:220px;overflow-y:auto;border:1px solid var(--border);border-radius:var(--radius-sm);background:var(--bg)">';
if (filtered.length === 0) {
html += '<div class="empty-state" style="padding:12px"><div class="icon">\U0001f50d</div>\u65e0\u5339\u914d\u6a21\u578b</div>';
} else {
html += filtered.map(function(m) {
return '<div class="fetched-model-item"' +
' data-model="' + escHtml(m) + '"' +
' onclick="var mn=this.getAttribute(\"data-model\");selectFetchedModel(mn);"' +
' style="padding:6px 12px;cursor:pointer;font-size:12px;border-bottom:1px solid var(--border);transition:background .12s"' +
' onmouseenter="this.style.background=\"var(--bg3)\""' +
' onmouseleave="this.style.background=\"\"">' + escHtml(m) + '</div>';
}).join('');
}
html += '</div>';
area.innerHTML = html;
}
// filterFetchedModels 仅更新结果列表,不重建搜索框,解决输入时焦点丢失问题
function filterFetchedModels() {
var input = document.getElementById('model-search-input');
var results = document.getElementById('model-search-results');
if (!input || !results) return;
var filter = input.value;
var models = STATE.fetchedModels;
var filterLower = filter.toLowerCase();
var filtered = filterLower ? models.filter(function(m) { return m.toLowerCase().indexOf(filterLower) >= 0; }) : models;
var countInfo = filterLower ? '\uff08' + filtered.length + '/' + models.length + '\uff09' : '\uff08共 ' + models.length + ' \u4e2a\uff09';
input.placeholder = '\U0001f50d \u641c索模型名称...' + countInfo;
if (filtered.length === 0) {
results.innerHTML = '<div class="empty-state" style="padding:12px"><div class="icon">\U0001f50d</div>\u65e0\u5339\u914d\u6a21型</div>';
} else {
results.innerHTML = filtered.map(function(m) {
return '<div class="fetched-model-item" data-model="' + escHtml(m) + '"' +
' onclick="var mn=this.getAttribute(\"data-model\");selectFetchedModel(mn);"' +
' style="padding:6px 12px;cursor:pointer;font-size:12px;border-bottom:1px solid var(--border);transition:background .12s"' +
' onmouseenter="this.style.background=\"var(--bg3)\""' +
' onmouseleave="this.style.background=\"\"">' + escHtml(m) + '</div>';
}).join('');
}
}
function clearModelSearch() {
var input = document.getElementById('model-search-input');
if (input) { input.value = ''; input.focus(); }
filterFetchedModels();
}
function selectFetchedModel(modelName) {
@@ -4143,13 +4174,20 @@ function showRoutingForm(purpose) {
'<button class="btn btn-sm" onclick="renderRoutingTab()">← 返回</button></div>' +
'<div class="card-body"><form onsubmit="event.preventDefault();saveRoutingForm(\'' + escHtml(purpose || '') + '\');">' +
'<div class="form-row"><label>用途 ID ' + (isEdit ? '' : '<span style="color:var(--red)">*</span>') + '</label>' +
'<select id="routing-purpose" class="input" ' + (isEdit ? 'disabled' : 'required') + '>' +
' '<select id="routing-purpose" class="input" ' + (isEdit ? 'disabled' : 'required') + '>' +
'<option value="">-- 选择用途 --</option>' +
'<option value="chat"' + (defaults.purpose === 'chat' ? ' selected' : '') + '>chat (日常对话)</option>' +
'<option value="deep_thinking"' + (defaults.purpose === 'deep_thinking' ? ' selected' : '') + '>deep_thinking (深度思考)</option>' +
'<option value="intent_analysis"' + (defaults.purpose === 'intent_analysis' ? ' selected' : '') + '>intent_analysis (意图分析)</option>' +
'<option value="tool_calling"' + (defaults.purpose === 'tool_calling' ? ' selected' : '') + '>tool_calling (工具调用)</option>' +
'<option value="memory_extraction"' + (defaults.purpose === 'memory_extraction' ? ' selected' : '') + '>memory_extraction (记忆提取)</option>' +
' '<option value="chat"'+ (defaults.purpose === 'chat' ? ' selected' : '') +'>chat (日常对话)</option>' +
' '<option value="deep_thinking"'+ (defaults.purpose === 'deep_thinking' ? ' selected' : '') +'>deep_thinking (深度思考/复杂推理)</option>' +
' '<option value="code"'+ (defaults.purpose === 'code' ? ' selected' : '') +'>code (代码生成)</option>' +
' '<option value="vision"'+ (defaults.purpose === 'vision' ? ' selected' : '') +'>vision (视觉理解)</option>' +
' '<option value="ocr"'+ (defaults.purpose === 'ocr' ? ' selected' : '') +'>ocr (文字识别/OCR)</option>' +
' '<option value="math"'+ (defaults.purpose === 'math' ? ' selected' : '') +'>math (数学推理)</option>' +
' '<option value="translation"'+ (defaults.purpose === 'translation' ? ' selected' : '') +'>translation (翻译)</option>' +
' '<option value="intent_analysis"'+ (defaults.purpose === 'intent_analysis' ? ' selected' : '') +'>intent_analysis (意图分析)</option>' +
' '<option value="tool_calling"'+ (defaults.purpose === 'tool_calling' ? ' selected' : '') +'>tool_calling (工具调用/Function Calling)</option>' +
' '<option value="memory_extraction"'+ (defaults.purpose === 'memory_extraction' ? ' selected' : '') +'>memory_extraction (记忆提取)</option>' +
' '<option value="roleplay"'+ (defaults.purpose === 'roleplay' ? ' selected' : '') +'>roleplay (角色扮演)</option>' +
' '<option value="long_context"'+ (defaults.purpose === 'long_context' ? ' selected' : '') +'>long_context (长文档处理)</option>' +
'</select></div>' +
'<div class="form-row"><label>回退模型链 <span style="color:var(--text2);font-weight:400">(勾选即加入,顺序=表格显示顺序)</span></label>' +
(models.length > 0 ? '<div class="btn-group" style="margin-bottom:8px">' +
+2
View File
@@ -148,6 +148,8 @@ export const SERVICES = {
WHISPER_BINARY: './whisper.cpp/main',
WHISPER_MODEL: './whisper.cpp/models/ggml-small.bin',
WHISPER_LANGUAGE: 'zh',
DASHSCOPE_API_KEY: process.env.DASHSCOPE_API_KEY || '',
DASHSCOPE_STT_MODEL: 'gummy-chat-v1',
},
healthUrl: 'http://localhost:8093/api/v1/health',
port: 8093,