feat: ASR语音转写管线 + 群聊身份混淆修复

- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息
- 模型名称全部从models.json路由获取,无硬编码
- 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题
  - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆
  - 群聊上下文指令改为肯定性表述,移除具体名称提及
- trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒
- 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒)
- 新增video_tool插件模板
- 优化OpenAI adapter reasoning_content处理

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 16:46:47 +08:00
parent d112fdd540
commit a9c79d7887
16 changed files with 780 additions and 67 deletions
+72 -2
View File
@@ -185,6 +185,8 @@ func main() {
toolRegistry := plgManager.NewToolRegistry()
var visionProvider llm.LLMProvider
var ocrProvider llm.LLMProvider
var videoProvider llm.LLMProvider
var asrProvider llm.ASRProvider
if getEnvBool("ENABLE_TOOLS", true) {
// 11 个共享通用插件 — 注册其工具到统一注册中心
registerPluginTools(toolRegistry, &pluginCalc.CalculatorPlugin{})
@@ -252,12 +254,52 @@ func main() {
}
}
}
}
if ocrProvider == nil {
log.Println("OCR模型未配置,图片文字提取将复用视觉模型")
}
// 初始化视频理解模型
videoProvider = nil
if configLoader != nil && configLoader.HasConfig() {
cfg := configLoader.GetConfig()
if route, ok := cfg.Routing["video"]; ok && len(route.FallbackChain) > 0 {
for _, mid := range route.FallbackChain {
if _, ok := cfg.Models[mid]; ok {
videoProvider, _ = modelSelector.Select(context.Background(), llm.PurposeVideo)
log.Printf("视频理解模型已启用: %s", videoProvider.ModelName())
break
}
}
}
}
if videoProvider == nil {
log.Println("视频理解模型未配置")
}
// 初始化 ASR 语音识别模型
asrProvider = nil
if configLoader != nil && configLoader.HasConfig() {
cfg := configLoader.GetConfig()
if route, ok := cfg.Routing["speech_recognition"]; ok && len(route.FallbackChain) > 0 {
for _, mid := range route.FallbackChain {
if m, ok := cfg.Models[mid]; ok {
if p, ok := cfg.Providers[m.Provider]; ok {
asrProvider = llm.NewDashScopeASRProvider(p.BaseURL, p.APIKey, m.Name)
log.Printf("ASR语音识别模型已启用: %s", m.Name)
break
}
}
}
}
}
if asrProvider == nil {
log.Println("ASR语音识别模型未配置")
}
toolRegistry.Register(wrapTool(tools.NewVisionTool(visionProvider), "vision_analyze", "Image Vision Analysis & OCR", "multimodal"))
toolRegistry.Register(wrapTool(tools.NewVideoTool(videoProvider), "video_analyze", "Video Understanding & Analysis", "multimodal"))
if knowledgeRetriever != nil {
toolRegistry.Register(wrapTool(tools.NewKnowledgeSearchTool(knowledgeRetriever), "knowledge_search", "Search Knowledge Base", "knowledge"))
@@ -286,6 +328,7 @@ func main() {
convStore,
adminUserID,
adminSessionID,
cfg.AdminNickname,
memClient,
)
@@ -375,12 +418,24 @@ func main() {
orch.SetOCRProvider(ocrProvider)
log.Printf("对话编排器: OCR模型已注入 (%s)", ocrProvider.ModelName())
}
log.Println("对话编排器 v2.0 已就绪")
if videoProvider != nil {
orch.SetVideoProvider(videoProvider)
log.Printf("对话编排器: 视频模型已注入 (%s)\n", videoProvider.ModelName())
} else {
log.Println("对话编排器: 视频模型未配置,视频理解功能不可用")
}
if asrProvider != nil && asrProvider.IsAvailable() {
orch.SetASRProvider(asrProvider)
log.Printf("对话编排器: ASR语音识别模型已注入 (%s)\n", asrProvider.ModelName())
} else {
log.Println("对话编排器: ASR语音识别模型未配置")
}
log.Println("对话编排器 v2.0 已就绪")
_ = orch
// 注册对话API端点
mux.HandleFunc("/api/v1/chat", func(w http.ResponseWriter, r *http.Request) {
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry)
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry, adminSessionID)
})
// 注册记忆API端点
@@ -746,6 +801,7 @@ func handleChat(
iotClient *tools.IoTClient,
thinker *background.Thinker,
_ *plgManager.ToolRegistry,
adminSessionID string,
) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
@@ -758,8 +814,11 @@ func handleChat(
SessionID string `json:"session_id"`
Message string `json:"message"`
Images []string `json:"images,omitempty"` // 图片 base64 data URL
VideoURLs []string `json:"video_urls,omitempty"` // 视频 URL (多模态)
VoiceURLs []string `json:"voice_urls,omitempty"` // 语音 URL (ASR 转录)
Mode string `json:"mode"`
Nickname string `json:"nickname,omitempty"`
IsAdmin bool `json:"is_admin"`
Source struct {
Platform string `json:"platform"`
ChannelID string `json:"channel_id"`
@@ -795,11 +854,20 @@ func handleChat(
ctx := r.Context()
// Inject admin flag for tool access control.
ctx = context.WithValue(ctx, plgManager.CtxKeyIsAdmin, req.IsAdmin)
// 0. 记录用户活动(重置闲置计时器)
if thinker != nil {
thinker.RecordUserMessage(req.SessionID)
}
// Admin private messages: redirect to the main admin session so conversation
// history is shared across platforms (QQ, web UI, etc.).
if req.UserID == "admin" && req.Source.ChannelType == "direct" && adminSessionID != "" {
req.SessionID = adminSessionID
}
// 确定用户昵称
userNickname := req.Nickname
if userNickname == "" {
@@ -828,6 +896,8 @@ func handleChat(
SessionID: req.SessionID,
Message: req.Message,
Images: req.Images,
VideoURLs: req.VideoURLs,
VoiceURLs: req.VoiceURLs,
Mode: req.Mode,
Nickname: userNickname,
ChannelType: req.Source.ChannelType,