feat: ASR语音转写管线 + 群聊身份混淆修复
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息 - 模型名称全部从models.json路由获取,无硬编码 - 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题 - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆 - 群聊上下文指令改为肯定性表述,移除具体名称提及 - trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒 - 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒) - 新增video_tool插件模板 - 优化OpenAI adapter reasoning_content处理 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -185,6 +185,8 @@ func main() {
|
||||
toolRegistry := plgManager.NewToolRegistry()
|
||||
var visionProvider llm.LLMProvider
|
||||
var ocrProvider llm.LLMProvider
|
||||
var videoProvider llm.LLMProvider
|
||||
var asrProvider llm.ASRProvider
|
||||
if getEnvBool("ENABLE_TOOLS", true) {
|
||||
// 11 个共享通用插件 — 注册其工具到统一注册中心
|
||||
registerPluginTools(toolRegistry, &pluginCalc.CalculatorPlugin{})
|
||||
@@ -252,12 +254,52 @@ func main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if ocrProvider == nil {
|
||||
log.Println("OCR模型未配置,图片文字提取将复用视觉模型")
|
||||
}
|
||||
|
||||
// 初始化视频理解模型
|
||||
videoProvider = nil
|
||||
if configLoader != nil && configLoader.HasConfig() {
|
||||
cfg := configLoader.GetConfig()
|
||||
if route, ok := cfg.Routing["video"]; ok && len(route.FallbackChain) > 0 {
|
||||
for _, mid := range route.FallbackChain {
|
||||
if _, ok := cfg.Models[mid]; ok {
|
||||
videoProvider, _ = modelSelector.Select(context.Background(), llm.PurposeVideo)
|
||||
log.Printf("视频理解模型已启用: %s", videoProvider.ModelName())
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if videoProvider == nil {
|
||||
log.Println("视频理解模型未配置")
|
||||
}
|
||||
|
||||
// 初始化 ASR 语音识别模型
|
||||
asrProvider = nil
|
||||
if configLoader != nil && configLoader.HasConfig() {
|
||||
cfg := configLoader.GetConfig()
|
||||
if route, ok := cfg.Routing["speech_recognition"]; ok && len(route.FallbackChain) > 0 {
|
||||
for _, mid := range route.FallbackChain {
|
||||
if m, ok := cfg.Models[mid]; ok {
|
||||
if p, ok := cfg.Providers[m.Provider]; ok {
|
||||
asrProvider = llm.NewDashScopeASRProvider(p.BaseURL, p.APIKey, m.Name)
|
||||
log.Printf("ASR语音识别模型已启用: %s", m.Name)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if asrProvider == nil {
|
||||
log.Println("ASR语音识别模型未配置")
|
||||
}
|
||||
|
||||
toolRegistry.Register(wrapTool(tools.NewVisionTool(visionProvider), "vision_analyze", "Image Vision Analysis & OCR", "multimodal"))
|
||||
toolRegistry.Register(wrapTool(tools.NewVideoTool(videoProvider), "video_analyze", "Video Understanding & Analysis", "multimodal"))
|
||||
|
||||
if knowledgeRetriever != nil {
|
||||
toolRegistry.Register(wrapTool(tools.NewKnowledgeSearchTool(knowledgeRetriever), "knowledge_search", "Search Knowledge Base", "knowledge"))
|
||||
@@ -286,6 +328,7 @@ func main() {
|
||||
convStore,
|
||||
adminUserID,
|
||||
adminSessionID,
|
||||
cfg.AdminNickname,
|
||||
memClient,
|
||||
)
|
||||
|
||||
@@ -375,12 +418,24 @@ func main() {
|
||||
orch.SetOCRProvider(ocrProvider)
|
||||
log.Printf("对话编排器: OCR模型已注入 (%s)", ocrProvider.ModelName())
|
||||
}
|
||||
log.Println("对话编排器 v2.0 已就绪")
|
||||
if videoProvider != nil {
|
||||
orch.SetVideoProvider(videoProvider)
|
||||
log.Printf("对话编排器: 视频模型已注入 (%s)\n", videoProvider.ModelName())
|
||||
} else {
|
||||
log.Println("对话编排器: 视频模型未配置,视频理解功能不可用")
|
||||
}
|
||||
if asrProvider != nil && asrProvider.IsAvailable() {
|
||||
orch.SetASRProvider(asrProvider)
|
||||
log.Printf("对话编排器: ASR语音识别模型已注入 (%s)\n", asrProvider.ModelName())
|
||||
} else {
|
||||
log.Println("对话编排器: ASR语音识别模型未配置")
|
||||
}
|
||||
log.Println("对话编排器 v2.0 已就绪")
|
||||
_ = orch
|
||||
|
||||
// 注册对话API端点
|
||||
mux.HandleFunc("/api/v1/chat", func(w http.ResponseWriter, r *http.Request) {
|
||||
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry)
|
||||
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry, adminSessionID)
|
||||
})
|
||||
|
||||
// 注册记忆API端点
|
||||
@@ -746,6 +801,7 @@ func handleChat(
|
||||
iotClient *tools.IoTClient,
|
||||
thinker *background.Thinker,
|
||||
_ *plgManager.ToolRegistry,
|
||||
adminSessionID string,
|
||||
) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
@@ -758,8 +814,11 @@ func handleChat(
|
||||
SessionID string `json:"session_id"`
|
||||
Message string `json:"message"`
|
||||
Images []string `json:"images,omitempty"` // 图片 base64 data URL
|
||||
VideoURLs []string `json:"video_urls,omitempty"` // 视频 URL (多模态)
|
||||
VoiceURLs []string `json:"voice_urls,omitempty"` // 语音 URL (ASR 转录)
|
||||
Mode string `json:"mode"`
|
||||
Nickname string `json:"nickname,omitempty"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
Source struct {
|
||||
Platform string `json:"platform"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
@@ -795,11 +854,20 @@ func handleChat(
|
||||
|
||||
ctx := r.Context()
|
||||
|
||||
// Inject admin flag for tool access control.
|
||||
ctx = context.WithValue(ctx, plgManager.CtxKeyIsAdmin, req.IsAdmin)
|
||||
|
||||
// 0. 记录用户活动(重置闲置计时器)
|
||||
if thinker != nil {
|
||||
thinker.RecordUserMessage(req.SessionID)
|
||||
}
|
||||
|
||||
// Admin private messages: redirect to the main admin session so conversation
|
||||
// history is shared across platforms (QQ, web UI, etc.).
|
||||
if req.UserID == "admin" && req.Source.ChannelType == "direct" && adminSessionID != "" {
|
||||
req.SessionID = adminSessionID
|
||||
}
|
||||
|
||||
// 确定用户昵称
|
||||
userNickname := req.Nickname
|
||||
if userNickname == "" {
|
||||
@@ -828,6 +896,8 @@ func handleChat(
|
||||
SessionID: req.SessionID,
|
||||
Message: req.Message,
|
||||
Images: req.Images,
|
||||
VideoURLs: req.VideoURLs,
|
||||
VoiceURLs: req.VoiceURLs,
|
||||
Mode: req.Mode,
|
||||
Nickname: userNickname,
|
||||
ChannelType: req.Source.ChannelType,
|
||||
|
||||
Reference in New Issue
Block a user