feat: ASR语音转写管线 + 群聊身份混淆修复
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息 - 模型名称全部从models.json路由获取,无硬编码 - 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题 - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆 - 群聊上下文指令改为肯定性表述,移除具体名称提及 - trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒 - 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒) - 新增video_tool插件模板 - 优化OpenAI adapter reasoning_content处理 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/llm"
|
||||
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/model"
|
||||
)
|
||||
|
||||
// VideoTool enables video understanding via multimodal LLM.
|
||||
type VideoTool struct {
|
||||
videoProvider llm.LLMProvider
|
||||
}
|
||||
|
||||
// NewVideoTool creates a video tool. videoProvider is optional (nil = no-op mode).
|
||||
func NewVideoTool(videoProvider llm.LLMProvider) *VideoTool {
|
||||
return &VideoTool{videoProvider: videoProvider}
|
||||
}
|
||||
|
||||
func (t *VideoTool) Definition() ToolDefinition {
|
||||
return ToolDefinition{
|
||||
Name: "video_analyze",
|
||||
Description: "分析视频内容。传入视频文件路径或URL,返回视频内容的文字描述和分析结果。支持场景理解、动作识别、文字提取等。",
|
||||
Parameters: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"video_path": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "视频文件路径或URL",
|
||||
},
|
||||
"task": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "分析任务: describe(内容描述), summarize(摘要), analyze(综合分析)",
|
||||
"enum": []string{"describe", "summarize", "analyze"},
|
||||
},
|
||||
},
|
||||
"required": []string{"video_path", "task"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var videoTaskPrompts = map[string]string{
|
||||
"describe": "请详细描述这个视频的内容,包括场景、人物、动作、对话要点等。",
|
||||
"summarize": "请用简洁的语言总结这个视频的主要内容。",
|
||||
"analyze": "请综合分析这个视频,包括内容描述、关键片段、文字信息(如有)、以及你的理解。",
|
||||
}
|
||||
|
||||
func (t *VideoTool) Execute(ctx context.Context, args map[string]interface{}) (*ToolResult, error) {
|
||||
videoPath, _ := args["video_path"].(string)
|
||||
if videoPath == "" {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "video_path 参数不能为空"}, nil
|
||||
}
|
||||
|
||||
task, _ := args["task"].(string)
|
||||
if task == "" {
|
||||
task = "analyze"
|
||||
}
|
||||
|
||||
prompt := videoTaskPrompts[task]
|
||||
if prompt == "" {
|
||||
prompt = videoTaskPrompts["analyze"]
|
||||
}
|
||||
|
||||
if t.videoProvider == nil {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "视频理解模型未配置"}, nil
|
||||
}
|
||||
|
||||
messages := []model.LLMMessage{
|
||||
{Role: model.RoleUser, Content: prompt, VideoURLs: []string{videoPath}},
|
||||
}
|
||||
resp, err := t.videoProvider.Chat(ctx, messages)
|
||||
if err != nil {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: fmt.Sprintf("视频模型调用失败: %v", err)}, nil
|
||||
}
|
||||
|
||||
output, _ := json.Marshal(map[string]interface{}{
|
||||
"video_path": videoPath,
|
||||
"task": task,
|
||||
"model": t.videoProvider.ModelName(),
|
||||
"text": resp.Content,
|
||||
"prompt_tokens": resp.Usage.PromptTokens,
|
||||
"completion_tokens": resp.Usage.CompletionTokens,
|
||||
"total_tokens": resp.Usage.TotalTokens,
|
||||
})
|
||||
return &ToolResult{ToolName: "video_analyze", Success: true, Data: string(output)}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user