a9c79d7887
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息 - 模型名称全部从models.json路由获取,无硬编码 - 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题 - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆 - 群聊上下文指令改为肯定性表述,移除具体名称提及 - trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒 - 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒) - 新增video_tool插件模板 - 优化OpenAI adapter reasoning_content处理 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
89 lines
2.9 KiB
Go
89 lines
2.9 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
|
|
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/llm"
|
|
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/model"
|
|
)
|
|
|
|
// VideoTool enables video understanding via multimodal LLM.
|
|
type VideoTool struct {
|
|
videoProvider llm.LLMProvider
|
|
}
|
|
|
|
// NewVideoTool creates a video tool. videoProvider is optional (nil = no-op mode).
|
|
func NewVideoTool(videoProvider llm.LLMProvider) *VideoTool {
|
|
return &VideoTool{videoProvider: videoProvider}
|
|
}
|
|
|
|
func (t *VideoTool) Definition() ToolDefinition {
|
|
return ToolDefinition{
|
|
Name: "video_analyze",
|
|
Description: "分析视频内容。传入视频文件路径或URL,返回视频内容的文字描述和分析结果。支持场景理解、动作识别、文字提取等。",
|
|
Parameters: map[string]interface{}{
|
|
"type": "object",
|
|
"properties": map[string]interface{}{
|
|
"video_path": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "视频文件路径或URL",
|
|
},
|
|
"task": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "分析任务: describe(内容描述), summarize(摘要), analyze(综合分析)",
|
|
"enum": []string{"describe", "summarize", "analyze"},
|
|
},
|
|
},
|
|
"required": []string{"video_path", "task"},
|
|
},
|
|
}
|
|
}
|
|
|
|
var videoTaskPrompts = map[string]string{
|
|
"describe": "请详细描述这个视频的内容,包括场景、人物、动作、对话要点等。",
|
|
"summarize": "请用简洁的语言总结这个视频的主要内容。",
|
|
"analyze": "请综合分析这个视频,包括内容描述、关键片段、文字信息(如有)、以及你的理解。",
|
|
}
|
|
|
|
func (t *VideoTool) Execute(ctx context.Context, args map[string]interface{}) (*ToolResult, error) {
|
|
videoPath, _ := args["video_path"].(string)
|
|
if videoPath == "" {
|
|
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "video_path 参数不能为空"}, nil
|
|
}
|
|
|
|
task, _ := args["task"].(string)
|
|
if task == "" {
|
|
task = "analyze"
|
|
}
|
|
|
|
prompt := videoTaskPrompts[task]
|
|
if prompt == "" {
|
|
prompt = videoTaskPrompts["analyze"]
|
|
}
|
|
|
|
if t.videoProvider == nil {
|
|
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "视频理解模型未配置"}, nil
|
|
}
|
|
|
|
messages := []model.LLMMessage{
|
|
{Role: model.RoleUser, Content: prompt, VideoURLs: []string{videoPath}},
|
|
}
|
|
resp, err := t.videoProvider.Chat(ctx, messages)
|
|
if err != nil {
|
|
return &ToolResult{ToolName: "video_analyze", Success: false, Error: fmt.Sprintf("视频模型调用失败: %v", err)}, nil
|
|
}
|
|
|
|
output, _ := json.Marshal(map[string]interface{}{
|
|
"video_path": videoPath,
|
|
"task": task,
|
|
"model": t.videoProvider.ModelName(),
|
|
"text": resp.Content,
|
|
"prompt_tokens": resp.Usage.PromptTokens,
|
|
"completion_tokens": resp.Usage.CompletionTokens,
|
|
"total_tokens": resp.Usage.TotalTokens,
|
|
})
|
|
return &ToolResult{ToolName: "video_analyze", Success: true, Data: string(output)}, nil
|
|
}
|