package tools import ( "context" "encoding/json" "fmt" "git.yeij.top/AskaEth/Cyrene/ai-core/internal/llm" "git.yeij.top/AskaEth/Cyrene/ai-core/internal/model" ) // VideoTool enables video understanding via multimodal LLM. type VideoTool struct { videoProvider llm.LLMProvider } // NewVideoTool creates a video tool. videoProvider is optional (nil = no-op mode). func NewVideoTool(videoProvider llm.LLMProvider) *VideoTool { return &VideoTool{videoProvider: videoProvider} } func (t *VideoTool) Definition() ToolDefinition { return ToolDefinition{ Name: "video_analyze", Description: "分析视频内容。传入视频文件路径或URL,返回视频内容的文字描述和分析结果。支持场景理解、动作识别、文字提取等。", Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "video_path": map[string]interface{}{ "type": "string", "description": "视频文件路径或URL", }, "task": map[string]interface{}{ "type": "string", "description": "分析任务: describe(内容描述), summarize(摘要), analyze(综合分析)", "enum": []string{"describe", "summarize", "analyze"}, }, }, "required": []string{"video_path", "task"}, }, } } var videoTaskPrompts = map[string]string{ "describe": "请详细描述这个视频的内容,包括场景、人物、动作、对话要点等。", "summarize": "请用简洁的语言总结这个视频的主要内容。", "analyze": "请综合分析这个视频,包括内容描述、关键片段、文字信息(如有)、以及你的理解。", } func (t *VideoTool) Execute(ctx context.Context, args map[string]interface{}) (*ToolResult, error) { videoPath, _ := args["video_path"].(string) if videoPath == "" { return &ToolResult{ToolName: "video_analyze", Success: false, Error: "video_path 参数不能为空"}, nil } task, _ := args["task"].(string) if task == "" { task = "analyze" } prompt := videoTaskPrompts[task] if prompt == "" { prompt = videoTaskPrompts["analyze"] } if t.videoProvider == nil { return &ToolResult{ToolName: "video_analyze", Success: false, Error: "视频理解模型未配置"}, nil } messages := []model.LLMMessage{ {Role: model.RoleUser, Content: prompt, VideoURLs: []string{videoPath}}, } resp, err := t.videoProvider.Chat(ctx, messages) if err != nil { return &ToolResult{ToolName: "video_analyze", Success: false, Error: fmt.Sprintf("视频模型调用失败: %v", err)}, nil } output, _ := json.Marshal(map[string]interface{}{ "video_path": videoPath, "task": task, "model": t.videoProvider.ModelName(), "text": resp.Content, "prompt_tokens": resp.Usage.PromptTokens, "completion_tokens": resp.Usage.CompletionTokens, "total_tokens": resp.Usage.TotalTokens, }) return &ToolResult{ToolName: "video_analyze", Success: true, Data: string(output)}, nil }