Files
Cyrene/backend/ai-core/internal/tools/video_tool.go
T
AskaEth a9c79d7887 feat: ASR语音转写管线 + 群聊身份混淆修复
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息
- 模型名称全部从models.json路由获取,无硬编码
- 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题
  - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆
  - 群聊上下文指令改为肯定性表述,移除具体名称提及
- trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒
- 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒)
- 新增video_tool插件模板
- 优化OpenAI adapter reasoning_content处理

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 16:46:47 +08:00

89 lines
2.9 KiB
Go

package tools
import (
"context"
"encoding/json"
"fmt"
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/llm"
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/model"
)
// VideoTool enables video understanding via multimodal LLM.
type VideoTool struct {
videoProvider llm.LLMProvider
}
// NewVideoTool creates a video tool. videoProvider is optional (nil = no-op mode).
func NewVideoTool(videoProvider llm.LLMProvider) *VideoTool {
return &VideoTool{videoProvider: videoProvider}
}
func (t *VideoTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "video_analyze",
Description: "分析视频内容。传入视频文件路径或URL,返回视频内容的文字描述和分析结果。支持场景理解、动作识别、文字提取等。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"video_path": map[string]interface{}{
"type": "string",
"description": "视频文件路径或URL",
},
"task": map[string]interface{}{
"type": "string",
"description": "分析任务: describe(内容描述), summarize(摘要), analyze(综合分析)",
"enum": []string{"describe", "summarize", "analyze"},
},
},
"required": []string{"video_path", "task"},
},
}
}
var videoTaskPrompts = map[string]string{
"describe": "请详细描述这个视频的内容,包括场景、人物、动作、对话要点等。",
"summarize": "请用简洁的语言总结这个视频的主要内容。",
"analyze": "请综合分析这个视频,包括内容描述、关键片段、文字信息(如有)、以及你的理解。",
}
func (t *VideoTool) Execute(ctx context.Context, args map[string]interface{}) (*ToolResult, error) {
videoPath, _ := args["video_path"].(string)
if videoPath == "" {
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "video_path 参数不能为空"}, nil
}
task, _ := args["task"].(string)
if task == "" {
task = "analyze"
}
prompt := videoTaskPrompts[task]
if prompt == "" {
prompt = videoTaskPrompts["analyze"]
}
if t.videoProvider == nil {
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "视频理解模型未配置"}, nil
}
messages := []model.LLMMessage{
{Role: model.RoleUser, Content: prompt, VideoURLs: []string{videoPath}},
}
resp, err := t.videoProvider.Chat(ctx, messages)
if err != nil {
return &ToolResult{ToolName: "video_analyze", Success: false, Error: fmt.Sprintf("视频模型调用失败: %v", err)}, nil
}
output, _ := json.Marshal(map[string]interface{}{
"video_path": videoPath,
"task": task,
"model": t.videoProvider.ModelName(),
"text": resp.Content,
"prompt_tokens": resp.Usage.PromptTokens,
"completion_tokens": resp.Usage.CompletionTokens,
"total_tokens": resp.Usage.TotalTokens,
})
return &ToolResult{ToolName: "video_analyze", Success: true, Data: string(output)}, nil
}