feat: Phase 6.3 视觉理解 — 多模态图片输入 + OCR/Vision 工具 + 图片编码管线

- LLMMessage 新增 Images 字段支持多模态 content array - OpenAIProvider 支持 image_url content parts - VisionTool: 图片读取 + base64 编码 + OCR/场景描述/综合分析 - 对话管道全线支持 images 参数传递 (Gateway->Orchestrator->Synthesizer->LLM) - 自动根据图片有无构建 text-only 或 multimodal content Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 22:28:42 +08:00
parent 38b36fc5ad
commit 9a8fb8d0ce
7 changed files with 205 additions and 24 deletions
@@ -158,6 +158,9 @@ func main() {
 			toolRegistry.Register(tools.NewHostFileTool(hostManager))
 			toolRegistry.Register(tools.NewHostSystemTool(hostManager))
 		}
+
+		// Phase 6.3: 视觉理解工具
+		toolRegistry.Register(tools.NewVisionTool())
 		log.Printf("工具注册中心已就绪: %d 个工具 (%v)", len(toolRegistry.ListTools()), toolRegistry.ListTools())
 	}

@@ -431,11 +434,12 @@ func handleChat(

 	// 解析请求
 	var req struct {
-		UserID    string `json:"user_id"`
-		SessionID string `json:"session_id"`
-		Message   string `json:"message"`
-		Mode      string `json:"mode"`
-		Nickname  string `json:"nickname,omitempty"`
+		UserID    string   `json:"user_id"`
+		SessionID string   `json:"session_id"`
+		Message   string   `json:"message"`
+		Images    []string `json:"images,omitempty"` // 图片 base64 data URL
+		Mode      string   `json:"mode"`
+		Nickname  string   `json:"nickname,omitempty"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		http.Error(w, "无效的请求体", http.StatusBadRequest)
@@ -480,6 +484,7 @@ func handleChat(
 		UserID:    req.UserID,
 		SessionID: req.SessionID,
 		Message:   req.Message,
+		Images:    req.Images,
 		Mode:      req.Mode,
 		Nickname:  userNickname,
 	})