fix: 修复 AI 回复无法送达发送者 + 重复消息 + action角色泄露 + OS环境支持

广播逻辑重构: - AI 回复 (stream_start/response/stream_segments/multi_message/stream_end) 改用 broadcastToUser 发送给所有客户端 - 用户消息回显保持 broadcastToUserExcept 排除发送者消息去重与角色修复: - CacheMessage(user) 移至回复生成后，避免本轮 LLM 调用出现重复用户消息 - action 角色消息在 DB 存储时映射为 assistant，DeepSeek 等模型不支持自定义角色 - stream_end defer 机制确保错误路径也会终止客户端思考指示器 OS 完整环境支持: - host 包重构为 HostBackend 接口 + Direct/WSL/Docker 三种后端 - 新增 os_exec/os_file/os_system 工具供 AI 在完整 Linux 环境中自由操作其他: - 视觉模型注入 + 图片预处理后清空 Images 避免传给 Chat 模型 - 图片 URL 相对路径→绝对 URL 转换 - DevTools 链路追踪页面 + 重启修复 - 记忆搜索模糊匹配增强 - 后台思考定时调度支持 - 管理后台页面 (模型配置/用户管理等) - docs/api 更新广播机制说明 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 12:46:17 +08:00
parent aac64ed8b7
commit 91c9ee4b2d
49 changed files with 5032 additions and 299 deletions
@@ -8,6 +8,17 @@ type EnrichmentData struct {
 	ThoughtOutline string
 	IoTSummary     string
 	KnowledgeInfo  string
+
+	// Pending tool results from async execution (keyed by tool call ID)
+	PendingToolResults []PendingToolResult
+}
+
+// PendingToolResult holds the result of a tool that completed asynchronously.
+type PendingToolResult struct {
+	ToolCallID string `json:"tool_call_id"`
+	ToolName   string `json:"tool_name"`
+	Result     string `json:"result"`
+	Success    bool   `json:"success"`
 }

 // SessionEnrichmentStore is a thread-safe per-session cache for async
@@ -25,8 +36,15 @@ func NewEnrichmentStore() *SessionEnrichmentStore {
 	}
 }

-// Get returns stored enrichment for a session and clears it (one-shot consumption).
+// Get returns stored enrichment for a session (does NOT clear; results may be reused).
 func (s *SessionEnrichmentStore) Get(sessionID string) *EnrichmentData {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.data[sessionID]
+}
+
+// Pop returns stored enrichment for a session and clears it (one-shot consumption).
+func (s *SessionEnrichmentStore) Pop(sessionID string) *EnrichmentData {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	d, ok := s.data[sessionID]
@@ -45,3 +63,32 @@ func (s *SessionEnrichmentStore) Store(sessionID string, d *EnrichmentData) {
 	s.data[sessionID] = d
 	s.mu.Unlock()
 }
+
+// AppendToolResult adds a completed tool result to the session's enrichment data.
+func (s *SessionEnrichmentStore) AppendToolResult(sessionID string, r PendingToolResult) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	d, ok := s.data[sessionID]
+	if !ok {
+		d = &EnrichmentData{}
+		s.data[sessionID] = d
+	}
+	d.PendingToolResults = append(d.PendingToolResults, r)
+}
+
+// ---- Global pending tool store (used by Synthesizer for async tool results) ----
+
+var globalPendingToolStore *SessionEnrichmentStore
+var pendingToolStoreOnce sync.Once
+
+// InitGlobalPendingToolStore initializes the singleton.
+func InitGlobalPendingToolStore() {
+	pendingToolStoreOnce.Do(func() {
+		globalPendingToolStore = NewEnrichmentStore()
+	})
+}
+
+// GetGlobalPendingToolStore returns the singleton, or nil if not initialized.
+func GetGlobalPendingToolStore() *SessionEnrichmentStore {
+	return globalPendingToolStore
+}
@@ -38,6 +38,7 @@ type Orchestrator struct {
 	msgScheduler    *scheduler.MessageScheduler
 	emotionTracker  *persona.EmotionTracker
 	toolRegistry    *plgManager.ToolRegistry
+	visionProvider  llm.LLMProvider // 视觉模型 (图片预处理/OCR)
 }

 // SetResponseCache sets the response cache (optional, for Phase 0.2).
@@ -71,6 +72,11 @@ func (o *Orchestrator) SetToolRegistry(tr *plgManager.ToolRegistry) {
 	o.synthesizer.toolRegistry = tr
 }

+// SetVisionProvider sets the vision model provider for image preprocessing.
+func (o *Orchestrator) SetVisionProvider(vp llm.LLMProvider) {
+	o.visionProvider = vp
+}
+
 // getBus returns the bus or a nop fallback.
 func (o *Orchestrator) getBus() bus.Bus {
 	if o.eventBus == nil {
@@ -149,7 +155,27 @@ func (o *Orchestrator) ProcessInput(
 			UserID:    params.UserID,
 		})

-		// 1. 意图分析
+		// 0.5 图片预处理: 使用视觉模型分析图片，将描述注入消息
+	if len(params.Images) > 0 && o.visionProvider != nil {
+		startTime := time.Now()
+		augmented := o.preprocessImages(ctx, params.Message, params.Images)
+		if augmented != params.Message {
+			params.Message = augmented
+			logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
+				time.Since(startTime), len([]rune(params.Message))-len([]rune(augmented))+len([]rune(params.Message)), len([]rune(augmented)))
+		}
+		// 预处理后清空原始图片，避免后续传给不支持多模态的 Chat 模型
+		params.Images = nil
+	} else if len(params.Images) > 0 {
+		// 未配置 Vision 模型时，告知用户该模型不支持图片，并清空图片避免报错
+		if params.Message == "" {
+			params.Message = "(用户发送了一张图片，但当前未配置视觉模型，无法识别图片内容)"
+		}
+		logger.Printf("[orchestrator] 视觉模型未配置，丢弃 %d 张图片", len(params.Images))
+		params.Images = nil
+	}
+
+	// 1. 意图分析
 		startTime := time.Now()
 		intent, err := o.intentAnalyzer.Analyze(ctx, params.Message)
 		if err != nil || intent == nil {
@@ -247,17 +273,39 @@ func (o *Orchestrator) ProcessInput(
 			resultCh = o.subManager.Dispatch(subCtx, intent, params.Message, createParams)
 		}

+		// 3.5 确保全局工具结果存储已初始化
+		InitGlobalPendingToolStore()
+
 		// 4. 加载上一轮异步完成的子会话富化结果
 		var prevEnrichment *EnrichmentData
 		if o.enrichmentStore != nil {
-			prevEnrichment = o.enrichmentStore.Get(params.SessionID)
-			if prevEnrichment != nil {
-				logger.Printf("[orchestrator] 加载上一轮富化结果: memory=%t thought=%t iot=%t knowledge=%t",
-					prevEnrichment.MemorySummary != "",
-					prevEnrichment.ThoughtOutline != "",
-					prevEnrichment.IoTSummary != "",
-					prevEnrichment.KnowledgeInfo != "")
+			prevEnrichment = o.enrichmentStore.Pop(params.SessionID)
+			// Also merge any pending tool results from the global store
+			if globalStore := GetGlobalPendingToolStore(); globalStore != nil {
+				if toolData := globalStore.Pop(params.SessionID); toolData != nil && len(toolData.PendingToolResults) > 0 {
+					if prevEnrichment == nil {
+						prevEnrichment = &EnrichmentData{}
+					}
+					prevEnrichment.PendingToolResults = append(prevEnrichment.PendingToolResults, toolData.PendingToolResults...)
+					logger.Printf("[orchestrator] 合并后台工具结果 %d 条", len(toolData.PendingToolResults))
+				}
 			}
+		} else {
+			// Still check global store even if enrichmentStore is not set
+			if globalStore := GetGlobalPendingToolStore(); globalStore != nil {
+				if toolData := globalStore.Pop(params.SessionID); toolData != nil && len(toolData.PendingToolResults) > 0 {
+					prevEnrichment = toolData
+					logger.Printf("[orchestrator] 加载后台工具结果 %d 条", len(toolData.PendingToolResults))
+				}
+			}
+		}
+			if prevEnrichment != nil {
+			logger.Printf("[orchestrator] 加载上一轮富化结果: memory=%t thought=%t iot=%t knowledge=%t tools=%d",
+				prevEnrichment.MemorySummary != "",
+				prevEnrichment.ThoughtOutline != "",
+				prevEnrichment.IoTSummary != "",
+				prevEnrichment.KnowledgeInfo != "",
+				len(prevEnrichment.PendingToolResults))
 		}

 		// 5. 先构建基础综合参数（不含子会话结果），开始合成
@@ -284,6 +332,7 @@ func (o *Orchestrator) ProcessInput(
 			synthParams.ThoughtOutline = prevEnrichment.ThoughtOutline
 			synthParams.IoTSummary = prevEnrichment.IoTSummary
 			synthParams.KnowledgeInfo = prevEnrichment.KnowledgeInfo
+			synthParams.PendingToolResults = prevEnrichment.PendingToolResults
 		}

 		// 异步收集子会话结果，存入 enrichmentStore 供下一轮使用
@@ -324,7 +373,7 @@ func (o *Orchestrator) ProcessInput(
 		}()

 		// 5. 调用 Synthesizer 流式生成最终回复
-		chunkCh, err := o.synthesizer.Synthesize(ctx, synthParams)
+		chunkCh, err := o.synthesizer.Synthesize(ctx, synthParams, eventCh)
 		if err != nil {
 			logger.Printf("[orchestrator] 综合器启动失败: %v", err)
 			eventCh <- model.StreamEvent{
@@ -601,6 +650,46 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
 	}
 }

+// preprocessImages uses the vision model to analyze images and augments the user message.
+// For standalone images (no text): generates a comprehensive description as the message.
+// For text+images: appends image descriptions as contextual annotations.
+func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
+	var prompt string
+	if message == "" {
+		prompt = "请详细描述这张图片的内容，包括场景、物体、人物、文字（如有）、颜色、氛围等所有视觉信息。"
+	} else {
+		prompt = fmt.Sprintf("用户的问题是：「%s」\n\n请根据用户的问题，分析这张图片中相关的视觉信息，帮助回答用户的问题。如果图片中有文字，请完整提取。", message)
+	}
+
+	var descriptions []string
+	for i, img := range images {
+		resp, err := o.visionProvider.Chat(ctx, []model.LLMMessage{
+			{Role: model.RoleUser, Content: prompt, Images: []string{img}},
+		})
+		if err != nil {
+			logger.Printf("[orchestrator] 图片 %d 预处理失败: %v", i, err)
+			continue
+		}
+		if resp.Content != "" {
+			descriptions = append(descriptions, resp.Content)
+		}
+	}
+
+	if len(descriptions) == 0 {
+		return message
+	}
+
+	if message == "" {
+		return strings.Join(descriptions, "\n\n")
+	}
+
+	augmented := message
+	for i, desc := range descriptions {
+		augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
+	}
+	return augmented
+}
+
 // Ensure time, memory are used
 var _ = time.Now
 var _ = memory.NewRetriever
@@ -14,7 +14,7 @@ var codeBlockPattern = regexp.MustCompile("`{3}([^\n]*)\n([\\s\\S]*?)`{3}")
 var markdownPatterns = []*regexp.Regexp{
 	regexp.MustCompile(`^#{1,6}\s`),                // headings
 	regexp.MustCompile(`\*\*[^*]+\*\*`),             // bold
-	regexp.MustCompile(`(?<!\*)\*[^*]+\*(?!\*)`),    // italic (single *)
+	regexp.MustCompile(`(?:^|[^*])\*([^*]+)\*(?:[^*]|$)`), // italic (*text*)
 	regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`),   // links [text](url)
 	regexp.MustCompile(`^[\-\*]\s`),                 // unordered list
 	regexp.MustCompile(`^\d+\.\s`),                  // ordered list
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"time"

 	"github.com/yourname/cyrene-ai/ai-core/internal/llm"
 	"github.com/yourname/cyrene-ai/ai-core/internal/model"
@@ -30,23 +31,25 @@ func NewSynthesizer(llmAdapter *llm.Adapter, toolRegistry *plgManager.ToolRegist

 // SynthesizeParams 综合参数
 type SynthesizeParams struct {
-	UserID         string
-	SessionID      string
-	UserMessage    string
-	Images         []string           // 图片 base64 data URL (多模态)
-	Nickname       string
-	PersonaPrompt  string             // 完整人格提示词
-	DialogHistory  []model.LLMMessage // 对话历史
-	MemorySummary  string             // 记忆检索摘要
-	ThoughtOutline string             // 通用对话思考
-	IoTSummary     string             // IoT 操作摘要
-	DeviceContext  string             // 设备状态上下文
-	KnowledgeInfo  string             // 知识库检索摘要
-	Mode           string             // text / voice_assistant
+	UserID             string
+	SessionID          string
+	UserMessage        string
+	Images             []string                    // 图片 base64 data URL (多模态)
+	Nickname           string
+	PersonaPrompt      string                      // 完整人格提示词
+	DialogHistory      []model.LLMMessage           // 对话历史
+	MemorySummary      string                      // 记忆检索摘要
+	ThoughtOutline     string                      // 通用对话思考
+	IoTSummary         string                      // IoT 操作摘要
+	DeviceContext      string                      // 设备状态上下文
+	KnowledgeInfo      string                      // 知识库检索摘要
+	PendingToolResults []PendingToolResult         // 上一轮异步完成的工具结果
+	Mode               string                      // text / voice_assistant
 }

-// Synthesize 综合所有子会话结果，流式生成最终回复
-func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (<-chan llm.StreamChunk, error) {
+// Synthesize 综合所有子会话结果，流式生成最终回复。
+// eventCh receives tool progress events; pass nil to suppress.
+func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams, eventCh chan<- model.StreamEvent) (<-chan llm.StreamChunk, error) {
 	messages := s.buildSynthesizeMessages(params)

 	logger.Printf("[synthesizer] 开始综合 (上下文 %d 条消息)", len(messages))
@@ -62,7 +65,9 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
 		return nil, err
 	}

-	maxRounds := 5
+	const toolDeadline = 8 * time.Second
+	const maxRounds = 5
+
 	for round := 0; len(resp.ToolCalls) > 0 && round < maxRounds; round++ {
 		logger.Printf("[synthesizer] LLM 请求 %d 个工具调用 (round=%d)", len(resp.ToolCalls), round)

@@ -80,7 +85,12 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
 				args = make(map[string]interface{})
 			}

-			result, execErr := s.toolRegistry.Execute(ctx, tc.Name, args)
+			s.emitToolProgress(eventCh, tc.Name, "started", 0, "正在执行 "+tc.Name)
+
+			toolCtx, cancel := context.WithTimeout(ctx, toolDeadline)
+			result, execErr := s.toolRegistry.Execute(toolCtx, tc.Name, args)
+			cancel()
+
 			if execErr != nil {
 				logger.Printf("[synthesizer] 工具 %s 执行失败: %v", tc.Name, execErr)
 			}
@@ -88,6 +98,19 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
 				result = &plgSDK.ToolResult{ToolName: tc.Name, Success: false, Error: execErr.Error()}
 			}

+			// Async fallback: if tool timed out, store for next turn
+			if toolCtx.Err() == context.DeadlineExceeded {
+				s.emitToolProgress(eventCh, tc.Name, "running", 0.5, tc.Name+" 执行时间较长，转入后台继续...")
+				go s.executeAsyncAndStore(tc, args, params.SessionID, eventCh)
+				result = &plgSDK.ToolResult{
+					ToolName: tc.Name,
+					Success:  true,
+					Output:   fmt.Sprintf("[后台执行中] %s 正在后台运行，结果将在下一轮对话中返回。你可以继续聊天。", tc.Name),
+				}
+			} else {
+				s.emitToolProgress(eventCh, tc.Name, "completed", 1.0, "")
+			}
+
 			resultJSON, _ := json.Marshal(result)
 			messages = append(messages, model.LLMMessage{
 				Role:       model.RoleTool,
@@ -120,6 +143,51 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
 	return ch, nil
 }

+// emitToolProgress sends a StreamToolProgress event if eventCh is available.
+func (s *Synthesizer) emitToolProgress(eventCh chan<- model.StreamEvent, name, status string, progress float64, message string) {
+	if eventCh == nil {
+		return
+	}
+	select {
+	case eventCh <- model.StreamEvent{
+		Type: model.StreamToolProgress,
+		ToolProgress: &model.ToolProgressInfo{
+			ToolName: name,
+			Status:   status,
+			Progress: progress,
+			Message:  message,
+		},
+	}:
+	default:
+	}
+}
+
+// executeAsyncAndStore runs a tool in background and stores the result for the next turn.
+func (s *Synthesizer) executeAsyncAndStore(tc model.ToolCall, args map[string]interface{}, sessionID string, eventCh chan<- model.StreamEvent) {
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+
+	result, err := s.toolRegistry.Execute(ctx, tc.Name, args)
+	if err != nil {
+		logger.Printf("[synthesizer] 后台工具 %s 执行失败: %v", tc.Name, err)
+		s.emitToolProgress(eventCh, tc.Name, "failed", 1.0, tc.Name+" 后台执行失败: "+err.Error())
+		return
+	}
+
+	s.emitToolProgress(eventCh, tc.Name, "completed", 1.0, tc.Name+" 后台执行完成")
+
+	resultJSON, _ := json.Marshal(result)
+	store := GetGlobalPendingToolStore()
+	if store != nil {
+		store.AppendToolResult(sessionID, PendingToolResult{
+			ToolCallID: tc.ID,
+			ToolName:   tc.Name,
+			Result:     string(resultJSON),
+			Success:    result != nil && result.Success,
+		})
+	}
+}
+
 // buildSynthesizeMessages 构建综合用的 LLM 消息列表
 func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.LLMMessage {
 	var messages []model.LLMMessage
@@ -174,6 +242,23 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
 		})
 	}

+	// 注入上一轮异步工具执行结果
+	if len(params.PendingToolResults) > 0 {
+		var sb strings.Builder
+		sb.WriteString("【上一轮后台工具执行结果】\n")
+		for _, ptr := range params.PendingToolResults {
+			status := "成功"
+			if !ptr.Success {
+				status = "失败"
+			}
+			sb.WriteString(fmt.Sprintf("- %s (%s): %s\n", ptr.ToolName, status, ptr.Result))
+		}
+		messages = append(messages, model.LLMMessage{
+			Role:    model.RoleSystem,
+			Content: sb.String(),
+		})
+	}
+
 	// 注入对话历史
 	if len(params.DialogHistory) > 0 {
 		messages = append(messages, params.DialogHistory...)