fix: 修复 AI 回复无法送达发送者 + 重复消息 + action角色泄露 + OS环境支持

广播逻辑重构:
- AI 回复 (stream_start/response/stream_segments/multi_message/stream_end) 改用 broadcastToUser 发送给所有客户端
- 用户消息回显保持 broadcastToUserExcept 排除发送者

消息去重与角色修复:
- CacheMessage(user) 移至回复生成后,避免本轮 LLM 调用出现重复用户消息
- action 角色消息在 DB 存储时映射为 assistant,DeepSeek 等模型不支持自定义角色
- stream_end defer 机制确保错误路径也会终止客户端思考指示器

OS 完整环境支持:
- host 包重构为 HostBackend 接口 + Direct/WSL/Docker 三种后端
- 新增 os_exec/os_file/os_system 工具供 AI 在完整 Linux 环境中自由操作

其他:
- 视觉模型注入 + 图片预处理后清空 Images 避免传给 Chat 模型
- 图片 URL 相对路径→绝对 URL 转换
- DevTools 链路追踪页面 + 重启修复
- 记忆搜索模糊匹配增强
- 后台思考定时调度支持
- 管理后台页面 (模型配置/用户管理等)
- docs/api 更新广播机制说明

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 12:46:17 +08:00
parent aac64ed8b7
commit 91c9ee4b2d
49 changed files with 5032 additions and 299 deletions
@@ -8,6 +8,17 @@ type EnrichmentData struct {
ThoughtOutline string
IoTSummary string
KnowledgeInfo string
// Pending tool results from async execution (keyed by tool call ID)
PendingToolResults []PendingToolResult
}
// PendingToolResult holds the result of a tool that completed asynchronously.
type PendingToolResult struct {
ToolCallID string `json:"tool_call_id"`
ToolName string `json:"tool_name"`
Result string `json:"result"`
Success bool `json:"success"`
}
// SessionEnrichmentStore is a thread-safe per-session cache for async
@@ -25,8 +36,15 @@ func NewEnrichmentStore() *SessionEnrichmentStore {
}
}
// Get returns stored enrichment for a session and clears it (one-shot consumption).
// Get returns stored enrichment for a session (does NOT clear; results may be reused).
func (s *SessionEnrichmentStore) Get(sessionID string) *EnrichmentData {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data[sessionID]
}
// Pop returns stored enrichment for a session and clears it (one-shot consumption).
func (s *SessionEnrichmentStore) Pop(sessionID string) *EnrichmentData {
s.mu.Lock()
defer s.mu.Unlock()
d, ok := s.data[sessionID]
@@ -45,3 +63,32 @@ func (s *SessionEnrichmentStore) Store(sessionID string, d *EnrichmentData) {
s.data[sessionID] = d
s.mu.Unlock()
}
// AppendToolResult adds a completed tool result to the session's enrichment data.
func (s *SessionEnrichmentStore) AppendToolResult(sessionID string, r PendingToolResult) {
s.mu.Lock()
defer s.mu.Unlock()
d, ok := s.data[sessionID]
if !ok {
d = &EnrichmentData{}
s.data[sessionID] = d
}
d.PendingToolResults = append(d.PendingToolResults, r)
}
// ---- Global pending tool store (used by Synthesizer for async tool results) ----
var globalPendingToolStore *SessionEnrichmentStore
var pendingToolStoreOnce sync.Once
// InitGlobalPendingToolStore initializes the singleton.
func InitGlobalPendingToolStore() {
pendingToolStoreOnce.Do(func() {
globalPendingToolStore = NewEnrichmentStore()
})
}
// GetGlobalPendingToolStore returns the singleton, or nil if not initialized.
func GetGlobalPendingToolStore() *SessionEnrichmentStore {
return globalPendingToolStore
}
@@ -38,6 +38,7 @@ type Orchestrator struct {
msgScheduler *scheduler.MessageScheduler
emotionTracker *persona.EmotionTracker
toolRegistry *plgManager.ToolRegistry
visionProvider llm.LLMProvider // 视觉模型 (图片预处理/OCR)
}
// SetResponseCache sets the response cache (optional, for Phase 0.2).
@@ -71,6 +72,11 @@ func (o *Orchestrator) SetToolRegistry(tr *plgManager.ToolRegistry) {
o.synthesizer.toolRegistry = tr
}
// SetVisionProvider sets the vision model provider for image preprocessing.
func (o *Orchestrator) SetVisionProvider(vp llm.LLMProvider) {
o.visionProvider = vp
}
// getBus returns the bus or a nop fallback.
func (o *Orchestrator) getBus() bus.Bus {
if o.eventBus == nil {
@@ -149,7 +155,27 @@ func (o *Orchestrator) ProcessInput(
UserID: params.UserID,
})
// 1. 意图分析
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
if len(params.Images) > 0 && o.visionProvider != nil {
startTime := time.Now()
augmented := o.preprocessImages(ctx, params.Message, params.Images)
if augmented != params.Message {
params.Message = augmented
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
time.Since(startTime), len([]rune(params.Message))-len([]rune(augmented))+len([]rune(params.Message)), len([]rune(augmented)))
}
// 预处理后清空原始图片,避免后续传给不支持多模态的 Chat 模型
params.Images = nil
} else if len(params.Images) > 0 {
// 未配置 Vision 模型时,告知用户该模型不支持图片,并清空图片避免报错
if params.Message == "" {
params.Message = "(用户发送了一张图片,但当前未配置视觉模型,无法识别图片内容)"
}
logger.Printf("[orchestrator] 视觉模型未配置,丢弃 %d 张图片", len(params.Images))
params.Images = nil
}
// 1. 意图分析
startTime := time.Now()
intent, err := o.intentAnalyzer.Analyze(ctx, params.Message)
if err != nil || intent == nil {
@@ -247,17 +273,39 @@ func (o *Orchestrator) ProcessInput(
resultCh = o.subManager.Dispatch(subCtx, intent, params.Message, createParams)
}
// 3.5 确保全局工具结果存储已初始化
InitGlobalPendingToolStore()
// 4. 加载上一轮异步完成的子会话富化结果
var prevEnrichment *EnrichmentData
if o.enrichmentStore != nil {
prevEnrichment = o.enrichmentStore.Get(params.SessionID)
if prevEnrichment != nil {
logger.Printf("[orchestrator] 加载上一轮富化结果: memory=%t thought=%t iot=%t knowledge=%t",
prevEnrichment.MemorySummary != "",
prevEnrichment.ThoughtOutline != "",
prevEnrichment.IoTSummary != "",
prevEnrichment.KnowledgeInfo != "")
prevEnrichment = o.enrichmentStore.Pop(params.SessionID)
// Also merge any pending tool results from the global store
if globalStore := GetGlobalPendingToolStore(); globalStore != nil {
if toolData := globalStore.Pop(params.SessionID); toolData != nil && len(toolData.PendingToolResults) > 0 {
if prevEnrichment == nil {
prevEnrichment = &EnrichmentData{}
}
prevEnrichment.PendingToolResults = append(prevEnrichment.PendingToolResults, toolData.PendingToolResults...)
logger.Printf("[orchestrator] 合并后台工具结果 %d 条", len(toolData.PendingToolResults))
}
}
} else {
// Still check global store even if enrichmentStore is not set
if globalStore := GetGlobalPendingToolStore(); globalStore != nil {
if toolData := globalStore.Pop(params.SessionID); toolData != nil && len(toolData.PendingToolResults) > 0 {
prevEnrichment = toolData
logger.Printf("[orchestrator] 加载后台工具结果 %d 条", len(toolData.PendingToolResults))
}
}
}
if prevEnrichment != nil {
logger.Printf("[orchestrator] 加载上一轮富化结果: memory=%t thought=%t iot=%t knowledge=%t tools=%d",
prevEnrichment.MemorySummary != "",
prevEnrichment.ThoughtOutline != "",
prevEnrichment.IoTSummary != "",
prevEnrichment.KnowledgeInfo != "",
len(prevEnrichment.PendingToolResults))
}
// 5. 先构建基础综合参数(不含子会话结果),开始合成
@@ -284,6 +332,7 @@ func (o *Orchestrator) ProcessInput(
synthParams.ThoughtOutline = prevEnrichment.ThoughtOutline
synthParams.IoTSummary = prevEnrichment.IoTSummary
synthParams.KnowledgeInfo = prevEnrichment.KnowledgeInfo
synthParams.PendingToolResults = prevEnrichment.PendingToolResults
}
// 异步收集子会话结果,存入 enrichmentStore 供下一轮使用
@@ -324,7 +373,7 @@ func (o *Orchestrator) ProcessInput(
}()
// 5. 调用 Synthesizer 流式生成最终回复
chunkCh, err := o.synthesizer.Synthesize(ctx, synthParams)
chunkCh, err := o.synthesizer.Synthesize(ctx, synthParams, eventCh)
if err != nil {
logger.Printf("[orchestrator] 综合器启动失败: %v", err)
eventCh <- model.StreamEvent{
@@ -601,6 +650,46 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
}
}
// preprocessImages uses the vision model to analyze images and augments the user message.
// For standalone images (no text): generates a comprehensive description as the message.
// For text+images: appends image descriptions as contextual annotations.
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
var prompt string
if message == "" {
prompt = "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
} else {
prompt = fmt.Sprintf("用户的问题是:「%s」\n\n请根据用户的问题,分析这张图片中相关的视觉信息,帮助回答用户的问题。如果图片中有文字,请完整提取。", message)
}
var descriptions []string
for i, img := range images {
resp, err := o.visionProvider.Chat(ctx, []model.LLMMessage{
{Role: model.RoleUser, Content: prompt, Images: []string{img}},
})
if err != nil {
logger.Printf("[orchestrator] 图片 %d 预处理失败: %v", i, err)
continue
}
if resp.Content != "" {
descriptions = append(descriptions, resp.Content)
}
}
if len(descriptions) == 0 {
return message
}
if message == "" {
return strings.Join(descriptions, "\n\n")
}
augmented := message
for i, desc := range descriptions {
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
}
return augmented
}
// Ensure time, memory are used
var _ = time.Now
var _ = memory.NewRetriever
@@ -14,7 +14,7 @@ var codeBlockPattern = regexp.MustCompile("`{3}([^\n]*)\n([\\s\\S]*?)`{3}")
var markdownPatterns = []*regexp.Regexp{
regexp.MustCompile(`^#{1,6}\s`), // headings
regexp.MustCompile(`\*\*[^*]+\*\*`), // bold
regexp.MustCompile(`(?<!\*)\*[^*]+\*(?!\*)`), // italic (single *)
regexp.MustCompile(`(?:^|[^*])\*([^*]+)\*(?:[^*]|$)`), // italic (*text*)
regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`), // links [text](url)
regexp.MustCompile(`^[\-\*]\s`), // unordered list
regexp.MustCompile(`^\d+\.\s`), // ordered list
@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/yourname/cyrene-ai/ai-core/internal/llm"
"github.com/yourname/cyrene-ai/ai-core/internal/model"
@@ -30,23 +31,25 @@ func NewSynthesizer(llmAdapter *llm.Adapter, toolRegistry *plgManager.ToolRegist
// SynthesizeParams 综合参数
type SynthesizeParams struct {
UserID string
SessionID string
UserMessage string
Images []string // 图片 base64 data URL (多模态)
Nickname string
PersonaPrompt string // 完整人格提示词
DialogHistory []model.LLMMessage // 对话历史
MemorySummary string // 记忆检索摘要
ThoughtOutline string // 通用对话思考
IoTSummary string // IoT 操作摘要
DeviceContext string // 设备状态上下文
KnowledgeInfo string // 知识库检索摘要
Mode string // text / voice_assistant
UserID string
SessionID string
UserMessage string
Images []string // 图片 base64 data URL (多模态)
Nickname string
PersonaPrompt string // 完整人格提示词
DialogHistory []model.LLMMessage // 对话历史
MemorySummary string // 记忆检索摘要
ThoughtOutline string // 通用对话思考
IoTSummary string // IoT 操作摘要
DeviceContext string // 设备状态上下文
KnowledgeInfo string // 知识库检索摘要
PendingToolResults []PendingToolResult // 上一轮异步完成的工具结果
Mode string // text / voice_assistant
}
// Synthesize 综合所有子会话结果,流式生成最终回复
func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (<-chan llm.StreamChunk, error) {
// Synthesize 综合所有子会话结果,流式生成最终回复
// eventCh receives tool progress events; pass nil to suppress.
func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams, eventCh chan<- model.StreamEvent) (<-chan llm.StreamChunk, error) {
messages := s.buildSynthesizeMessages(params)
logger.Printf("[synthesizer] 开始综合 (上下文 %d 条消息)", len(messages))
@@ -62,7 +65,9 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
return nil, err
}
maxRounds := 5
const toolDeadline = 8 * time.Second
const maxRounds = 5
for round := 0; len(resp.ToolCalls) > 0 && round < maxRounds; round++ {
logger.Printf("[synthesizer] LLM 请求 %d 个工具调用 (round=%d)", len(resp.ToolCalls), round)
@@ -80,7 +85,12 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
args = make(map[string]interface{})
}
result, execErr := s.toolRegistry.Execute(ctx, tc.Name, args)
s.emitToolProgress(eventCh, tc.Name, "started", 0, "正在执行 "+tc.Name)
toolCtx, cancel := context.WithTimeout(ctx, toolDeadline)
result, execErr := s.toolRegistry.Execute(toolCtx, tc.Name, args)
cancel()
if execErr != nil {
logger.Printf("[synthesizer] 工具 %s 执行失败: %v", tc.Name, execErr)
}
@@ -88,6 +98,19 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
result = &plgSDK.ToolResult{ToolName: tc.Name, Success: false, Error: execErr.Error()}
}
// Async fallback: if tool timed out, store for next turn
if toolCtx.Err() == context.DeadlineExceeded {
s.emitToolProgress(eventCh, tc.Name, "running", 0.5, tc.Name+" 执行时间较长,转入后台继续...")
go s.executeAsyncAndStore(tc, args, params.SessionID, eventCh)
result = &plgSDK.ToolResult{
ToolName: tc.Name,
Success: true,
Output: fmt.Sprintf("[后台执行中] %s 正在后台运行,结果将在下一轮对话中返回。你可以继续聊天。", tc.Name),
}
} else {
s.emitToolProgress(eventCh, tc.Name, "completed", 1.0, "")
}
resultJSON, _ := json.Marshal(result)
messages = append(messages, model.LLMMessage{
Role: model.RoleTool,
@@ -120,6 +143,51 @@ func (s *Synthesizer) Synthesize(ctx context.Context, params SynthesizeParams) (
return ch, nil
}
// emitToolProgress sends a StreamToolProgress event if eventCh is available.
func (s *Synthesizer) emitToolProgress(eventCh chan<- model.StreamEvent, name, status string, progress float64, message string) {
if eventCh == nil {
return
}
select {
case eventCh <- model.StreamEvent{
Type: model.StreamToolProgress,
ToolProgress: &model.ToolProgressInfo{
ToolName: name,
Status: status,
Progress: progress,
Message: message,
},
}:
default:
}
}
// executeAsyncAndStore runs a tool in background and stores the result for the next turn.
func (s *Synthesizer) executeAsyncAndStore(tc model.ToolCall, args map[string]interface{}, sessionID string, eventCh chan<- model.StreamEvent) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
result, err := s.toolRegistry.Execute(ctx, tc.Name, args)
if err != nil {
logger.Printf("[synthesizer] 后台工具 %s 执行失败: %v", tc.Name, err)
s.emitToolProgress(eventCh, tc.Name, "failed", 1.0, tc.Name+" 后台执行失败: "+err.Error())
return
}
s.emitToolProgress(eventCh, tc.Name, "completed", 1.0, tc.Name+" 后台执行完成")
resultJSON, _ := json.Marshal(result)
store := GetGlobalPendingToolStore()
if store != nil {
store.AppendToolResult(sessionID, PendingToolResult{
ToolCallID: tc.ID,
ToolName: tc.Name,
Result: string(resultJSON),
Success: result != nil && result.Success,
})
}
}
// buildSynthesizeMessages 构建综合用的 LLM 消息列表
func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.LLMMessage {
var messages []model.LLMMessage
@@ -174,6 +242,23 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
})
}
// 注入上一轮异步工具执行结果
if len(params.PendingToolResults) > 0 {
var sb strings.Builder
sb.WriteString("【上一轮后台工具执行结果】\n")
for _, ptr := range params.PendingToolResults {
status := "成功"
if !ptr.Success {
status = "失败"
}
sb.WriteString(fmt.Sprintf("- %s (%s): %s\n", ptr.ToolName, status, ptr.Result))
}
messages = append(messages, model.LLMMessage{
Role: model.RoleSystem,
Content: sb.String(),
})
}
// 注入对话历史
if len(params.DialogHistory) > 0 {
messages = append(messages, params.DialogHistory...)