fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复

- 后台思考对话历史增加标签说明，严格区分群聊中不同发送者 - 静默观察模式传入图片URL并预处理，供后台思考参考 - 视觉+OCR双模型结果合并格式优化，避免LLM误认为多张图片 - QQ卡片消息(CQ:json)正确解析标题/类型，不再丢失为[JSON] - 进程管理器stop()在进程为null时重置pid/startTime，消除矛盾状态 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 21:07:25 +08:00
parent a9c79d7887
commit b085e58031
7 changed files with 179 additions and 33 deletions
@@ -1177,8 +1177,11 @@ func (t *Thinker) buildThinkingUserPrompt(

 	// 对话历史
 	var lastUserMsg string
+	lastUserIsAdmin := false
 	if len(convHistory) > 0 {
 		sb.WriteString("\n【最近的对话】\n")
+		sb.WriteString(fmt.Sprintf("（标签说明：每条消息前的 [名字] 标识了说话者。只有 [%s] 才是%s。其他名字是群聊中的其他成员，不是%s。请严格根据标签区分不同的人，不要张冠李戴。）\n",
+			t.adminNickname, t.adminNickname, t.adminNickname))
 		msgCount := 0
 		for _, msg := range convHistory {
 			if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant {
@@ -1187,6 +1190,8 @@ func (t *Thinker) buildThinkingUserPrompt(
 					roleLabel = "昔涟"
 				} else if strings.Contains(msg.Content, t.adminNickname+"/") {
 					roleLabel = t.adminNickname
+				} else if name := extractGroupSender(msg.Content); name != "" {
+					roleLabel = name
 				}
 				content := msg.Content
 				runes := []rune(content)
@@ -1197,18 +1202,19 @@ func (t *Thinker) buildThinkingUserPrompt(
 				msgCount++
 				if msg.Role == model.RoleUser {
 					lastUserMsg = msg.Content
+					lastUserIsAdmin = roleLabel == t.adminNickname
 				}
 			}
 		}
 		if msgCount == 0 {
-			sb.WriteString("（暂无对话历史）\n")
+		sb.WriteString("（暂无对话历史）\n")
 		}
 	} else {
 		sb.WriteString("\n【最近的对话】\n（暂无对话历史）\n")
 	}

-	// 关键：强调根据对话历史判断用户当前状态
-	if lastUserMsg != "" {
+	// 关键：强调根据对话历史判断当前状态
+	if lastUserMsg != "" && lastUserIsAdmin {
 		sb.WriteString(fmt.Sprintf("\n🔍 **重要**：开拓者最后说的是「%s」。请认真判断：他现在是不是在休息/睡觉/忙？如果是，不要输出【主动消息】指令行。\n", lastUserMsg))
 	}

@@ -1821,6 +1827,27 @@ func (t *Thinker) expandMemoryKeywords(ctx context.Context, message string) []st
 	return keywords
 }

+// extractGroupSender extracts the sender name from a group message prefix.
+// Group messages have the format: [群聊 GROUPID] SENDERNAME (UID)：\ncontent
+// Returns empty string if the message doesn't match the group format.
+func extractGroupSender(content string) string {
+	if !strings.HasPrefix(content, "[群聊 ") {
+		return ""
+	}
+	// Find "] " which ends the group label
+	bracketEnd := strings.Index(content, "] ")
+	if bracketEnd < 0 {
+		return ""
+	}
+	rest := content[bracketEnd+2:]
+	// Find " (" which precedes the UID
+	parenIdx := strings.Index(rest, " (")
+	if parenIdx < 0 {
+		return ""
+	}
+	return rest[:parenIdx]
+}
+
 // lastUserMessage extracts the last user message from conversation history.
 func lastUserMessage(history []model.LLMMessage) string {
 	for i := len(history) - 1; i >= 0; i-- {
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
 		// 0.5 图片预处理: 使用视觉模型分析图片，将描述注入消息
 	if len(params.Images) > 0 && o.visionProvider != nil {
 		startTime := time.Now()
-		augmented := o.preprocessImages(ctx, params.Message, params.Images)
+		augmented := o.PreprocessImages(ctx, params.Message, params.Images)
 		if augmented != params.Message {
 			params.Message = augmented
 			logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
 	}
 }

-// cacheAssistantMessage caches the assistant response, tagging it with the recipient
-// in group chats so dialog history shows who the AI was addressing.
+// cacheAssistantMessage caches the assistant response.
 func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
 	if o.contextBuilder == nil {
 		return
 	}
-	cached := fullContent
-	if params.ChannelType == "group" && params.Nickname != "" {
-		cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
-	}
-	o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
+	o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
 }

-// preprocessImages uses vision and OCR models to analyze images and augments the user message.
+// PreprocessImages uses vision and OCR models to analyze images and augments the user message.
 // When both vision and OCR providers are available (and are different models), they are called
 // in parallel and both results are passed to the chat model for autonomous judgment.
 // For standalone images (no text): generates a comprehensive description as the message.
 // For text+images: appends image descriptions as contextual annotations.
-func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
+func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
 	visionPromptBase := "请详细描述这张图片的内容，包括场景、物体、人物、文字（如有）、颜色、氛围等所有视觉信息。"
 	ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容，保持原有格式和排版。如果图片中没有文字，请回复"无文字"。`

@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
 		var combined string
 		switch {
 		case visionDesc != "" && ocrDesc != "":
-			combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc)
+			combined = fmt.Sprintf("视觉描述：%s\n（图中文字：%s）", visionDesc, ocrDesc)
 		case visionDesc != "":
 			combined = visionDesc
 		case ocrDesc != "":
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima

 	augmented := message
 	for i, desc := range descriptions {
-		augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
+		augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
 	}
 	return augmented
 }
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
 	if params.ChannelType == "group" {
 		messages = append(messages, model.LLMMessage{
 			Role:    model.RoleSystem,
-			Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊，而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方，不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
+			Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊，而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字来称呼对方——即使你之前在历史对话中称呼过别人，也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
 		})
 	}