fix: 修复间隔对话后首条回复为上一次对话内容的问题

根因：用户消息在回复完成后才缓存到 ConversationStore，而 assistant 回复在 orchestrator 中先缓存，导致存储顺序为 assistant → user 颠倒。下次请求时 LLM 看到连续两条 assistant 再连续两条 user，对两条 user 消息都生成回复。修复：将用户消息缓存移到 orchestrator 调用之前，确保 user → assistant 正确顺序；synthesizer 中对 DialogHistory 末尾与当前消息相同的 user 消息去重。同时包含之前的 action 消息类型检测修复（isActionLike 启发式 + injector XML 标签格式改进）。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 21:00:15 +08:00
parent 80dad9a018
commit 85f7f90318
4 changed files with 71 additions and 7 deletions
@@ -759,6 +759,9 @@ func handleChat(
 		return
 	}

+	// 1.5 缓存用户消息到会话历史（在 Orchestrator 之前，确保顺序正确：user → assistant）
+	ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, req.Message)
+
 	// 2. 调用 Orchestrator 处理（替代原有的线性处理流程）
 	// Orchestrator 内部处理：意图分析 → 子会话分派 → 结果汇总 → 综合生成回复
 	eventCh, err := orch.ProcessInput(ctx, orchestrator.ProcessParams{
@@ -848,8 +851,6 @@ func handleChat(
 		}
 	}

-	// 缓存用户消息到会话历史（在回复生成后，避免本轮 LLM 调用出现重复用户消息）
-	ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, req.Message)
 	// 4. 对话完成后触发昔涟的自主思考（事件驱动，非定时）
 	if thinker != nil {
 		thinker.TriggerPostChatThink()
@@ -41,9 +41,59 @@ func autoDetectType(text string) model.ReviewMessageType {
 	if hasMarkdownSyntax(text) {
 		return model.ReviewMessageMarkdown
 	}
+	if isActionLike(text) {
+		return model.ReviewMessageAction
+	}
 	return model.ReviewMessageChat
 }

+// isActionLike checks whether text looks like an action/expression description
+// (e.g. "忍不住轻声笑出来", "俏皮地眨眨眼") rather than dialogue. Used as a
+// fallback when the model doesn't use <action> tags or brackets.
+func isActionLike(text string) bool {
+	runes := []rune(strings.TrimSpace(text))
+	if len(runes) == 0 || len(runes) > 50 {
+		return false
+	}
+	// Dialogue markers disqualify action
+	if strings.ContainsAny(text, "？?！!") {
+		return false
+	}
+	// Common dialogue starters
+	dialoguePrefixes := []string{"你", "您", "我", "他", "她", "这", "那", "怎么", "什么", "为什"}
+	for _, p := range dialoguePrefixes {
+		if strings.HasPrefix(string(runes), p) {
+			return false
+		}
+	}
+	// Soft dialogue indicators: text that looks like addressing someone
+	softDialogue := []string{"吗", "吧", "哦", "呢", "啦", "呀", "喔"}
+	dialogueScore := 0
+	for _, s := range softDialogue {
+		if strings.HasSuffix(string(runes), s) {
+			dialogueScore++
+		}
+	}
+	if dialogueScore >= 1 && strings.Contains(text, "你") {
+		return false
+	}
+	// Action-indicating patterns
+	actionPatterns := []string{
+		"笑出来", "眨眨眼", "歪头", "点头", "摇头", "挥手", "伸手",
+		"松口气", "叹口气", "叹气", "拍拍", "摸摸", "抱抱",
+		"轻轻", "俏皮", "微微", "默默", "悄悄", "偷偷",
+		"忍不住", "不由得", "不禁",
+		"站起来", "坐下", "走", "跑", "跳", "躺",
+		"眼睛", "目光", "嘴角", "眉头", "脸上",
+	}
+	for _, p := range actionPatterns {
+		if strings.Contains(text, p) {
+			return true
+		}
+	}
+	return false
+}
+
 // parseReviewMessages splits the assistant's full response into typed messages.
 //
 // Phases:
@@ -259,9 +259,16 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
 		})
 	}

-	// 注入对话历史
-	if len(params.DialogHistory) > 0 {
-		messages = append(messages, params.DialogHistory...)
+	// 注入对话历史（去掉末尾的当前用户消息，因为后面会单独追加）
+	history := params.DialogHistory
+	if len(history) > 0 {
+		last := history[len(history)-1]
+		if last.Role == model.RoleUser && last.Content == params.UserMessage {
+			history = history[:len(history)-1]
+		}
+	}
+	if len(history) > 0 {
+		messages = append(messages, history...)
 	}

 	// 当前用户消息 (支持多模态图片)
@@ -269,8 +269,14 @@ func (pc *PersonaConfig) buildConversationStyle() string {
 	}
 	sb.WriteString("- 像 LINE 聊天一样，随意、亲切、有温度\n")
 	sb.WriteString("- 偶尔可以用语气词开头：\"嗯...\"、\"啊\"、\"诶\"\n")
-	sb.WriteString("- 表达动作、表情、肢体语言或执行操作时，使用 <action>...</action> 标签包裹，后面跟自然对话。例如：\"<action>帮你把客厅灯关掉啦</action> 嗯，已经关好了~\"\n")
-	sb.WriteString("- 动作标签只能包含纯动作描述，不要把对话内容放进 <action> 标签里\n")
+		sb.WriteString("- <格式规则> 回复中涉及动作/表情/肢体语言/执行操作时，必须用 <action>...</action> 标签包裹，对话内容放在标签外面\n")
+		sb.WriteString("- 示例：\n")
+		sb.WriteString("  \"<action>忍不住轻声笑出来</action> 抓到一只偷偷眨眼睛的小可爱~\"\n")
+		sb.WriteString("  \"<action>俏皮地眨眨眼</action> 人家可是随时待机的哦~\"\n")
+		sb.WriteString("  \"<action>轻轻歪头</action> 嗯？你在想什么呢？\"\n")
+		sb.WriteString("  \"<action>帮你把客厅灯关掉啦</action> 嗯，已经关好了~\"\n")
+		sb.WriteString("- 动作标签只能包含纯动作描述，不要把对话内容放进 <action> 标签里\n")
+		sb.WriteString("- 每条回复都要检查：有动作就必须用标签，纯对话不需要标签\n")

 	if len(cs.SentenceEnders) > 0 {
 		sb.WriteString(fmt.Sprintf("- 句尾可以带这些语气符：%s\n", strings.Join(cs.SentenceEnders, " ")))