fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复

- 后台思考对话历史增加标签说明,严格区分群聊中不同发送者
- 静默观察模式传入图片URL并预处理,供后台思考参考
- 视觉+OCR双模型结果合并格式优化,避免LLM误认为多张图片
- QQ卡片消息(CQ:json)正确解析标题/类型,不再丢失为[JSON]
- 进程管理器stop()在进程为null时重置pid/startTime,消除矛盾状态

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 21:07:25 +08:00
parent a9c79d7887
commit b085e58031
7 changed files with 179 additions and 33 deletions
+30 -3
View File
@@ -1177,8 +1177,11 @@ func (t *Thinker) buildThinkingUserPrompt(
// 对话历史
var lastUserMsg string
lastUserIsAdmin := false
if len(convHistory) > 0 {
sb.WriteString("\n【最近的对话】\n")
sb.WriteString(fmt.Sprintf("(标签说明:每条消息前的 [名字] 标识了说话者。只有 [%s] 才是%s。其他名字是群聊中的其他成员,不是%s。请严格根据标签区分不同的人,不要张冠李戴。)\n",
t.adminNickname, t.adminNickname, t.adminNickname))
msgCount := 0
for _, msg := range convHistory {
if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant {
@@ -1187,6 +1190,8 @@ func (t *Thinker) buildThinkingUserPrompt(
roleLabel = "昔涟"
} else if strings.Contains(msg.Content, t.adminNickname+"/") {
roleLabel = t.adminNickname
} else if name := extractGroupSender(msg.Content); name != "" {
roleLabel = name
}
content := msg.Content
runes := []rune(content)
@@ -1197,18 +1202,19 @@ func (t *Thinker) buildThinkingUserPrompt(
msgCount++
if msg.Role == model.RoleUser {
lastUserMsg = msg.Content
lastUserIsAdmin = roleLabel == t.adminNickname
}
}
}
if msgCount == 0 {
sb.WriteString("(暂无对话历史)\n")
sb.WriteString("(暂无对话历史)\n")
}
} else {
sb.WriteString("\n【最近的对话】\n(暂无对话历史)\n")
}
// 关键:强调根据对话历史判断用户当前状态
if lastUserMsg != "" {
// 关键:强调根据对话历史判断当前状态
if lastUserMsg != "" && lastUserIsAdmin {
sb.WriteString(fmt.Sprintf("\n🔍 **重要**:开拓者最后说的是「%s」。请认真判断:他现在是不是在休息/睡觉/忙?如果是,不要输出【主动消息】指令行。\n", lastUserMsg))
}
@@ -1821,6 +1827,27 @@ func (t *Thinker) expandMemoryKeywords(ctx context.Context, message string) []st
return keywords
}
// extractGroupSender extracts the sender name from a group message prefix.
// Group messages have the format: [群聊 GROUPID] SENDERNAME (UID)\ncontent
// Returns empty string if the message doesn't match the group format.
func extractGroupSender(content string) string {
if !strings.HasPrefix(content, "[群聊 ") {
return ""
}
// Find "] " which ends the group label
bracketEnd := strings.Index(content, "] ")
if bracketEnd < 0 {
return ""
}
rest := content[bracketEnd+2:]
// Find " (" which precedes the UID
parenIdx := strings.Index(rest, " (")
if parenIdx < 0 {
return ""
}
return rest[:parenIdx]
}
// lastUserMessage extracts the last user message from conversation history.
func lastUserMessage(history []model.LLMMessage) string {
for i := len(history) - 1; i >= 0; i-- {
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
if len(params.Images) > 0 && o.visionProvider != nil {
startTime := time.Now()
augmented := o.preprocessImages(ctx, params.Message, params.Images)
augmented := o.PreprocessImages(ctx, params.Message, params.Images)
if augmented != params.Message {
params.Message = augmented
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
}
}
// cacheAssistantMessage caches the assistant response, tagging it with the recipient
// in group chats so dialog history shows who the AI was addressing.
// cacheAssistantMessage caches the assistant response.
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
if o.contextBuilder == nil {
return
}
cached := fullContent
if params.ChannelType == "group" && params.Nickname != "" {
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
}
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
}
// preprocessImages uses vision and OCR models to analyze images and augments the user message.
// PreprocessImages uses vision and OCR models to analyze images and augments the user message.
// When both vision and OCR providers are available (and are different models), they are called
// in parallel and both results are passed to the chat model for autonomous judgment.
// For standalone images (no text): generates a comprehensive description as the message.
// For text+images: appends image descriptions as contextual annotations.
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。`
@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
var combined string
switch {
case visionDesc != "" && ocrDesc != "":
combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc)
combined = fmt.Sprintf("视觉描述:%s\n(图中文字:%s", visionDesc, ocrDesc)
case visionDesc != "":
combined = visionDesc
case ocrDesc != "":
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
augmented := message
for i, desc := range descriptions {
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
}
return augmented
}
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
if params.ChannelType == "group" {
messages = append(messages, model.LLMMessage{
Role: model.RoleSystem,
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字称呼对方——即使你之前在历史对话中称呼过别人,也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
})
}