fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复

- 后台思考对话历史增加标签说明,严格区分群聊中不同发送者
- 静默观察模式传入图片URL并预处理,供后台思考参考
- 视觉+OCR双模型结果合并格式优化,避免LLM误认为多张图片
- QQ卡片消息(CQ:json)正确解析标题/类型,不再丢失为[JSON]
- 进程管理器stop()在进程为null时重置pid/startTime,消除矛盾状态

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 21:07:25 +08:00
parent a9c79d7887
commit b085e58031
7 changed files with 179 additions and 33 deletions
+12 -2
View File
@@ -841,9 +841,19 @@ func handleChat(
if thinker != nil { if thinker != nil {
thinker.RecordUserMessage(req.SessionID) thinker.RecordUserMessage(req.SessionID)
} }
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, req.Message) // 图片预处理:静默观察时也分析图片内容,供后台思考使用
message := req.Message
if len(req.Images) > 0 {
startTime := time.Now()
augmented := orch.PreprocessImages(r.Context(), message, req.Images)
if augmented != message {
message = augmented
log.Printf("[silent] 图片预处理耗时: %%v", time.Since(startTime))
}
}
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, message)
// 从观察到的群聊消息中提取记忆。 // 从观察到的群聊消息中提取记忆。
orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, req.Message) orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, message)
if thinker != nil { if thinker != nil {
thinker.TriggerPostChatThink() thinker.TriggerPostChatThink()
} }
+29 -2
View File
@@ -1177,8 +1177,11 @@ func (t *Thinker) buildThinkingUserPrompt(
// 对话历史 // 对话历史
var lastUserMsg string var lastUserMsg string
lastUserIsAdmin := false
if len(convHistory) > 0 { if len(convHistory) > 0 {
sb.WriteString("\n【最近的对话】\n") sb.WriteString("\n【最近的对话】\n")
sb.WriteString(fmt.Sprintf("(标签说明:每条消息前的 [名字] 标识了说话者。只有 [%s] 才是%s。其他名字是群聊中的其他成员,不是%s。请严格根据标签区分不同的人,不要张冠李戴。)\n",
t.adminNickname, t.adminNickname, t.adminNickname))
msgCount := 0 msgCount := 0
for _, msg := range convHistory { for _, msg := range convHistory {
if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant { if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant {
@@ -1187,6 +1190,8 @@ func (t *Thinker) buildThinkingUserPrompt(
roleLabel = "昔涟" roleLabel = "昔涟"
} else if strings.Contains(msg.Content, t.adminNickname+"/") { } else if strings.Contains(msg.Content, t.adminNickname+"/") {
roleLabel = t.adminNickname roleLabel = t.adminNickname
} else if name := extractGroupSender(msg.Content); name != "" {
roleLabel = name
} }
content := msg.Content content := msg.Content
runes := []rune(content) runes := []rune(content)
@@ -1197,6 +1202,7 @@ func (t *Thinker) buildThinkingUserPrompt(
msgCount++ msgCount++
if msg.Role == model.RoleUser { if msg.Role == model.RoleUser {
lastUserMsg = msg.Content lastUserMsg = msg.Content
lastUserIsAdmin = roleLabel == t.adminNickname
} }
} }
} }
@@ -1207,8 +1213,8 @@ func (t *Thinker) buildThinkingUserPrompt(
sb.WriteString("\n【最近的对话】\n(暂无对话历史)\n") sb.WriteString("\n【最近的对话】\n(暂无对话历史)\n")
} }
// 关键:强调根据对话历史判断用户当前状态 // 关键:强调根据对话历史判断当前状态
if lastUserMsg != "" { if lastUserMsg != "" && lastUserIsAdmin {
sb.WriteString(fmt.Sprintf("\n🔍 **重要**:开拓者最后说的是「%s」。请认真判断:他现在是不是在休息/睡觉/忙?如果是,不要输出【主动消息】指令行。\n", lastUserMsg)) sb.WriteString(fmt.Sprintf("\n🔍 **重要**:开拓者最后说的是「%s」。请认真判断:他现在是不是在休息/睡觉/忙?如果是,不要输出【主动消息】指令行。\n", lastUserMsg))
} }
@@ -1821,6 +1827,27 @@ func (t *Thinker) expandMemoryKeywords(ctx context.Context, message string) []st
return keywords return keywords
} }
// extractGroupSender extracts the sender name from a group message prefix.
// Group messages have the format: [群聊 GROUPID] SENDERNAME (UID)\ncontent
// Returns empty string if the message doesn't match the group format.
func extractGroupSender(content string) string {
if !strings.HasPrefix(content, "[群聊 ") {
return ""
}
// Find "] " which ends the group label
bracketEnd := strings.Index(content, "] ")
if bracketEnd < 0 {
return ""
}
rest := content[bracketEnd+2:]
// Find " (" which precedes the UID
parenIdx := strings.Index(rest, " (")
if parenIdx < 0 {
return ""
}
return rest[:parenIdx]
}
// lastUserMessage extracts the last user message from conversation history. // lastUserMessage extracts the last user message from conversation history.
func lastUserMessage(history []model.LLMMessage) string { func lastUserMessage(history []model.LLMMessage) string {
for i := len(history) - 1; i >= 0; i-- { for i := len(history) - 1; i >= 0; i-- {
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息 // 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
if len(params.Images) > 0 && o.visionProvider != nil { if len(params.Images) > 0 && o.visionProvider != nil {
startTime := time.Now() startTime := time.Now()
augmented := o.preprocessImages(ctx, params.Message, params.Images) augmented := o.PreprocessImages(ctx, params.Message, params.Images)
if augmented != params.Message { if augmented != params.Message {
params.Message = augmented params.Message = augmented
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字", logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
} }
} }
// cacheAssistantMessage caches the assistant response, tagging it with the recipient // cacheAssistantMessage caches the assistant response.
// in group chats so dialog history shows who the AI was addressing.
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) { func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
if o.contextBuilder == nil { if o.contextBuilder == nil {
return return
} }
cached := fullContent o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
if params.ChannelType == "group" && params.Nickname != "" {
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
}
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
} }
// preprocessImages uses vision and OCR models to analyze images and augments the user message. // PreprocessImages uses vision and OCR models to analyze images and augments the user message.
// When both vision and OCR providers are available (and are different models), they are called // When both vision and OCR providers are available (and are different models), they are called
// in parallel and both results are passed to the chat model for autonomous judgment. // in parallel and both results are passed to the chat model for autonomous judgment.
// For standalone images (no text): generates a comprehensive description as the message. // For standalone images (no text): generates a comprehensive description as the message.
// For text+images: appends image descriptions as contextual annotations. // For text+images: appends image descriptions as contextual annotations.
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string { func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。" visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。` ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。`
@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
var combined string var combined string
switch { switch {
case visionDesc != "" && ocrDesc != "": case visionDesc != "" && ocrDesc != "":
combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc) combined = fmt.Sprintf("视觉描述:%s\n(图中文字:%s", visionDesc, ocrDesc)
case visionDesc != "": case visionDesc != "":
combined = visionDesc combined = visionDesc
case ocrDesc != "": case ocrDesc != "":
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
augmented := message augmented := message
for i, desc := range descriptions { for i, desc := range descriptions {
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc) augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
} }
return augmented return augmented
} }
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
if params.ChannelType == "group" { if params.ChannelType == "group" {
messages = append(messages, model.LLMMessage{ messages = append(messages, model.LLMMessage{
Role: model.RoleSystem, Role: model.RoleSystem,
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方不同的人有不同的名字。只在对你说话或延续已有对话时才回复。", Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字称呼对方——即使你之前在历史对话中称呼过别人,也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
}) })
} }
+39 -13
View File
@@ -180,7 +180,7 @@ func main() {
case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router): case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router):
msg.RouteType = "silent" msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID) namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin) response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
case isAdmin: case isAdmin:
msg.RouteType = "normal" msg.RouteType = "normal"
@@ -195,12 +195,12 @@ func main() {
// the admin already gets QQ's native @notification. Observe silently. // the admin already gets QQ's native @notification. Observe silently.
msg.RouteType = "silent" msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID) namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin) response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
case isSilent: case isSilent:
msg.RouteType = "silent" msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID) namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin) silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
if silentErr != nil { if silentErr != nil {
msgLogger.Log(logging.LogEntry{ msgLogger.Log(logging.LogEntry{
Timestamp: time.Now(), Timestamp: time.Now(),
@@ -769,23 +769,49 @@ func parseSSEAndAccumulate(body string) string {
return strings.Join(deltas, "") return strings.Join(deltas, "")
} }
// splitContent splits text by ♪ (sentence-break marker), then by \n\n within each segment. // splitContent splits text into separate chat messages.
// Non-empty segments are each wrapped as a chat message; empty input returns a single empty message. // It first splits by \n\n (message separator), then within each message
// optionally splits further by ♪ (sentence-break marker).
// Very short segments (< 10 chars) are merged with their neighbors to avoid
// one-word messages followed by a wall of text.
func splitContent(text string) []bridge.ResponseMessage { func splitContent(text string) []bridge.ResponseMessage {
// First split by ♪ sentence-break marker. // Step 1: split by \n\n (message-level separator) always.
var rawParts []string rawParts := strings.Split(text, "\n\n")
if strings.Contains(text, "♪") {
rawParts = strings.Split(text, "♪")
} else {
rawParts = strings.Split(text, "\n\n")
}
var parts []string var parts []string
for _, p := range rawParts { for _, p := range rawParts {
p = strings.TrimSpace(p) p = strings.TrimSpace(p)
if p != "" { if p == "" {
continue
}
// Step 2: within each \n\n segment, split by ♪ if present.
if strings.Contains(p, "♪") {
for _, sub := range strings.Split(p, "♪") {
sub = strings.TrimSpace(sub)
if sub != "" {
parts = append(parts, sub)
}
}
} else {
parts = append(parts, p) parts = append(parts, p)
} }
} }
// Step 3: merge very short segments with neighbors.
const minRunes = 8
var merged []string
for _, part := range parts {
if len(merged) > 0 && len([]rune(merged[len(merged)-1])) < minRunes {
// Previous segment is too short: merge current into it.
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
} else if len(merged) > 0 && len([]rune(part)) < minRunes {
// Current segment is too short: merge it into previous.
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
} else {
merged = append(merged, part)
}
}
parts = merged
var msgs []bridge.ResponseMessage var msgs []bridge.ResponseMessage
for _, part := range parts { for _, part := range parts {
msgs = append(msgs, bridge.ResponseMessage{ msgs = append(msgs, bridge.ResponseMessage{
@@ -5,6 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"net/url"
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
@@ -506,6 +507,55 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
} }
} }
// parseJSONCardTitle extracts a human-readable title from a QQ JSON card.
// The data is the raw JSON string from the "data" field of a json-type message segment.
func parseJSONCardTitle(data string) string {
var card struct {
App string `json:"app"`
Prompt string `json:"prompt"`
Title string `json:"title"`
Desc string `json:"desc"`
Meta struct {
Detail1 struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"detail_1"`
News struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"news"`
Music struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"music"`
} `json:"meta"`
}
if err := json.Unmarshal([]byte(data), &card); err != nil {
return "[卡片消息]"
}
// Prefer prompt (e.g. "[分享]标题"), then meta titles, then top-level title.
if card.Prompt != "" {
return "[卡片] " + card.Prompt
}
if card.Meta.Detail1.Title != "" {
return "[卡片] " + card.Meta.Detail1.Title
}
if card.Meta.News.Title != "" {
return "[卡片] " + card.Meta.News.Title
}
if card.Meta.Music.Title != "" {
return "[卡片] " + card.Meta.Music.Title
}
if card.Title != "" {
return "[卡片] " + card.Title
}
if card.Desc != "" {
return "[卡片] " + card.Desc
}
return "[卡片消息]"
}
// cqSimplifyMap maps CQ code types to simplified Chinese labels. // cqSimplifyMap maps CQ code types to simplified Chinese labels.
var cqSimplifyMap = map[string]string{ var cqSimplifyMap = map[string]string{
"image": "[图片]", "image": "[图片]",
@@ -528,6 +578,15 @@ func simplifyCQCodes(s string) string {
break break
} }
} }
if typ == "json" {
// Parse data= field from [CQ:json,data=URL_ENCODED_JSON]
if dataVal := extractCQParam(match, "data"); dataVal != "" {
if decoded, err := url.QueryUnescape(dataVal); err == nil {
return parseJSONCardTitle(decoded)
}
}
return "[卡片消息]"
}
if label, ok := cqSimplifyMap[typ]; ok { if label, ok := cqSimplifyMap[typ]; ok {
return label return label
} }
@@ -535,6 +594,24 @@ func simplifyCQCodes(s string) string {
}) })
} }
// extractCQParam extracts a named parameter value from a CQ code string.
// e.g. extractCQParam("[CQ:json,data=hello%20world]", "data") → "hello%20world"
func extractCQParam(cqCode, paramName string) string {
prefix := paramName + "="
idx := strings.Index(cqCode, prefix)
if idx < 0 {
return ""
}
val := cqCode[idx+len(prefix):]
// Value ends at "," or "]"
for i, c := range val {
if c == ',' || c == ']' {
return val[:i]
}
}
return val
}
// extractText retrieves plain text from an OBv11 message. // extractText retrieves plain text from an OBv11 message.
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx). // CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
func extractText(msg *OBv11Message) string { func extractText(msg *OBv11Message) string {
@@ -576,6 +653,16 @@ func extractText(msg *OBv11Message) string {
case "reply": case "reply":
// Reply is handled separately in ToUnified with reply text. // Reply is handled separately in ToUnified with reply text.
text += "[回复]" text += "[回复]"
case "json":
if data, ok := s["data"].(map[string]interface{}); ok {
if inner, ok := data["data"].(string); ok && inner != "" {
text += parseJSONCardTitle(inner)
} else {
text += "[卡片消息]"
}
} else {
text += "[卡片消息]"
}
} }
} }
} }
+3 -2
View File
@@ -360,9 +360,10 @@ class ProcessManager extends EventEmitter {
const procInfo = this.processes.get(serviceId); const procInfo = this.processes.get(serviceId);
if (!procInfo.process) { if (!procInfo.process) {
// 可能已经崩溃了,重置状态
procInfo.status = 'stopped'; procInfo.status = 'stopped';
return { success: true, message: `${svc.name} 未在运行` }; procInfo.pid = null;
procInfo.startTime = null;
return { success: true, message: `${svc.name} 已停止` };
} }
return new Promise((resolve) => { return new Promise((resolve) => {