fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复

- 后台思考对话历史增加标签说明,严格区分群聊中不同发送者
- 静默观察模式传入图片URL并预处理,供后台思考参考
- 视觉+OCR双模型结果合并格式优化,避免LLM误认为多张图片
- QQ卡片消息(CQ:json)正确解析标题/类型,不再丢失为[JSON]
- 进程管理器stop()在进程为null时重置pid/startTime,消除矛盾状态

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 21:07:25 +08:00
parent a9c79d7887
commit b085e58031
7 changed files with 179 additions and 33 deletions
+12 -2
View File
@@ -841,9 +841,19 @@ func handleChat(
if thinker != nil {
thinker.RecordUserMessage(req.SessionID)
}
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, req.Message)
// 图片预处理:静默观察时也分析图片内容,供后台思考使用
message := req.Message
if len(req.Images) > 0 {
startTime := time.Now()
augmented := orch.PreprocessImages(r.Context(), message, req.Images)
if augmented != message {
message = augmented
log.Printf("[silent] 图片预处理耗时: %%v", time.Since(startTime))
}
}
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, message)
// 从观察到的群聊消息中提取记忆。
orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, req.Message)
orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, message)
if thinker != nil {
thinker.TriggerPostChatThink()
}
+30 -3
View File
@@ -1177,8 +1177,11 @@ func (t *Thinker) buildThinkingUserPrompt(
// 对话历史
var lastUserMsg string
lastUserIsAdmin := false
if len(convHistory) > 0 {
sb.WriteString("\n【最近的对话】\n")
sb.WriteString(fmt.Sprintf("(标签说明:每条消息前的 [名字] 标识了说话者。只有 [%s] 才是%s。其他名字是群聊中的其他成员,不是%s。请严格根据标签区分不同的人,不要张冠李戴。)\n",
t.adminNickname, t.adminNickname, t.adminNickname))
msgCount := 0
for _, msg := range convHistory {
if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant {
@@ -1187,6 +1190,8 @@ func (t *Thinker) buildThinkingUserPrompt(
roleLabel = "昔涟"
} else if strings.Contains(msg.Content, t.adminNickname+"/") {
roleLabel = t.adminNickname
} else if name := extractGroupSender(msg.Content); name != "" {
roleLabel = name
}
content := msg.Content
runes := []rune(content)
@@ -1197,18 +1202,19 @@ func (t *Thinker) buildThinkingUserPrompt(
msgCount++
if msg.Role == model.RoleUser {
lastUserMsg = msg.Content
lastUserIsAdmin = roleLabel == t.adminNickname
}
}
}
if msgCount == 0 {
sb.WriteString("(暂无对话历史)\n")
sb.WriteString("(暂无对话历史)\n")
}
} else {
sb.WriteString("\n【最近的对话】\n(暂无对话历史)\n")
}
// 关键:强调根据对话历史判断用户当前状态
if lastUserMsg != "" {
// 关键:强调根据对话历史判断当前状态
if lastUserMsg != "" && lastUserIsAdmin {
sb.WriteString(fmt.Sprintf("\n🔍 **重要**:开拓者最后说的是「%s」。请认真判断:他现在是不是在休息/睡觉/忙?如果是,不要输出【主动消息】指令行。\n", lastUserMsg))
}
@@ -1821,6 +1827,27 @@ func (t *Thinker) expandMemoryKeywords(ctx context.Context, message string) []st
return keywords
}
// extractGroupSender extracts the sender name from a group message prefix.
// Group messages have the format: [群聊 GROUPID] SENDERNAME (UID)\ncontent
// Returns empty string if the message doesn't match the group format.
func extractGroupSender(content string) string {
if !strings.HasPrefix(content, "[群聊 ") {
return ""
}
// Find "] " which ends the group label
bracketEnd := strings.Index(content, "] ")
if bracketEnd < 0 {
return ""
}
rest := content[bracketEnd+2:]
// Find " (" which precedes the UID
parenIdx := strings.Index(rest, " (")
if parenIdx < 0 {
return ""
}
return rest[:parenIdx]
}
// lastUserMessage extracts the last user message from conversation history.
func lastUserMessage(history []model.LLMMessage) string {
for i := len(history) - 1; i >= 0; i-- {
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
if len(params.Images) > 0 && o.visionProvider != nil {
startTime := time.Now()
augmented := o.preprocessImages(ctx, params.Message, params.Images)
augmented := o.PreprocessImages(ctx, params.Message, params.Images)
if augmented != params.Message {
params.Message = augmented
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
}
}
// cacheAssistantMessage caches the assistant response, tagging it with the recipient
// in group chats so dialog history shows who the AI was addressing.
// cacheAssistantMessage caches the assistant response.
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
if o.contextBuilder == nil {
return
}
cached := fullContent
if params.ChannelType == "group" && params.Nickname != "" {
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
}
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
}
// preprocessImages uses vision and OCR models to analyze images and augments the user message.
// PreprocessImages uses vision and OCR models to analyze images and augments the user message.
// When both vision and OCR providers are available (and are different models), they are called
// in parallel and both results are passed to the chat model for autonomous judgment.
// For standalone images (no text): generates a comprehensive description as the message.
// For text+images: appends image descriptions as contextual annotations.
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。`
@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
var combined string
switch {
case visionDesc != "" && ocrDesc != "":
combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc)
combined = fmt.Sprintf("视觉描述:%s\n(图中文字:%s", visionDesc, ocrDesc)
case visionDesc != "":
combined = visionDesc
case ocrDesc != "":
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
augmented := message
for i, desc := range descriptions {
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
}
return augmented
}
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
if params.ChannelType == "group" {
messages = append(messages, model.LLMMessage{
Role: model.RoleSystem,
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字称呼对方——即使你之前在历史对话中称呼过别人,也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
})
}
+39 -13
View File
@@ -180,7 +180,7 @@ func main() {
case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router):
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
case isAdmin:
msg.RouteType = "normal"
@@ -195,12 +195,12 @@ func main() {
// the admin already gets QQ's native @notification. Observe silently.
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
case isSilent:
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
if silentErr != nil {
msgLogger.Log(logging.LogEntry{
Timestamp: time.Now(),
@@ -769,23 +769,49 @@ func parseSSEAndAccumulate(body string) string {
return strings.Join(deltas, "")
}
// splitContent splits text by ♪ (sentence-break marker), then by \n\n within each segment.
// Non-empty segments are each wrapped as a chat message; empty input returns a single empty message.
// splitContent splits text into separate chat messages.
// It first splits by \n\n (message separator), then within each message
// optionally splits further by ♪ (sentence-break marker).
// Very short segments (< 10 chars) are merged with their neighbors to avoid
// one-word messages followed by a wall of text.
func splitContent(text string) []bridge.ResponseMessage {
// First split by ♪ sentence-break marker.
var rawParts []string
if strings.Contains(text, "♪") {
rawParts = strings.Split(text, "♪")
} else {
rawParts = strings.Split(text, "\n\n")
}
// Step 1: split by \n\n (message-level separator) always.
rawParts := strings.Split(text, "\n\n")
var parts []string
for _, p := range rawParts {
p = strings.TrimSpace(p)
if p != "" {
if p == "" {
continue
}
// Step 2: within each \n\n segment, split by ♪ if present.
if strings.Contains(p, "♪") {
for _, sub := range strings.Split(p, "♪") {
sub = strings.TrimSpace(sub)
if sub != "" {
parts = append(parts, sub)
}
}
} else {
parts = append(parts, p)
}
}
// Step 3: merge very short segments with neighbors.
const minRunes = 8
var merged []string
for _, part := range parts {
if len(merged) > 0 && len([]rune(merged[len(merged)-1])) < minRunes {
// Previous segment is too short: merge current into it.
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
} else if len(merged) > 0 && len([]rune(part)) < minRunes {
// Current segment is too short: merge it into previous.
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
} else {
merged = append(merged, part)
}
}
parts = merged
var msgs []bridge.ResponseMessage
for _, part := range parts {
msgs = append(msgs, bridge.ResponseMessage{
@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
@@ -506,6 +507,55 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
}
}
// parseJSONCardTitle extracts a human-readable title from a QQ JSON card.
// The data is the raw JSON string from the "data" field of a json-type message segment.
func parseJSONCardTitle(data string) string {
var card struct {
App string `json:"app"`
Prompt string `json:"prompt"`
Title string `json:"title"`
Desc string `json:"desc"`
Meta struct {
Detail1 struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"detail_1"`
News struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"news"`
Music struct {
Title string `json:"title"`
Desc string `json:"desc"`
} `json:"music"`
} `json:"meta"`
}
if err := json.Unmarshal([]byte(data), &card); err != nil {
return "[卡片消息]"
}
// Prefer prompt (e.g. "[分享]标题"), then meta titles, then top-level title.
if card.Prompt != "" {
return "[卡片] " + card.Prompt
}
if card.Meta.Detail1.Title != "" {
return "[卡片] " + card.Meta.Detail1.Title
}
if card.Meta.News.Title != "" {
return "[卡片] " + card.Meta.News.Title
}
if card.Meta.Music.Title != "" {
return "[卡片] " + card.Meta.Music.Title
}
if card.Title != "" {
return "[卡片] " + card.Title
}
if card.Desc != "" {
return "[卡片] " + card.Desc
}
return "[卡片消息]"
}
// cqSimplifyMap maps CQ code types to simplified Chinese labels.
var cqSimplifyMap = map[string]string{
"image": "[图片]",
@@ -528,6 +578,15 @@ func simplifyCQCodes(s string) string {
break
}
}
if typ == "json" {
// Parse data= field from [CQ:json,data=URL_ENCODED_JSON]
if dataVal := extractCQParam(match, "data"); dataVal != "" {
if decoded, err := url.QueryUnescape(dataVal); err == nil {
return parseJSONCardTitle(decoded)
}
}
return "[卡片消息]"
}
if label, ok := cqSimplifyMap[typ]; ok {
return label
}
@@ -535,6 +594,24 @@ func simplifyCQCodes(s string) string {
})
}
// extractCQParam extracts a named parameter value from a CQ code string.
// e.g. extractCQParam("[CQ:json,data=hello%20world]", "data") → "hello%20world"
func extractCQParam(cqCode, paramName string) string {
prefix := paramName + "="
idx := strings.Index(cqCode, prefix)
if idx < 0 {
return ""
}
val := cqCode[idx+len(prefix):]
// Value ends at "," or "]"
for i, c := range val {
if c == ',' || c == ']' {
return val[:i]
}
}
return val
}
// extractText retrieves plain text from an OBv11 message.
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
func extractText(msg *OBv11Message) string {
@@ -576,6 +653,16 @@ func extractText(msg *OBv11Message) string {
case "reply":
// Reply is handled separately in ToUnified with reply text.
text += "[回复]"
case "json":
if data, ok := s["data"].(map[string]interface{}); ok {
if inner, ok := data["data"].(string); ok && inner != "" {
text += parseJSONCardTitle(inner)
} else {
text += "[卡片消息]"
}
} else {
text += "[卡片消息]"
}
}
}
}
+3 -2
View File
@@ -360,9 +360,10 @@ class ProcessManager extends EventEmitter {
const procInfo = this.processes.get(serviceId);
if (!procInfo.process) {
// 可能已经崩溃了,重置状态
procInfo.status = 'stopped';
return { success: true, message: `${svc.name} 未在运行` };
procInfo.pid = null;
procInfo.startTime = null;
return { success: true, message: `${svc.name} 已停止` };
}
return new Promise((resolve) => {