fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复
- 后台思考对话历史增加标签说明,严格区分群聊中不同发送者 - 静默观察模式传入图片URL并预处理,供后台思考参考 - 视觉+OCR双模型结果合并格式优化,避免LLM误认为多张图片 - QQ卡片消息(CQ:json)正确解析标题/类型,不再丢失为[JSON] - 进程管理器stop()在进程为null时重置pid/startTime,消除矛盾状态 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -841,9 +841,19 @@ func handleChat(
|
||||
if thinker != nil {
|
||||
thinker.RecordUserMessage(req.SessionID)
|
||||
}
|
||||
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, req.Message)
|
||||
// 图片预处理:静默观察时也分析图片内容,供后台思考使用
|
||||
message := req.Message
|
||||
if len(req.Images) > 0 {
|
||||
startTime := time.Now()
|
||||
augmented := orch.PreprocessImages(r.Context(), message, req.Images)
|
||||
if augmented != message {
|
||||
message = augmented
|
||||
log.Printf("[silent] 图片预处理耗时: %%v", time.Since(startTime))
|
||||
}
|
||||
}
|
||||
ctxBuilder.CacheMessage(req.SessionID, model.RoleUser, message)
|
||||
// 从观察到的群聊消息中提取记忆。
|
||||
orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, req.Message)
|
||||
orch.ExtractMemoriesOnly(r.Context(), req.UserID, req.SessionID, message)
|
||||
if thinker != nil {
|
||||
thinker.TriggerPostChatThink()
|
||||
}
|
||||
|
||||
@@ -1177,8 +1177,11 @@ func (t *Thinker) buildThinkingUserPrompt(
|
||||
|
||||
// 对话历史
|
||||
var lastUserMsg string
|
||||
lastUserIsAdmin := false
|
||||
if len(convHistory) > 0 {
|
||||
sb.WriteString("\n【最近的对话】\n")
|
||||
sb.WriteString(fmt.Sprintf("(标签说明:每条消息前的 [名字] 标识了说话者。只有 [%s] 才是%s。其他名字是群聊中的其他成员,不是%s。请严格根据标签区分不同的人,不要张冠李戴。)\n",
|
||||
t.adminNickname, t.adminNickname, t.adminNickname))
|
||||
msgCount := 0
|
||||
for _, msg := range convHistory {
|
||||
if msg.Role == model.RoleUser || msg.Role == model.RoleAssistant {
|
||||
@@ -1187,6 +1190,8 @@ func (t *Thinker) buildThinkingUserPrompt(
|
||||
roleLabel = "昔涟"
|
||||
} else if strings.Contains(msg.Content, t.adminNickname+"/") {
|
||||
roleLabel = t.adminNickname
|
||||
} else if name := extractGroupSender(msg.Content); name != "" {
|
||||
roleLabel = name
|
||||
}
|
||||
content := msg.Content
|
||||
runes := []rune(content)
|
||||
@@ -1197,18 +1202,19 @@ func (t *Thinker) buildThinkingUserPrompt(
|
||||
msgCount++
|
||||
if msg.Role == model.RoleUser {
|
||||
lastUserMsg = msg.Content
|
||||
lastUserIsAdmin = roleLabel == t.adminNickname
|
||||
}
|
||||
}
|
||||
}
|
||||
if msgCount == 0 {
|
||||
sb.WriteString("(暂无对话历史)\n")
|
||||
sb.WriteString("(暂无对话历史)\n")
|
||||
}
|
||||
} else {
|
||||
sb.WriteString("\n【最近的对话】\n(暂无对话历史)\n")
|
||||
}
|
||||
|
||||
// 关键:强调根据对话历史判断用户当前状态
|
||||
if lastUserMsg != "" {
|
||||
// 关键:强调根据对话历史判断当前状态
|
||||
if lastUserMsg != "" && lastUserIsAdmin {
|
||||
sb.WriteString(fmt.Sprintf("\n🔍 **重要**:开拓者最后说的是「%s」。请认真判断:他现在是不是在休息/睡觉/忙?如果是,不要输出【主动消息】指令行。\n", lastUserMsg))
|
||||
}
|
||||
|
||||
@@ -1821,6 +1827,27 @@ func (t *Thinker) expandMemoryKeywords(ctx context.Context, message string) []st
|
||||
return keywords
|
||||
}
|
||||
|
||||
// extractGroupSender extracts the sender name from a group message prefix.
|
||||
// Group messages have the format: [群聊 GROUPID] SENDERNAME (UID):\ncontent
|
||||
// Returns empty string if the message doesn't match the group format.
|
||||
func extractGroupSender(content string) string {
|
||||
if !strings.HasPrefix(content, "[群聊 ") {
|
||||
return ""
|
||||
}
|
||||
// Find "] " which ends the group label
|
||||
bracketEnd := strings.Index(content, "] ")
|
||||
if bracketEnd < 0 {
|
||||
return ""
|
||||
}
|
||||
rest := content[bracketEnd+2:]
|
||||
// Find " (" which precedes the UID
|
||||
parenIdx := strings.Index(rest, " (")
|
||||
if parenIdx < 0 {
|
||||
return ""
|
||||
}
|
||||
return rest[:parenIdx]
|
||||
}
|
||||
|
||||
// lastUserMessage extracts the last user message from conversation history.
|
||||
func lastUserMessage(history []model.LLMMessage) string {
|
||||
for i := len(history) - 1; i >= 0; i-- {
|
||||
|
||||
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
|
||||
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
|
||||
if len(params.Images) > 0 && o.visionProvider != nil {
|
||||
startTime := time.Now()
|
||||
augmented := o.preprocessImages(ctx, params.Message, params.Images)
|
||||
augmented := o.PreprocessImages(ctx, params.Message, params.Images)
|
||||
if augmented != params.Message {
|
||||
params.Message = augmented
|
||||
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
|
||||
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
|
||||
}
|
||||
}
|
||||
|
||||
// cacheAssistantMessage caches the assistant response, tagging it with the recipient
|
||||
// in group chats so dialog history shows who the AI was addressing.
|
||||
// cacheAssistantMessage caches the assistant response.
|
||||
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
|
||||
if o.contextBuilder == nil {
|
||||
return
|
||||
}
|
||||
cached := fullContent
|
||||
if params.ChannelType == "group" && params.Nickname != "" {
|
||||
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
|
||||
}
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
|
||||
}
|
||||
|
||||
// preprocessImages uses vision and OCR models to analyze images and augments the user message.
|
||||
// PreprocessImages uses vision and OCR models to analyze images and augments the user message.
|
||||
// When both vision and OCR providers are available (and are different models), they are called
|
||||
// in parallel and both results are passed to the chat model for autonomous judgment.
|
||||
// For standalone images (no text): generates a comprehensive description as the message.
|
||||
// For text+images: appends image descriptions as contextual annotations.
|
||||
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
|
||||
func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
|
||||
visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
|
||||
ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。`
|
||||
|
||||
@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
|
||||
var combined string
|
||||
switch {
|
||||
case visionDesc != "" && ocrDesc != "":
|
||||
combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc)
|
||||
combined = fmt.Sprintf("视觉描述:%s\n(图中文字:%s)", visionDesc, ocrDesc)
|
||||
case visionDesc != "":
|
||||
combined = visionDesc
|
||||
case ocrDesc != "":
|
||||
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
|
||||
|
||||
augmented := message
|
||||
for i, desc := range descriptions {
|
||||
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
|
||||
augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
|
||||
}
|
||||
return augmented
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
|
||||
if params.ChannelType == "group" {
|
||||
messages = append(messages, model.LLMMessage{
|
||||
Role: model.RoleSystem,
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方,不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字来称呼对方——即使你之前在历史对话中称呼过别人,也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -180,7 +180,7 @@ func main() {
|
||||
case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router):
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isAdmin:
|
||||
msg.RouteType = "normal"
|
||||
@@ -195,12 +195,12 @@ func main() {
|
||||
// the admin already gets QQ's native @notification. Observe silently.
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isSilent:
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, imageURLs, videoURLs, voiceURLs, isAdmin)
|
||||
if silentErr != nil {
|
||||
msgLogger.Log(logging.LogEntry{
|
||||
Timestamp: time.Now(),
|
||||
@@ -769,23 +769,49 @@ func parseSSEAndAccumulate(body string) string {
|
||||
return strings.Join(deltas, "")
|
||||
}
|
||||
|
||||
// splitContent splits text by ♪ (sentence-break marker), then by \n\n within each segment.
|
||||
// Non-empty segments are each wrapped as a chat message; empty input returns a single empty message.
|
||||
// splitContent splits text into separate chat messages.
|
||||
// It first splits by \n\n (message separator), then within each message
|
||||
// optionally splits further by ♪ (sentence-break marker).
|
||||
// Very short segments (< 10 chars) are merged with their neighbors to avoid
|
||||
// one-word messages followed by a wall of text.
|
||||
func splitContent(text string) []bridge.ResponseMessage {
|
||||
// First split by ♪ sentence-break marker.
|
||||
var rawParts []string
|
||||
if strings.Contains(text, "♪") {
|
||||
rawParts = strings.Split(text, "♪")
|
||||
} else {
|
||||
rawParts = strings.Split(text, "\n\n")
|
||||
}
|
||||
// Step 1: split by \n\n (message-level separator) always.
|
||||
rawParts := strings.Split(text, "\n\n")
|
||||
var parts []string
|
||||
for _, p := range rawParts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
// Step 2: within each \n\n segment, split by ♪ if present.
|
||||
if strings.Contains(p, "♪") {
|
||||
for _, sub := range strings.Split(p, "♪") {
|
||||
sub = strings.TrimSpace(sub)
|
||||
if sub != "" {
|
||||
parts = append(parts, sub)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parts = append(parts, p)
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: merge very short segments with neighbors.
|
||||
const minRunes = 8
|
||||
var merged []string
|
||||
for _, part := range parts {
|
||||
if len(merged) > 0 && len([]rune(merged[len(merged)-1])) < minRunes {
|
||||
// Previous segment is too short: merge current into it.
|
||||
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
|
||||
} else if len(merged) > 0 && len([]rune(part)) < minRunes {
|
||||
// Current segment is too short: merge it into previous.
|
||||
merged[len(merged)-1] = merged[len(merged)-1] + "\n" + part
|
||||
} else {
|
||||
merged = append(merged, part)
|
||||
}
|
||||
}
|
||||
parts = merged
|
||||
|
||||
var msgs []bridge.ResponseMessage
|
||||
for _, part := range parts {
|
||||
msgs = append(msgs, bridge.ResponseMessage{
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -506,6 +507,55 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
|
||||
}
|
||||
}
|
||||
|
||||
// parseJSONCardTitle extracts a human-readable title from a QQ JSON card.
|
||||
// The data is the raw JSON string from the "data" field of a json-type message segment.
|
||||
func parseJSONCardTitle(data string) string {
|
||||
var card struct {
|
||||
App string `json:"app"`
|
||||
Prompt string `json:"prompt"`
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
Meta struct {
|
||||
Detail1 struct {
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
} `json:"detail_1"`
|
||||
News struct {
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
} `json:"news"`
|
||||
Music struct {
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
} `json:"music"`
|
||||
} `json:"meta"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(data), &card); err != nil {
|
||||
return "[卡片消息]"
|
||||
}
|
||||
|
||||
// Prefer prompt (e.g. "[分享]标题"), then meta titles, then top-level title.
|
||||
if card.Prompt != "" {
|
||||
return "[卡片] " + card.Prompt
|
||||
}
|
||||
if card.Meta.Detail1.Title != "" {
|
||||
return "[卡片] " + card.Meta.Detail1.Title
|
||||
}
|
||||
if card.Meta.News.Title != "" {
|
||||
return "[卡片] " + card.Meta.News.Title
|
||||
}
|
||||
if card.Meta.Music.Title != "" {
|
||||
return "[卡片] " + card.Meta.Music.Title
|
||||
}
|
||||
if card.Title != "" {
|
||||
return "[卡片] " + card.Title
|
||||
}
|
||||
if card.Desc != "" {
|
||||
return "[卡片] " + card.Desc
|
||||
}
|
||||
return "[卡片消息]"
|
||||
}
|
||||
|
||||
// cqSimplifyMap maps CQ code types to simplified Chinese labels.
|
||||
var cqSimplifyMap = map[string]string{
|
||||
"image": "[图片]",
|
||||
@@ -528,6 +578,15 @@ func simplifyCQCodes(s string) string {
|
||||
break
|
||||
}
|
||||
}
|
||||
if typ == "json" {
|
||||
// Parse data= field from [CQ:json,data=URL_ENCODED_JSON]
|
||||
if dataVal := extractCQParam(match, "data"); dataVal != "" {
|
||||
if decoded, err := url.QueryUnescape(dataVal); err == nil {
|
||||
return parseJSONCardTitle(decoded)
|
||||
}
|
||||
}
|
||||
return "[卡片消息]"
|
||||
}
|
||||
if label, ok := cqSimplifyMap[typ]; ok {
|
||||
return label
|
||||
}
|
||||
@@ -535,6 +594,24 @@ func simplifyCQCodes(s string) string {
|
||||
})
|
||||
}
|
||||
|
||||
// extractCQParam extracts a named parameter value from a CQ code string.
|
||||
// e.g. extractCQParam("[CQ:json,data=hello%20world]", "data") → "hello%20world"
|
||||
func extractCQParam(cqCode, paramName string) string {
|
||||
prefix := paramName + "="
|
||||
idx := strings.Index(cqCode, prefix)
|
||||
if idx < 0 {
|
||||
return ""
|
||||
}
|
||||
val := cqCode[idx+len(prefix):]
|
||||
// Value ends at "," or "]"
|
||||
for i, c := range val {
|
||||
if c == ',' || c == ']' {
|
||||
return val[:i]
|
||||
}
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// extractText retrieves plain text from an OBv11 message.
|
||||
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
|
||||
func extractText(msg *OBv11Message) string {
|
||||
@@ -576,6 +653,16 @@ func extractText(msg *OBv11Message) string {
|
||||
case "reply":
|
||||
// Reply is handled separately in ToUnified with reply text.
|
||||
text += "[回复]"
|
||||
case "json":
|
||||
if data, ok := s["data"].(map[string]interface{}); ok {
|
||||
if inner, ok := data["data"].(string); ok && inner != "" {
|
||||
text += parseJSONCardTitle(inner)
|
||||
} else {
|
||||
text += "[卡片消息]"
|
||||
}
|
||||
} else {
|
||||
text += "[卡片消息]"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -360,9 +360,10 @@ class ProcessManager extends EventEmitter {
|
||||
|
||||
const procInfo = this.processes.get(serviceId);
|
||||
if (!procInfo.process) {
|
||||
// 可能已经崩溃了,重置状态
|
||||
procInfo.status = 'stopped';
|
||||
return { success: true, message: `${svc.name} 未在运行` };
|
||||
procInfo.pid = null;
|
||||
procInfo.startTime = null;
|
||||
return { success: true, message: `${svc.name} 已停止` };
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
|
||||
Reference in New Issue
Block a user