fix: 后台思考身份混淆 + 静默模式视觉理解 + QQ卡片解析 + 仪表盘状态修复
- 后台思考对话历史增加标签说明,严格区分群聊中不同发送者 - 静默观察模式传入图片URL并预处理,供后台思考参考 - 视觉+OCR双模型结果合并格式优化,避免LLM误认为多张图片 - QQ卡片消息(CQ:json)正确解析标题/类型,不再丢失为[JSON] - 进程管理器stop()在进程为null时重置pid/startTime,消除矛盾状态 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -180,7 +180,7 @@ func (o *Orchestrator) ProcessInput(
|
||||
// 0.5 图片预处理: 使用视觉模型分析图片,将描述注入消息
|
||||
if len(params.Images) > 0 && o.visionProvider != nil {
|
||||
startTime := time.Now()
|
||||
augmented := o.preprocessImages(ctx, params.Message, params.Images)
|
||||
augmented := o.PreprocessImages(ctx, params.Message, params.Images)
|
||||
if augmented != params.Message {
|
||||
params.Message = augmented
|
||||
logger.Printf("[orchestrator] 图片预处理耗时: %v, 原消息=%d字, 增强后=%d字",
|
||||
@@ -736,25 +736,20 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
|
||||
}
|
||||
}
|
||||
|
||||
// cacheAssistantMessage caches the assistant response, tagging it with the recipient
|
||||
// in group chats so dialog history shows who the AI was addressing.
|
||||
// cacheAssistantMessage caches the assistant response.
|
||||
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
|
||||
if o.contextBuilder == nil {
|
||||
return
|
||||
}
|
||||
cached := fullContent
|
||||
if params.ChannelType == "group" && params.Nickname != "" {
|
||||
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
|
||||
}
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
|
||||
}
|
||||
|
||||
// preprocessImages uses vision and OCR models to analyze images and augments the user message.
|
||||
// PreprocessImages uses vision and OCR models to analyze images and augments the user message.
|
||||
// When both vision and OCR providers are available (and are different models), they are called
|
||||
// in parallel and both results are passed to the chat model for autonomous judgment.
|
||||
// For standalone images (no text): generates a comprehensive description as the message.
|
||||
// For text+images: appends image descriptions as contextual annotations.
|
||||
func (o *Orchestrator) preprocessImages(ctx context.Context, message string, images []string) string {
|
||||
func (o *Orchestrator) PreprocessImages(ctx context.Context, message string, images []string) string {
|
||||
visionPromptBase := "请详细描述这张图片的内容,包括场景、物体、人物、文字(如有)、颜色、氛围等所有视觉信息。"
|
||||
ocrPromptBase := `请逐字逐句完整提取图片中的所有文字内容,保持原有格式和排版。如果图片中没有文字,请回复"无文字"。`
|
||||
|
||||
@@ -809,7 +804,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
|
||||
var combined string
|
||||
switch {
|
||||
case visionDesc != "" && ocrDesc != "":
|
||||
combined = fmt.Sprintf("[视觉分析]: %s\n[文字提取(OCR)]: %s", visionDesc, ocrDesc)
|
||||
combined = fmt.Sprintf("视觉描述:%s\n(图中文字:%s)", visionDesc, ocrDesc)
|
||||
case visionDesc != "":
|
||||
combined = visionDesc
|
||||
case ocrDesc != "":
|
||||
@@ -831,7 +826,7 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
|
||||
|
||||
augmented := message
|
||||
for i, desc := range descriptions {
|
||||
augmented += fmt.Sprintf("\n\n[图片%d的视觉分析]: %s", i+1, desc)
|
||||
augmented += fmt.Sprintf("\n\n[图片%d分析结果]: %s", i+1, desc)
|
||||
}
|
||||
return augmented
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
|
||||
if params.ChannelType == "group" {
|
||||
messages = append(messages, model.LLMMessage{
|
||||
Role: model.RoleSystem,
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方,不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据当前这条消息前缀中的发送者名字来称呼对方——即使你之前在历史对话中称呼过别人,也不要把之前用的称呼套在当前发送者身上。不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user