fix: 修复 AI 回复无法送达发送者 + 重复消息 + action角色泄露 + OS环境支持

广播逻辑重构:
- AI 回复 (stream_start/response/stream_segments/multi_message/stream_end) 改用 broadcastToUser 发送给所有客户端
- 用户消息回显保持 broadcastToUserExcept 排除发送者

消息去重与角色修复:
- CacheMessage(user) 移至回复生成后,避免本轮 LLM 调用出现重复用户消息
- action 角色消息在 DB 存储时映射为 assistant,DeepSeek 等模型不支持自定义角色
- stream_end defer 机制确保错误路径也会终止客户端思考指示器

OS 完整环境支持:
- host 包重构为 HostBackend 接口 + Direct/WSL/Docker 三种后端
- 新增 os_exec/os_file/os_system 工具供 AI 在完整 Linux 环境中自由操作

其他:
- 视觉模型注入 + 图片预处理后清空 Images 避免传给 Chat 模型
- 图片 URL 相对路径→绝对 URL 转换
- DevTools 链路追踪页面 + 重启修复
- 记忆搜索模糊匹配增强
- 后台思考定时调度支持
- 管理后台页面 (模型配置/用户管理等)
- docs/api 更新广播机制说明

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 12:46:17 +08:00
parent aac64ed8b7
commit 91c9ee4b2d
49 changed files with 5032 additions and 299 deletions
+68 -4
View File
@@ -4,15 +4,16 @@ import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"github.com/yourname/cyrene-ai/pkg/logger"
"net/http"
"strings"
"time"
"github.com/yourname/cyrene-ai/ai-core/internal/model"
"github.com/yourname/cyrene-ai/pkg/logger"
)
// OpenAIConfig OpenAI适配器配置
@@ -267,12 +268,13 @@ func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage
LogCall(r)
}()
// 转换消息格式
// 转换消息格式(先解析图片 URL 为 data URL
oaiMessages := make([]openAIMessage, len(messages))
for i, msg := range messages {
resolvedImages := p.resolveImages(msg.Images)
oaiMsg := openAIMessage{
Role: string(msg.Role),
Content: buildContent(msg.Content, msg.Images),
Content: buildContent(msg.Content, resolvedImages),
Name: msg.Name,
ToolCallID: msg.ToolCallID,
ReasoningContent: msg.ReasoningContent,
@@ -377,9 +379,10 @@ func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage
func (p *OpenAIProvider) doChatStream(ctx context.Context, messages []model.LLMMessage, modelName string, tools []OpenAITool) (*http.Response, error) {
oaiMessages := make([]openAIMessage, len(messages))
for i, msg := range messages {
resolvedImages := p.resolveImages(msg.Images)
oaiMsg := openAIMessage{
Role: string(msg.Role),
Content: buildContent(msg.Content, msg.Images),
Content: buildContent(msg.Content, resolvedImages),
Name: msg.Name,
ToolCallID: msg.ToolCallID,
ReasoningContent: msg.ReasoningContent,
@@ -455,6 +458,67 @@ func contentString(v interface{}) string {
return ""
}
// resolveImages converts non-data URLs to base64 data URLs so external LLM APIs can access them.
func (p *OpenAIProvider) resolveImages(images []string) []string {
if len(images) == 0 {
return images
}
resolved := make([]string, 0, len(images))
for _, img := range images {
if strings.HasPrefix(img, "data:") {
resolved = append(resolved, img)
continue
}
dataURL, err := p.downloadAsDataURL(img)
if err != nil {
logger.Printf("[openai] 图片下载失败, 保留原始 URL: %s, err=%v", img, err)
resolved = append(resolved, img) // 保留原始 URL 作为 fallback
continue
}
resolved = append(resolved, dataURL)
}
return resolved
}
// downloadAsDataURL downloads an image from a URL and returns it as a base64 data URL.
func (p *OpenAIProvider) downloadAsDataURL(url string) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", fmt.Errorf("创建请求失败: %w", err)
}
resp, err := p.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("下载失败: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
// 限制最大 20MB
const maxSize = 20 * 1024 * 1024
body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize+1))
if err != nil {
return "", fmt.Errorf("读取失败: %w", err)
}
if len(body) > maxSize {
return "", fmt.Errorf("图片过大: %d bytes", len(body))
}
mimeType := resp.Header.Get("Content-Type")
if mimeType == "" {
mimeType = http.DetectContentType(body)
}
b64 := base64.StdEncoding.EncodeToString(body)
return fmt.Sprintf("data:%s;base64,%s", mimeType, b64), nil
}
// buildContent converts text + optional images to API content format.
// Returns a plain string if no images, or a multimodal array otherwise.
func buildContent(text string, images []string) interface{} {
+1
View File
@@ -19,6 +19,7 @@ const (
PurposeToolCalling ModelPurpose = "tool_calling"
PurposeMemoryExtraction ModelPurpose = "memory_extraction"
PurposeVision ModelPurpose = "vision"
PurposeOCR ModelPurpose = "ocr"
)
// ErrModelNotRequired is returned when an optional model is unavailable.