feat: ASR语音转写管线 + 群聊身份混淆修复
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息 - 模型名称全部从models.json路由获取,无硬编码 - 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题 - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆 - 群聊上下文指令改为肯定性表述,移除具体名称提及 - trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒 - 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒) - 新增video_tool插件模板 - 优化OpenAI adapter reasoning_content处理 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -185,6 +185,8 @@ func main() {
|
||||
toolRegistry := plgManager.NewToolRegistry()
|
||||
var visionProvider llm.LLMProvider
|
||||
var ocrProvider llm.LLMProvider
|
||||
var videoProvider llm.LLMProvider
|
||||
var asrProvider llm.ASRProvider
|
||||
if getEnvBool("ENABLE_TOOLS", true) {
|
||||
// 11 个共享通用插件 — 注册其工具到统一注册中心
|
||||
registerPluginTools(toolRegistry, &pluginCalc.CalculatorPlugin{})
|
||||
@@ -252,12 +254,52 @@ func main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if ocrProvider == nil {
|
||||
log.Println("OCR模型未配置,图片文字提取将复用视觉模型")
|
||||
}
|
||||
|
||||
// 初始化视频理解模型
|
||||
videoProvider = nil
|
||||
if configLoader != nil && configLoader.HasConfig() {
|
||||
cfg := configLoader.GetConfig()
|
||||
if route, ok := cfg.Routing["video"]; ok && len(route.FallbackChain) > 0 {
|
||||
for _, mid := range route.FallbackChain {
|
||||
if _, ok := cfg.Models[mid]; ok {
|
||||
videoProvider, _ = modelSelector.Select(context.Background(), llm.PurposeVideo)
|
||||
log.Printf("视频理解模型已启用: %s", videoProvider.ModelName())
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if videoProvider == nil {
|
||||
log.Println("视频理解模型未配置")
|
||||
}
|
||||
|
||||
// 初始化 ASR 语音识别模型
|
||||
asrProvider = nil
|
||||
if configLoader != nil && configLoader.HasConfig() {
|
||||
cfg := configLoader.GetConfig()
|
||||
if route, ok := cfg.Routing["speech_recognition"]; ok && len(route.FallbackChain) > 0 {
|
||||
for _, mid := range route.FallbackChain {
|
||||
if m, ok := cfg.Models[mid]; ok {
|
||||
if p, ok := cfg.Providers[m.Provider]; ok {
|
||||
asrProvider = llm.NewDashScopeASRProvider(p.BaseURL, p.APIKey, m.Name)
|
||||
log.Printf("ASR语音识别模型已启用: %s", m.Name)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if asrProvider == nil {
|
||||
log.Println("ASR语音识别模型未配置")
|
||||
}
|
||||
|
||||
toolRegistry.Register(wrapTool(tools.NewVisionTool(visionProvider), "vision_analyze", "Image Vision Analysis & OCR", "multimodal"))
|
||||
toolRegistry.Register(wrapTool(tools.NewVideoTool(videoProvider), "video_analyze", "Video Understanding & Analysis", "multimodal"))
|
||||
|
||||
if knowledgeRetriever != nil {
|
||||
toolRegistry.Register(wrapTool(tools.NewKnowledgeSearchTool(knowledgeRetriever), "knowledge_search", "Search Knowledge Base", "knowledge"))
|
||||
@@ -286,6 +328,7 @@ func main() {
|
||||
convStore,
|
||||
adminUserID,
|
||||
adminSessionID,
|
||||
cfg.AdminNickname,
|
||||
memClient,
|
||||
)
|
||||
|
||||
@@ -375,12 +418,24 @@ func main() {
|
||||
orch.SetOCRProvider(ocrProvider)
|
||||
log.Printf("对话编排器: OCR模型已注入 (%s)", ocrProvider.ModelName())
|
||||
}
|
||||
log.Println("对话编排器 v2.0 已就绪")
|
||||
if videoProvider != nil {
|
||||
orch.SetVideoProvider(videoProvider)
|
||||
log.Printf("对话编排器: 视频模型已注入 (%s)\n", videoProvider.ModelName())
|
||||
} else {
|
||||
log.Println("对话编排器: 视频模型未配置,视频理解功能不可用")
|
||||
}
|
||||
if asrProvider != nil && asrProvider.IsAvailable() {
|
||||
orch.SetASRProvider(asrProvider)
|
||||
log.Printf("对话编排器: ASR语音识别模型已注入 (%s)\n", asrProvider.ModelName())
|
||||
} else {
|
||||
log.Println("对话编排器: ASR语音识别模型未配置")
|
||||
}
|
||||
log.Println("对话编排器 v2.0 已就绪")
|
||||
_ = orch
|
||||
|
||||
// 注册对话API端点
|
||||
mux.HandleFunc("/api/v1/chat", func(w http.ResponseWriter, r *http.Request) {
|
||||
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry)
|
||||
handleChat(w, r, orch, ctxBuilder, personaLoader, memRetriever, memExtractor, iotClient, thinker, toolRegistry, adminSessionID)
|
||||
})
|
||||
|
||||
// 注册记忆API端点
|
||||
@@ -746,6 +801,7 @@ func handleChat(
|
||||
iotClient *tools.IoTClient,
|
||||
thinker *background.Thinker,
|
||||
_ *plgManager.ToolRegistry,
|
||||
adminSessionID string,
|
||||
) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
@@ -758,8 +814,11 @@ func handleChat(
|
||||
SessionID string `json:"session_id"`
|
||||
Message string `json:"message"`
|
||||
Images []string `json:"images,omitempty"` // 图片 base64 data URL
|
||||
VideoURLs []string `json:"video_urls,omitempty"` // 视频 URL (多模态)
|
||||
VoiceURLs []string `json:"voice_urls,omitempty"` // 语音 URL (ASR 转录)
|
||||
Mode string `json:"mode"`
|
||||
Nickname string `json:"nickname,omitempty"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
Source struct {
|
||||
Platform string `json:"platform"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
@@ -795,11 +854,20 @@ func handleChat(
|
||||
|
||||
ctx := r.Context()
|
||||
|
||||
// Inject admin flag for tool access control.
|
||||
ctx = context.WithValue(ctx, plgManager.CtxKeyIsAdmin, req.IsAdmin)
|
||||
|
||||
// 0. 记录用户活动(重置闲置计时器)
|
||||
if thinker != nil {
|
||||
thinker.RecordUserMessage(req.SessionID)
|
||||
}
|
||||
|
||||
// Admin private messages: redirect to the main admin session so conversation
|
||||
// history is shared across platforms (QQ, web UI, etc.).
|
||||
if req.UserID == "admin" && req.Source.ChannelType == "direct" && adminSessionID != "" {
|
||||
req.SessionID = adminSessionID
|
||||
}
|
||||
|
||||
// 确定用户昵称
|
||||
userNickname := req.Nickname
|
||||
if userNickname == "" {
|
||||
@@ -828,6 +896,8 @@ func handleChat(
|
||||
SessionID: req.SessionID,
|
||||
Message: req.Message,
|
||||
Images: req.Images,
|
||||
VideoURLs: req.VideoURLs,
|
||||
VoiceURLs: req.VoiceURLs,
|
||||
Mode: req.Mode,
|
||||
Nickname: userNickname,
|
||||
ChannelType: req.Source.ChannelType,
|
||||
|
||||
@@ -91,6 +91,7 @@ type Thinker struct {
|
||||
convStore *ctxbuild.ConversationStore
|
||||
adminUserID string
|
||||
adminSessionID string
|
||||
adminNickname string
|
||||
activeSessionID string // 当前活跃的前端会话 ID(随用户消息更新)
|
||||
|
||||
// 记忆服务 HTTP 客户端
|
||||
@@ -290,6 +291,7 @@ func NewThinker(
|
||||
convStore *ctxbuild.ConversationStore,
|
||||
adminUserID string,
|
||||
adminSessionID string,
|
||||
adminNickname string,
|
||||
memClient *memory.Client,
|
||||
) *Thinker {
|
||||
// 加载时区配置
|
||||
@@ -323,6 +325,7 @@ func NewThinker(
|
||||
convStore: convStore,
|
||||
adminUserID: adminUserID,
|
||||
adminSessionID: adminSessionID,
|
||||
adminNickname: adminNickname,
|
||||
memClient: memClient,
|
||||
pendingThoughts: make([]*PendingThought, 0),
|
||||
lastUserMessage: time.Now(),
|
||||
@@ -1119,7 +1122,9 @@ func (t *Thinker) buildThinkingSystemPrompt(personaConfig *persona.PersonaConfig
|
||||
2. 3-4句话即可。`
|
||||
}
|
||||
|
||||
return basePrompt + thinkingInstructions
|
||||
// Security: only admin can authorize sensitive operations.
|
||||
securityRule := fmt.Sprintf("\n\n## 安全规则\n- 涉及敏感操作(调整IoT设备、执行主机操作等)的请求,只有%s(管理员)下达的指令才能执行。其他陌生人让你做的敏感操作不要执行。\n", t.adminNickname)
|
||||
return basePrompt + thinkingInstructions + securityRule
|
||||
}
|
||||
|
||||
// buildThinkingUserPrompt 构建思考用的用户提示词
|
||||
@@ -1180,8 +1185,8 @@ func (t *Thinker) buildThinkingUserPrompt(
|
||||
roleLabel := "用户"
|
||||
if msg.Role == model.RoleAssistant {
|
||||
roleLabel = "昔涟"
|
||||
} else if strings.Contains(msg.Content, "【管理员】") {
|
||||
roleLabel = "管理员"
|
||||
} else if strings.Contains(msg.Content, t.adminNickname+"/") {
|
||||
roleLabel = t.adminNickname
|
||||
}
|
||||
content := msg.Content
|
||||
runes := []rune(content)
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ASRProvider handles speech-to-text transcription.
|
||||
type ASRProvider interface {
|
||||
Transcribe(ctx context.Context, audioURL string) (string, error)
|
||||
IsAvailable() bool
|
||||
ModelName() string
|
||||
}
|
||||
|
||||
// DashScopeASRProvider uses DashScope Paraformer API for offline speech recognition.
|
||||
type DashScopeASRProvider struct {
|
||||
apiKey string
|
||||
baseURL string
|
||||
model string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewDashScopeASRProvider creates a DashScope ASR provider.
|
||||
func NewDashScopeASRProvider(baseURL, apiKey, model string) *DashScopeASRProvider {
|
||||
if model == "" {
|
||||
model = "qwen3-asr-flash-2026-02-10"
|
||||
}
|
||||
return &DashScopeASRProvider{
|
||||
apiKey: apiKey,
|
||||
baseURL: baseURL,
|
||||
model: model,
|
||||
client: &http.Client{Timeout: 60 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// IsAvailable returns true if the API key is configured.
|
||||
func (p *DashScopeASRProvider) IsAvailable() bool {
|
||||
return p.apiKey != ""
|
||||
}
|
||||
|
||||
// ModelName returns the ASR model name.
|
||||
func (p *DashScopeASRProvider) ModelName() string {
|
||||
return p.model
|
||||
}
|
||||
|
||||
type asrRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input asrInput `json:"input"`
|
||||
Parameters asrParams `json:"parameters"`
|
||||
}
|
||||
|
||||
type asrInput struct {
|
||||
Audio string `json:"audio"`
|
||||
}
|
||||
|
||||
type asrParams struct {
|
||||
Format string `json:"format,omitempty"`
|
||||
SampleRate int `json:"sample_rate,omitempty"`
|
||||
Language string `json:"language,omitempty"`
|
||||
}
|
||||
|
||||
type asrResponse struct {
|
||||
Output struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"output"`
|
||||
Usage struct {
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
} `json:"usage"`
|
||||
RequestID string `json:"request_id"`
|
||||
Code string `json:"code,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
// downloadAudio fetches audio data from a URL and returns the bytes with inferred format.
|
||||
func (p *DashScopeASRProvider) downloadAudio(ctx context.Context, audioURL string) ([]byte, string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", audioURL, nil)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("create download request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("download failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 10<<20)) // 10 MB limit
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("read audio data: %w", err)
|
||||
}
|
||||
|
||||
format := inferAudioFormat(audioURL, resp.Header.Get("Content-Type"))
|
||||
return data, format, nil
|
||||
}
|
||||
|
||||
// inferAudioFormat determines the audio format from URL extension or Content-Type header.
|
||||
func inferAudioFormat(urlStr, contentType string) string {
|
||||
// Try URL extension first
|
||||
u, err := url.Parse(urlStr)
|
||||
if err == nil {
|
||||
path := u.Path
|
||||
if idx := strings.LastIndex(path, "."); idx >= 0 {
|
||||
ext := strings.ToLower(path[idx+1:])
|
||||
switch ext {
|
||||
case "amr", "wav", "mp3", "ogg", "flac", "m4a", "aac", "opus", "webm", "pcm":
|
||||
return ext
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: use Content-Type
|
||||
if strings.Contains(contentType, "audio/amr") || strings.Contains(contentType, "amr") {
|
||||
return "amr"
|
||||
}
|
||||
if strings.Contains(contentType, "audio/wav") || strings.Contains(contentType, "wav") {
|
||||
return "wav"
|
||||
}
|
||||
if strings.Contains(contentType, "audio/mpeg") || strings.Contains(contentType, "mp3") {
|
||||
return "mp3"
|
||||
}
|
||||
if strings.Contains(contentType, "audio/ogg") || strings.Contains(contentType, "opus") {
|
||||
return "ogg"
|
||||
}
|
||||
return "amr" // default for QQ voice messages
|
||||
}
|
||||
// asrEndpoint derives the DashScope ASR REST endpoint from the provider base URL.
|
||||
func asrEndpoint(baseURL string) string {
|
||||
if u, err := url.Parse(baseURL); err == nil {
|
||||
return fmt.Sprintf("%s://%s/api/v1/services/audio/asr/asr", u.Scheme, u.Host)
|
||||
}
|
||||
return strings.TrimRight(baseURL, "/") + "/api/v1/services/audio/asr/asr"
|
||||
}
|
||||
func (p *DashScopeASRProvider) Transcribe(ctx context.Context, audioURL string) (string, error) {
|
||||
if !p.IsAvailable() {
|
||||
return "", fmt.Errorf("DashScope ASR API key not configured")
|
||||
}
|
||||
|
||||
audioData, format, err := p.downloadAudio(ctx, audioURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("download audio: %w", err)
|
||||
}
|
||||
|
||||
audioB64 := base64.StdEncoding.EncodeToString(audioData)
|
||||
|
||||
reqBody := asrRequest{
|
||||
Model: p.model,
|
||||
Input: asrInput{
|
||||
Audio: fmt.Sprintf("data:audio/%s;base64,%s", format, audioB64),
|
||||
},
|
||||
Parameters: asrParams{
|
||||
Format: format,
|
||||
Language: "zh",
|
||||
},
|
||||
}
|
||||
bodyBytes, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("marshal ASR request: %w", err)
|
||||
}
|
||||
|
||||
asrURL := asrEndpoint(p.baseURL)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", asrURL, bytes.NewReader(bodyBytes))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create ASR request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+p.apiKey)
|
||||
|
||||
resp, err := p.client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ASR request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read ASR response: %w", err)
|
||||
}
|
||||
|
||||
var asrResp asrResponse
|
||||
if err := json.Unmarshal(respBytes, &asrResp); err != nil {
|
||||
return "", fmt.Errorf("parse ASR response: %w", err)
|
||||
}
|
||||
|
||||
if asrResp.Code != "" && asrResp.Code != "0" {
|
||||
return "", fmt.Errorf("ASR error: %s (code=%s)", asrResp.Message, asrResp.Code)
|
||||
}
|
||||
|
||||
return asrResp.Output.Text, nil
|
||||
}
|
||||
@@ -274,7 +274,7 @@ func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage
|
||||
resolvedImages := p.resolveImages(msg.Images)
|
||||
oaiMsg := openAIMessage{
|
||||
Role: string(msg.Role),
|
||||
Content: buildContent(msg.Content, resolvedImages),
|
||||
Content: buildContent(msg.Content, resolvedImages, msg.VideoURLs),
|
||||
Name: msg.Name,
|
||||
ToolCallID: msg.ToolCallID,
|
||||
ReasoningContent: msg.ReasoningContent,
|
||||
@@ -382,7 +382,7 @@ func (p *OpenAIProvider) doChatStream(ctx context.Context, messages []model.LLMM
|
||||
resolvedImages := p.resolveImages(msg.Images)
|
||||
oaiMsg := openAIMessage{
|
||||
Role: string(msg.Role),
|
||||
Content: buildContent(msg.Content, resolvedImages),
|
||||
Content: buildContent(msg.Content, resolvedImages, msg.VideoURLs),
|
||||
Name: msg.Name,
|
||||
ToolCallID: msg.ToolCallID,
|
||||
ReasoningContent: msg.ReasoningContent,
|
||||
@@ -521,23 +521,27 @@ func (p *OpenAIProvider) downloadAsDataURL(url string) (string, error) {
|
||||
|
||||
// buildContent converts text + optional images to API content format.
|
||||
// Returns a plain string if no images, or a multimodal array otherwise.
|
||||
func buildContent(text string, images []string) interface{} {
|
||||
if len(images) == 0 {
|
||||
func buildContent(text string, images []string, videoURLs []string) interface{} {
|
||||
if len(images) == 0 && len(videoURLs) == 0 {
|
||||
return text
|
||||
}
|
||||
parts := make([]model.ImageContent, 0, len(images)+1)
|
||||
parts := make([]interface{}, 0, len(images)+len(videoURLs)+1)
|
||||
if text != "" {
|
||||
parts = append(parts, model.ImageContent{
|
||||
Type: "text",
|
||||
Text: text,
|
||||
parts = append(parts, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": text,
|
||||
})
|
||||
}
|
||||
for _, img := range images {
|
||||
parts = append(parts, model.ImageContent{
|
||||
Type: "image_url",
|
||||
ImageURL: &model.ImageURL{
|
||||
URL: img,
|
||||
},
|
||||
parts = append(parts, map[string]interface{}{
|
||||
"type": "image_url",
|
||||
"image_url": map[string]string{"url": img},
|
||||
})
|
||||
}
|
||||
for _, video := range videoURLs {
|
||||
parts = append(parts, map[string]interface{}{
|
||||
"type": "video_url",
|
||||
"video_url": map[string]string{"url": video},
|
||||
})
|
||||
}
|
||||
return parts
|
||||
|
||||
@@ -18,8 +18,10 @@ const (
|
||||
PurposeIntentAnalysis ModelPurpose = "intent_analysis"
|
||||
PurposeToolCalling ModelPurpose = "tool_calling"
|
||||
PurposeMemoryExtraction ModelPurpose = "memory_extraction"
|
||||
PurposeVision ModelPurpose = "vision"
|
||||
PurposeOCR ModelPurpose = "ocr"
|
||||
PurposeVision ModelPurpose = "vision"
|
||||
PurposeVideo ModelPurpose = "video"
|
||||
PurposeOCR ModelPurpose = "ocr"
|
||||
PurposeSpeechRecognition ModelPurpose = "speech_recognition"
|
||||
)
|
||||
|
||||
// ErrModelNotRequired is returned when an optional model is unavailable.
|
||||
|
||||
@@ -17,6 +17,7 @@ type LLMMessage struct {
|
||||
Role Role `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Images []string `json:"images,omitempty"` // 图片 base64 data URL 列表 (多模态)
|
||||
VideoURLs []string `json:"video_urls,omitempty"` // 视频 URL 列表 (多模态)
|
||||
Name string `json:"name,omitempty"` // 可选发送者名称
|
||||
ToolCallID string `json:"tool_call_id,omitempty"` // 工具调用关联ID (tool role 消息关联调用)
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"` // 助手消息中的工具调用列表
|
||||
@@ -36,6 +37,16 @@ type ImageURL struct {
|
||||
Detail string `json:"detail,omitempty"` // low, high, auto
|
||||
}
|
||||
|
||||
// VideoURLContent holds a video URL for multimodal video understanding.
|
||||
type VideoURLContent struct {
|
||||
VideoURL *VideoURL `json:"video_url,omitempty"`
|
||||
}
|
||||
|
||||
// VideoURL holds a video URL.
|
||||
type VideoURL struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
// ChatMessage 数据库存储的对话消息
|
||||
type ChatMessage struct {
|
||||
ID string `json:"id" db:"id"`
|
||||
|
||||
@@ -41,6 +41,8 @@ type Orchestrator struct {
|
||||
toolRegistry *plgManager.ToolRegistry
|
||||
visionProvider llm.LLMProvider // 视觉模型 (图片预处理)
|
||||
ocrProvider llm.LLMProvider // OCR 模型 (文字提取,与视觉模型并行调用)
|
||||
videoProvider llm.LLMProvider // 视频模型 (短视频理解)
|
||||
asrProvider llm.ASRProvider // ASR 语音识别 (语音消息转录)
|
||||
}
|
||||
|
||||
// SetResponseCache sets the response cache (optional, for Phase 0.2).
|
||||
@@ -84,6 +86,16 @@ func (o *Orchestrator) SetOCRProvider(op llm.LLMProvider) {
|
||||
o.ocrProvider = op
|
||||
}
|
||||
|
||||
// SetVideoProvider sets the video model provider for short video understanding.
|
||||
func (o *Orchestrator) SetVideoProvider(vp llm.LLMProvider) {
|
||||
o.videoProvider = vp
|
||||
}
|
||||
|
||||
// SetASRProvider sets the ASR provider for voice message transcription.
|
||||
func (o *Orchestrator) SetASRProvider(ap llm.ASRProvider) {
|
||||
o.asrProvider = ap
|
||||
}
|
||||
|
||||
// getBus returns the bus or a nop fallback.
|
||||
func (o *Orchestrator) getBus() bus.Bus {
|
||||
if o.eventBus == nil {
|
||||
@@ -121,6 +133,8 @@ type ProcessParams struct {
|
||||
SessionID string
|
||||
Message string
|
||||
Images []string // 图片 base64 data URL (多模态)
|
||||
VideoURLs []string // 视频 URL (多模态), ≤20s short videos
|
||||
VoiceURLs []string // 语音 URL (ASR 转录)
|
||||
Mode string // text / voice_msg / voice_assistant
|
||||
Nickname string
|
||||
ChannelType string // direct / group
|
||||
@@ -174,6 +188,34 @@ func (o *Orchestrator) ProcessInput(
|
||||
}
|
||||
// 预处理后清空原始图片,避免后续传给不支持多模态的 Chat 模型
|
||||
params.Images = nil
|
||||
|
||||
// 0.6 视频预处理: 使用视频模型分析短视频 (≤20s),将描述注入消息
|
||||
if len(params.VideoURLs) > 0 && o.videoProvider != nil {
|
||||
startTime := time.Now()
|
||||
augmented := o.preprocessVideos(ctx, params.Message, params.VideoURLs)
|
||||
if augmented != params.Message {
|
||||
params.Message = augmented
|
||||
logger.Printf("[orchestrator] 视频预处理耗时: %v", time.Since(startTime))
|
||||
}
|
||||
params.VideoURLs = nil
|
||||
} else if len(params.VideoURLs) > 0 {
|
||||
logger.Printf("[orchestrator] 视频模型未配置,丢弃 %d 个视频", len(params.VideoURLs))
|
||||
params.VideoURLs = nil
|
||||
}
|
||||
|
||||
// 0.7 语音预处理: 使用 ASR 模型转录语音消息,将文本注入消息
|
||||
if len(params.VoiceURLs) > 0 && o.asrProvider != nil && o.asrProvider.IsAvailable() {
|
||||
startTime := time.Now()
|
||||
augmented := o.preprocessVoice(ctx, params.Message, params.VoiceURLs)
|
||||
if augmented != params.Message {
|
||||
params.Message = augmented
|
||||
logger.Printf("[orchestrator] 语音预处理耗时: %v", time.Since(startTime))
|
||||
}
|
||||
params.VoiceURLs = nil
|
||||
} else if len(params.VoiceURLs) > 0 {
|
||||
logger.Printf("[orchestrator] ASR模型未配置,丢弃 %d 个语音", len(params.VoiceURLs))
|
||||
params.VoiceURLs = nil
|
||||
}
|
||||
} else if len(params.Images) > 0 {
|
||||
// 未配置 Vision 模型时,告知用户该模型不支持图片,并清空图片避免报错
|
||||
if params.Message == "" {
|
||||
@@ -234,7 +276,7 @@ func (o *Orchestrator) ProcessInput(
|
||||
eventCh <- model.StreamEvent{Type: model.StreamSegments, Segments: segments}
|
||||
}
|
||||
eventCh <- model.StreamEvent{Type: model.StreamDone}
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
|
||||
o.cacheAssistantMessage(params, fullContent)
|
||||
logger.Printf("[orchestrator] 缓存响应完成: len=%d", len([]rune(fullContent)))
|
||||
return
|
||||
}
|
||||
@@ -478,7 +520,7 @@ func (o *Orchestrator) ProcessInput(
|
||||
|
||||
// 10. 后处理:缓存回复
|
||||
if fullContent != "" {
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, fullContent)
|
||||
o.cacheAssistantMessage(params, fullContent)
|
||||
if o.responseCache != nil {
|
||||
o.responseCache.Set(params.Message, fullContent)
|
||||
}
|
||||
@@ -694,6 +736,19 @@ func (o *Orchestrator) CacheMessage(sessionID string, role model.Role, content s
|
||||
}
|
||||
}
|
||||
|
||||
// cacheAssistantMessage caches the assistant response, tagging it with the recipient
|
||||
// in group chats so dialog history shows who the AI was addressing.
|
||||
func (o *Orchestrator) cacheAssistantMessage(params ProcessParams, fullContent string) {
|
||||
if o.contextBuilder == nil {
|
||||
return
|
||||
}
|
||||
cached := fullContent
|
||||
if params.ChannelType == "group" && params.Nickname != "" {
|
||||
cached = fmt.Sprintf("[回复 %s]\n%s", params.Nickname, fullContent)
|
||||
}
|
||||
o.contextBuilder.CacheMessage(params.SessionID, model.RoleAssistant, cached)
|
||||
}
|
||||
|
||||
// preprocessImages uses vision and OCR models to analyze images and augments the user message.
|
||||
// When both vision and OCR providers are available (and are different models), they are called
|
||||
// in parallel and both results are passed to the chat model for autonomous judgment.
|
||||
@@ -781,6 +836,74 @@ func (o *Orchestrator) preprocessImages(ctx context.Context, message string, ima
|
||||
return augmented
|
||||
}
|
||||
|
||||
// preprocessVideos uses the video model to analyze short videos and augments the message.
|
||||
func (o *Orchestrator) preprocessVideos(ctx context.Context, message string, videoURLs []string) string {
|
||||
if o.videoProvider == nil {
|
||||
return message
|
||||
}
|
||||
|
||||
var descriptions []string
|
||||
for i, url := range videoURLs {
|
||||
resp, err := o.videoProvider.Chat(ctx, []model.LLMMessage{
|
||||
{Role: model.RoleUser, Content: "请用简短的中文描述这个视频的内容,包括场景、人物、动作等。控制在100字以内。", VideoURLs: []string{url}},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Printf("[orchestrator] 视频 %d 分析失败: %v", i, err)
|
||||
continue
|
||||
}
|
||||
if resp.Content != "" {
|
||||
descriptions = append(descriptions, resp.Content)
|
||||
}
|
||||
}
|
||||
|
||||
if len(descriptions) == 0 {
|
||||
return message
|
||||
}
|
||||
|
||||
if message == "" {
|
||||
return strings.Join(descriptions, "\n\n")
|
||||
}
|
||||
|
||||
augmented := message
|
||||
for i, desc := range descriptions {
|
||||
augmented += fmt.Sprintf("\n\n[视频%d的分析]: %s", i+1, desc)
|
||||
}
|
||||
return augmented
|
||||
}
|
||||
|
||||
// preprocessVoice transcribes voice messages using the ASR provider and augments the message.
|
||||
func (o *Orchestrator) preprocessVoice(ctx context.Context, message string, voiceURLs []string) string {
|
||||
if o.asrProvider == nil || !o.asrProvider.IsAvailable() {
|
||||
return message
|
||||
}
|
||||
|
||||
var transcriptions []string
|
||||
for i, url := range voiceURLs {
|
||||
text, err := o.asrProvider.Transcribe(ctx, url)
|
||||
if err != nil {
|
||||
logger.Printf("[orchestrator] 语音 %d 转录失败: %v", i, err)
|
||||
continue
|
||||
}
|
||||
if text != "" {
|
||||
transcriptions = append(transcriptions, text)
|
||||
}
|
||||
}
|
||||
|
||||
if len(transcriptions) == 0 {
|
||||
return message
|
||||
}
|
||||
|
||||
if message == "" {
|
||||
return strings.Join(transcriptions, "\n\n")
|
||||
}
|
||||
|
||||
augmented := message
|
||||
for i, t := range transcriptions {
|
||||
augmented += fmt.Sprintf("\n\n[语音%d的转写]: %s", i+1, t)
|
||||
}
|
||||
return augmented
|
||||
}
|
||||
|
||||
// Ensure time, memory are used
|
||||
var _ = time.Now
|
||||
var _ = memory.NewRetriever
|
||||
|
||||
@@ -35,6 +35,7 @@ type SynthesizeParams struct {
|
||||
SessionID string
|
||||
UserMessage string
|
||||
Images []string // 图片 base64 data URL (多模态)
|
||||
VideoURLs []string // 视频 URL (多模态)
|
||||
Nickname string
|
||||
PersonaPrompt string // 完整人格提示词
|
||||
DialogHistory []model.LLMMessage // 对话历史
|
||||
@@ -215,7 +216,7 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
|
||||
if params.ChannelType == "group" {
|
||||
messages = append(messages, model.LLMMessage{
|
||||
Role: model.RoleSystem,
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请用发送者的真实名字称呼,不要叫所有人开拓者或叶酱。只在对你说话或延续已有对话时才回复。",
|
||||
Content: "【群聊上下文】这条消息来自QQ群聊。消息前缀 [群聊 群号] 昵称 (QQ号) 标注了真实发送者。你不是在和开拓者一对一私聊,而是在群聊中和不同成员交流。请根据消息前缀中的发送者名字称呼对方,不同的人有不同的名字。只在对你说话或延续已有对话时才回复。",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -280,11 +281,12 @@ func (s *Synthesizer) buildSynthesizeMessages(params SynthesizeParams) []model.L
|
||||
messages = append(messages, history...)
|
||||
}
|
||||
|
||||
// 当前用户消息 (支持多模态图片)
|
||||
// 当前用户消息 (支持多模态图片和视频)
|
||||
messages = append(messages, model.LLMMessage{
|
||||
Role: model.RoleUser,
|
||||
Content: params.UserMessage,
|
||||
Images: params.Images,
|
||||
Role: model.RoleUser,
|
||||
Content: params.UserMessage,
|
||||
Images: params.Images,
|
||||
VideoURLs: params.VideoURLs,
|
||||
})
|
||||
|
||||
return messages
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/llm"
|
||||
"git.yeij.top/AskaEth/Cyrene/ai-core/internal/model"
|
||||
)
|
||||
|
||||
// VideoTool enables video understanding via multimodal LLM.
|
||||
type VideoTool struct {
|
||||
videoProvider llm.LLMProvider
|
||||
}
|
||||
|
||||
// NewVideoTool creates a video tool. videoProvider is optional (nil = no-op mode).
|
||||
func NewVideoTool(videoProvider llm.LLMProvider) *VideoTool {
|
||||
return &VideoTool{videoProvider: videoProvider}
|
||||
}
|
||||
|
||||
func (t *VideoTool) Definition() ToolDefinition {
|
||||
return ToolDefinition{
|
||||
Name: "video_analyze",
|
||||
Description: "分析视频内容。传入视频文件路径或URL,返回视频内容的文字描述和分析结果。支持场景理解、动作识别、文字提取等。",
|
||||
Parameters: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"video_path": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "视频文件路径或URL",
|
||||
},
|
||||
"task": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "分析任务: describe(内容描述), summarize(摘要), analyze(综合分析)",
|
||||
"enum": []string{"describe", "summarize", "analyze"},
|
||||
},
|
||||
},
|
||||
"required": []string{"video_path", "task"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var videoTaskPrompts = map[string]string{
|
||||
"describe": "请详细描述这个视频的内容,包括场景、人物、动作、对话要点等。",
|
||||
"summarize": "请用简洁的语言总结这个视频的主要内容。",
|
||||
"analyze": "请综合分析这个视频,包括内容描述、关键片段、文字信息(如有)、以及你的理解。",
|
||||
}
|
||||
|
||||
func (t *VideoTool) Execute(ctx context.Context, args map[string]interface{}) (*ToolResult, error) {
|
||||
videoPath, _ := args["video_path"].(string)
|
||||
if videoPath == "" {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "video_path 参数不能为空"}, nil
|
||||
}
|
||||
|
||||
task, _ := args["task"].(string)
|
||||
if task == "" {
|
||||
task = "analyze"
|
||||
}
|
||||
|
||||
prompt := videoTaskPrompts[task]
|
||||
if prompt == "" {
|
||||
prompt = videoTaskPrompts["analyze"]
|
||||
}
|
||||
|
||||
if t.videoProvider == nil {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: "视频理解模型未配置"}, nil
|
||||
}
|
||||
|
||||
messages := []model.LLMMessage{
|
||||
{Role: model.RoleUser, Content: prompt, VideoURLs: []string{videoPath}},
|
||||
}
|
||||
resp, err := t.videoProvider.Chat(ctx, messages)
|
||||
if err != nil {
|
||||
return &ToolResult{ToolName: "video_analyze", Success: false, Error: fmt.Sprintf("视频模型调用失败: %v", err)}, nil
|
||||
}
|
||||
|
||||
output, _ := json.Marshal(map[string]interface{}{
|
||||
"video_path": videoPath,
|
||||
"task": task,
|
||||
"model": t.videoProvider.ModelName(),
|
||||
"text": resp.Content,
|
||||
"prompt_tokens": resp.Usage.PromptTokens,
|
||||
"completion_tokens": resp.Usage.CompletionTokens,
|
||||
"total_tokens": resp.Usage.TotalTokens,
|
||||
})
|
||||
return &ToolResult{ToolName: "video_analyze", Success: true, Data: string(output)}, nil
|
||||
}
|
||||
@@ -10,6 +10,24 @@ import (
|
||||
"git.yeij.top/AskaEth/Cyrene/pkg/plugins/sdk"
|
||||
)
|
||||
|
||||
// CtxKeyIsAdmin is the context key for the admin flag.
|
||||
type ctxKey string
|
||||
|
||||
const CtxKeyIsAdmin ctxKey = "isAdmin"
|
||||
|
||||
// adminOnlyTools lists tools that require admin permission to execute.
|
||||
var adminOnlyTools = map[string]bool{
|
||||
"host_exec": true,
|
||||
"os_exec": true,
|
||||
"host_file": true,
|
||||
}
|
||||
|
||||
// IsAdminFromCtx returns true if the context carries an admin flag.
|
||||
func IsAdminFromCtx(ctx context.Context) bool {
|
||||
v, _ := ctx.Value(CtxKeyIsAdmin).(bool)
|
||||
return v
|
||||
}
|
||||
|
||||
// CallLogRecord 工具调用记录
|
||||
type CallLogRecord struct {
|
||||
CallID string `json:"call_id"`
|
||||
@@ -197,6 +215,16 @@ func (r *ToolRegistry) Execute(ctx context.Context, toolID string, args map[stri
|
||||
return &sdk.ToolResult{Success: false, Error: err.Error()}, nil
|
||||
}
|
||||
|
||||
// Admin-only tools: deny non-admin callers.
|
||||
if adminOnlyTools[toolID] && !IsAdminFromCtx(ctx) {
|
||||
errMsg := fmt.Sprintf("工具 %s 仅限管理员使用", toolID)
|
||||
r.callLog.push(CallLogRecord{
|
||||
ToolName: toolID, Error: errMsg, Success: false,
|
||||
DurationMs: int(time.Since(startTime).Milliseconds()),
|
||||
})
|
||||
return &sdk.ToolResult{Success: false, Error: errMsg}, nil
|
||||
}
|
||||
|
||||
result, err := tool.Execute(ctx, args)
|
||||
durationMs := int(time.Since(startTime).Milliseconds())
|
||||
|
||||
|
||||
@@ -56,9 +56,10 @@ func main() {
|
||||
mapper := bridge.NewIdentityMapper()
|
||||
checker := permissions.NewChecker()
|
||||
router := bridge.NewPlatformRouter(mapper, checker)
|
||||
lastDisplayNames := make(map[string]string) // platformUID -> last known display name
|
||||
|
||||
// Seed default identities from environment.
|
||||
seedIdentities(mapper, configStore)
|
||||
seedIdentities(mapper, configStore, cfg.AdminNickname)
|
||||
|
||||
// Register platform adapters based on stored configs or defaults.
|
||||
adapters := createAdapters(cfg, configStore)
|
||||
@@ -85,6 +86,27 @@ func main() {
|
||||
|
||||
// Routing decisions.
|
||||
isAdmin := mapper.IsAdmin(msg.Platform, msg.OriginalSenderUID)
|
||||
adminNick := cfg.AdminNickname
|
||||
if isAdmin {
|
||||
if id := mapper.ResolveOrNil(msg.Platform, msg.OriginalSenderUID); id != nil && id.Nickname != "" {
|
||||
adminNick = id.Nickname
|
||||
}
|
||||
// Track per-group display names (群名片 can differ across groups).
|
||||
nameKey := msg.OriginalSenderUID
|
||||
if msg.ChannelType == "group" {
|
||||
nameKey = msg.OriginalSenderUID + ":" + msg.ChannelID
|
||||
}
|
||||
if prevName, ok := lastDisplayNames[nameKey]; ok && prevName != msg.OriginalSenderName && msg.OriginalSenderName != "" {
|
||||
ctx := msg.ChannelID
|
||||
if msg.GroupName != "" {
|
||||
ctx = truncateString(msg.GroupName, 8)
|
||||
}
|
||||
msg.Content = fmt.Sprintf("【昵称更新:该用户在%s(%s)上的昵称已从\"%s\"变更为\"%s\"】\n%s", ctx, msg.Platform, prevName, msg.OriginalSenderName, msg.Content)
|
||||
}
|
||||
if msg.OriginalSenderName != "" {
|
||||
lastDisplayNames[nameKey] = msg.OriginalSenderName
|
||||
}
|
||||
}
|
||||
isMentioned, _ := detectAdminMention(msg, mapper, cfg)
|
||||
isBotMentioned := msg.BotUID != "" && containsString(msg.Mentions, msg.BotUID)
|
||||
isSilent := cfg.PlatformSilentEnabled && !isAdmin && !isBotMentioned
|
||||
@@ -107,9 +129,11 @@ func main() {
|
||||
senderLabel = msg.SenderName
|
||||
}
|
||||
if isAdmin {
|
||||
senderLabel = "【管理员】" + msg.OriginalSenderName
|
||||
senderLabel = adminNick + "/" + msg.OriginalSenderName
|
||||
}
|
||||
msg.Content = fmt.Sprintf("[群聊 %s] %s (%s):\n%s", groupLabel, senderLabel, msg.OriginalSenderUID, msg.Content)
|
||||
} else if msg.ChannelType == "private" {
|
||||
msg.Content = fmt.Sprintf("【私聊 %s】%s/%s (%s):\n%s", msg.Platform, adminNick, msg.OriginalSenderName, msg.OriginalSenderUID, msg.Content)
|
||||
}
|
||||
|
||||
// Blocklist/whitelist check (admin always bypasses).
|
||||
@@ -137,6 +161,8 @@ func main() {
|
||||
|
||||
// Extract image URLs for vision/OCR processing (admin + bot-mentioned + admin-mentioned only).
|
||||
imageURLs := getImageURLs(msg)
|
||||
videoURLs := getShortVideoURLs(msg)
|
||||
voiceURLs := getVoiceURLs(msg)
|
||||
|
||||
// For group chats, use a channel-based user ID to share context between admin and regular users.
|
||||
chatUserID := msg.SenderID
|
||||
@@ -149,32 +175,32 @@ func main() {
|
||||
case isMessageHistorical(msg, router):
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router):
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isAdmin:
|
||||
msg.RouteType = "normal"
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isBotMentioned:
|
||||
msg.RouteType = "normal"
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isMentioned:
|
||||
// Non-admin user mentioned an admin. Don't respond in channel —
|
||||
// the admin already gets QQ's native @notification. Observe silently.
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
|
||||
case isSilent:
|
||||
msg.RouteType = "silent"
|
||||
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
|
||||
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
|
||||
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
|
||||
if silentErr != nil {
|
||||
msgLogger.Log(logging.LogEntry{
|
||||
Timestamp: time.Now(),
|
||||
@@ -192,7 +218,7 @@ func main() {
|
||||
|
||||
default:
|
||||
msg.RouteType = "normal"
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, nil)
|
||||
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, nil, videoURLs, voiceURLs, isAdmin)
|
||||
}
|
||||
|
||||
if routeErr != nil {
|
||||
@@ -261,7 +287,7 @@ func main() {
|
||||
fmt.Printf("Platform adapter hot-reloaded: %s\n", name)
|
||||
}
|
||||
// Sync admin identities from config fields.
|
||||
syncAdminUIDs(mapper, platform, fields)
|
||||
syncAdminUIDs(mapper, platform, fields, cfg.AdminNickname)
|
||||
// Restart QQ reader when QQ config changes.
|
||||
if platform == "qq" {
|
||||
startQQReaders(router)
|
||||
@@ -559,9 +585,36 @@ func getImageURLs(msg *bridge.UnifiedMessage) []string {
|
||||
return urls
|
||||
}
|
||||
|
||||
// getVoiceURLs extracts voice/record attachment URLs from a UnifiedMessage.
|
||||
func getVoiceURLs(msg *bridge.UnifiedMessage) []string {
|
||||
if len(msg.Attachments) == 0 {
|
||||
return nil
|
||||
}
|
||||
var urls []string
|
||||
for _, a := range msg.Attachments {
|
||||
if a.Type == "voice" && a.URL != "" {
|
||||
urls = append(urls, a.URL)
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
||||
// getShortVideoURLs returns video URLs with duration ≤ 20 seconds.
|
||||
func getShortVideoURLs(msg *bridge.UnifiedMessage) []string {
|
||||
if len(msg.Attachments) == 0 {
|
||||
return nil
|
||||
}
|
||||
var urls []string
|
||||
for _, a := range msg.Attachments {
|
||||
if a.Type == "video" && a.URL != "" && a.Duration > 0 && a.Duration <= 20 {
|
||||
urls = append(urls, a.URL)
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
||||
// forwardToAICore sends a unified message to AI-Core's chat endpoint and returns the response.
|
||||
// If images is non-empty, they are passed as URL strings for AI-Core to download and process.
|
||||
func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userID, sessionID string, images []string) (*bridge.UnifiedResponse, error) {
|
||||
func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userID, sessionID string, images []string, videoURLs []string, voiceURLs []string, isAdmin bool) (*bridge.UnifiedResponse, error) {
|
||||
bodyMap := map[string]interface{}{
|
||||
"user_id": userID,
|
||||
"session_id": sessionID,
|
||||
@@ -569,6 +622,7 @@ func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userI
|
||||
"mode": mode,
|
||||
"routing": msg.RouteType,
|
||||
"nickname": fmt.Sprintf("%s (%s)", msg.SenderName, msg.OriginalSenderUID),
|
||||
"is_admin": isAdmin,
|
||||
"source": map[string]string{
|
||||
"platform": msg.Platform,
|
||||
"channel_id": msg.ChannelID,
|
||||
@@ -580,6 +634,12 @@ func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userI
|
||||
if len(images) > 0 {
|
||||
bodyMap["images"] = images
|
||||
}
|
||||
if len(videoURLs) > 0 {
|
||||
bodyMap["video_urls"] = videoURLs
|
||||
}
|
||||
if len(voiceURLs) > 0 {
|
||||
bodyMap["voice_urls"] = voiceURLs
|
||||
}
|
||||
reqBody, _ := json.Marshal(bodyMap)
|
||||
|
||||
url := cfg.AICoreURL + "/api/v1/chat"
|
||||
@@ -829,7 +889,7 @@ func parseIntOr(s string, defaultVal int) int {
|
||||
}
|
||||
|
||||
// seedIdentities loads default identity mappings from env vars and stored platform configs.
|
||||
func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
|
||||
func seedIdentities(m *bridge.IdentityMapper, store *config.Store, adminNickname string) {
|
||||
// From environment variables.
|
||||
for _, entry := range []struct{ envKey, platform string }{
|
||||
{"QQ_ADMIN_UID", "qq"},
|
||||
@@ -845,7 +905,7 @@ func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
|
||||
Platform: entry.platform,
|
||||
PlatformUID: uid,
|
||||
CyreneUser: "admin",
|
||||
Nickname: "开拓者",
|
||||
Nickname: adminNickname,
|
||||
PermissionLevel: "admin",
|
||||
})
|
||||
}
|
||||
@@ -858,13 +918,13 @@ func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
|
||||
if stored == nil {
|
||||
continue
|
||||
}
|
||||
syncAdminUIDs(m, name, stored.Fields)
|
||||
syncAdminUIDs(m, name, stored.Fields, adminNickname)
|
||||
}
|
||||
}
|
||||
|
||||
// syncAdminUIDs registers admin identities from a platform config's admin_uids field.
|
||||
// Comma-separated list of platform UIDs.
|
||||
func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]string) {
|
||||
func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]string, adminNickname string) {
|
||||
raw, ok := fields["admin_uids"]
|
||||
if !ok || raw == "" {
|
||||
return
|
||||
@@ -878,7 +938,7 @@ func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]
|
||||
Platform: platform,
|
||||
PlatformUID: uid,
|
||||
CyreneUser: "admin",
|
||||
Nickname: "开拓者",
|
||||
Nickname: adminNickname,
|
||||
PermissionLevel: "admin",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -321,6 +321,7 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
|
||||
}
|
||||
|
||||
var mentions []string
|
||||
var replyToText string
|
||||
if segments, ok := msg.Message.([]interface{}); ok {
|
||||
for _, s := range segments {
|
||||
if seg, ok := s.(map[string]interface{}); ok {
|
||||
@@ -331,9 +332,24 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if seg["type"] == "reply" {
|
||||
if data, ok := seg["data"].(map[string]interface{}); ok {
|
||||
if t, ok := data["text"].(string); ok && t != "" {
|
||||
replyToText = t
|
||||
}
|
||||
if id, ok := data["id"]; ok {
|
||||
_ = id // message ID of the replied-to message
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prepend reply context for the AI.
|
||||
if replyToText != "" {
|
||||
content = "【回复】" + truncateForReply(replyToText, 100) + "\n" + content
|
||||
}
|
||||
// Fallback: parse CQ at codes from string format (e.g. [CQ:at,qq=2254389756]).
|
||||
if len(mentions) == 0 {
|
||||
raw := msg.RawMessage
|
||||
@@ -490,24 +506,76 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
|
||||
}
|
||||
}
|
||||
|
||||
// cqSimplifyMap maps CQ code types to simplified Chinese labels.
|
||||
var cqSimplifyMap = map[string]string{
|
||||
"image": "[图片]",
|
||||
"reply": "[回复]",
|
||||
"face": "[表情]",
|
||||
"record": "[语音]",
|
||||
"video": "[视频]",
|
||||
"file": "[文件]",
|
||||
}
|
||||
|
||||
// simplifyCQCodes replaces [CQ:type,...] codes with human-readable labels.
|
||||
func simplifyCQCodes(s string) string {
|
||||
return cqAllRegex.ReplaceAllStringFunc(s, func(match string) string {
|
||||
// match looks like "[CQ:image,file=xxx,url=xxx]"
|
||||
// Extract the type (text between "CQ:" and the first "," or "]").
|
||||
typ := match[4:] // strip "[CQ:"
|
||||
for i, c := range typ {
|
||||
if c == ',' || c == ']' {
|
||||
typ = typ[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if label, ok := cqSimplifyMap[typ]; ok {
|
||||
return label
|
||||
}
|
||||
return "[" + typ + "]"
|
||||
})
|
||||
}
|
||||
|
||||
// extractText retrieves plain text from an OBv11 message.
|
||||
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
|
||||
func extractText(msg *OBv11Message) string {
|
||||
if msg.RawMessage != "" {
|
||||
return msg.RawMessage
|
||||
s := cqAtRegex.ReplaceAllString(msg.RawMessage, "@$1")
|
||||
return simplifyCQCodes(s)
|
||||
}
|
||||
switch m := msg.Message.(type) {
|
||||
case string:
|
||||
return m
|
||||
s := cqAtRegex.ReplaceAllString(m, "@$1")
|
||||
return simplifyCQCodes(s)
|
||||
case []interface{}:
|
||||
var text string
|
||||
for _, seg := range m {
|
||||
if s, ok := seg.(map[string]interface{}); ok {
|
||||
if s["type"] == "text" {
|
||||
switch s["type"] {
|
||||
case "text":
|
||||
if data, ok := s["data"].(map[string]interface{}); ok {
|
||||
if t, ok := data["text"].(string); ok {
|
||||
text += t
|
||||
}
|
||||
}
|
||||
case "at":
|
||||
if data, ok := s["data"].(map[string]interface{}); ok {
|
||||
if qq, ok := data["qq"].(string); ok {
|
||||
text += "@" + qq
|
||||
}
|
||||
}
|
||||
case "image":
|
||||
text += "[图片]"
|
||||
case "face":
|
||||
text += "[表情]"
|
||||
case "record":
|
||||
text += "[语音]"
|
||||
case "video":
|
||||
text += "[视频]"
|
||||
case "file":
|
||||
text += "[文件]"
|
||||
case "reply":
|
||||
// Reply is handled separately in ToUnified with reply text.
|
||||
text += "[回复]"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -518,21 +586,50 @@ func extractText(msg *OBv11Message) string {
|
||||
|
||||
var cqAtRegex = regexp.MustCompile(`\[CQ:at,qq=(\d+)\]`)
|
||||
var cqImageRegex = regexp.MustCompile(`\[CQ:image,[^\]]*\]`)
|
||||
var cqVideoRegex = regexp.MustCompile(`\[CQ:video,[^\]]*\]`)
|
||||
var cqRecordRegex = regexp.MustCompile(`\[CQ:record,[^\]]*\]`)
|
||||
var cqURLRegex = regexp.MustCompile(`\burl=([^,\]]+)`)
|
||||
var cqDurationRegex = regexp.MustCompile(`\bduration=(\d+)`)
|
||||
var cqAllRegex = regexp.MustCompile(`\[CQ:[^\]]+\]`)
|
||||
var boldRegex = regexp.MustCompile(`\*\*(.+?)\*\*`)
|
||||
var italicRegex = regexp.MustCompile(`\*(.+?)\*`)
|
||||
var strikethroughRegex = regexp.MustCompile(`~~(.+?)~~`)
|
||||
|
||||
// extractAttachments extracts image URLs from OBv11Message.
|
||||
func parseIntOr(s string, defaultVal int) int {
|
||||
if s == "" {
|
||||
return defaultVal
|
||||
}
|
||||
n := 0
|
||||
for _, c := range s {
|
||||
if c >= '0' && c <= '9' {
|
||||
n = n*10 + int(c-'0')
|
||||
} else {
|
||||
return defaultVal
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// truncateForReply truncates reply preview text to keep messages readable.
|
||||
func truncateForReply(s string, maxLen int) string {
|
||||
runes := []rune(s)
|
||||
if len(runes) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return string(runes[:maxLen]) + "…"
|
||||
}
|
||||
|
||||
// extractAttachments extracts image/video URLs from OBv11Message.
|
||||
// Handles both string format (CQ codes in raw_message) and array format (parsed segments).
|
||||
func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
var attachments []bridge.Attachment
|
||||
|
||||
// Array format: iterate segments looking for type="image".
|
||||
// Array format: iterate segments looking for image and video.
|
||||
if segments, ok := msg.Message.([]interface{}); ok {
|
||||
for _, s := range segments {
|
||||
if seg, ok := s.(map[string]interface{}); ok {
|
||||
if seg["type"] != "image" {
|
||||
segType, _ := seg["type"].(string)
|
||||
if segType != "image" && segType != "video" && segType != "record" {
|
||||
continue
|
||||
}
|
||||
data, _ := seg["data"].(map[string]interface{})
|
||||
@@ -544,11 +641,17 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
attachments = append(attachments, bridge.Attachment{
|
||||
Type: "image",
|
||||
att := bridge.Attachment{
|
||||
Type: segType,
|
||||
URL: url,
|
||||
FileName: file,
|
||||
})
|
||||
}
|
||||
if segType == "video" {
|
||||
if d, ok := data["duration"].(float64); ok {
|
||||
att.Duration = int(d)
|
||||
}
|
||||
}
|
||||
attachments = append(attachments, att)
|
||||
}
|
||||
}
|
||||
return attachments
|
||||
@@ -561,14 +664,29 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
raw = s
|
||||
}
|
||||
}
|
||||
matches := cqImageRegex.FindAllString(raw, -1)
|
||||
for _, m := range matches {
|
||||
// Images.
|
||||
for _, m := range cqImageRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
attachments = append(attachments, bridge.Attachment{
|
||||
Type: "image",
|
||||
URL: urlMatch[1],
|
||||
})
|
||||
attachments = append(attachments, bridge.Attachment{Type: "image", URL: urlMatch[1]})
|
||||
}
|
||||
}
|
||||
// Videos.
|
||||
for _, m := range cqVideoRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
dur := 0
|
||||
if dm := cqDurationRegex.FindStringSubmatch(m); len(dm) >= 2 {
|
||||
dur = parseIntOr(dm[1], 0)
|
||||
}
|
||||
attachments = append(attachments, bridge.Attachment{Type: "video", URL: urlMatch[1], Duration: dur})
|
||||
}
|
||||
}
|
||||
// Records (voice messages).
|
||||
for _, m := range cqRecordRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
attachments = append(attachments, bridge.Attachment{Type: "voice", URL: urlMatch[1]})
|
||||
}
|
||||
}
|
||||
return attachments
|
||||
|
||||
@@ -31,13 +31,14 @@ type UnifiedMessage struct {
|
||||
BotUID string `json:"-"` // bot's own platform UID, set by router
|
||||
}
|
||||
|
||||
// Attachment represents a file/image/voice attachment.
|
||||
// Attachment represents a file/image/voice/video attachment.
|
||||
type Attachment struct {
|
||||
Type string `json:"type"` // "image", "voice", "file", "video"
|
||||
URL string `json:"url,omitempty"`
|
||||
FileName string `json:"file_name,omitempty"`
|
||||
MimeType string `json:"mime_type,omitempty"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Duration int `json:"duration,omitempty"` // video/voice duration in seconds
|
||||
}
|
||||
|
||||
// UnifiedResponse is AI-Core's response converted to unified format.
|
||||
|
||||
@@ -20,6 +20,7 @@ type Config struct {
|
||||
|
||||
// Silent observation mode.
|
||||
PlatformSilentEnabled bool // PLATFORM_SILENT_ENABLED, default true
|
||||
AdminNickname string // ADMIN_NICKNAME, admin's Cyrene identity nickname (default "开拓者")
|
||||
AdminNicknames []string // ADMIN_NICKNAMES, default ["开拓者"]
|
||||
AdminMentionKeywords []string // ADMIN_MENTION_KEYWORDS, default ["昔涟","Cyrene","管理员"]
|
||||
|
||||
@@ -61,6 +62,10 @@ func Load() *Config {
|
||||
}
|
||||
// Silent observation defaults.
|
||||
cfg.PlatformSilentEnabled = getEnvBool("PLATFORM_SILENT_ENABLED", true)
|
||||
cfg.AdminNickname = os.Getenv("ADMIN_NICKNAME")
|
||||
if cfg.AdminNickname == "" {
|
||||
cfg.AdminNickname = "开拓者"
|
||||
}
|
||||
cfg.AdminNicknames = getEnvList("ADMIN_NICKNAMES", []string{"开拓者"})
|
||||
cfg.AdminMentionKeywords = getEnvList("ADMIN_MENTION_KEYWORDS", []string{"昔涟", "Cyrene", "管理员"})
|
||||
cfg.MessageSendIntervalMs = getEnvInt("MSG_SEND_INTERVAL_MS", 2000)
|
||||
|
||||
@@ -6364,7 +6364,7 @@ function pruneTimeline() {
|
||||
}
|
||||
|
||||
function traceHopHtml(t, prevT) {
|
||||
var time = new Date(t.timestamp).toISOString().replace('T', ' ').slice(11, 19);
|
||||
var time = new Date(t.timestamp).toISOString().replace('T', ' ').slice(0, 19);
|
||||
var isError = t.status === 'error';
|
||||
var gapHtml = '';
|
||||
|
||||
@@ -6381,7 +6381,7 @@ function traceHopHtml(t, prevT) {
|
||||
'<span class="hop-time">' + escHtml(time) + '</span>' +
|
||||
'<span class="hop-service ' + escHtml(t.service) + '">' + escHtml(t.service) + '</span>' +
|
||||
'<span class="hop-label">' + escHtml(t.label) + '</span>' +
|
||||
(t.durationMs > 0 ? '<span class="hop-duration">' + (t.durationMs >= 1000 ? (t.durationMs / 1000).toFixed(2) + 's' : t.durationMs + 'ms') + '</span>' : '') +
|
||||
(t.durationMs > 0 ? '<span class="hop-duration">' + (t.durationMs / 1000).toFixed(3) + 's</span>' : '') +
|
||||
'<span class="hop-status">' + (isError ? '❌' : '✅') + '</span>' +
|
||||
'</div>' +
|
||||
'<div class="trace-hop-detail">' +
|
||||
@@ -6389,7 +6389,7 @@ function traceHopHtml(t, prevT) {
|
||||
'<div><strong>服务:</strong> ' + escHtml(t.service) + '</div>' +
|
||||
'<div><strong>节点:</strong> ' + escHtml(t.hop) + '</div>' +
|
||||
'<div><strong>标签:</strong> ' + escHtml(t.label) + '</div>' +
|
||||
(t.durationMs > 0 ? '<div><strong>耗时:</strong> ' + (t.durationMs >= 1000 ? (t.durationMs / 1000).toFixed(2) + 's' : t.durationMs + 'ms') + '</div>' : '') +
|
||||
(t.durationMs > 0 ? '<div><strong>耗时:</strong> ' + (t.durationMs / 1000).toFixed(3) + 's</div>' : '') +
|
||||
'<div><strong>状态:</strong> ' + (isError ? '❌ 失败' : '✅ 成功') + '</div>' +
|
||||
(t.detail ? '<div style="margin-top:6px"><strong>详情:</strong><br>' + escHtml(String(t.detail)) + '</div>' : '') +
|
||||
'</div>';
|
||||
@@ -6423,7 +6423,7 @@ function llmCallToTrace(call) {
|
||||
hop: 'llm_call',
|
||||
label: 'LLM 调用: ' + (call.model || 'unknown'),
|
||||
status: call.success ? 'success' : 'error',
|
||||
durationMs: call.duration_ms || call.Duration || 0,
|
||||
durationMs: call.duration_ms || (call.Duration ? Math.round(call.Duration / 1e6) : 0),
|
||||
detail: call.error || (call.prompt_tokens || 0) + '+' + (call.completion_tokens || 0) + ' tokens',
|
||||
data: call
|
||||
};
|
||||
|
||||
+3
-3
@@ -1297,7 +1297,7 @@ app.get('/api/trace/recent', async (req, res) => {
|
||||
hop: 'llm_call',
|
||||
label: `LLM 调用: ${call.model || 'unknown'}`,
|
||||
status: call.success ? 'success' : 'error',
|
||||
durationMs: call.duration_ms || call.Duration || 0,
|
||||
durationMs: call.duration_ms || (call.Duration ? Math.round(call.Duration / 1e6) : 0),
|
||||
detail: call.error || `${call.prompt_tokens || 0}+${call.completion_tokens || 0} tokens`,
|
||||
data: call,
|
||||
});
|
||||
@@ -1316,7 +1316,7 @@ app.get('/api/trace/recent', async (req, res) => {
|
||||
hop: 'tool_call',
|
||||
label: `工具调用: ${tc.tool_name || tc.name || 'unknown'}`,
|
||||
status: tc.error ? 'error' : 'success',
|
||||
durationMs: tc.duration_ms || tc.Duration || 0,
|
||||
durationMs: tc.duration_ms || (tc.Duration ? Math.round(tc.Duration / 1e6) : 0),
|
||||
detail: tc.error || tc.result?.substring?.(0, 100) || '',
|
||||
data: tc,
|
||||
});
|
||||
@@ -1443,7 +1443,7 @@ app.get('/api/trace/session/:sessionId', async (req, res) => {
|
||||
hop: 'llm_call',
|
||||
label: `LLM: ${call.model || 'unknown'}`,
|
||||
status: call.success ? 'success' : 'error',
|
||||
durationMs: call.duration_ms || call.Duration || 0,
|
||||
durationMs: call.duration_ms || (call.Duration ? Math.round(call.Duration / 1e6) : 0),
|
||||
detail: call.error || `${call.prompt_tokens || 0}→${call.completion_tokens || 0} tokens`,
|
||||
data: call,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user