feat: ASR语音转写管线 + 群聊身份混淆修复

- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息
- 模型名称全部从models.json路由获取,无硬编码
- 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题
  - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆
  - 群聊上下文指令改为肯定性表述,移除具体名称提及
- trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒
- 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒)
- 新增video_tool插件模板
- 优化OpenAI adapter reasoning_content处理

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 16:46:47 +08:00
parent d112fdd540
commit a9c79d7887
16 changed files with 780 additions and 67 deletions
+77 -17
View File
@@ -56,9 +56,10 @@ func main() {
mapper := bridge.NewIdentityMapper()
checker := permissions.NewChecker()
router := bridge.NewPlatformRouter(mapper, checker)
lastDisplayNames := make(map[string]string) // platformUID -> last known display name
// Seed default identities from environment.
seedIdentities(mapper, configStore)
seedIdentities(mapper, configStore, cfg.AdminNickname)
// Register platform adapters based on stored configs or defaults.
adapters := createAdapters(cfg, configStore)
@@ -85,6 +86,27 @@ func main() {
// Routing decisions.
isAdmin := mapper.IsAdmin(msg.Platform, msg.OriginalSenderUID)
adminNick := cfg.AdminNickname
if isAdmin {
if id := mapper.ResolveOrNil(msg.Platform, msg.OriginalSenderUID); id != nil && id.Nickname != "" {
adminNick = id.Nickname
}
// Track per-group display names (群名片 can differ across groups).
nameKey := msg.OriginalSenderUID
if msg.ChannelType == "group" {
nameKey = msg.OriginalSenderUID + ":" + msg.ChannelID
}
if prevName, ok := lastDisplayNames[nameKey]; ok && prevName != msg.OriginalSenderName && msg.OriginalSenderName != "" {
ctx := msg.ChannelID
if msg.GroupName != "" {
ctx = truncateString(msg.GroupName, 8)
}
msg.Content = fmt.Sprintf("【昵称更新:该用户在%s(%s)上的昵称已从\"%s\"变更为\"%s\"】\n%s", ctx, msg.Platform, prevName, msg.OriginalSenderName, msg.Content)
}
if msg.OriginalSenderName != "" {
lastDisplayNames[nameKey] = msg.OriginalSenderName
}
}
isMentioned, _ := detectAdminMention(msg, mapper, cfg)
isBotMentioned := msg.BotUID != "" && containsString(msg.Mentions, msg.BotUID)
isSilent := cfg.PlatformSilentEnabled && !isAdmin && !isBotMentioned
@@ -107,9 +129,11 @@ func main() {
senderLabel = msg.SenderName
}
if isAdmin {
senderLabel = "【管理员】" + msg.OriginalSenderName
senderLabel = adminNick + "/" + msg.OriginalSenderName
}
msg.Content = fmt.Sprintf("[群聊 %s] %s (%s)\n%s", groupLabel, senderLabel, msg.OriginalSenderUID, msg.Content)
} else if msg.ChannelType == "private" {
msg.Content = fmt.Sprintf("【私聊 %s】%s/%s (%s)\n%s", msg.Platform, adminNick, msg.OriginalSenderName, msg.OriginalSenderUID, msg.Content)
}
// Blocklist/whitelist check (admin always bypasses).
@@ -137,6 +161,8 @@ func main() {
// Extract image URLs for vision/OCR processing (admin + bot-mentioned + admin-mentioned only).
imageURLs := getImageURLs(msg)
videoURLs := getShortVideoURLs(msg)
voiceURLs := getVoiceURLs(msg)
// For group chats, use a channel-based user ID to share context between admin and regular users.
chatUserID := msg.SenderID
@@ -149,32 +175,32 @@ func main() {
case isMessageHistorical(msg, router):
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
case isAdmin && !isBotMentioned && shouldAdminBeSilent(msg, router):
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
case isAdmin:
msg.RouteType = "normal"
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs)
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs, videoURLs, voiceURLs, isAdmin)
case isBotMentioned:
msg.RouteType = "normal"
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs)
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, imageURLs, videoURLs, voiceURLs, isAdmin)
case isMentioned:
// Non-admin user mentioned an admin. Don't respond in channel —
// the admin already gets QQ's native @notification. Observe silently.
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
response, routeErr = forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
case isSilent:
msg.RouteType = "silent"
namespace := buildMemoryNamespace(msg.Platform, msg.ChannelType, msg.ChannelID)
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil)
silentResponse, silentErr := forwardToAICore(cfg, msg, "platform_silent", namespace, namespace, nil, videoURLs, voiceURLs, isAdmin)
if silentErr != nil {
msgLogger.Log(logging.LogEntry{
Timestamp: time.Now(),
@@ -192,7 +218,7 @@ func main() {
default:
msg.RouteType = "normal"
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, nil)
response, routeErr = forwardToAICore(cfg, msg, "text", chatUserID, groupSessionID, nil, videoURLs, voiceURLs, isAdmin)
}
if routeErr != nil {
@@ -261,7 +287,7 @@ func main() {
fmt.Printf("Platform adapter hot-reloaded: %s\n", name)
}
// Sync admin identities from config fields.
syncAdminUIDs(mapper, platform, fields)
syncAdminUIDs(mapper, platform, fields, cfg.AdminNickname)
// Restart QQ reader when QQ config changes.
if platform == "qq" {
startQQReaders(router)
@@ -559,9 +585,36 @@ func getImageURLs(msg *bridge.UnifiedMessage) []string {
return urls
}
// getVoiceURLs extracts voice/record attachment URLs from a UnifiedMessage.
func getVoiceURLs(msg *bridge.UnifiedMessage) []string {
if len(msg.Attachments) == 0 {
return nil
}
var urls []string
for _, a := range msg.Attachments {
if a.Type == "voice" && a.URL != "" {
urls = append(urls, a.URL)
}
}
return urls
}
// getShortVideoURLs returns video URLs with duration ≤ 20 seconds.
func getShortVideoURLs(msg *bridge.UnifiedMessage) []string {
if len(msg.Attachments) == 0 {
return nil
}
var urls []string
for _, a := range msg.Attachments {
if a.Type == "video" && a.URL != "" && a.Duration > 0 && a.Duration <= 20 {
urls = append(urls, a.URL)
}
}
return urls
}
// forwardToAICore sends a unified message to AI-Core's chat endpoint and returns the response.
// If images is non-empty, they are passed as URL strings for AI-Core to download and process.
func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userID, sessionID string, images []string) (*bridge.UnifiedResponse, error) {
func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userID, sessionID string, images []string, videoURLs []string, voiceURLs []string, isAdmin bool) (*bridge.UnifiedResponse, error) {
bodyMap := map[string]interface{}{
"user_id": userID,
"session_id": sessionID,
@@ -569,6 +622,7 @@ func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userI
"mode": mode,
"routing": msg.RouteType,
"nickname": fmt.Sprintf("%s (%s)", msg.SenderName, msg.OriginalSenderUID),
"is_admin": isAdmin,
"source": map[string]string{
"platform": msg.Platform,
"channel_id": msg.ChannelID,
@@ -580,6 +634,12 @@ func forwardToAICore(cfg *config.Config, msg *bridge.UnifiedMessage, mode, userI
if len(images) > 0 {
bodyMap["images"] = images
}
if len(videoURLs) > 0 {
bodyMap["video_urls"] = videoURLs
}
if len(voiceURLs) > 0 {
bodyMap["voice_urls"] = voiceURLs
}
reqBody, _ := json.Marshal(bodyMap)
url := cfg.AICoreURL + "/api/v1/chat"
@@ -829,7 +889,7 @@ func parseIntOr(s string, defaultVal int) int {
}
// seedIdentities loads default identity mappings from env vars and stored platform configs.
func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
func seedIdentities(m *bridge.IdentityMapper, store *config.Store, adminNickname string) {
// From environment variables.
for _, entry := range []struct{ envKey, platform string }{
{"QQ_ADMIN_UID", "qq"},
@@ -845,7 +905,7 @@ func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
Platform: entry.platform,
PlatformUID: uid,
CyreneUser: "admin",
Nickname: "开拓者",
Nickname: adminNickname,
PermissionLevel: "admin",
})
}
@@ -858,13 +918,13 @@ func seedIdentities(m *bridge.IdentityMapper, store *config.Store) {
if stored == nil {
continue
}
syncAdminUIDs(m, name, stored.Fields)
syncAdminUIDs(m, name, stored.Fields, adminNickname)
}
}
// syncAdminUIDs registers admin identities from a platform config's admin_uids field.
// Comma-separated list of platform UIDs.
func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]string) {
func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]string, adminNickname string) {
raw, ok := fields["admin_uids"]
if !ok || raw == "" {
return
@@ -878,7 +938,7 @@ func syncAdminUIDs(m *bridge.IdentityMapper, platform string, fields map[string]
Platform: platform,
PlatformUID: uid,
CyreneUser: "admin",
Nickname: "开拓者",
Nickname: adminNickname,
PermissionLevel: "admin",
})
}
@@ -321,6 +321,7 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
}
var mentions []string
var replyToText string
if segments, ok := msg.Message.([]interface{}); ok {
for _, s := range segments {
if seg, ok := s.(map[string]interface{}); ok {
@@ -331,9 +332,24 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
}
}
}
if seg["type"] == "reply" {
if data, ok := seg["data"].(map[string]interface{}); ok {
if t, ok := data["text"].(string); ok && t != "" {
replyToText = t
}
if id, ok := data["id"]; ok {
_ = id // message ID of the replied-to message
}
}
}
}
}
}
// Prepend reply context for the AI.
if replyToText != "" {
content = "【回复】" + truncateForReply(replyToText, 100) + "\n" + content
}
// Fallback: parse CQ at codes from string format (e.g. [CQ:at,qq=2254389756]).
if len(mentions) == 0 {
raw := msg.RawMessage
@@ -490,24 +506,76 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
}
}
// cqSimplifyMap maps CQ code types to simplified Chinese labels.
var cqSimplifyMap = map[string]string{
"image": "[图片]",
"reply": "[回复]",
"face": "[表情]",
"record": "[语音]",
"video": "[视频]",
"file": "[文件]",
}
// simplifyCQCodes replaces [CQ:type,...] codes with human-readable labels.
func simplifyCQCodes(s string) string {
return cqAllRegex.ReplaceAllStringFunc(s, func(match string) string {
// match looks like "[CQ:image,file=xxx,url=xxx]"
// Extract the type (text between "CQ:" and the first "," or "]").
typ := match[4:] // strip "[CQ:"
for i, c := range typ {
if c == ',' || c == ']' {
typ = typ[:i]
break
}
}
if label, ok := cqSimplifyMap[typ]; ok {
return label
}
return "[" + typ + "]"
})
}
// extractText retrieves plain text from an OBv11 message.
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
func extractText(msg *OBv11Message) string {
if msg.RawMessage != "" {
return msg.RawMessage
s := cqAtRegex.ReplaceAllString(msg.RawMessage, "@$1")
return simplifyCQCodes(s)
}
switch m := msg.Message.(type) {
case string:
return m
s := cqAtRegex.ReplaceAllString(m, "@$1")
return simplifyCQCodes(s)
case []interface{}:
var text string
for _, seg := range m {
if s, ok := seg.(map[string]interface{}); ok {
if s["type"] == "text" {
switch s["type"] {
case "text":
if data, ok := s["data"].(map[string]interface{}); ok {
if t, ok := data["text"].(string); ok {
text += t
}
}
case "at":
if data, ok := s["data"].(map[string]interface{}); ok {
if qq, ok := data["qq"].(string); ok {
text += "@" + qq
}
}
case "image":
text += "[图片]"
case "face":
text += "[表情]"
case "record":
text += "[语音]"
case "video":
text += "[视频]"
case "file":
text += "[文件]"
case "reply":
// Reply is handled separately in ToUnified with reply text.
text += "[回复]"
}
}
}
@@ -518,21 +586,50 @@ func extractText(msg *OBv11Message) string {
var cqAtRegex = regexp.MustCompile(`\[CQ:at,qq=(\d+)\]`)
var cqImageRegex = regexp.MustCompile(`\[CQ:image,[^\]]*\]`)
var cqVideoRegex = regexp.MustCompile(`\[CQ:video,[^\]]*\]`)
var cqRecordRegex = regexp.MustCompile(`\[CQ:record,[^\]]*\]`)
var cqURLRegex = regexp.MustCompile(`\burl=([^,\]]+)`)
var cqDurationRegex = regexp.MustCompile(`\bduration=(\d+)`)
var cqAllRegex = regexp.MustCompile(`\[CQ:[^\]]+\]`)
var boldRegex = regexp.MustCompile(`\*\*(.+?)\*\*`)
var italicRegex = regexp.MustCompile(`\*(.+?)\*`)
var strikethroughRegex = regexp.MustCompile(`~~(.+?)~~`)
// extractAttachments extracts image URLs from OBv11Message.
func parseIntOr(s string, defaultVal int) int {
if s == "" {
return defaultVal
}
n := 0
for _, c := range s {
if c >= '0' && c <= '9' {
n = n*10 + int(c-'0')
} else {
return defaultVal
}
}
return n
}
// truncateForReply truncates reply preview text to keep messages readable.
func truncateForReply(s string, maxLen int) string {
runes := []rune(s)
if len(runes) <= maxLen {
return s
}
return string(runes[:maxLen]) + "…"
}
// extractAttachments extracts image/video URLs from OBv11Message.
// Handles both string format (CQ codes in raw_message) and array format (parsed segments).
func extractAttachments(msg *OBv11Message) []bridge.Attachment {
var attachments []bridge.Attachment
// Array format: iterate segments looking for type="image".
// Array format: iterate segments looking for image and video.
if segments, ok := msg.Message.([]interface{}); ok {
for _, s := range segments {
if seg, ok := s.(map[string]interface{}); ok {
if seg["type"] != "image" {
segType, _ := seg["type"].(string)
if segType != "image" && segType != "video" && segType != "record" {
continue
}
data, _ := seg["data"].(map[string]interface{})
@@ -544,11 +641,17 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
if url == "" {
continue
}
attachments = append(attachments, bridge.Attachment{
Type: "image",
att := bridge.Attachment{
Type: segType,
URL: url,
FileName: file,
})
}
if segType == "video" {
if d, ok := data["duration"].(float64); ok {
att.Duration = int(d)
}
}
attachments = append(attachments, att)
}
}
return attachments
@@ -561,14 +664,29 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
raw = s
}
}
matches := cqImageRegex.FindAllString(raw, -1)
for _, m := range matches {
// Images.
for _, m := range cqImageRegex.FindAllString(raw, -1) {
urlMatch := cqURLRegex.FindStringSubmatch(m)
if len(urlMatch) >= 2 {
attachments = append(attachments, bridge.Attachment{
Type: "image",
URL: urlMatch[1],
})
attachments = append(attachments, bridge.Attachment{Type: "image", URL: urlMatch[1]})
}
}
// Videos.
for _, m := range cqVideoRegex.FindAllString(raw, -1) {
urlMatch := cqURLRegex.FindStringSubmatch(m)
if len(urlMatch) >= 2 {
dur := 0
if dm := cqDurationRegex.FindStringSubmatch(m); len(dm) >= 2 {
dur = parseIntOr(dm[1], 0)
}
attachments = append(attachments, bridge.Attachment{Type: "video", URL: urlMatch[1], Duration: dur})
}
}
// Records (voice messages).
for _, m := range cqRecordRegex.FindAllString(raw, -1) {
urlMatch := cqURLRegex.FindStringSubmatch(m)
if len(urlMatch) >= 2 {
attachments = append(attachments, bridge.Attachment{Type: "voice", URL: urlMatch[1]})
}
}
return attachments
@@ -31,13 +31,14 @@ type UnifiedMessage struct {
BotUID string `json:"-"` // bot's own platform UID, set by router
}
// Attachment represents a file/image/voice attachment.
// Attachment represents a file/image/voice/video attachment.
type Attachment struct {
Type string `json:"type"` // "image", "voice", "file", "video"
URL string `json:"url,omitempty"`
FileName string `json:"file_name,omitempty"`
MimeType string `json:"mime_type,omitempty"`
Size int64 `json:"size,omitempty"`
Duration int `json:"duration,omitempty"` // video/voice duration in seconds
}
// UnifiedResponse is AI-Core's response converted to unified format.
@@ -20,6 +20,7 @@ type Config struct {
// Silent observation mode.
PlatformSilentEnabled bool // PLATFORM_SILENT_ENABLED, default true
AdminNickname string // ADMIN_NICKNAME, admin's Cyrene identity nickname (default "开拓者")
AdminNicknames []string // ADMIN_NICKNAMES, default ["开拓者"]
AdminMentionKeywords []string // ADMIN_MENTION_KEYWORDS, default ["昔涟","Cyrene","管理员"]
@@ -61,6 +62,10 @@ func Load() *Config {
}
// Silent observation defaults.
cfg.PlatformSilentEnabled = getEnvBool("PLATFORM_SILENT_ENABLED", true)
cfg.AdminNickname = os.Getenv("ADMIN_NICKNAME")
if cfg.AdminNickname == "" {
cfg.AdminNickname = "开拓者"
}
cfg.AdminNicknames = getEnvList("ADMIN_NICKNAMES", []string{"开拓者"})
cfg.AdminMentionKeywords = getEnvList("ADMIN_MENTION_KEYWORDS", []string{"昔涟", "Cyrene", "管理员"})
cfg.MessageSendIntervalMs = getEnvInt("MSG_SEND_INTERVAL_MS", 2000)