feat: ASR语音转写管线 + 群聊身份混淆修复
- 新增ASR语音识别管线: QQ语音→下载音频→qwen3-asr-flash转录→注入用户消息 - 模型名称全部从models.json路由获取,无硬编码 - 修复群聊中AI将非管理员用户误称为管理员昵称(叶酱)的问题 - 助手回复缓存时标注[回复 昵称 (UID)],防止对话历史中身份混淆 - 群聊上下文指令改为肯定性表述,移除具体名称提及 - trace面板时间戳改为YYYY-MM-DD HH:MM:SS格式,耗时统一显示为秒 - 修复Go time.Duration纳秒值在前端显示问题(Duration/1e6转毫秒) - 新增video_tool插件模板 - 优化OpenAI adapter reasoning_content处理 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,7 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
|
||||
}
|
||||
|
||||
var mentions []string
|
||||
var replyToText string
|
||||
if segments, ok := msg.Message.([]interface{}); ok {
|
||||
for _, s := range segments {
|
||||
if seg, ok := s.(map[string]interface{}); ok {
|
||||
@@ -331,9 +332,24 @@ func (a *Adapter) ToUnified(rawMessage interface{}) (*bridge.UnifiedMessage, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if seg["type"] == "reply" {
|
||||
if data, ok := seg["data"].(map[string]interface{}); ok {
|
||||
if t, ok := data["text"].(string); ok && t != "" {
|
||||
replyToText = t
|
||||
}
|
||||
if id, ok := data["id"]; ok {
|
||||
_ = id // message ID of the replied-to message
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prepend reply context for the AI.
|
||||
if replyToText != "" {
|
||||
content = "【回复】" + truncateForReply(replyToText, 100) + "\n" + content
|
||||
}
|
||||
// Fallback: parse CQ at codes from string format (e.g. [CQ:at,qq=2254389756]).
|
||||
if len(mentions) == 0 {
|
||||
raw := msg.RawMessage
|
||||
@@ -490,24 +506,76 @@ func (a *Adapter) ReadMessages(ctx context.Context, msgCh chan<- *OBv11Message)
|
||||
}
|
||||
}
|
||||
|
||||
// cqSimplifyMap maps CQ code types to simplified Chinese labels.
|
||||
var cqSimplifyMap = map[string]string{
|
||||
"image": "[图片]",
|
||||
"reply": "[回复]",
|
||||
"face": "[表情]",
|
||||
"record": "[语音]",
|
||||
"video": "[视频]",
|
||||
"file": "[文件]",
|
||||
}
|
||||
|
||||
// simplifyCQCodes replaces [CQ:type,...] codes with human-readable labels.
|
||||
func simplifyCQCodes(s string) string {
|
||||
return cqAllRegex.ReplaceAllStringFunc(s, func(match string) string {
|
||||
// match looks like "[CQ:image,file=xxx,url=xxx]"
|
||||
// Extract the type (text between "CQ:" and the first "," or "]").
|
||||
typ := match[4:] // strip "[CQ:"
|
||||
for i, c := range typ {
|
||||
if c == ',' || c == ']' {
|
||||
typ = typ[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if label, ok := cqSimplifyMap[typ]; ok {
|
||||
return label
|
||||
}
|
||||
return "[" + typ + "]"
|
||||
})
|
||||
}
|
||||
|
||||
// extractText retrieves plain text from an OBv11 message.
|
||||
// CQ codes are converted to human-readable form where applicable (e.g. [CQ:at,qq=xxx] → @xxx).
|
||||
func extractText(msg *OBv11Message) string {
|
||||
if msg.RawMessage != "" {
|
||||
return msg.RawMessage
|
||||
s := cqAtRegex.ReplaceAllString(msg.RawMessage, "@$1")
|
||||
return simplifyCQCodes(s)
|
||||
}
|
||||
switch m := msg.Message.(type) {
|
||||
case string:
|
||||
return m
|
||||
s := cqAtRegex.ReplaceAllString(m, "@$1")
|
||||
return simplifyCQCodes(s)
|
||||
case []interface{}:
|
||||
var text string
|
||||
for _, seg := range m {
|
||||
if s, ok := seg.(map[string]interface{}); ok {
|
||||
if s["type"] == "text" {
|
||||
switch s["type"] {
|
||||
case "text":
|
||||
if data, ok := s["data"].(map[string]interface{}); ok {
|
||||
if t, ok := data["text"].(string); ok {
|
||||
text += t
|
||||
}
|
||||
}
|
||||
case "at":
|
||||
if data, ok := s["data"].(map[string]interface{}); ok {
|
||||
if qq, ok := data["qq"].(string); ok {
|
||||
text += "@" + qq
|
||||
}
|
||||
}
|
||||
case "image":
|
||||
text += "[图片]"
|
||||
case "face":
|
||||
text += "[表情]"
|
||||
case "record":
|
||||
text += "[语音]"
|
||||
case "video":
|
||||
text += "[视频]"
|
||||
case "file":
|
||||
text += "[文件]"
|
||||
case "reply":
|
||||
// Reply is handled separately in ToUnified with reply text.
|
||||
text += "[回复]"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -518,21 +586,50 @@ func extractText(msg *OBv11Message) string {
|
||||
|
||||
var cqAtRegex = regexp.MustCompile(`\[CQ:at,qq=(\d+)\]`)
|
||||
var cqImageRegex = regexp.MustCompile(`\[CQ:image,[^\]]*\]`)
|
||||
var cqVideoRegex = regexp.MustCompile(`\[CQ:video,[^\]]*\]`)
|
||||
var cqRecordRegex = regexp.MustCompile(`\[CQ:record,[^\]]*\]`)
|
||||
var cqURLRegex = regexp.MustCompile(`\burl=([^,\]]+)`)
|
||||
var cqDurationRegex = regexp.MustCompile(`\bduration=(\d+)`)
|
||||
var cqAllRegex = regexp.MustCompile(`\[CQ:[^\]]+\]`)
|
||||
var boldRegex = regexp.MustCompile(`\*\*(.+?)\*\*`)
|
||||
var italicRegex = regexp.MustCompile(`\*(.+?)\*`)
|
||||
var strikethroughRegex = regexp.MustCompile(`~~(.+?)~~`)
|
||||
|
||||
// extractAttachments extracts image URLs from OBv11Message.
|
||||
func parseIntOr(s string, defaultVal int) int {
|
||||
if s == "" {
|
||||
return defaultVal
|
||||
}
|
||||
n := 0
|
||||
for _, c := range s {
|
||||
if c >= '0' && c <= '9' {
|
||||
n = n*10 + int(c-'0')
|
||||
} else {
|
||||
return defaultVal
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// truncateForReply truncates reply preview text to keep messages readable.
|
||||
func truncateForReply(s string, maxLen int) string {
|
||||
runes := []rune(s)
|
||||
if len(runes) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return string(runes[:maxLen]) + "…"
|
||||
}
|
||||
|
||||
// extractAttachments extracts image/video URLs from OBv11Message.
|
||||
// Handles both string format (CQ codes in raw_message) and array format (parsed segments).
|
||||
func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
var attachments []bridge.Attachment
|
||||
|
||||
// Array format: iterate segments looking for type="image".
|
||||
// Array format: iterate segments looking for image and video.
|
||||
if segments, ok := msg.Message.([]interface{}); ok {
|
||||
for _, s := range segments {
|
||||
if seg, ok := s.(map[string]interface{}); ok {
|
||||
if seg["type"] != "image" {
|
||||
segType, _ := seg["type"].(string)
|
||||
if segType != "image" && segType != "video" && segType != "record" {
|
||||
continue
|
||||
}
|
||||
data, _ := seg["data"].(map[string]interface{})
|
||||
@@ -544,11 +641,17 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
attachments = append(attachments, bridge.Attachment{
|
||||
Type: "image",
|
||||
att := bridge.Attachment{
|
||||
Type: segType,
|
||||
URL: url,
|
||||
FileName: file,
|
||||
})
|
||||
}
|
||||
if segType == "video" {
|
||||
if d, ok := data["duration"].(float64); ok {
|
||||
att.Duration = int(d)
|
||||
}
|
||||
}
|
||||
attachments = append(attachments, att)
|
||||
}
|
||||
}
|
||||
return attachments
|
||||
@@ -561,14 +664,29 @@ func extractAttachments(msg *OBv11Message) []bridge.Attachment {
|
||||
raw = s
|
||||
}
|
||||
}
|
||||
matches := cqImageRegex.FindAllString(raw, -1)
|
||||
for _, m := range matches {
|
||||
// Images.
|
||||
for _, m := range cqImageRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
attachments = append(attachments, bridge.Attachment{
|
||||
Type: "image",
|
||||
URL: urlMatch[1],
|
||||
})
|
||||
attachments = append(attachments, bridge.Attachment{Type: "image", URL: urlMatch[1]})
|
||||
}
|
||||
}
|
||||
// Videos.
|
||||
for _, m := range cqVideoRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
dur := 0
|
||||
if dm := cqDurationRegex.FindStringSubmatch(m); len(dm) >= 2 {
|
||||
dur = parseIntOr(dm[1], 0)
|
||||
}
|
||||
attachments = append(attachments, bridge.Attachment{Type: "video", URL: urlMatch[1], Duration: dur})
|
||||
}
|
||||
}
|
||||
// Records (voice messages).
|
||||
for _, m := range cqRecordRegex.FindAllString(raw, -1) {
|
||||
urlMatch := cqURLRegex.FindStringSubmatch(m)
|
||||
if len(urlMatch) >= 2 {
|
||||
attachments = append(attachments, bridge.Attachment{Type: "voice", URL: urlMatch[1]})
|
||||
}
|
||||
}
|
||||
return attachments
|
||||
|
||||
@@ -31,13 +31,14 @@ type UnifiedMessage struct {
|
||||
BotUID string `json:"-"` // bot's own platform UID, set by router
|
||||
}
|
||||
|
||||
// Attachment represents a file/image/voice attachment.
|
||||
// Attachment represents a file/image/voice/video attachment.
|
||||
type Attachment struct {
|
||||
Type string `json:"type"` // "image", "voice", "file", "video"
|
||||
URL string `json:"url,omitempty"`
|
||||
FileName string `json:"file_name,omitempty"`
|
||||
MimeType string `json:"mime_type,omitempty"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Duration int `json:"duration,omitempty"` // video/voice duration in seconds
|
||||
}
|
||||
|
||||
// UnifiedResponse is AI-Core's response converted to unified format.
|
||||
|
||||
@@ -20,6 +20,7 @@ type Config struct {
|
||||
|
||||
// Silent observation mode.
|
||||
PlatformSilentEnabled bool // PLATFORM_SILENT_ENABLED, default true
|
||||
AdminNickname string // ADMIN_NICKNAME, admin's Cyrene identity nickname (default "开拓者")
|
||||
AdminNicknames []string // ADMIN_NICKNAMES, default ["开拓者"]
|
||||
AdminMentionKeywords []string // ADMIN_MENTION_KEYWORDS, default ["昔涟","Cyrene","管理员"]
|
||||
|
||||
@@ -61,6 +62,10 @@ func Load() *Config {
|
||||
}
|
||||
// Silent observation defaults.
|
||||
cfg.PlatformSilentEnabled = getEnvBool("PLATFORM_SILENT_ENABLED", true)
|
||||
cfg.AdminNickname = os.Getenv("ADMIN_NICKNAME")
|
||||
if cfg.AdminNickname == "" {
|
||||
cfg.AdminNickname = "开拓者"
|
||||
}
|
||||
cfg.AdminNicknames = getEnvList("ADMIN_NICKNAMES", []string{"开拓者"})
|
||||
cfg.AdminMentionKeywords = getEnvList("ADMIN_MENTION_KEYWORDS", []string{"昔涟", "Cyrene", "管理员"})
|
||||
cfg.MessageSendIntervalMs = getEnvInt("MSG_SEND_INTERVAL_MS", 2000)
|
||||
|
||||
Reference in New Issue
Block a user