feat: 富文本消息类型支持 — Markdown/代码块安全渲染 + 审查解析器
添加 review_parser.go 从 LLM 输出中提取 Markdown 和代码块,创建独立 ReviewMessage 类型 (markdown/code/search_result)。前端新增安全 Markdown 渲染器 (HTML 转义优先),代码块以深色背景+语言标签展示。Markdown/代码 类型禁止断句拆分,避免格式损坏。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -470,80 +470,6 @@ func (o *Orchestrator) scheduleWithDelays(messages []model.ReviewMessage) []mode
|
||||
return messages
|
||||
}
|
||||
|
||||
// parseReviewMessages 解析完整回复文本,拆分为带类型的消息
|
||||
// 用于审查子会话的轻量版本(内联到 orchestrator 以减少一次子会话调度开销)
|
||||
func parseReviewMessages(text string) []model.ReviewMessage {
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var messages []model.ReviewMessage
|
||||
|
||||
// 简单状态机:逐行或按括号匹配提取(使用 rune 切片正确处理 Unicode)
|
||||
remaining := text
|
||||
for len(remaining) > 0 {
|
||||
// 查找括号动作 (xxx)或 (xxx)
|
||||
actionStart := -1 // byte 位置
|
||||
actionEnd := -1 // byte 位置(括号之后)
|
||||
actionContent := ""
|
||||
|
||||
runes := []rune(remaining)
|
||||
for ri, r := range runes {
|
||||
if r == '(' || r == '(' {
|
||||
actionStart = len(string(runes[:ri]))
|
||||
closeRune := ')'
|
||||
if r == '(' {
|
||||
closeRune = ')'
|
||||
}
|
||||
// 查找匹配的闭合括号
|
||||
for rj := ri + 1; rj < len(runes); rj++ {
|
||||
if runes[rj] == closeRune {
|
||||
actionEnd = len(string(runes[:rj+1]))
|
||||
actionContent = string(runes[ri+1 : rj])
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if actionStart >= 0 {
|
||||
// 括号前的普通文本
|
||||
if actionStart > 0 {
|
||||
prefix := strings.TrimSpace(remaining[:actionStart])
|
||||
if prefix != "" {
|
||||
messages = append(messages, splitChatByLines(model.ReviewMessageChat, prefix)...)
|
||||
}
|
||||
}
|
||||
// 括号内作为 action
|
||||
content := strings.TrimSpace(actionContent)
|
||||
if content != "" {
|
||||
messages = append(messages, model.ReviewMessage{
|
||||
Type: model.ReviewMessageAction,
|
||||
Content: content,
|
||||
})
|
||||
}
|
||||
remaining = remaining[actionEnd:]
|
||||
} else {
|
||||
// 没有括号,剩余全部作为 chat
|
||||
remaining = strings.TrimSpace(remaining)
|
||||
if remaining != "" {
|
||||
messages = append(messages, splitChatByLines(model.ReviewMessageChat, remaining)...)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(messages) == 0 && text != "" {
|
||||
messages = append(messages, model.ReviewMessage{
|
||||
Type: model.ReviewMessageChat,
|
||||
Content: strings.TrimSpace(text),
|
||||
})
|
||||
}
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
// splitReviewLongMessage 将长消息按句子边界拆分为多条短消息
|
||||
func splitReviewLongMessage(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
|
||||
const maxLen = 80 // 最大字符数(按 rune 计数)
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/yourname/cyrene-ai/ai-core/internal/model"
|
||||
)
|
||||
|
||||
// codeBlockPattern matches fenced code blocks: ```lang\n...\n```
|
||||
var codeBlockPattern = regexp.MustCompile("`{3}([^\n]*)\n([\\s\\S]*?)`{3}")
|
||||
|
||||
// markdownPatterns detects common Markdown syntax for auto-classification.
|
||||
var markdownPatterns = []*regexp.Regexp{
|
||||
regexp.MustCompile(`^#{1,6}\s`), // headings
|
||||
regexp.MustCompile(`\*\*[^*]+\*\*`), // bold
|
||||
regexp.MustCompile(`(?<!\*)\*[^*]+\*(?!\*)`), // italic (single *)
|
||||
regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`), // links [text](url)
|
||||
regexp.MustCompile(`^[\-\*]\s`), // unordered list
|
||||
regexp.MustCompile(`^\d+\.\s`), // ordered list
|
||||
regexp.MustCompile(`^>\s`), // blockquote
|
||||
regexp.MustCompile(`^\|.*\|.*\|`), // table
|
||||
regexp.MustCompile("`[^`]+`"), // inline code
|
||||
}
|
||||
|
||||
// hasMarkdownSyntax reports whether text contains Markdown formatting.
|
||||
func hasMarkdownSyntax(text string) bool {
|
||||
for _, p := range markdownPatterns {
|
||||
if p.MatchString(text) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// autoDetectType returns the best message type for a text segment.
|
||||
func autoDetectType(text string) model.ReviewMessageType {
|
||||
if hasMarkdownSyntax(text) {
|
||||
return model.ReviewMessageMarkdown
|
||||
}
|
||||
return model.ReviewMessageChat
|
||||
}
|
||||
|
||||
// parseReviewMessages splits the assistant's full response into typed messages.
|
||||
//
|
||||
// Phases:
|
||||
// 1. Extract fenced code blocks (```) → code type with language metadata.
|
||||
// 2. For text between code blocks, run the bracket-action parser:
|
||||
// (…) / (…) → action type.
|
||||
// 3. Remaining text is auto-detected as markdown or chat.
|
||||
// 4. Markdown and code messages are never sentence-split (keeps formatting intact).
|
||||
func parseReviewMessages(text string) []model.ReviewMessage {
|
||||
if text == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var messages []model.ReviewMessage
|
||||
|
||||
// Phase 1: extract code blocks
|
||||
codeMatches := codeBlockPattern.FindAllStringSubmatchIndex(text, -1)
|
||||
type codeBlock struct {
|
||||
start, end int
|
||||
language string
|
||||
content string
|
||||
}
|
||||
var blocks []codeBlock
|
||||
for _, m := range codeMatches {
|
||||
blocks = append(blocks, codeBlock{
|
||||
start: m[0],
|
||||
end: m[1],
|
||||
language: strings.TrimSpace(text[m[2]:m[3]]),
|
||||
content: strings.TrimSpace(text[m[4]:m[5]]),
|
||||
})
|
||||
}
|
||||
|
||||
// Phase 2: bracket-action parser on non-code text
|
||||
processText := func(t string) {
|
||||
remaining := t
|
||||
for len(remaining) > 0 {
|
||||
actionStart := -1
|
||||
actionEnd := -1
|
||||
actionContent := ""
|
||||
|
||||
runes := []rune(remaining)
|
||||
for ri, r := range runes {
|
||||
if r == '(' || r == '(' { // fullwidth (
|
||||
actionStart = len(string(runes[:ri]))
|
||||
closeRune := ')'
|
||||
if r == '(' {
|
||||
closeRune = ')' // fullwidth )
|
||||
}
|
||||
for rj := ri + 1; rj < len(runes); rj++ {
|
||||
if runes[rj] == closeRune {
|
||||
actionEnd = len(string(runes[:rj+1]))
|
||||
actionContent = string(runes[ri+1 : rj])
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if actionStart >= 0 {
|
||||
if actionStart > 0 {
|
||||
prefix := strings.TrimSpace(remaining[:actionStart])
|
||||
if prefix != "" {
|
||||
messages = append(messages, classifyText(autoDetectType(prefix), prefix)...)
|
||||
}
|
||||
}
|
||||
content := strings.TrimSpace(actionContent)
|
||||
if content != "" {
|
||||
messages = append(messages, model.ReviewMessage{
|
||||
Type: model.ReviewMessageAction,
|
||||
Content: content,
|
||||
})
|
||||
}
|
||||
remaining = remaining[actionEnd:]
|
||||
} else {
|
||||
remaining = strings.TrimSpace(remaining)
|
||||
if remaining != "" {
|
||||
messages = append(messages, classifyText(autoDetectType(remaining), remaining)...)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: interleave code blocks and parsed text
|
||||
pos := 0
|
||||
for _, cb := range blocks {
|
||||
if cb.start > pos {
|
||||
processText(text[pos:cb.start])
|
||||
}
|
||||
messages = append(messages, model.ReviewMessage{
|
||||
Type: model.ReviewMessageCode,
|
||||
Content: cb.content,
|
||||
Metadata: map[string]any{"language": cb.language},
|
||||
})
|
||||
pos = cb.end
|
||||
}
|
||||
if pos < len(text) {
|
||||
processText(text[pos:])
|
||||
}
|
||||
|
||||
if len(messages) == 0 && text != "" {
|
||||
messages = append(messages, model.ReviewMessage{
|
||||
Type: model.ReviewMessageChat,
|
||||
Content: strings.TrimSpace(text),
|
||||
})
|
||||
}
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
// classifyText splits text by paragraph boundaries.
|
||||
// markdown and code types are never sentence-split — they stay as complete blocks.
|
||||
func classifyText(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
|
||||
switch msgType {
|
||||
case model.ReviewMessageMarkdown, model.ReviewMessageCode:
|
||||
return []model.ReviewMessage{{Type: msgType, Content: text}}
|
||||
default:
|
||||
return splitChatByLines(msgType, text)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user