feat: 富文本消息类型支持 — Markdown/代码块安全渲染 + 审查解析器

添加 review_parser.go 从 LLM 输出中提取 Markdown 和代码块,创建独立
ReviewMessage 类型 (markdown/code/search_result)。前端新增安全 Markdown
渲染器 (HTML 转义优先),代码块以深色背景+语言标签展示。Markdown/代码
类型禁止断句拆分,避免格式损坏。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-26 21:57:09 +08:00
parent 9f3b0f386d
commit 24f827fe02
10 changed files with 473 additions and 87 deletions
@@ -470,80 +470,6 @@ func (o *Orchestrator) scheduleWithDelays(messages []model.ReviewMessage) []mode
return messages
}
// parseReviewMessages 解析完整回复文本,拆分为带类型的消息
// 用于审查子会话的轻量版本(内联到 orchestrator 以减少一次子会话调度开销)
func parseReviewMessages(text string) []model.ReviewMessage {
if text == "" {
return nil
}
var messages []model.ReviewMessage
// 简单状态机:逐行或按括号匹配提取(使用 rune 切片正确处理 Unicode
remaining := text
for len(remaining) > 0 {
// 查找括号动作 xxx)或 (xxx)
actionStart := -1 // byte 位置
actionEnd := -1 // byte 位置(括号之后)
actionContent := ""
runes := []rune(remaining)
for ri, r := range runes {
if r == '(' || r == '' {
actionStart = len(string(runes[:ri]))
closeRune := ')'
if r == '' {
closeRune = ''
}
// 查找匹配的闭合括号
for rj := ri + 1; rj < len(runes); rj++ {
if runes[rj] == closeRune {
actionEnd = len(string(runes[:rj+1]))
actionContent = string(runes[ri+1 : rj])
break
}
}
break
}
}
if actionStart >= 0 {
// 括号前的普通文本
if actionStart > 0 {
prefix := strings.TrimSpace(remaining[:actionStart])
if prefix != "" {
messages = append(messages, splitChatByLines(model.ReviewMessageChat, prefix)...)
}
}
// 括号内作为 action
content := strings.TrimSpace(actionContent)
if content != "" {
messages = append(messages, model.ReviewMessage{
Type: model.ReviewMessageAction,
Content: content,
})
}
remaining = remaining[actionEnd:]
} else {
// 没有括号,剩余全部作为 chat
remaining = strings.TrimSpace(remaining)
if remaining != "" {
messages = append(messages, splitChatByLines(model.ReviewMessageChat, remaining)...)
}
break
}
}
if len(messages) == 0 && text != "" {
messages = append(messages, model.ReviewMessage{
Type: model.ReviewMessageChat,
Content: strings.TrimSpace(text),
})
}
return messages
}
// splitReviewLongMessage 将长消息按句子边界拆分为多条短消息
func splitReviewLongMessage(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
const maxLen = 80 // 最大字符数(按 rune 计数)
@@ -0,0 +1,164 @@
package orchestrator
import (
"regexp"
"strings"
"github.com/yourname/cyrene-ai/ai-core/internal/model"
)
// codeBlockPattern matches fenced code blocks: ```lang\n...\n```
var codeBlockPattern = regexp.MustCompile("`{3}([^\n]*)\n([\\s\\S]*?)`{3}")
// markdownPatterns detects common Markdown syntax for auto-classification.
var markdownPatterns = []*regexp.Regexp{
regexp.MustCompile(`^#{1,6}\s`), // headings
regexp.MustCompile(`\*\*[^*]+\*\*`), // bold
regexp.MustCompile(`(?<!\*)\*[^*]+\*(?!\*)`), // italic (single *)
regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`), // links [text](url)
regexp.MustCompile(`^[\-\*]\s`), // unordered list
regexp.MustCompile(`^\d+\.\s`), // ordered list
regexp.MustCompile(`^>\s`), // blockquote
regexp.MustCompile(`^\|.*\|.*\|`), // table
regexp.MustCompile("`[^`]+`"), // inline code
}
// hasMarkdownSyntax reports whether text contains Markdown formatting.
func hasMarkdownSyntax(text string) bool {
for _, p := range markdownPatterns {
if p.MatchString(text) {
return true
}
}
return false
}
// autoDetectType returns the best message type for a text segment.
func autoDetectType(text string) model.ReviewMessageType {
if hasMarkdownSyntax(text) {
return model.ReviewMessageMarkdown
}
return model.ReviewMessageChat
}
// parseReviewMessages splits the assistant's full response into typed messages.
//
// Phases:
// 1. Extract fenced code blocks (```) → code type with language metadata.
// 2. For text between code blocks, run the bracket-action parser:
// (…) / (…) → action type.
// 3. Remaining text is auto-detected as markdown or chat.
// 4. Markdown and code messages are never sentence-split (keeps formatting intact).
func parseReviewMessages(text string) []model.ReviewMessage {
if text == "" {
return nil
}
var messages []model.ReviewMessage
// Phase 1: extract code blocks
codeMatches := codeBlockPattern.FindAllStringSubmatchIndex(text, -1)
type codeBlock struct {
start, end int
language string
content string
}
var blocks []codeBlock
for _, m := range codeMatches {
blocks = append(blocks, codeBlock{
start: m[0],
end: m[1],
language: strings.TrimSpace(text[m[2]:m[3]]),
content: strings.TrimSpace(text[m[4]:m[5]]),
})
}
// Phase 2: bracket-action parser on non-code text
processText := func(t string) {
remaining := t
for len(remaining) > 0 {
actionStart := -1
actionEnd := -1
actionContent := ""
runes := []rune(remaining)
for ri, r := range runes {
if r == '(' || r == '' { // fullwidth (
actionStart = len(string(runes[:ri]))
closeRune := ')'
if r == '' {
closeRune = '' // fullwidth )
}
for rj := ri + 1; rj < len(runes); rj++ {
if runes[rj] == closeRune {
actionEnd = len(string(runes[:rj+1]))
actionContent = string(runes[ri+1 : rj])
break
}
}
break
}
}
if actionStart >= 0 {
if actionStart > 0 {
prefix := strings.TrimSpace(remaining[:actionStart])
if prefix != "" {
messages = append(messages, classifyText(autoDetectType(prefix), prefix)...)
}
}
content := strings.TrimSpace(actionContent)
if content != "" {
messages = append(messages, model.ReviewMessage{
Type: model.ReviewMessageAction,
Content: content,
})
}
remaining = remaining[actionEnd:]
} else {
remaining = strings.TrimSpace(remaining)
if remaining != "" {
messages = append(messages, classifyText(autoDetectType(remaining), remaining)...)
}
break
}
}
}
// Phase 3: interleave code blocks and parsed text
pos := 0
for _, cb := range blocks {
if cb.start > pos {
processText(text[pos:cb.start])
}
messages = append(messages, model.ReviewMessage{
Type: model.ReviewMessageCode,
Content: cb.content,
Metadata: map[string]any{"language": cb.language},
})
pos = cb.end
}
if pos < len(text) {
processText(text[pos:])
}
if len(messages) == 0 && text != "" {
messages = append(messages, model.ReviewMessage{
Type: model.ReviewMessageChat,
Content: strings.TrimSpace(text),
})
}
return messages
}
// classifyText splits text by paragraph boundaries.
// markdown and code types are never sentence-split — they stay as complete blocks.
func classifyText(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
switch msgType {
case model.ReviewMessageMarkdown, model.ReviewMessageCode:
return []model.ReviewMessage{{Type: msgType, Content: text}}
default:
return splitChatByLines(msgType, text)
}
}