feat: 富文本消息类型支持 — Markdown/代码块安全渲染 + 审查解析器

添加 review_parser.go 从 LLM 输出中提取 Markdown 和代码块，创建独立 ReviewMessage 类型 (markdown/code/search_result)。前端新增安全 Markdown 渲染器 (HTML 转义优先)，代码块以深色背景+语言标签展示。Markdown/代码类型禁止断句拆分，避免格式损坏。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 21:57:09 +08:00
parent 9f3b0f386d
commit 24f827fe02
10 changed files with 473 additions and 87 deletions
@@ -470,80 +470,6 @@ func (o *Orchestrator) scheduleWithDelays(messages []model.ReviewMessage) []mode
 	return messages
 }

-// parseReviewMessages 解析完整回复文本，拆分为带类型的消息
-// 用于审查子会话的轻量版本（内联到 orchestrator 以减少一次子会话调度开销）
-func parseReviewMessages(text string) []model.ReviewMessage {
-	if text == "" {
-		return nil
-	}
-
-	var messages []model.ReviewMessage
-
-	// 简单状态机：逐行或按括号匹配提取（使用 rune 切片正确处理 Unicode）
-	remaining := text
-	for len(remaining) > 0 {
-		// 查找括号动作 （xxx）或 (xxx)
-		actionStart := -1 // byte 位置
-		actionEnd := -1   // byte 位置（括号之后）
-		actionContent := ""
-
-		runes := []rune(remaining)
-		for ri, r := range runes {
-			if r == '(' || r == '（' {
-				actionStart = len(string(runes[:ri]))
-				closeRune := ')'
-				if r == '（' {
-					closeRune = '）'
-				}
-				// 查找匹配的闭合括号
-				for rj := ri + 1; rj < len(runes); rj++ {
-					if runes[rj] == closeRune {
-						actionEnd = len(string(runes[:rj+1]))
-						actionContent = string(runes[ri+1 : rj])
-						break
-					}
-				}
-				break
-			}
-		}
-
-		if actionStart >= 0 {
-			// 括号前的普通文本
-			if actionStart > 0 {
-				prefix := strings.TrimSpace(remaining[:actionStart])
-				if prefix != "" {
-					messages = append(messages, splitChatByLines(model.ReviewMessageChat, prefix)...)
-				}
-			}
-			// 括号内作为 action
-			content := strings.TrimSpace(actionContent)
-			if content != "" {
-				messages = append(messages, model.ReviewMessage{
-					Type:    model.ReviewMessageAction,
-					Content: content,
-				})
-			}
-			remaining = remaining[actionEnd:]
-		} else {
-			// 没有括号，剩余全部作为 chat
-			remaining = strings.TrimSpace(remaining)
-			if remaining != "" {
-				messages = append(messages, splitChatByLines(model.ReviewMessageChat, remaining)...)
-			}
-			break
-		}
-	}
-
-	if len(messages) == 0 && text != "" {
-		messages = append(messages, model.ReviewMessage{
-			Type:    model.ReviewMessageChat,
-			Content: strings.TrimSpace(text),
-		})
-	}
-
-	return messages
-}
-
 // splitReviewLongMessage 将长消息按句子边界拆分为多条短消息
 func splitReviewLongMessage(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
 	const maxLen = 80 // 最大字符数（按 rune 计数）
@@ -0,0 +1,164 @@
+package orchestrator
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/yourname/cyrene-ai/ai-core/internal/model"
+)
+
+// codeBlockPattern matches fenced code blocks: ```lang\n...\n```
+var codeBlockPattern = regexp.MustCompile("`{3}([^\n]*)\n([\\s\\S]*?)`{3}")
+
+// markdownPatterns detects common Markdown syntax for auto-classification.
+var markdownPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`^#{1,6}\s`),                // headings
+	regexp.MustCompile(`\*\*[^*]+\*\*`),             // bold
+	regexp.MustCompile(`(?<!\*)\*[^*]+\*(?!\*)`),    // italic (single *)
+	regexp.MustCompile(`\[([^\]]+)\]\(([^\)]+)\)`),   // links [text](url)
+	regexp.MustCompile(`^[\-\*]\s`),                 // unordered list
+	regexp.MustCompile(`^\d+\.\s`),                  // ordered list
+	regexp.MustCompile(`^>\s`),                      // blockquote
+	regexp.MustCompile(`^\|.*\|.*\|`),               // table
+	regexp.MustCompile("`[^`]+`"),                   // inline code
+}
+
+// hasMarkdownSyntax reports whether text contains Markdown formatting.
+func hasMarkdownSyntax(text string) bool {
+	for _, p := range markdownPatterns {
+		if p.MatchString(text) {
+			return true
+		}
+	}
+	return false
+}
+
+// autoDetectType returns the best message type for a text segment.
+func autoDetectType(text string) model.ReviewMessageType {
+	if hasMarkdownSyntax(text) {
+		return model.ReviewMessageMarkdown
+	}
+	return model.ReviewMessageChat
+}
+
+// parseReviewMessages splits the assistant's full response into typed messages.
+//
+// Phases:
+//  1. Extract fenced code blocks (```) → code type with language metadata.
+//  2. For text between code blocks, run the bracket-action parser:
+//     （…） / (…) → action type.
+//  3. Remaining text is auto-detected as markdown or chat.
+//  4. Markdown and code messages are never sentence-split (keeps formatting intact).
+func parseReviewMessages(text string) []model.ReviewMessage {
+	if text == "" {
+		return nil
+	}
+
+	var messages []model.ReviewMessage
+
+	// Phase 1: extract code blocks
+	codeMatches := codeBlockPattern.FindAllStringSubmatchIndex(text, -1)
+	type codeBlock struct {
+		start, end int
+		language   string
+		content    string
+	}
+	var blocks []codeBlock
+	for _, m := range codeMatches {
+		blocks = append(blocks, codeBlock{
+			start:    m[0],
+			end:      m[1],
+			language: strings.TrimSpace(text[m[2]:m[3]]),
+			content:  strings.TrimSpace(text[m[4]:m[5]]),
+		})
+	}
+
+	// Phase 2: bracket-action parser on non-code text
+	processText := func(t string) {
+		remaining := t
+		for len(remaining) > 0 {
+			actionStart := -1
+			actionEnd := -1
+			actionContent := ""
+
+			runes := []rune(remaining)
+			for ri, r := range runes {
+				if r == '(' || r == '（' { // fullwidth (
+					actionStart = len(string(runes[:ri]))
+					closeRune := ')'
+					if r == '（' {
+						closeRune = '）' // fullwidth )
+					}
+					for rj := ri + 1; rj < len(runes); rj++ {
+						if runes[rj] == closeRune {
+							actionEnd = len(string(runes[:rj+1]))
+							actionContent = string(runes[ri+1 : rj])
+							break
+						}
+					}
+					break
+				}
+			}
+
+			if actionStart >= 0 {
+				if actionStart > 0 {
+					prefix := strings.TrimSpace(remaining[:actionStart])
+					if prefix != "" {
+						messages = append(messages, classifyText(autoDetectType(prefix), prefix)...)
+					}
+				}
+				content := strings.TrimSpace(actionContent)
+				if content != "" {
+					messages = append(messages, model.ReviewMessage{
+						Type:    model.ReviewMessageAction,
+						Content: content,
+					})
+				}
+				remaining = remaining[actionEnd:]
+			} else {
+				remaining = strings.TrimSpace(remaining)
+				if remaining != "" {
+					messages = append(messages, classifyText(autoDetectType(remaining), remaining)...)
+				}
+				break
+			}
+		}
+	}
+
+	// Phase 3: interleave code blocks and parsed text
+	pos := 0
+	for _, cb := range blocks {
+		if cb.start > pos {
+			processText(text[pos:cb.start])
+		}
+		messages = append(messages, model.ReviewMessage{
+			Type:    model.ReviewMessageCode,
+			Content: cb.content,
+			Metadata: map[string]any{"language": cb.language},
+		})
+		pos = cb.end
+	}
+	if pos < len(text) {
+		processText(text[pos:])
+	}
+
+	if len(messages) == 0 && text != "" {
+		messages = append(messages, model.ReviewMessage{
+			Type:    model.ReviewMessageChat,
+			Content: strings.TrimSpace(text),
+		})
+	}
+
+	return messages
+}
+
+// classifyText splits text by paragraph boundaries.
+// markdown and code types are never sentence-split — they stay as complete blocks.
+func classifyText(msgType model.ReviewMessageType, text string) []model.ReviewMessage {
+	switch msgType {
+	case model.ReviewMessageMarkdown, model.ReviewMessageCode:
+		return []model.ReviewMessage{{Type: msgType, Content: text}}
+	default:
+		return splitChatByLines(msgType, text)
+	}
+}