package tools import ( "context" "fmt" "regexp" "strings" ) // MarkdownTool provides Markdown processing utilities for the LLM. // Supports HTML conversion, plain text extraction, link/code extraction, and TOC generation. type MarkdownTool struct{} // NewMarkdownTool creates a Markdown processing tool. func NewMarkdownTool() *MarkdownTool { return &MarkdownTool{} } // Definition returns the tool definition for LLM function calling. func (t *MarkdownTool) Definition() ToolDefinition { return ToolDefinition{ Name: "markdown", Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。", Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"}, "description": "操作类型。to_html: 转换为HTML;to_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录", }, "markdown": map[string]interface{}{ "type": "string", "description": "Markdown格式文本,需要处理的Markdown内容", }, }, "required": []string{"action", "markdown"}, }, } } // Execute performs Markdown processing operations. func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) { action, ok := arguments["action"].(string) if !ok || action == "" { return &ToolResult{ ToolName: "markdown", Success: false, Error: "缺少 action 参数", }, nil } md, ok := arguments["markdown"].(string) if !ok || strings.TrimSpace(md) == "" { return &ToolResult{ ToolName: "markdown", Success: false, Error: "缺少 markdown 参数或内容为空", }, nil } switch action { case "to_html": return t.handleToHTML(md) case "to_text": return t.handleToText(md) case "extract_links": return t.handleExtractLinks(md) case "extract_code": return t.handleExtractCode(md) case "table_of_contents": return t.handleTableOfContents(md) default: return &ToolResult{ ToolName: "markdown", Success: false, Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action), }, nil } } // handleToHTML converts Markdown to HTML using simple regex-based approach. func (t *MarkdownTool) handleToHTML(md string) (*ToolResult, error) { html := md // Process in order: code blocks first (to avoid interference), then inline elements, then blocks // 1. Code blocks (```...```) - preserve with placeholder codeBlocks := make([]string, 0) reFence := regexp.MustCompile("(?s)```[^`]*```") html = reFence.ReplaceAllStringFunc(html, func(match string) string { codeBlocks = append(codeBlocks, match) return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1) }) // 2. Inline code (`...`) inlineCodes := make([]string, 0) reInlineCode := regexp.MustCompile("`[^`]+`") html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string { inlineCodes = append(inlineCodes, match) return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1) }) // 3. Images ![alt](url) reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`) html = reImage.ReplaceAllString(html, `$1`) // 4. Links [text](url) reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`) html = reLink.ReplaceAllString(html, `$1`) // 5. Bold **text** or __text__ reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`) html = reBold.ReplaceAllString(html, `$1`) reBold2 := regexp.MustCompile(`__([^_]+)__`) html = reBold2.ReplaceAllString(html, `$1`) // 6. Italic *text* or _text_ reItalic := regexp.MustCompile(`\*([^*]+)\*`) html = reItalic.ReplaceAllString(html, `$1`) reItalic2 := regexp.MustCompile(`_([^_]+)_`) html = reItalic2.ReplaceAllString(html, `$1`) // 7. Strikethrough ~~text~~ reStrike := regexp.MustCompile(`~~([^~]+)~~`) html = reStrike.ReplaceAllString(html, `$1`) // 8. Headings (# to ######) reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`) html = reH6.ReplaceAllString(html, `
$1
`) reH5 := regexp.MustCompile(`(?m)^#####\s+(.+)$`) html = reH5.ReplaceAllString(html, `
$1
`) reH4 := regexp.MustCompile(`(?m)^####\s+(.+)$`) html = reH4.ReplaceAllString(html, `

$1

`) reH3 := regexp.MustCompile(`(?m)^###\s+(.+)$`) html = reH3.ReplaceAllString(html, `

$1

`) reH2 := regexp.MustCompile(`(?m)^##\s+(.+)$`) html = reH2.ReplaceAllString(html, `

$1

`) reH1 := regexp.MustCompile(`(?m)^#\s+(.+)$`) html = reH1.ReplaceAllString(html, `

$1

`) // 9. Horizontal rules reHR := regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`) html = reHR.ReplaceAllString(html, `
`) // 10. Unordered lists (- item) html = t.processLists(html, `(?m)^[\-*]\s+`, "ul") // 11. Ordered lists (1. item) html = t.processLists(html, `(?m)^\d+\.\s+`, "ol") // 12. Blockquotes reBlockquote := regexp.MustCompile(`(?m)^>\s?(.+)$`) html = reBlockquote.ReplaceAllString(html, `
$1
`) // 13. Paragraphs: wrap remaining text lines html = t.wrapParagraphs(html) // 14. Restore code blocks for i, cb := range codeBlocks { // Strip the opening/closing ``` content := strings.TrimPrefix(cb, "```") content = strings.TrimSuffix(content, "```") // Extract language if present on first line lang := "" content = strings.TrimSpace(content) if idx := strings.Index(content, "\n"); idx > 0 { lang = strings.TrimSpace(content[:idx]) content = strings.TrimSpace(content[idx+1:]) } if lang != "" { html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i), fmt.Sprintf(`
%s
`, lang, escapeHTML(content))) } else { html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i), fmt.Sprintf("
%s
", escapeHTML(content))) } } // 15. Restore inline code for i, ic := range inlineCodes { content := strings.Trim(ic, "`") html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i), fmt.Sprintf("%s", escapeHTML(content))) } return &ToolResult{ ToolName: "markdown", Success: true, Data: html, }, nil } // handleToText strips Markdown formatting and extracts plain text. func (t *MarkdownTool) handleToText(md string) (*ToolResult, error) { text := md // Remove code blocks reFence := regexp.MustCompile("(?s)```[^`]*```") text = reFence.ReplaceAllString(text, "[代码块]") // Remove inline code reInlineCode := regexp.MustCompile("`[^`]+`") text = reInlineCode.ReplaceAllString(text, "[代码]") // Remove images ![alt](url) - keep alt text reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`) text = reImage.ReplaceAllString(text, "$1") // Remove links [text](url) - keep text reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`) text = reLink.ReplaceAllString(text, "$1") // Remove bold/italic markers text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1") // Remove strikethrough text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1") // Remove heading markers but keep the text text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "") // Remove horizontal rules text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "") // Remove list markers text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "") text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "") // Remove blockquote markers text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "") // Collapse multiple blank lines text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n") return &ToolResult{ ToolName: "markdown", Success: true, Data: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s", len([]rune(text)), strings.TrimSpace(text)), }, nil } // handleExtractLinks extracts all [text](url) links from Markdown. func (t *MarkdownTool) handleExtractLinks(md string) (*ToolResult, error) { reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`) matches := reLink.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &ToolResult{ ToolName: "markdown", Success: true, Data: "未找到任何链接", }, nil } var result strings.Builder result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches))) for i, m := range matches { result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n", i+1, m[1], m[2], m[1], m[2])) } return &ToolResult{ ToolName: "markdown", Success: true, Data: strings.TrimSpace(result.String()), }, nil } // handleExtractCode extracts all code blocks from Markdown. func (t *MarkdownTool) handleExtractCode(md string) (*ToolResult, error) { reFence := regexp.MustCompile("(?s)```([^`]*)```") matches := reFence.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &ToolResult{ ToolName: "markdown", Success: true, Data: "未找到任何代码块", }, nil } var result strings.Builder result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches))) for i, m := range matches { content := strings.TrimSpace(m[1]) lang := "" if idx := strings.Index(content, "\n"); idx > 0 { lang = strings.TrimSpace(content[:idx]) content = strings.TrimSpace(content[idx+1:]) } result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1)) if lang != "" { result.WriteString(fmt.Sprintf(" (语言: %s)", lang)) } result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500))) } return &ToolResult{ ToolName: "markdown", Success: true, Data: strings.TrimSpace(result.String()), }, nil } // handleTableOfContents generates a table of contents from headings. func (t *MarkdownTool) handleTableOfContents(md string) (*ToolResult, error) { reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`) matches := reHeading.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &ToolResult{ ToolName: "markdown", Success: true, Data: "未找到任何标题,无法生成目录", }, nil } var result strings.Builder result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches))) for _, m := range matches { level := len(m[1]) title := strings.TrimSpace(m[2]) indent := strings.Repeat(" ", level-1) result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title)) } return &ToolResult{ ToolName: "markdown", Success: true, Data: result.String(), }, nil } // --- Markdown helper functions below --- // processLists wraps consecutive list items in