package tools
import (
"context"
"fmt"
"regexp"
"strings"
)
// MarkdownTool provides Markdown processing utilities for the LLM.
// Supports HTML conversion, plain text extraction, link/code extraction, and TOC generation.
type MarkdownTool struct{}
// NewMarkdownTool creates a Markdown processing tool.
func NewMarkdownTool() *MarkdownTool {
return &MarkdownTool{}
}
// Definition returns the tool definition for LLM function calling.
func (t *MarkdownTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "markdown",
Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"},
"description": "操作类型。to_html: 转换为HTML;to_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录",
},
"markdown": map[string]interface{}{
"type": "string",
"description": "Markdown格式文本,需要处理的Markdown内容",
},
},
"required": []string{"action", "markdown"},
},
}
}
// Execute performs Markdown processing operations.
func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
action, ok := arguments["action"].(string)
if !ok || action == "" {
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: "缺少 action 参数",
}, nil
}
md, ok := arguments["markdown"].(string)
if !ok || strings.TrimSpace(md) == "" {
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: "缺少 markdown 参数或内容为空",
}, nil
}
switch action {
case "to_html":
return t.handleToHTML(md)
case "to_text":
return t.handleToText(md)
case "extract_links":
return t.handleExtractLinks(md)
case "extract_code":
return t.handleExtractCode(md)
case "table_of_contents":
return t.handleTableOfContents(md)
default:
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action),
}, nil
}
}
// handleToHTML converts Markdown to HTML using simple regex-based approach.
func (t *MarkdownTool) handleToHTML(md string) (*ToolResult, error) {
html := md
// Process in order: code blocks first (to avoid interference), then inline elements, then blocks
// 1. Code blocks (```...```) - preserve with placeholder
codeBlocks := make([]string, 0)
reFence := regexp.MustCompile("(?s)```[^`]*```")
html = reFence.ReplaceAllStringFunc(html, func(match string) string {
codeBlocks = append(codeBlocks, match)
return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1)
})
// 2. Inline code (`...`)
inlineCodes := make([]string, 0)
reInlineCode := regexp.MustCompile("`[^`]+`")
html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string {
inlineCodes = append(inlineCodes, match)
return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1)
})
// 3. Images 
reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`)
html = reImage.ReplaceAllString(html, ``)
// 4. Links [text](url)
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
html = reLink.ReplaceAllString(html, `$1`)
// 5. Bold **text** or __text__
reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`)
html = reBold.ReplaceAllString(html, `$1`)
reBold2 := regexp.MustCompile(`__([^_]+)__`)
html = reBold2.ReplaceAllString(html, `$1`)
// 6. Italic *text* or _text_
reItalic := regexp.MustCompile(`\*([^*]+)\*`)
html = reItalic.ReplaceAllString(html, `$1`)
reItalic2 := regexp.MustCompile(`_([^_]+)_`)
html = reItalic2.ReplaceAllString(html, `$1`)
// 7. Strikethrough ~~text~~
reStrike := regexp.MustCompile(`~~([^~]+)~~`)
html = reStrike.ReplaceAllString(html, `
$1`)
// 8. Headings (# to ######)
reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`)
html = reH6.ReplaceAllString(html, `
$1`) // 13. Paragraphs: wrap remaining text lines html = t.wrapParagraphs(html) // 14. Restore code blocks for i, cb := range codeBlocks { // Strip the opening/closing ``` content := strings.TrimPrefix(cb, "```") content = strings.TrimSuffix(content, "```") // Extract language if present on first line lang := "" content = strings.TrimSpace(content) if idx := strings.Index(content, "\n"); idx > 0 { lang = strings.TrimSpace(content[:idx]) content = strings.TrimSpace(content[idx+1:]) } if lang != "" { html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i), fmt.Sprintf(`
%s`, lang, escapeHTML(content)))
} else {
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
fmt.Sprintf("%s", escapeHTML(content)))
}
}
// 15. Restore inline code
for i, ic := range inlineCodes {
content := strings.Trim(ic, "`")
html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i),
fmt.Sprintf("%s", escapeHTML(content)))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: html,
}, nil
}
// handleToText strips Markdown formatting and extracts plain text.
func (t *MarkdownTool) handleToText(md string) (*ToolResult, error) {
text := md
// Remove code blocks
reFence := regexp.MustCompile("(?s)```[^`]*```")
text = reFence.ReplaceAllString(text, "[代码块]")
// Remove inline code
reInlineCode := regexp.MustCompile("`[^`]+`")
text = reInlineCode.ReplaceAllString(text, "[代码]")
// Remove images  - keep alt text
reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`)
text = reImage.ReplaceAllString(text, "$1")
// Remove links [text](url) - keep text
reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`)
text = reLink.ReplaceAllString(text, "$1")
// Remove bold/italic markers
text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1")
// Remove strikethrough
text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1")
// Remove heading markers but keep the text
text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "")
// Remove horizontal rules
text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "")
// Remove list markers
text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "")
// Remove blockquote markers
text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "")
// Collapse multiple blank lines
text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n")
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s",
len([]rune(text)), strings.TrimSpace(text)),
}, nil
}
// handleExtractLinks extracts all [text](url) links from Markdown.
func (t *MarkdownTool) handleExtractLinks(md string) (*ToolResult, error) {
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
matches := reLink.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何链接",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n",
i+1, m[1], m[2], m[1], m[2]))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: strings.TrimSpace(result.String()),
}, nil
}
// handleExtractCode extracts all code blocks from Markdown.
func (t *MarkdownTool) handleExtractCode(md string) (*ToolResult, error) {
reFence := regexp.MustCompile("(?s)```([^`]*)```")
matches := reFence.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何代码块",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
content := strings.TrimSpace(m[1])
lang := ""
if idx := strings.Index(content, "\n"); idx > 0 {
lang = strings.TrimSpace(content[:idx])
content = strings.TrimSpace(content[idx+1:])
}
result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1))
if lang != "" {
result.WriteString(fmt.Sprintf(" (语言: %s)", lang))
}
result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500)))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: strings.TrimSpace(result.String()),
}, nil
}
// handleTableOfContents generates a table of contents from headings.
func (t *MarkdownTool) handleTableOfContents(md string) (*ToolResult, error) {
reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`)
matches := reHeading.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何标题,无法生成目录",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches)))
for _, m := range matches {
level := len(m[1])
title := strings.TrimSpace(m[2])
indent := strings.Repeat(" ", level-1)
result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: result.String(),
}, nil
}
// --- Markdown helper functions below ---
// processLists wraps consecutive list items in tags. func (t *MarkdownTool) wrapParagraphs(html string) string { lines := strings.Split(html, "\n") result := make([]string, 0, len(lines)) skipTags := map[string]bool{ "
": true, "": true, "
": true, "": true, "