package tools import ( "context" "fmt" "regexp" "strings" "github.com/yourname/cyrene-ai/tool-engine/internal/model" ) // MarkdownTool provides Markdown processing utilities for the LLM. type MarkdownTool struct{} // NewMarkdownTool creates a Markdown processing tool. func NewMarkdownTool() *MarkdownTool { return &MarkdownTool{} } // Definition returns the tool definition for LLM function calling. func (t *MarkdownTool) Definition() model.ToolDefinition { return model.ToolDefinition{ Name: "markdown", Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。", Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"}, "description": "操作类型。to_html: 转换为HTML;to_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录", }, "markdown": map[string]interface{}{ "type": "string", "description": "Markdown格式文本,需要处理的Markdown内容", }, }, "required": []string{"action", "markdown"}, }, } } // Execute performs Markdown processing operations. func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*model.ToolResult, error) { action, ok := arguments["action"].(string) if !ok || action == "" { return &model.ToolResult{ID: "", Error: "缺少 action 参数"}, nil } md, ok := arguments["markdown"].(string) if !ok || strings.TrimSpace(md) == "" { return &model.ToolResult{ID: "", Error: "缺少 markdown 参数或内容为空"}, nil } switch action { case "to_html": return t.handleToHTML(md) case "to_text": return t.handleToText(md) case "extract_links": return t.handleExtractLinks(md) case "extract_code": return t.handleExtractCode(md) case "table_of_contents": return t.handleTableOfContents(md) default: return &model.ToolResult{ ID: "", Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action), }, nil } } func (t *MarkdownTool) handleToHTML(md string) (*model.ToolResult, error) { html := md codeBlocks := make([]string, 0) reFence := regexp.MustCompile("(?s)```[^`]*```") html = reFence.ReplaceAllStringFunc(html, func(match string) string { codeBlocks = append(codeBlocks, match) return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1) }) inlineCodes := make([]string, 0) reInlineCode := regexp.MustCompile("`[^`]+`") html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string { inlineCodes = append(inlineCodes, match) return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1) }) reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`) html = reImage.ReplaceAllString(html, `$1`) reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`) html = reLink.ReplaceAllString(html, `$1`) reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`) html = reBold.ReplaceAllString(html, `$1`) reBold2 := regexp.MustCompile(`__([^_]+)__`) html = reBold2.ReplaceAllString(html, `$1`) reItalic := regexp.MustCompile(`\*([^*]+)\*`) html = reItalic.ReplaceAllString(html, `$1`) reItalic2 := regexp.MustCompile(`_([^_]+)_`) html = reItalic2.ReplaceAllString(html, `$1`) reStrike := regexp.MustCompile(`~~([^~]+)~~`) html = reStrike.ReplaceAllString(html, `$1`) reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`) html = reH6.ReplaceAllString(html, `
$1
`) reH5 := regexp.MustCompile(`(?m)^#####\s+(.+)$`) html = reH5.ReplaceAllString(html, `
$1
`) reH4 := regexp.MustCompile(`(?m)^####\s+(.+)$`) html = reH4.ReplaceAllString(html, `

$1

`) reH3 := regexp.MustCompile(`(?m)^###\s+(.+)$`) html = reH3.ReplaceAllString(html, `

$1

`) reH2 := regexp.MustCompile(`(?m)^##\s+(.+)$`) html = reH2.ReplaceAllString(html, `

$1

`) reH1 := regexp.MustCompile(`(?m)^#\s+(.+)$`) html = reH1.ReplaceAllString(html, `

$1

`) reHR := regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`) html = reHR.ReplaceAllString(html, `
`) html = t.processLists(html, `(?m)^[\-*]\s+`, "ul") html = t.processLists(html, `(?m)^\d+\.\s+`, "ol") reBlockquote := regexp.MustCompile(`(?m)^>\s?(.+)$`) html = reBlockquote.ReplaceAllString(html, `
$1
`) html = t.wrapParagraphs(html) for i, cb := range codeBlocks { content := strings.TrimPrefix(cb, "```") content = strings.TrimSuffix(content, "```") lang := "" content = strings.TrimSpace(content) if idx := strings.Index(content, "\n"); idx > 0 { lang = strings.TrimSpace(content[:idx]) content = strings.TrimSpace(content[idx+1:]) } if lang != "" { html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i), fmt.Sprintf(`
%s
`, lang, escapeHTML(content))) } else { html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i), fmt.Sprintf("
%s
", escapeHTML(content))) } } for i, ic := range inlineCodes { content := strings.Trim(ic, "`") html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i), fmt.Sprintf("%s", escapeHTML(content))) } return &model.ToolResult{ID: "", Output: html}, nil } func (t *MarkdownTool) handleToText(md string) (*model.ToolResult, error) { text := md reFence := regexp.MustCompile("(?s)```[^`]*```") text = reFence.ReplaceAllString(text, "[代码块]") reInlineCode := regexp.MustCompile("`[^`]+`") text = reInlineCode.ReplaceAllString(text, "[代码]") reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`) text = reImage.ReplaceAllString(text, "$1") reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`) text = reLink.ReplaceAllString(text, "$1") text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1") text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "") text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "") text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "") text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "") text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "") text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n") return &model.ToolResult{ ID: "", Output: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s", len([]rune(text)), strings.TrimSpace(text)), }, nil } func (t *MarkdownTool) handleExtractLinks(md string) (*model.ToolResult, error) { reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`) matches := reLink.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &model.ToolResult{ID: "", Output: "未找到任何链接"}, nil } var result strings.Builder result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches))) for i, m := range matches { result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n", i+1, m[1], m[2], m[1], m[2])) } return &model.ToolResult{ID: "", Output: strings.TrimSpace(result.String())}, nil } func (t *MarkdownTool) handleExtractCode(md string) (*model.ToolResult, error) { reFence := regexp.MustCompile("(?s)```([^`]*)```") matches := reFence.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &model.ToolResult{ID: "", Output: "未找到任何代码块"}, nil } var result strings.Builder result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches))) for i, m := range matches { content := strings.TrimSpace(m[1]) lang := "" if idx := strings.Index(content, "\n"); idx > 0 { lang = strings.TrimSpace(content[:idx]) content = strings.TrimSpace(content[idx+1:]) } result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1)) if lang != "" { result.WriteString(fmt.Sprintf(" (语言: %s)", lang)) } result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500))) } return &model.ToolResult{ID: "", Output: strings.TrimSpace(result.String())}, nil } func (t *MarkdownTool) handleTableOfContents(md string) (*model.ToolResult, error) { reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`) matches := reHeading.FindAllStringSubmatch(md, -1) if len(matches) == 0 { return &model.ToolResult{ID: "", Output: "未找到任何标题,无法生成目录"}, nil } var result strings.Builder result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches))) for _, m := range matches { level := len(m[1]) title := strings.TrimSpace(m[2]) indent := strings.Repeat(" ", level-1) result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title)) } return &model.ToolResult{ID: "", Output: result.String()}, nil } func (t *MarkdownTool) processLists(html, itemPattern, listTag string) string { reItem := regexp.MustCompile(itemPattern + `(.+)$`) lines := strings.Split(html, "\n") result := make([]string, 0, len(lines)) inList := false for _, line := range lines { if reItem.MatchString(line) { content := reItem.ReplaceAllString(line, "$1") if !inList { result = append(result, fmt.Sprintf("<%s>", listTag)) inList = true } result = append(result, fmt.Sprintf("
  • %s
  • ", content)) } else { if inList { result = append(result, fmt.Sprintf("", listTag)) inList = false } result = append(result, line) } } if inList { result = append(result, fmt.Sprintf("", listTag)) } return strings.Join(result, "\n") } func (t *MarkdownTool) wrapParagraphs(html string) string { lines := strings.Split(html, "\n") result := make([]string, 0, len(lines)) skipTags := map[string]bool{ "

    ": true, "

    ": true, "

    ": true, "

    ": true, "

    ": true, "
    ": true, "
    ": true, "": true, "
      ": true, "
    ": true, "
  • ": true, "
  • ": true, "
    ": true, "
    ": true, "
    ": true, "
    ": true, "%s

    ", trimmed)) } else { result = append(result, line) } } return strings.Join(result, "\n") } func escapeHTML(s string) string { replacer := strings.NewReplacer( "&", "&"+"amp;", "<", "&"+"lt;", ">", "&"+"gt;", "\"", "&"+"quot;", ) return replacer.Replace(s) } func truncateText(s string, maxLen int) string { runes := []rune(s) if len(runes) <= maxLen { return s } return string(runes[:maxLen]) + "..." }