Files
Cyrene/backend/ai-core/internal/tools/markdown_tool.go
T
AskaEth b6ec36886c feat: 第四轮功能增强 - LLM 思维记忆优化、DevTools 记忆UI、9个新工具、5分钟自我思考
- 优化 LLM 思维方式和记忆方法(类别/重要性/关键词/相似度合并/衰减)
- DevTools 记忆查询 UI 重新设计(类别筛选/排序/星标/搜索)
- 新增 9 个 LLM 工具:calculator, datetime, file_ops, http_request, json_ops, text, random, crypto, markdown
- 管理员主对话 5 分钟自我思考增强(工具调用/记忆提取/记忆维护)
2026-05-18 12:13:49 +08:00

428 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package tools
import (
"context"
"fmt"
"regexp"
"strings"
)
// MarkdownTool provides Markdown processing utilities for the LLM.
// Supports HTML conversion, plain text extraction, link/code extraction, and TOC generation.
type MarkdownTool struct{}
// NewMarkdownTool creates a Markdown processing tool.
func NewMarkdownTool() *MarkdownTool {
return &MarkdownTool{}
}
// Definition returns the tool definition for LLM function calling.
func (t *MarkdownTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "markdown",
Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"},
"description": "操作类型。to_html: 转换为HTMLto_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录",
},
"markdown": map[string]interface{}{
"type": "string",
"description": "Markdown格式文本,需要处理的Markdown内容",
},
},
"required": []string{"action", "markdown"},
},
}
}
// Execute performs Markdown processing operations.
func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
action, ok := arguments["action"].(string)
if !ok || action == "" {
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: "缺少 action 参数",
}, nil
}
md, ok := arguments["markdown"].(string)
if !ok || strings.TrimSpace(md) == "" {
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: "缺少 markdown 参数或内容为空",
}, nil
}
switch action {
case "to_html":
return t.handleToHTML(md)
case "to_text":
return t.handleToText(md)
case "extract_links":
return t.handleExtractLinks(md)
case "extract_code":
return t.handleExtractCode(md)
case "table_of_contents":
return t.handleTableOfContents(md)
default:
return &ToolResult{
ToolName: "markdown",
Success: false,
Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action),
}, nil
}
}
// handleToHTML converts Markdown to HTML using simple regex-based approach.
func (t *MarkdownTool) handleToHTML(md string) (*ToolResult, error) {
html := md
// Process in order: code blocks first (to avoid interference), then inline elements, then blocks
// 1. Code blocks (```...```) - preserve with placeholder
codeBlocks := make([]string, 0)
reFence := regexp.MustCompile("(?s)```[^`]*```")
html = reFence.ReplaceAllStringFunc(html, func(match string) string {
codeBlocks = append(codeBlocks, match)
return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1)
})
// 2. Inline code (`...`)
inlineCodes := make([]string, 0)
reInlineCode := regexp.MustCompile("`[^`]+`")
html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string {
inlineCodes = append(inlineCodes, match)
return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1)
})
// 3. Images ![alt](url)
reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`)
html = reImage.ReplaceAllString(html, `<img src="$2" alt="$1">`)
// 4. Links [text](url)
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
html = reLink.ReplaceAllString(html, `<a href="$2">$1</a>`)
// 5. Bold **text** or __text__
reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`)
html = reBold.ReplaceAllString(html, `<strong>$1</strong>`)
reBold2 := regexp.MustCompile(`__([^_]+)__`)
html = reBold2.ReplaceAllString(html, `<strong>$1</strong>`)
// 6. Italic *text* or _text_
reItalic := regexp.MustCompile(`\*([^*]+)\*`)
html = reItalic.ReplaceAllString(html, `<em>$1</em>`)
reItalic2 := regexp.MustCompile(`_([^_]+)_`)
html = reItalic2.ReplaceAllString(html, `<em>$1</em>`)
// 7. Strikethrough ~~text~~
reStrike := regexp.MustCompile(`~~([^~]+)~~`)
html = reStrike.ReplaceAllString(html, `<del>$1</del>`)
// 8. Headings (# to ######)
reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`)
html = reH6.ReplaceAllString(html, `<h6>$1</h6>`)
reH5 := regexp.MustCompile(`(?m)^#####\s+(.+)$`)
html = reH5.ReplaceAllString(html, `<h5>$1</h5>`)
reH4 := regexp.MustCompile(`(?m)^####\s+(.+)$`)
html = reH4.ReplaceAllString(html, `<h4>$1</h4>`)
reH3 := regexp.MustCompile(`(?m)^###\s+(.+)$`)
html = reH3.ReplaceAllString(html, `<h3>$1</h3>`)
reH2 := regexp.MustCompile(`(?m)^##\s+(.+)$`)
html = reH2.ReplaceAllString(html, `<h2>$1</h2>`)
reH1 := regexp.MustCompile(`(?m)^#\s+(.+)$`)
html = reH1.ReplaceAllString(html, `<h1>$1</h1>`)
// 9. Horizontal rules
reHR := regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`)
html = reHR.ReplaceAllString(html, `<hr>`)
// 10. Unordered lists (- item)
html = t.processLists(html, `(?m)^[\-*]\s+`, "ul")
// 11. Ordered lists (1. item)
html = t.processLists(html, `(?m)^\d+\.\s+`, "ol")
// 12. Blockquotes
reBlockquote := regexp.MustCompile(`(?m)^>\s?(.+)$`)
html = reBlockquote.ReplaceAllString(html, `<blockquote>$1</blockquote>`)
// 13. Paragraphs: wrap remaining text lines
html = t.wrapParagraphs(html)
// 14. Restore code blocks
for i, cb := range codeBlocks {
// Strip the opening/closing ```
content := strings.TrimPrefix(cb, "```")
content = strings.TrimSuffix(content, "```")
// Extract language if present on first line
lang := ""
content = strings.TrimSpace(content)
if idx := strings.Index(content, "\n"); idx > 0 {
lang = strings.TrimSpace(content[:idx])
content = strings.TrimSpace(content[idx+1:])
}
if lang != "" {
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
fmt.Sprintf(`<pre><code class="language-%s">%s</code></pre>`, lang, escapeHTML(content)))
} else {
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
fmt.Sprintf("<pre><code>%s</code></pre>", escapeHTML(content)))
}
}
// 15. Restore inline code
for i, ic := range inlineCodes {
content := strings.Trim(ic, "`")
html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i),
fmt.Sprintf("<code>%s</code>", escapeHTML(content)))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: html,
}, nil
}
// handleToText strips Markdown formatting and extracts plain text.
func (t *MarkdownTool) handleToText(md string) (*ToolResult, error) {
text := md
// Remove code blocks
reFence := regexp.MustCompile("(?s)```[^`]*```")
text = reFence.ReplaceAllString(text, "[代码块]")
// Remove inline code
reInlineCode := regexp.MustCompile("`[^`]+`")
text = reInlineCode.ReplaceAllString(text, "[代码]")
// Remove images ![alt](url) - keep alt text
reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`)
text = reImage.ReplaceAllString(text, "$1")
// Remove links [text](url) - keep text
reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`)
text = reLink.ReplaceAllString(text, "$1")
// Remove bold/italic markers
text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1")
// Remove strikethrough
text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1")
// Remove heading markers but keep the text
text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "")
// Remove horizontal rules
text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "")
// Remove list markers
text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "")
// Remove blockquote markers
text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "")
// Collapse multiple blank lines
text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n")
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s",
len([]rune(text)), strings.TrimSpace(text)),
}, nil
}
// handleExtractLinks extracts all [text](url) links from Markdown.
func (t *MarkdownTool) handleExtractLinks(md string) (*ToolResult, error) {
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
matches := reLink.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何链接",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n",
i+1, m[1], m[2], m[1], m[2]))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: strings.TrimSpace(result.String()),
}, nil
}
// handleExtractCode extracts all code blocks from Markdown.
func (t *MarkdownTool) handleExtractCode(md string) (*ToolResult, error) {
reFence := regexp.MustCompile("(?s)```([^`]*)```")
matches := reFence.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何代码块",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
content := strings.TrimSpace(m[1])
lang := ""
if idx := strings.Index(content, "\n"); idx > 0 {
lang = strings.TrimSpace(content[:idx])
content = strings.TrimSpace(content[idx+1:])
}
result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1))
if lang != "" {
result.WriteString(fmt.Sprintf(" (语言: %s)", lang))
}
result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500)))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: strings.TrimSpace(result.String()),
}, nil
}
// handleTableOfContents generates a table of contents from headings.
func (t *MarkdownTool) handleTableOfContents(md string) (*ToolResult, error) {
reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`)
matches := reHeading.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: "未找到任何标题,无法生成目录",
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches)))
for _, m := range matches {
level := len(m[1])
title := strings.TrimSpace(m[2])
indent := strings.Repeat(" ", level-1)
result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title))
}
return &ToolResult{
ToolName: "markdown",
Success: true,
Data: result.String(),
}, nil
}
// --- Markdown helper functions below ---
// processLists wraps consecutive list items in <ul> or <ol> tags.
func (t *MarkdownTool) processLists(html, itemPattern, listTag string) string {
reItem := regexp.MustCompile(itemPattern + `(.+)$`)
lines := strings.Split(html, "\n")
result := make([]string, 0, len(lines))
inList := false
for _, line := range lines {
if reItem.MatchString(line) {
content := reItem.ReplaceAllString(line, "$1")
if !inList {
result = append(result, fmt.Sprintf("<%s>", listTag))
inList = true
}
result = append(result, fmt.Sprintf("<li>%s</li>", content))
} else {
if inList {
result = append(result, fmt.Sprintf("</%s>", listTag))
inList = false
}
result = append(result, line)
}
}
if inList {
result = append(result, fmt.Sprintf("</%s>", listTag))
}
return strings.Join(result, "\n")
}
// wrapParagraphs wraps non-tag lines in <p> tags.
func (t *MarkdownTool) wrapParagraphs(html string) string {
lines := strings.Split(html, "\n")
result := make([]string, 0, len(lines))
skipTags := map[string]bool{
"<h1>": true, "<h2>": true, "<h3>": true, "<h4>": true, "<h5>": true, "<h6>": true,
"<hr>": true, "<ul>": true, "</ul>": true, "<ol>": true, "</ol>": true,
"<li>": true, "</li>": true, "<blockquote>": true, "</blockquote>": true,
"<pre>": true, "</pre>": true, "<img": true,
}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
result = append(result, line)
continue
}
// Check if line starts with an HTML tag
isTag := false
for tag := range skipTags {
if strings.HasPrefix(trimmed, tag) {
isTag = true
break
}
}
if !isTag {
result = append(result, fmt.Sprintf("<p>%s</p>", trimmed))
} else {
result = append(result, line)
}
}
return strings.Join(result, "\n")
}
// escapeHTML escapes special HTML characters.
func escapeHTML(s string) string {
replacer := strings.NewReplacer(
"&", "&"+"amp;",
"<", "&"+"lt;",
">", "&"+"gt;",
"\"", "&"+"quot;",
)
return replacer.Replace(s)
}
// truncateText truncates text to maxLen runes, adding "..." if truncated.
func truncateText(s string, maxLen int) string {
runes := []rune(s)
if len(runes) <= maxLen {
return s
}
return string(runes[:maxLen]) + "..."
}