b6ec36886c
- 优化 LLM 思维方式和记忆方法(类别/重要性/关键词/相似度合并/衰减) - DevTools 记忆查询 UI 重新设计(类别筛选/排序/星标/搜索) - 新增 9 个 LLM 工具:calculator, datetime, file_ops, http_request, json_ops, text, random, crypto, markdown - 管理员主对话 5 分钟自我思考增强(工具调用/记忆提取/记忆维护)
428 lines
13 KiB
Go
428 lines
13 KiB
Go
package tools
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"regexp"
|
||
"strings"
|
||
)
|
||
|
||
// MarkdownTool provides Markdown processing utilities for the LLM.
|
||
// Supports HTML conversion, plain text extraction, link/code extraction, and TOC generation.
|
||
type MarkdownTool struct{}
|
||
|
||
// NewMarkdownTool creates a Markdown processing tool.
|
||
func NewMarkdownTool() *MarkdownTool {
|
||
return &MarkdownTool{}
|
||
}
|
||
|
||
// Definition returns the tool definition for LLM function calling.
|
||
func (t *MarkdownTool) Definition() ToolDefinition {
|
||
return ToolDefinition{
|
||
Name: "markdown",
|
||
Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。",
|
||
Parameters: map[string]interface{}{
|
||
"type": "object",
|
||
"properties": map[string]interface{}{
|
||
"action": map[string]interface{}{
|
||
"type": "string",
|
||
"enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"},
|
||
"description": "操作类型。to_html: 转换为HTML;to_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录",
|
||
},
|
||
"markdown": map[string]interface{}{
|
||
"type": "string",
|
||
"description": "Markdown格式文本,需要处理的Markdown内容",
|
||
},
|
||
},
|
||
"required": []string{"action", "markdown"},
|
||
},
|
||
}
|
||
}
|
||
|
||
// Execute performs Markdown processing operations.
|
||
func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
|
||
action, ok := arguments["action"].(string)
|
||
if !ok || action == "" {
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: false,
|
||
Error: "缺少 action 参数",
|
||
}, nil
|
||
}
|
||
|
||
md, ok := arguments["markdown"].(string)
|
||
if !ok || strings.TrimSpace(md) == "" {
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: false,
|
||
Error: "缺少 markdown 参数或内容为空",
|
||
}, nil
|
||
}
|
||
|
||
switch action {
|
||
case "to_html":
|
||
return t.handleToHTML(md)
|
||
case "to_text":
|
||
return t.handleToText(md)
|
||
case "extract_links":
|
||
return t.handleExtractLinks(md)
|
||
case "extract_code":
|
||
return t.handleExtractCode(md)
|
||
case "table_of_contents":
|
||
return t.handleTableOfContents(md)
|
||
default:
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: false,
|
||
Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action),
|
||
}, nil
|
||
}
|
||
}
|
||
|
||
// handleToHTML converts Markdown to HTML using simple regex-based approach.
|
||
func (t *MarkdownTool) handleToHTML(md string) (*ToolResult, error) {
|
||
html := md
|
||
|
||
// Process in order: code blocks first (to avoid interference), then inline elements, then blocks
|
||
|
||
// 1. Code blocks (```...```) - preserve with placeholder
|
||
codeBlocks := make([]string, 0)
|
||
reFence := regexp.MustCompile("(?s)```[^`]*```")
|
||
html = reFence.ReplaceAllStringFunc(html, func(match string) string {
|
||
codeBlocks = append(codeBlocks, match)
|
||
return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1)
|
||
})
|
||
|
||
// 2. Inline code (`...`)
|
||
inlineCodes := make([]string, 0)
|
||
reInlineCode := regexp.MustCompile("`[^`]+`")
|
||
html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string {
|
||
inlineCodes = append(inlineCodes, match)
|
||
return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1)
|
||
})
|
||
|
||
// 3. Images 
|
||
reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`)
|
||
html = reImage.ReplaceAllString(html, `<img src="$2" alt="$1">`)
|
||
|
||
// 4. Links [text](url)
|
||
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
|
||
html = reLink.ReplaceAllString(html, `<a href="$2">$1</a>`)
|
||
|
||
// 5. Bold **text** or __text__
|
||
reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`)
|
||
html = reBold.ReplaceAllString(html, `<strong>$1</strong>`)
|
||
reBold2 := regexp.MustCompile(`__([^_]+)__`)
|
||
html = reBold2.ReplaceAllString(html, `<strong>$1</strong>`)
|
||
|
||
// 6. Italic *text* or _text_
|
||
reItalic := regexp.MustCompile(`\*([^*]+)\*`)
|
||
html = reItalic.ReplaceAllString(html, `<em>$1</em>`)
|
||
reItalic2 := regexp.MustCompile(`_([^_]+)_`)
|
||
html = reItalic2.ReplaceAllString(html, `<em>$1</em>`)
|
||
|
||
// 7. Strikethrough ~~text~~
|
||
reStrike := regexp.MustCompile(`~~([^~]+)~~`)
|
||
html = reStrike.ReplaceAllString(html, `<del>$1</del>`)
|
||
|
||
// 8. Headings (# to ######)
|
||
reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`)
|
||
html = reH6.ReplaceAllString(html, `<h6>$1</h6>`)
|
||
reH5 := regexp.MustCompile(`(?m)^#####\s+(.+)$`)
|
||
html = reH5.ReplaceAllString(html, `<h5>$1</h5>`)
|
||
reH4 := regexp.MustCompile(`(?m)^####\s+(.+)$`)
|
||
html = reH4.ReplaceAllString(html, `<h4>$1</h4>`)
|
||
reH3 := regexp.MustCompile(`(?m)^###\s+(.+)$`)
|
||
html = reH3.ReplaceAllString(html, `<h3>$1</h3>`)
|
||
reH2 := regexp.MustCompile(`(?m)^##\s+(.+)$`)
|
||
html = reH2.ReplaceAllString(html, `<h2>$1</h2>`)
|
||
reH1 := regexp.MustCompile(`(?m)^#\s+(.+)$`)
|
||
html = reH1.ReplaceAllString(html, `<h1>$1</h1>`)
|
||
|
||
// 9. Horizontal rules
|
||
reHR := regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`)
|
||
html = reHR.ReplaceAllString(html, `<hr>`)
|
||
|
||
// 10. Unordered lists (- item)
|
||
html = t.processLists(html, `(?m)^[\-*]\s+`, "ul")
|
||
// 11. Ordered lists (1. item)
|
||
html = t.processLists(html, `(?m)^\d+\.\s+`, "ol")
|
||
|
||
// 12. Blockquotes
|
||
reBlockquote := regexp.MustCompile(`(?m)^>\s?(.+)$`)
|
||
html = reBlockquote.ReplaceAllString(html, `<blockquote>$1</blockquote>`)
|
||
|
||
// 13. Paragraphs: wrap remaining text lines
|
||
html = t.wrapParagraphs(html)
|
||
|
||
// 14. Restore code blocks
|
||
for i, cb := range codeBlocks {
|
||
// Strip the opening/closing ```
|
||
content := strings.TrimPrefix(cb, "```")
|
||
content = strings.TrimSuffix(content, "```")
|
||
// Extract language if present on first line
|
||
lang := ""
|
||
content = strings.TrimSpace(content)
|
||
if idx := strings.Index(content, "\n"); idx > 0 {
|
||
lang = strings.TrimSpace(content[:idx])
|
||
content = strings.TrimSpace(content[idx+1:])
|
||
}
|
||
if lang != "" {
|
||
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
|
||
fmt.Sprintf(`<pre><code class="language-%s">%s</code></pre>`, lang, escapeHTML(content)))
|
||
} else {
|
||
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
|
||
fmt.Sprintf("<pre><code>%s</code></pre>", escapeHTML(content)))
|
||
}
|
||
}
|
||
|
||
// 15. Restore inline code
|
||
for i, ic := range inlineCodes {
|
||
content := strings.Trim(ic, "`")
|
||
html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i),
|
||
fmt.Sprintf("<code>%s</code>", escapeHTML(content)))
|
||
}
|
||
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: html,
|
||
}, nil
|
||
}
|
||
|
||
// handleToText strips Markdown formatting and extracts plain text.
|
||
func (t *MarkdownTool) handleToText(md string) (*ToolResult, error) {
|
||
text := md
|
||
|
||
// Remove code blocks
|
||
reFence := regexp.MustCompile("(?s)```[^`]*```")
|
||
text = reFence.ReplaceAllString(text, "[代码块]")
|
||
|
||
// Remove inline code
|
||
reInlineCode := regexp.MustCompile("`[^`]+`")
|
||
text = reInlineCode.ReplaceAllString(text, "[代码]")
|
||
|
||
// Remove images  - keep alt text
|
||
reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`)
|
||
text = reImage.ReplaceAllString(text, "$1")
|
||
|
||
// Remove links [text](url) - keep text
|
||
reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`)
|
||
text = reLink.ReplaceAllString(text, "$1")
|
||
|
||
// Remove bold/italic markers
|
||
text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1")
|
||
text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1")
|
||
text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1")
|
||
text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1")
|
||
|
||
// Remove strikethrough
|
||
text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1")
|
||
|
||
// Remove heading markers but keep the text
|
||
text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "")
|
||
|
||
// Remove horizontal rules
|
||
text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "")
|
||
|
||
// Remove list markers
|
||
text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "")
|
||
text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "")
|
||
|
||
// Remove blockquote markers
|
||
text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "")
|
||
|
||
// Collapse multiple blank lines
|
||
text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n")
|
||
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s",
|
||
len([]rune(text)), strings.TrimSpace(text)),
|
||
}, nil
|
||
}
|
||
|
||
// handleExtractLinks extracts all [text](url) links from Markdown.
|
||
func (t *MarkdownTool) handleExtractLinks(md string) (*ToolResult, error) {
|
||
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
|
||
matches := reLink.FindAllStringSubmatch(md, -1)
|
||
|
||
if len(matches) == 0 {
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: "未找到任何链接",
|
||
}, nil
|
||
}
|
||
|
||
var result strings.Builder
|
||
result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches)))
|
||
for i, m := range matches {
|
||
result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n",
|
||
i+1, m[1], m[2], m[1], m[2]))
|
||
}
|
||
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: strings.TrimSpace(result.String()),
|
||
}, nil
|
||
}
|
||
|
||
// handleExtractCode extracts all code blocks from Markdown.
|
||
func (t *MarkdownTool) handleExtractCode(md string) (*ToolResult, error) {
|
||
reFence := regexp.MustCompile("(?s)```([^`]*)```")
|
||
matches := reFence.FindAllStringSubmatch(md, -1)
|
||
|
||
if len(matches) == 0 {
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: "未找到任何代码块",
|
||
}, nil
|
||
}
|
||
|
||
var result strings.Builder
|
||
result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches)))
|
||
for i, m := range matches {
|
||
content := strings.TrimSpace(m[1])
|
||
lang := ""
|
||
if idx := strings.Index(content, "\n"); idx > 0 {
|
||
lang = strings.TrimSpace(content[:idx])
|
||
content = strings.TrimSpace(content[idx+1:])
|
||
}
|
||
|
||
result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1))
|
||
if lang != "" {
|
||
result.WriteString(fmt.Sprintf(" (语言: %s)", lang))
|
||
}
|
||
result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500)))
|
||
}
|
||
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: strings.TrimSpace(result.String()),
|
||
}, nil
|
||
}
|
||
|
||
// handleTableOfContents generates a table of contents from headings.
|
||
func (t *MarkdownTool) handleTableOfContents(md string) (*ToolResult, error) {
|
||
reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`)
|
||
matches := reHeading.FindAllStringSubmatch(md, -1)
|
||
|
||
if len(matches) == 0 {
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: "未找到任何标题,无法生成目录",
|
||
}, nil
|
||
}
|
||
|
||
var result strings.Builder
|
||
result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches)))
|
||
for _, m := range matches {
|
||
level := len(m[1])
|
||
title := strings.TrimSpace(m[2])
|
||
indent := strings.Repeat(" ", level-1)
|
||
result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title))
|
||
}
|
||
|
||
return &ToolResult{
|
||
ToolName: "markdown",
|
||
Success: true,
|
||
Data: result.String(),
|
||
}, nil
|
||
}
|
||
|
||
// --- Markdown helper functions below ---
|
||
|
||
// processLists wraps consecutive list items in <ul> or <ol> tags.
|
||
func (t *MarkdownTool) processLists(html, itemPattern, listTag string) string {
|
||
reItem := regexp.MustCompile(itemPattern + `(.+)$`)
|
||
lines := strings.Split(html, "\n")
|
||
result := make([]string, 0, len(lines))
|
||
|
||
inList := false
|
||
for _, line := range lines {
|
||
if reItem.MatchString(line) {
|
||
content := reItem.ReplaceAllString(line, "$1")
|
||
if !inList {
|
||
result = append(result, fmt.Sprintf("<%s>", listTag))
|
||
inList = true
|
||
}
|
||
result = append(result, fmt.Sprintf("<li>%s</li>", content))
|
||
} else {
|
||
if inList {
|
||
result = append(result, fmt.Sprintf("</%s>", listTag))
|
||
inList = false
|
||
}
|
||
result = append(result, line)
|
||
}
|
||
}
|
||
if inList {
|
||
result = append(result, fmt.Sprintf("</%s>", listTag))
|
||
}
|
||
|
||
return strings.Join(result, "\n")
|
||
}
|
||
|
||
// wrapParagraphs wraps non-tag lines in <p> tags.
|
||
func (t *MarkdownTool) wrapParagraphs(html string) string {
|
||
lines := strings.Split(html, "\n")
|
||
result := make([]string, 0, len(lines))
|
||
|
||
skipTags := map[string]bool{
|
||
"<h1>": true, "<h2>": true, "<h3>": true, "<h4>": true, "<h5>": true, "<h6>": true,
|
||
"<hr>": true, "<ul>": true, "</ul>": true, "<ol>": true, "</ol>": true,
|
||
"<li>": true, "</li>": true, "<blockquote>": true, "</blockquote>": true,
|
||
"<pre>": true, "</pre>": true, "<img": true,
|
||
}
|
||
|
||
for _, line := range lines {
|
||
trimmed := strings.TrimSpace(line)
|
||
if trimmed == "" {
|
||
result = append(result, line)
|
||
continue
|
||
}
|
||
|
||
// Check if line starts with an HTML tag
|
||
isTag := false
|
||
for tag := range skipTags {
|
||
if strings.HasPrefix(trimmed, tag) {
|
||
isTag = true
|
||
break
|
||
}
|
||
}
|
||
|
||
if !isTag {
|
||
result = append(result, fmt.Sprintf("<p>%s</p>", trimmed))
|
||
} else {
|
||
result = append(result, line)
|
||
}
|
||
}
|
||
|
||
return strings.Join(result, "\n")
|
||
}
|
||
|
||
// escapeHTML escapes special HTML characters.
|
||
func escapeHTML(s string) string {
|
||
replacer := strings.NewReplacer(
|
||
"&", "&"+"amp;",
|
||
"<", "&"+"lt;",
|
||
">", "&"+"gt;",
|
||
"\"", "&"+"quot;",
|
||
)
|
||
return replacer.Replace(s)
|
||
}
|
||
|
||
// truncateText truncates text to maxLen runes, adding "..." if truncated.
|
||
func truncateText(s string, maxLen int) string {
|
||
runes := []rune(s)
|
||
if len(runes) <= maxLen {
|
||
return s
|
||
}
|
||
return string(runes[:maxLen]) + "..."
|
||
}
|