Files
Cyrene/backend/tool-engine/internal/tools/markdown.go
T
AskaEth 78e3f450c2 feat: Round 5 - Memory Service, Tool Engine, Call Records, Thinking Logs
- Fix: Session history flash (race condition + WS guard)
- Fix: Chat background overlay + sidebar transparency
- Fix: IoT device control (Chinese action names, status field)
- Feat: Independent memory-service (port 8091, 13 endpoints)
- Feat: Independent tool-engine service (port 8092, 13 tools)
- Feat: Tool call logs with paginated DevTools panel
- Feat: Thinking log records with DevTools panel
- Feat: Future development roadmap document
- Chore: Updated .gitignore, go.work, DevTools config
- Chore: 5-service health check, project review docs
2026-05-18 20:05:14 +08:00

349 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package tools
import (
"context"
"fmt"
"regexp"
"strings"
"github.com/yourname/cyrene-ai/tool-engine/internal/model"
)
// MarkdownTool provides Markdown processing utilities for the LLM.
type MarkdownTool struct{}
// NewMarkdownTool creates a Markdown processing tool.
func NewMarkdownTool() *MarkdownTool {
return &MarkdownTool{}
}
// Definition returns the tool definition for LLM function calling.
func (t *MarkdownTool) Definition() model.ToolDefinition {
return model.ToolDefinition{
Name: "markdown",
Description: "Markdown处理工具。将Markdown转为HTML、提取纯文本、提取链接/代码块、生成目录。用于处理Markdown格式的文档内容。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"to_html", "to_text", "extract_links", "extract_code", "table_of_contents"},
"description": "操作类型。to_html: 转换为HTMLto_text: 提取纯文本;extract_links: 提取所有链接;extract_code: 提取所有代码块;table_of_contents: 生成目录",
},
"markdown": map[string]interface{}{
"type": "string",
"description": "Markdown格式文本,需要处理的Markdown内容",
},
},
"required": []string{"action", "markdown"},
},
}
}
// Execute performs Markdown processing operations.
func (t *MarkdownTool) Execute(ctx context.Context, arguments map[string]interface{}) (*model.ToolResult, error) {
action, ok := arguments["action"].(string)
if !ok || action == "" {
return &model.ToolResult{ID: "", Error: "缺少 action 参数"}, nil
}
md, ok := arguments["markdown"].(string)
if !ok || strings.TrimSpace(md) == "" {
return &model.ToolResult{ID: "", Error: "缺少 markdown 参数或内容为空"}, nil
}
switch action {
case "to_html":
return t.handleToHTML(md)
case "to_text":
return t.handleToText(md)
case "extract_links":
return t.handleExtractLinks(md)
case "extract_code":
return t.handleExtractCode(md)
case "table_of_contents":
return t.handleTableOfContents(md)
default:
return &model.ToolResult{
ID: "",
Error: fmt.Sprintf("未知操作: %s,支持: to_html, to_text, extract_links, extract_code, table_of_contents", action),
}, nil
}
}
func (t *MarkdownTool) handleToHTML(md string) (*model.ToolResult, error) {
html := md
codeBlocks := make([]string, 0)
reFence := regexp.MustCompile("(?s)```[^`]*```")
html = reFence.ReplaceAllStringFunc(html, func(match string) string {
codeBlocks = append(codeBlocks, match)
return fmt.Sprintf("\x00CODEBLOCK%d\x00", len(codeBlocks)-1)
})
inlineCodes := make([]string, 0)
reInlineCode := regexp.MustCompile("`[^`]+`")
html = reInlineCode.ReplaceAllStringFunc(html, func(match string) string {
inlineCodes = append(inlineCodes, match)
return fmt.Sprintf("\x00INLINECODE%d\x00", len(inlineCodes)-1)
})
reImage := regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`)
html = reImage.ReplaceAllString(html, `<img src="$2" alt="$1">`)
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
html = reLink.ReplaceAllString(html, `<a href="$2">$1</a>`)
reBold := regexp.MustCompile(`\*\*([^*]+)\*\*`)
html = reBold.ReplaceAllString(html, `<strong>$1</strong>`)
reBold2 := regexp.MustCompile(`__([^_]+)__`)
html = reBold2.ReplaceAllString(html, `<strong>$1</strong>`)
reItalic := regexp.MustCompile(`\*([^*]+)\*`)
html = reItalic.ReplaceAllString(html, `<em>$1</em>`)
reItalic2 := regexp.MustCompile(`_([^_]+)_`)
html = reItalic2.ReplaceAllString(html, `<em>$1</em>`)
reStrike := regexp.MustCompile(`~~([^~]+)~~`)
html = reStrike.ReplaceAllString(html, `<del>$1</del>`)
reH6 := regexp.MustCompile(`(?m)^######\s+(.+)$`)
html = reH6.ReplaceAllString(html, `<h6>$1</h6>`)
reH5 := regexp.MustCompile(`(?m)^#####\s+(.+)$`)
html = reH5.ReplaceAllString(html, `<h5>$1</h5>`)
reH4 := regexp.MustCompile(`(?m)^####\s+(.+)$`)
html = reH4.ReplaceAllString(html, `<h4>$1</h4>`)
reH3 := regexp.MustCompile(`(?m)^###\s+(.+)$`)
html = reH3.ReplaceAllString(html, `<h3>$1</h3>`)
reH2 := regexp.MustCompile(`(?m)^##\s+(.+)$`)
html = reH2.ReplaceAllString(html, `<h2>$1</h2>`)
reH1 := regexp.MustCompile(`(?m)^#\s+(.+)$`)
html = reH1.ReplaceAllString(html, `<h1>$1</h1>`)
reHR := regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`)
html = reHR.ReplaceAllString(html, `<hr>`)
html = t.processLists(html, `(?m)^[\-*]\s+`, "ul")
html = t.processLists(html, `(?m)^\d+\.\s+`, "ol")
reBlockquote := regexp.MustCompile(`(?m)^>\s?(.+)$`)
html = reBlockquote.ReplaceAllString(html, `<blockquote>$1</blockquote>`)
html = t.wrapParagraphs(html)
for i, cb := range codeBlocks {
content := strings.TrimPrefix(cb, "```")
content = strings.TrimSuffix(content, "```")
lang := ""
content = strings.TrimSpace(content)
if idx := strings.Index(content, "\n"); idx > 0 {
lang = strings.TrimSpace(content[:idx])
content = strings.TrimSpace(content[idx+1:])
}
if lang != "" {
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
fmt.Sprintf(`<pre><code class="language-%s">%s</code></pre>`, lang, escapeHTML(content)))
} else {
html = strings.ReplaceAll(html, fmt.Sprintf("\x00CODEBLOCK%d\x00", i),
fmt.Sprintf("<pre><code>%s</code></pre>", escapeHTML(content)))
}
}
for i, ic := range inlineCodes {
content := strings.Trim(ic, "`")
html = strings.ReplaceAll(html, fmt.Sprintf("\x00INLINECODE%d\x00", i),
fmt.Sprintf("<code>%s</code>", escapeHTML(content)))
}
return &model.ToolResult{ID: "", Output: html}, nil
}
func (t *MarkdownTool) handleToText(md string) (*model.ToolResult, error) {
text := md
reFence := regexp.MustCompile("(?s)```[^`]*```")
text = reFence.ReplaceAllString(text, "[代码块]")
reInlineCode := regexp.MustCompile("`[^`]+`")
text = reInlineCode.ReplaceAllString(text, "[代码]")
reImage := regexp.MustCompile(`!\[([^\]]*)\]\([^)]+\)`)
text = reImage.ReplaceAllString(text, "$1")
reLink := regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`)
text = reLink.ReplaceAllString(text, "$1")
text = regexp.MustCompile(`\*\*([^*]+)\*\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`__([^_]+)__`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`\*([^*]+)\*`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`_([^_]+)_`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`~~([^~]+)~~`).ReplaceAllString(text, "$1")
text = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^(---|\*\*\*|___)\s*$`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^[\-*]\s+`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^\d+\.\s+`).ReplaceAllString(text, "")
text = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(text, "")
text = regexp.MustCompile(`\n{3,}`).ReplaceAllString(text, "\n\n")
return &model.ToolResult{
ID: "",
Output: fmt.Sprintf("纯文本提取结果 (%d 字符):\n\n%s",
len([]rune(text)), strings.TrimSpace(text)),
}, nil
}
func (t *MarkdownTool) handleExtractLinks(md string) (*model.ToolResult, error) {
reLink := regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`)
matches := reLink.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &model.ToolResult{ID: "", Output: "未找到任何链接"}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取链接 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
result.WriteString(fmt.Sprintf("%d. [%s](%s)\n - 文本: %s\n - URL: %s\n\n",
i+1, m[1], m[2], m[1], m[2]))
}
return &model.ToolResult{ID: "", Output: strings.TrimSpace(result.String())}, nil
}
func (t *MarkdownTool) handleExtractCode(md string) (*model.ToolResult, error) {
reFence := regexp.MustCompile("(?s)```([^`]*)```")
matches := reFence.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &model.ToolResult{ID: "", Output: "未找到任何代码块"}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("提取代码块 (共 %d 个):\n\n", len(matches)))
for i, m := range matches {
content := strings.TrimSpace(m[1])
lang := ""
if idx := strings.Index(content, "\n"); idx > 0 {
lang = strings.TrimSpace(content[:idx])
content = strings.TrimSpace(content[idx+1:])
}
result.WriteString(fmt.Sprintf("--- 代码块 %d", i+1))
if lang != "" {
result.WriteString(fmt.Sprintf(" (语言: %s)", lang))
}
result.WriteString(fmt.Sprintf(" ---\n%s\n\n", truncateText(content, 500)))
}
return &model.ToolResult{ID: "", Output: strings.TrimSpace(result.String())}, nil
}
func (t *MarkdownTool) handleTableOfContents(md string) (*model.ToolResult, error) {
reHeading := regexp.MustCompile(`(?m)^(#{1,6})\s+(.+)$`)
matches := reHeading.FindAllStringSubmatch(md, -1)
if len(matches) == 0 {
return &model.ToolResult{ID: "", Output: "未找到任何标题,无法生成目录"}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("文档目录 (共 %d 个标题):\n\n", len(matches)))
for _, m := range matches {
level := len(m[1])
title := strings.TrimSpace(m[2])
indent := strings.Repeat(" ", level-1)
result.WriteString(fmt.Sprintf("%s%s %s\n", indent, strings.Repeat("#", level), title))
}
return &model.ToolResult{ID: "", Output: result.String()}, nil
}
func (t *MarkdownTool) processLists(html, itemPattern, listTag string) string {
reItem := regexp.MustCompile(itemPattern + `(.+)$`)
lines := strings.Split(html, "\n")
result := make([]string, 0, len(lines))
inList := false
for _, line := range lines {
if reItem.MatchString(line) {
content := reItem.ReplaceAllString(line, "$1")
if !inList {
result = append(result, fmt.Sprintf("<%s>", listTag))
inList = true
}
result = append(result, fmt.Sprintf("<li>%s</li>", content))
} else {
if inList {
result = append(result, fmt.Sprintf("</%s>", listTag))
inList = false
}
result = append(result, line)
}
}
if inList {
result = append(result, fmt.Sprintf("</%s>", listTag))
}
return strings.Join(result, "\n")
}
func (t *MarkdownTool) wrapParagraphs(html string) string {
lines := strings.Split(html, "\n")
result := make([]string, 0, len(lines))
skipTags := map[string]bool{
"<h1>": true, "<h2>": true, "<h3>": true, "<h4>": true, "<h5>": true, "<h6>": true,
"<hr>": true, "<ul>": true, "</ul>": true, "<ol>": true, "</ol>": true,
"<li>": true, "</li>": true, "<blockquote>": true, "</blockquote>": true,
"<pre>": true, "</pre>": true, "<img": true,
}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
result = append(result, line)
continue
}
isTag := false
for tag := range skipTags {
if strings.HasPrefix(trimmed, tag) {
isTag = true
break
}
}
if !isTag {
result = append(result, fmt.Sprintf("<p>%s</p>", trimmed))
} else {
result = append(result, line)
}
}
return strings.Join(result, "\n")
}
func escapeHTML(s string) string {
replacer := strings.NewReplacer(
"&", "&"+"amp;",
"<", "&"+"lt;",
">", "&"+"gt;",
"\"", "&"+"quot;",
)
return replacer.Replace(s)
}
func truncateText(s string, maxLen int) string {
runes := []rune(s)
if len(runes) <= maxLen {
return s
}
return string(runes[:maxLen]) + "..."
}