Files
Cyrene/backend/ai-core/internal/tools/text_tool.go
T
AskaEth b6ec36886c feat: 第四轮功能增强 - LLM 思维记忆优化、DevTools 记忆UI、9个新工具、5分钟自我思考
- 优化 LLM 思维方式和记忆方法(类别/重要性/关键词/相似度合并/衰减)
- DevTools 记忆查询 UI 重新设计(类别筛选/排序/星标/搜索)
- 新增 9 个 LLM 工具:calculator, datetime, file_ops, http_request, json_ops, text, random, crypto, markdown
- 管理员主对话 5 分钟自我思考增强(工具调用/记忆提取/记忆维护)
2026-05-18 12:13:49 +08:00

346 lines
9.7 KiB
Go

package tools
import (
"context"
"fmt"
"regexp"
"strings"
"unicode"
)
// TextTool provides text processing operations for the LLM.
// Supports counting, summarizing, translation, and pattern extraction.
type TextTool struct{}
// NewTextTool creates a text processing tool.
func NewTextTool() *TextTool {
return &TextTool{}
}
// Definition returns the tool definition for LLM function calling.
func (t *TextTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "text",
Description: "文本处理工具。统计文本、生成摘要、翻译文本、正则提取信息。用于处理用户提供的文本内容。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"action": map[string]interface{}{
"type": "string",
"enum": []string{"count", "summarize", "translate", "extract"},
"description": "操作类型。count: 统计字符/单词/行/段落数;summarize: 提取首段+关键句生成简单摘要;translate: 翻译文本(需指定target_lang);extract: 正则提取邮箱/电话/URL等",
},
"text": map[string]interface{}{
"type": "string",
"description": "输入文本,需要处理的文本内容",
},
"target_lang": map[string]interface{}{
"type": "string",
"enum": []string{"en", "zh", "ja", "ko", "fr", "de"},
"description": "翻译目标语言代码。en: 英语, zh: 中文, ja: 日语, ko: 韩语, fr: 法语, de: 德语",
},
"pattern": map[string]interface{}{
"type": "string",
"description": "正则表达式模式,用于 extract 操作。常用预设: email(邮箱), phone(电话), url(网址)",
},
},
"required": []string{"action", "text"},
},
}
}
// Execute performs text processing operations.
func (t *TextTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
action, ok := arguments["action"].(string)
if !ok || action == "" {
return &ToolResult{
ToolName: "text",
Success: false,
Error: "缺少 action 参数",
}, nil
}
text, ok := arguments["text"].(string)
if !ok || strings.TrimSpace(text) == "" {
return &ToolResult{
ToolName: "text",
Success: false,
Error: "缺少 text 参数或文本为空",
}, nil
}
switch action {
case "count":
return t.handleCount(text)
case "summarize":
return t.handleSummarize(text)
case "translate":
return t.handleTranslate(arguments)
case "extract":
return t.handleExtract(arguments)
default:
return &ToolResult{
ToolName: "text",
Success: false,
Error: fmt.Sprintf("未知操作: %s,支持: count, summarize, translate, extract", action),
}, nil
}
}
// handleCount counts characters, words, lines, and paragraphs in the text.
func (t *TextTool) handleCount(text string) (*ToolResult, error) {
charCount := len([]rune(text))
byteCount := len(text)
words := strings.Fields(text)
wordCount := len(words)
lines := strings.Split(text, "\n")
lineCount := len(lines)
// Count paragraphs (separated by double newlines)
paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1)
paraCount := 0
for _, p := range paragraphs {
if strings.TrimSpace(p) != "" {
paraCount++
}
}
// Count Chinese characters
chineseCount := 0
for _, r := range text {
if unicode.Is(unicode.Han, r) {
chineseCount++
}
}
return &ToolResult{
ToolName: "text",
Success: true,
Data: fmt.Sprintf("文本统计结果:\n- 字符数 (含空格): %d\n- 字符数 (不含空格): %d\n- 字节数: %d\n- 单词数: %d\n- 行数: %d\n- 段落数: %d\n- 中文字符数: %d",
charCount, len([]rune(strings.ReplaceAll(text, " ", ""))),
byteCount, wordCount, lineCount, paraCount, chineseCount),
}, nil
}
// handleSummarize generates a simple summary by extracting the first paragraph and key sentences.
func (t *TextTool) handleSummarize(text string) (*ToolResult, error) {
var result strings.Builder
result.WriteString("文本摘要:\n\n")
// Extract first paragraph
paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1)
var firstPara string
for _, p := range paragraphs {
if trimmed := strings.TrimSpace(p); trimmed != "" {
firstPara = trimmed
break
}
}
if firstPara != "" {
result.WriteString("【首段】\n")
// Truncate if very long
runes := []rune(firstPara)
if len(runes) > 300 {
firstPara = string(runes[:300]) + "..."
}
result.WriteString(firstPara)
result.WriteString("\n\n")
}
// Extract key sentences (longer sentences with important keywords)
sentences := t.splitSentences(text)
keySentences := t.extractKeySentences(sentences, 5)
if len(keySentences) > 0 {
result.WriteString("【关键句】\n")
for i, s := range keySentences {
result.WriteString(fmt.Sprintf("%d. %s\n", i+1, s))
}
}
// Overall stats
lines := strings.Split(text, "\n")
words := strings.Fields(text)
result.WriteString(fmt.Sprintf("\n【概况】共 %d 段、%d 句、%d 词、%d 行",
len(paragraphs), len(sentences), len(words), len(lines)))
return &ToolResult{
ToolName: "text",
Success: true,
Data: result.String(),
}, nil
}
// splitSentences splits text into sentences based on punctuation.
func (t *TextTool) splitSentences(text string) []string {
re := regexp.MustCompile(`[^。!?.!?\n]+[。!?.!?\n]?`)
return re.FindAllString(text, -1)
}
// extractKeySentences selects the most informative sentences (longer ones with keyword hints).
func (t *TextTool) extractKeySentences(sentences []string, maxCount int) []string {
type scored struct {
text string
score int
}
var scoredList []scored
keywords := []string{"重要", "关键", "核心", "主要", "首先", "最后", "因此", "所以", "总结",
"important", "key", "critical", "significant", "therefore", "conclusion", "summary"}
for _, s := range sentences {
trimmed := strings.TrimSpace(s)
if len([]rune(trimmed)) < 10 {
continue
}
score := len([]rune(trimmed)) // longer sentences are more likely informative
lower := strings.ToLower(trimmed)
for _, kw := range keywords {
if strings.Contains(lower, kw) {
score += 50
}
}
scoredList = append(scoredList, scored{text: trimmed, score: score})
}
// Sort by score descending (simple bubble sort for small lists)
for i := 0; i < len(scoredList); i++ {
for j := i + 1; j < len(scoredList); j++ {
if scoredList[j].score > scoredList[i].score {
scoredList[i], scoredList[j] = scoredList[j], scoredList[i]
}
}
}
result := make([]string, 0, maxCount)
for i := 0; i < len(scoredList) && i < maxCount; i++ {
result = append(result, scoredList[i].text)
}
return result
}
// handleTranslate provides a translation placeholder (actual translation requires LLM).
func (t *TextTool) handleTranslate(arguments map[string]interface{}) (*ToolResult, error) {
text, _ := arguments["text"].(string)
targetLang, _ := arguments["target_lang"].(string)
if targetLang == "" {
targetLang = "zh"
}
langNames := map[string]string{
"en": "英语",
"zh": "中文",
"ja": "日语",
"ko": "韩语",
"fr": "法语",
"de": "德语",
}
langName, ok := langNames[targetLang]
if !ok {
langName = targetLang
}
return &ToolResult{
ToolName: "text",
Success: true,
Data: fmt.Sprintf("【翻译请求】\n目标语言: %s (%s)\n原文 (%d 字符):\n---\n%s\n---\n\n提示: 实际翻译由LLM完成,请基于以上原文和目标语言进行翻译。",
langName, targetLang, len([]rune(text)), text),
}, nil
}
// handleExtract extracts patterns like emails, phones, URLs from text using regex.
func (t *TextTool) handleExtract(arguments map[string]interface{}) (*ToolResult, error) {
text, _ := arguments["text"].(string)
pattern, _ := arguments["pattern"].(string)
// Predefined patterns
presets := map[string]string{
"email": `[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`,
"phone": `(?:\+?86[\-\s]?)?1[3-9]\d{9}`,
"url": `https?://[^\s<>"{}|\\^` + "`" + `\[\]]+`,
}
if preset, ok := presets[strings.ToLower(pattern)]; ok {
pattern = preset
}
if pattern == "" {
// Extract all common patterns when no specific pattern given
var result strings.Builder
result.WriteString("文本提取结果:\n\n")
for name, p := range presets {
re, err := regexp.Compile(p)
if err != nil {
continue
}
matches := re.FindAllString(text, -1)
if len(matches) > 0 {
result.WriteString(fmt.Sprintf("【%s】(共 %d 个):\n", name, len(matches)))
seen := make(map[string]bool)
for _, m := range matches {
if !seen[m] {
result.WriteString(fmt.Sprintf(" - %s\n", m))
seen[m] = true
}
}
result.WriteString("\n")
}
}
if result.Len() == len("文本提取结果:\n\n") {
return &ToolResult{
ToolName: "text",
Success: true,
Data: "未提取到匹配的内容(邮箱、电话、URL)",
}, nil
}
return &ToolResult{
ToolName: "text",
Success: true,
Data: result.String(),
}, nil
}
// Use custom regex pattern
re, err := regexp.Compile(pattern)
if err != nil {
return &ToolResult{
ToolName: "text",
Success: false,
Error: fmt.Sprintf("正则表达式无效: %v", err),
}, nil
}
matches := re.FindAllString(text, -1)
if len(matches) == 0 {
return &ToolResult{
ToolName: "text",
Success: true,
Data: fmt.Sprintf("未找到匹配模式 '%s' 的内容", pattern),
}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("正则提取结果 (模式: %s, 共 %d 个匹配):\n", pattern, len(matches)))
seen := make(map[string]bool)
for _, m := range matches {
if !seen[m] {
result.WriteString(fmt.Sprintf(" - %s\n", m))
seen[m] = true
}
}
return &ToolResult{
ToolName: "text",
Success: true,
Data: result.String(),
}, nil
}