b6ec36886c
- 优化 LLM 思维方式和记忆方法(类别/重要性/关键词/相似度合并/衰减) - DevTools 记忆查询 UI 重新设计(类别筛选/排序/星标/搜索) - 新增 9 个 LLM 工具:calculator, datetime, file_ops, http_request, json_ops, text, random, crypto, markdown - 管理员主对话 5 分钟自我思考增强(工具调用/记忆提取/记忆维护)
346 lines
9.7 KiB
Go
346 lines
9.7 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// TextTool provides text processing operations for the LLM.
|
|
// Supports counting, summarizing, translation, and pattern extraction.
|
|
type TextTool struct{}
|
|
|
|
// NewTextTool creates a text processing tool.
|
|
func NewTextTool() *TextTool {
|
|
return &TextTool{}
|
|
}
|
|
|
|
// Definition returns the tool definition for LLM function calling.
|
|
func (t *TextTool) Definition() ToolDefinition {
|
|
return ToolDefinition{
|
|
Name: "text",
|
|
Description: "文本处理工具。统计文本、生成摘要、翻译文本、正则提取信息。用于处理用户提供的文本内容。",
|
|
Parameters: map[string]interface{}{
|
|
"type": "object",
|
|
"properties": map[string]interface{}{
|
|
"action": map[string]interface{}{
|
|
"type": "string",
|
|
"enum": []string{"count", "summarize", "translate", "extract"},
|
|
"description": "操作类型。count: 统计字符/单词/行/段落数;summarize: 提取首段+关键句生成简单摘要;translate: 翻译文本(需指定target_lang);extract: 正则提取邮箱/电话/URL等",
|
|
},
|
|
"text": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "输入文本,需要处理的文本内容",
|
|
},
|
|
"target_lang": map[string]interface{}{
|
|
"type": "string",
|
|
"enum": []string{"en", "zh", "ja", "ko", "fr", "de"},
|
|
"description": "翻译目标语言代码。en: 英语, zh: 中文, ja: 日语, ko: 韩语, fr: 法语, de: 德语",
|
|
},
|
|
"pattern": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "正则表达式模式,用于 extract 操作。常用预设: email(邮箱), phone(电话), url(网址)",
|
|
},
|
|
},
|
|
"required": []string{"action", "text"},
|
|
},
|
|
}
|
|
}
|
|
|
|
// Execute performs text processing operations.
|
|
func (t *TextTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
|
|
action, ok := arguments["action"].(string)
|
|
if !ok || action == "" {
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: false,
|
|
Error: "缺少 action 参数",
|
|
}, nil
|
|
}
|
|
|
|
text, ok := arguments["text"].(string)
|
|
if !ok || strings.TrimSpace(text) == "" {
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: false,
|
|
Error: "缺少 text 参数或文本为空",
|
|
}, nil
|
|
}
|
|
|
|
switch action {
|
|
case "count":
|
|
return t.handleCount(text)
|
|
case "summarize":
|
|
return t.handleSummarize(text)
|
|
case "translate":
|
|
return t.handleTranslate(arguments)
|
|
case "extract":
|
|
return t.handleExtract(arguments)
|
|
default:
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: false,
|
|
Error: fmt.Sprintf("未知操作: %s,支持: count, summarize, translate, extract", action),
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
// handleCount counts characters, words, lines, and paragraphs in the text.
|
|
func (t *TextTool) handleCount(text string) (*ToolResult, error) {
|
|
charCount := len([]rune(text))
|
|
byteCount := len(text)
|
|
|
|
words := strings.Fields(text)
|
|
wordCount := len(words)
|
|
|
|
lines := strings.Split(text, "\n")
|
|
lineCount := len(lines)
|
|
|
|
// Count paragraphs (separated by double newlines)
|
|
paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1)
|
|
paraCount := 0
|
|
for _, p := range paragraphs {
|
|
if strings.TrimSpace(p) != "" {
|
|
paraCount++
|
|
}
|
|
}
|
|
|
|
// Count Chinese characters
|
|
chineseCount := 0
|
|
for _, r := range text {
|
|
if unicode.Is(unicode.Han, r) {
|
|
chineseCount++
|
|
}
|
|
}
|
|
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: fmt.Sprintf("文本统计结果:\n- 字符数 (含空格): %d\n- 字符数 (不含空格): %d\n- 字节数: %d\n- 单词数: %d\n- 行数: %d\n- 段落数: %d\n- 中文字符数: %d",
|
|
charCount, len([]rune(strings.ReplaceAll(text, " ", ""))),
|
|
byteCount, wordCount, lineCount, paraCount, chineseCount),
|
|
}, nil
|
|
}
|
|
|
|
// handleSummarize generates a simple summary by extracting the first paragraph and key sentences.
|
|
func (t *TextTool) handleSummarize(text string) (*ToolResult, error) {
|
|
var result strings.Builder
|
|
result.WriteString("文本摘要:\n\n")
|
|
|
|
// Extract first paragraph
|
|
paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1)
|
|
var firstPara string
|
|
for _, p := range paragraphs {
|
|
if trimmed := strings.TrimSpace(p); trimmed != "" {
|
|
firstPara = trimmed
|
|
break
|
|
}
|
|
}
|
|
|
|
if firstPara != "" {
|
|
result.WriteString("【首段】\n")
|
|
// Truncate if very long
|
|
runes := []rune(firstPara)
|
|
if len(runes) > 300 {
|
|
firstPara = string(runes[:300]) + "..."
|
|
}
|
|
result.WriteString(firstPara)
|
|
result.WriteString("\n\n")
|
|
}
|
|
|
|
// Extract key sentences (longer sentences with important keywords)
|
|
sentences := t.splitSentences(text)
|
|
keySentences := t.extractKeySentences(sentences, 5)
|
|
|
|
if len(keySentences) > 0 {
|
|
result.WriteString("【关键句】\n")
|
|
for i, s := range keySentences {
|
|
result.WriteString(fmt.Sprintf("%d. %s\n", i+1, s))
|
|
}
|
|
}
|
|
|
|
// Overall stats
|
|
lines := strings.Split(text, "\n")
|
|
words := strings.Fields(text)
|
|
result.WriteString(fmt.Sprintf("\n【概况】共 %d 段、%d 句、%d 词、%d 行",
|
|
len(paragraphs), len(sentences), len(words), len(lines)))
|
|
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: result.String(),
|
|
}, nil
|
|
}
|
|
|
|
// splitSentences splits text into sentences based on punctuation.
|
|
func (t *TextTool) splitSentences(text string) []string {
|
|
re := regexp.MustCompile(`[^。!?.!?\n]+[。!?.!?\n]?`)
|
|
return re.FindAllString(text, -1)
|
|
}
|
|
|
|
// extractKeySentences selects the most informative sentences (longer ones with keyword hints).
|
|
func (t *TextTool) extractKeySentences(sentences []string, maxCount int) []string {
|
|
type scored struct {
|
|
text string
|
|
score int
|
|
}
|
|
|
|
var scoredList []scored
|
|
keywords := []string{"重要", "关键", "核心", "主要", "首先", "最后", "因此", "所以", "总结",
|
|
"important", "key", "critical", "significant", "therefore", "conclusion", "summary"}
|
|
|
|
for _, s := range sentences {
|
|
trimmed := strings.TrimSpace(s)
|
|
if len([]rune(trimmed)) < 10 {
|
|
continue
|
|
}
|
|
|
|
score := len([]rune(trimmed)) // longer sentences are more likely informative
|
|
lower := strings.ToLower(trimmed)
|
|
for _, kw := range keywords {
|
|
if strings.Contains(lower, kw) {
|
|
score += 50
|
|
}
|
|
}
|
|
scoredList = append(scoredList, scored{text: trimmed, score: score})
|
|
}
|
|
|
|
// Sort by score descending (simple bubble sort for small lists)
|
|
for i := 0; i < len(scoredList); i++ {
|
|
for j := i + 1; j < len(scoredList); j++ {
|
|
if scoredList[j].score > scoredList[i].score {
|
|
scoredList[i], scoredList[j] = scoredList[j], scoredList[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
result := make([]string, 0, maxCount)
|
|
for i := 0; i < len(scoredList) && i < maxCount; i++ {
|
|
result = append(result, scoredList[i].text)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// handleTranslate provides a translation placeholder (actual translation requires LLM).
|
|
func (t *TextTool) handleTranslate(arguments map[string]interface{}) (*ToolResult, error) {
|
|
text, _ := arguments["text"].(string)
|
|
targetLang, _ := arguments["target_lang"].(string)
|
|
if targetLang == "" {
|
|
targetLang = "zh"
|
|
}
|
|
|
|
langNames := map[string]string{
|
|
"en": "英语",
|
|
"zh": "中文",
|
|
"ja": "日语",
|
|
"ko": "韩语",
|
|
"fr": "法语",
|
|
"de": "德语",
|
|
}
|
|
|
|
langName, ok := langNames[targetLang]
|
|
if !ok {
|
|
langName = targetLang
|
|
}
|
|
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: fmt.Sprintf("【翻译请求】\n目标语言: %s (%s)\n原文 (%d 字符):\n---\n%s\n---\n\n提示: 实际翻译由LLM完成,请基于以上原文和目标语言进行翻译。",
|
|
langName, targetLang, len([]rune(text)), text),
|
|
}, nil
|
|
}
|
|
|
|
// handleExtract extracts patterns like emails, phones, URLs from text using regex.
|
|
func (t *TextTool) handleExtract(arguments map[string]interface{}) (*ToolResult, error) {
|
|
text, _ := arguments["text"].(string)
|
|
pattern, _ := arguments["pattern"].(string)
|
|
|
|
// Predefined patterns
|
|
presets := map[string]string{
|
|
"email": `[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`,
|
|
"phone": `(?:\+?86[\-\s]?)?1[3-9]\d{9}`,
|
|
"url": `https?://[^\s<>"{}|\\^` + "`" + `\[\]]+`,
|
|
}
|
|
|
|
if preset, ok := presets[strings.ToLower(pattern)]; ok {
|
|
pattern = preset
|
|
}
|
|
|
|
if pattern == "" {
|
|
// Extract all common patterns when no specific pattern given
|
|
var result strings.Builder
|
|
result.WriteString("文本提取结果:\n\n")
|
|
|
|
for name, p := range presets {
|
|
re, err := regexp.Compile(p)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
matches := re.FindAllString(text, -1)
|
|
if len(matches) > 0 {
|
|
result.WriteString(fmt.Sprintf("【%s】(共 %d 个):\n", name, len(matches)))
|
|
seen := make(map[string]bool)
|
|
for _, m := range matches {
|
|
if !seen[m] {
|
|
result.WriteString(fmt.Sprintf(" - %s\n", m))
|
|
seen[m] = true
|
|
}
|
|
}
|
|
result.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
if result.Len() == len("文本提取结果:\n\n") {
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: "未提取到匹配的内容(邮箱、电话、URL)",
|
|
}, nil
|
|
}
|
|
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: result.String(),
|
|
}, nil
|
|
}
|
|
|
|
// Use custom regex pattern
|
|
re, err := regexp.Compile(pattern)
|
|
if err != nil {
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: false,
|
|
Error: fmt.Sprintf("正则表达式无效: %v", err),
|
|
}, nil
|
|
}
|
|
|
|
matches := re.FindAllString(text, -1)
|
|
if len(matches) == 0 {
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: fmt.Sprintf("未找到匹配模式 '%s' 的内容", pattern),
|
|
}, nil
|
|
}
|
|
|
|
var result strings.Builder
|
|
result.WriteString(fmt.Sprintf("正则提取结果 (模式: %s, 共 %d 个匹配):\n", pattern, len(matches)))
|
|
seen := make(map[string]bool)
|
|
for _, m := range matches {
|
|
if !seen[m] {
|
|
result.WriteString(fmt.Sprintf(" - %s\n", m))
|
|
seen[m] = true
|
|
}
|
|
}
|
|
|
|
return &ToolResult{
|
|
ToolName: "text",
|
|
Success: true,
|
|
Data: result.String(),
|
|
}, nil
|
|
}
|