package tools import ( "context" "fmt" "regexp" "strings" "unicode" "github.com/yourname/cyrene-ai/tool-engine/internal/model" ) // TextTool provides text processing operations for the LLM. type TextTool struct{} // NewTextTool creates a text processing tool. func NewTextTool() *TextTool { return &TextTool{} } // Definition returns the tool definition for LLM function calling. func (t *TextTool) Definition() model.ToolDefinition { return model.ToolDefinition{ Name: "text", Description: "文本处理工具。统计文本、生成摘要、翻译文本、正则提取信息。用于处理用户提供的文本内容。", Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "action": map[string]interface{}{ "type": "string", "enum": []string{"count", "summarize", "translate", "extract"}, "description": "操作类型。count: 统计字符/单词/行/段落数;summarize: 提取首段+关键句生成简单摘要;translate: 翻译文本(需指定target_lang);extract: 正则提取邮箱/电话/URL等", }, "text": map[string]interface{}{ "type": "string", "description": "输入文本,需要处理的文本内容", }, "target_lang": map[string]interface{}{ "type": "string", "enum": []string{"en", "zh", "ja", "ko", "fr", "de"}, "description": "翻译目标语言代码。en: 英语, zh: 中文, ja: 日语, ko: 韩语, fr: 法语, de: 德语", }, "pattern": map[string]interface{}{ "type": "string", "description": "正则表达式模式,用于 extract 操作。常用预设: email(邮箱), phone(电话), url(网址)", }, }, "required": []string{"action", "text"}, }, } } // Execute performs text processing operations. func (t *TextTool) Execute(ctx context.Context, arguments map[string]interface{}) (*model.ToolResult, error) { action, ok := arguments["action"].(string) if !ok || action == "" { return &model.ToolResult{ID: "", Error: "缺少 action 参数"}, nil } text, ok := arguments["text"].(string) if !ok || strings.TrimSpace(text) == "" { return &model.ToolResult{ID: "", Error: "缺少 text 参数或文本为空"}, nil } switch action { case "count": return t.handleCount(text) case "summarize": return t.handleSummarize(text) case "translate": return t.handleTranslate(arguments) case "extract": return t.handleExtract(arguments) default: return &model.ToolResult{ ID: "", Error: fmt.Sprintf("未知操作: %s,支持: count, summarize, translate, extract", action), }, nil } } func (t *TextTool) handleCount(text string) (*model.ToolResult, error) { charCount := len([]rune(text)) byteCount := len(text) words := strings.Fields(text) wordCount := len(words) lines := strings.Split(text, "\n") lineCount := len(lines) paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1) paraCount := 0 for _, p := range paragraphs { if strings.TrimSpace(p) != "" { paraCount++ } } chineseCount := 0 for _, r := range text { if unicode.Is(unicode.Han, r) { chineseCount++ } } return &model.ToolResult{ ID: "", Output: fmt.Sprintf("文本统计结果:\n- 字符数 (含空格): %d\n- 字符数 (不含空格): %d\n- 字节数: %d\n- 单词数: %d\n- 行数: %d\n- 段落数: %d\n- 中文字符数: %d", charCount, len([]rune(strings.ReplaceAll(text, " ", ""))), byteCount, wordCount, lineCount, paraCount, chineseCount), }, nil } func (t *TextTool) handleSummarize(text string) (*model.ToolResult, error) { var result strings.Builder result.WriteString("文本摘要:\n\n") paragraphs := regexp.MustCompile(`\n\s*\n`).Split(text, -1) var firstPara string for _, p := range paragraphs { if trimmed := strings.TrimSpace(p); trimmed != "" { firstPara = trimmed break } } if firstPara != "" { result.WriteString("【首段】\n") runes := []rune(firstPara) if len(runes) > 300 { firstPara = string(runes[:300]) + "..." } result.WriteString(firstPara) result.WriteString("\n\n") } sentences := t.splitSentences(text) keySentences := t.extractKeySentences(sentences, 5) if len(keySentences) > 0 { result.WriteString("【关键句】\n") for i, s := range keySentences { result.WriteString(fmt.Sprintf("%d. %s\n", i+1, s)) } } lines := strings.Split(text, "\n") words := strings.Fields(text) result.WriteString(fmt.Sprintf("\n【概况】共 %d 段、%d 句、%d 词、%d 行", len(paragraphs), len(sentences), len(words), len(lines))) return &model.ToolResult{ID: "", Output: result.String()}, nil } func (t *TextTool) splitSentences(text string) []string { re := regexp.MustCompile(`[^。!?.!?\n]+[。!?.!?\n]?`) return re.FindAllString(text, -1) } func (t *TextTool) extractKeySentences(sentences []string, maxCount int) []string { type scored struct { text string score int } var scoredList []scored keywords := []string{"重要", "关键", "核心", "主要", "首先", "最后", "因此", "所以", "总结", "important", "key", "critical", "significant", "therefore", "conclusion", "summary"} for _, s := range sentences { trimmed := strings.TrimSpace(s) if len([]rune(trimmed)) < 10 { continue } score := len([]rune(trimmed)) lower := strings.ToLower(trimmed) for _, kw := range keywords { if strings.Contains(lower, kw) { score += 50 } } scoredList = append(scoredList, scored{text: trimmed, score: score}) } for i := 0; i < len(scoredList); i++ { for j := i + 1; j < len(scoredList); j++ { if scoredList[j].score > scoredList[i].score { scoredList[i], scoredList[j] = scoredList[j], scoredList[i] } } } result := make([]string, 0, maxCount) for i := 0; i < len(scoredList) && i < maxCount; i++ { result = append(result, scoredList[i].text) } return result } func (t *TextTool) handleTranslate(arguments map[string]interface{}) (*model.ToolResult, error) { text, _ := arguments["text"].(string) targetLang, _ := arguments["target_lang"].(string) if targetLang == "" { targetLang = "zh" } langNames := map[string]string{ "en": "英语", "zh": "中文", "ja": "日语", "ko": "韩语", "fr": "法语", "de": "德语", } langName, ok := langNames[targetLang] if !ok { langName = targetLang } return &model.ToolResult{ ID: "", Output: fmt.Sprintf("【翻译请求】\n目标语言: %s (%s)\n原文 (%d 字符):\n---\n%s\n---\n\n提示: 实际翻译由LLM完成,请基于以上原文和目标语言进行翻译。", langName, targetLang, len([]rune(text)), text), }, nil } func (t *TextTool) handleExtract(arguments map[string]interface{}) (*model.ToolResult, error) { text, _ := arguments["text"].(string) pattern, _ := arguments["pattern"].(string) presets := map[string]string{ "email": `[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`, "phone": `(?:\+?86[\-\s]?)?1[3-9]\d{9}`, "url": `https?://[^\s<>"{}|\\^` + "`" + `\[\]]+`, } if preset, ok := presets[strings.ToLower(pattern)]; ok { pattern = preset } if pattern == "" { var result strings.Builder result.WriteString("文本提取结果:\n\n") for name, p := range presets { re, err := regexp.Compile(p) if err != nil { continue } matches := re.FindAllString(text, -1) if len(matches) > 0 { result.WriteString(fmt.Sprintf("【%s】(共 %d 个):\n", name, len(matches))) seen := make(map[string]bool) for _, m := range matches { if !seen[m] { result.WriteString(fmt.Sprintf(" - %s\n", m)) seen[m] = true } } result.WriteString("\n") } } if result.Len() == len("文本提取结果:\n\n") { return &model.ToolResult{ID: "", Output: "未提取到匹配的内容(邮箱、电话、URL)"}, nil } return &model.ToolResult{ID: "", Output: result.String()}, nil } re, err := regexp.Compile(pattern) if err != nil { return &model.ToolResult{ID: "", Error: fmt.Sprintf("正则表达式无效: %v", err)}, nil } matches := re.FindAllString(text, -1) if len(matches) == 0 { return &model.ToolResult{ID: "", Output: fmt.Sprintf("未找到匹配模式 '%s' 的内容", pattern)}, nil } var result strings.Builder result.WriteString(fmt.Sprintf("正则提取结果 (模式: %s, 共 %d 个匹配):\n", pattern, len(matches))) seen := make(map[string]bool) for _, m := range matches { if !seen[m] { result.WriteString(fmt.Sprintf(" - %s\n", m)) seen[m] = true } } return &model.ToolResult{ID: "", Output: result.String()}, nil }