Files
Cyrene/backend/ai-core/internal/tools/web_search.go
T
2026-05-26 20:40:34 +08:00

293 lines
7.7 KiB
Go

package tools
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// WebSearchTool 网页搜索工具 - 基于 SearXNG (或 DuckDuckGo fallback)
type WebSearchTool struct {
client *http.Client
timeout time.Duration
searxngURL string
}
// NewWebSearchTool 创建网页搜索工具
func NewWebSearchTool() *WebSearchTool {
return &WebSearchTool{
client: &http.Client{
Timeout: 10 * time.Second,
},
timeout: 10 * time.Second,
}
}
// NewWebSearchToolWithURL 使用 SearXNG 创建搜索工具
func NewWebSearchToolWithURL(searxngURL string) *WebSearchTool {
return &WebSearchTool{
client: &http.Client{
Timeout: 10 * time.Second,
},
timeout: 10 * time.Second,
searxngURL: strings.TrimRight(searxngURL, "/"),
}
}
// Definition 返回工具定义
func (t *WebSearchTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "web_search",
Description: "搜索互联网信息。用于查找新闻、资料、知识等。返回搜索结果摘要(最多5条)。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"query": map[string]interface{}{
"type": "string",
"description": "搜索关键词",
},
},
"required": []string{"query"},
},
}
}
// duckDuckGoResponse DuckDuckGo API 响应
type duckDuckGoResponse struct {
AbstractText string `json:"AbstractText"`
AbstractURL string `json:"AbstractURL"`
AbstractSource string `json:"AbstractSource"`
Heading string `json:"Heading"`
Answer string `json:"Answer"`
AnswerType string `json:"AnswerType"`
RelatedTopics []duckDuckGoRelated `json:"RelatedTopics"`
Results []duckDuckGoResult `json:"Results"`
}
type duckDuckGoRelated struct {
Text string `json:"Text"`
FirstURL string `json:"FirstURL"`
}
type duckDuckGoResult struct {
Text string `json:"Text"`
FirstURL string `json:"FirstURL"`
}
// Execute 执行网页搜索
func (t *WebSearchTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
query, ok := arguments["query"].(string)
if !ok || query == "" {
return &ToolResult{
ToolName: "web_search",
Success: false,
Error: "缺少 query 参数",
}, nil
}
if t.searxngURL != "" {
return t.searchViaSearXNG(ctx, query)
}
return t.searchViaDuckDuckGo(ctx, query)
}
func (t *WebSearchTool) searchViaSearXNG(ctx context.Context, query string) (*ToolResult, error) {
apiURL := fmt.Sprintf("%s/search?format=json&engines=bing,sogou,360search,baidu&q=%s",
t.searxngURL, url.QueryEscape(query))
req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
if err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("创建请求失败: %v", err)}, nil
}
resp, err := t.client.Do(req)
if err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("SearXNG 请求失败: %v", err)}, nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("SearXNG HTTP %d", resp.StatusCode)}, nil
}
var sr searxngAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("SearXNG 解析失败: %v", err)}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("搜索关键词: %s (共%d条结果)\n\n", query, sr.NumberOrResults))
for _, answer := range sr.Answers {
result.WriteString(fmt.Sprintf("📌 %s\n\n", answer))
}
count := 0
for _, r := range sr.Results {
if count >= 5 {
break
}
if r.Title == "" || r.URL == "" {
continue
}
snippet := cleanSnippet(r.Content)
result.WriteString(fmt.Sprintf("%d. %s\n %s\n %s\n\n", count+1, r.Title, r.URL, snippet))
count++
}
if result.Len() == 0 {
result.WriteString("未找到相关结果。")
}
return &ToolResult{ToolName: "web_search", Success: true, Data: result.String()}, nil
}
// searxngAPIResponse SearXNG JSON 响应
type searxngAPIResponse struct {
NumberOrResults int `json:"number_of_results"`
Results []searxngResult `json:"results"`
Answers []string `json:"answers"`
}
type searxngResult struct {
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Score float64 `json:"score"`
}
func cleanSnippet(s string) string {
text := stripHTML(s)
runes := []rune(text)
if len(runes) > 200 {
return string(runes[:200]) + "..."
}
return text
}
func (t *WebSearchTool) searchViaDuckDuckGo(ctx context.Context, query string) (*ToolResult, error) {
apiURL := fmt.Sprintf("https://api.duckduckgo.com/?q=%s&format=json&no_html=1&skip_disambig=1",
url.QueryEscape(query))
req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
if err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("创建请求失败: %v", err)}, nil
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; CyreneBot/1.0)")
resp, err := t.client.Do(req)
if err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("请求失败: %v", err)}, nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("HTTP %d", resp.StatusCode)}, nil
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 500*1024))
if err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("读取响应失败: %v", err)}, nil
}
var ddg duckDuckGoResponse
if err := json.Unmarshal(body, &ddg); err != nil {
return &ToolResult{ToolName: "web_search", Success: false, Error: fmt.Sprintf("解析响应失败: %v", err)}, nil
}
var result strings.Builder
result.WriteString(fmt.Sprintf("搜索关键词: %s\n\n", query))
if ddg.Answer != "" {
result.WriteString(fmt.Sprintf("📌 即时答案: %s\n\n", ddg.Answer))
}
if ddg.AbstractText != "" {
abstract := ddg.AbstractText
if len([]rune(abstract)) > 500 {
runes := []rune(abstract)
abstract = string(runes[:500]) + "..."
}
result.WriteString(fmt.Sprintf("摘要: %s\n", abstract))
if ddg.AbstractURL != "" {
result.WriteString(fmt.Sprintf("来源: %s\n", ddg.AbstractURL))
}
result.WriteString("\n")
}
topics := ddg.RelatedTopics
if len(ddg.Results) > 0 {
count := 0
for _, r := range ddg.Results {
if count >= 5 {
break
}
if r.Text != "" {
text := stripHTML(r.Text)
if len([]rune(text)) > 200 {
runes := []rune(text)
text = string(runes[:200]) + "..."
}
result.WriteString(fmt.Sprintf("\n🔗 %s\n", text))
if r.FirstURL != "" {
result.WriteString(fmt.Sprintf(" %s\n", r.FirstURL))
}
count++
}
}
} else {
count := 0
for _, topic := range topics {
if count >= 5 {
break
}
if topic.Text != "" {
text := stripHTML(topic.Text)
if len([]rune(text)) > 200 {
runes := []rune(text)
text = string(runes[:200]) + "..."
}
result.WriteString(fmt.Sprintf("\n🔗 %s\n", text))
if topic.FirstURL != "" {
result.WriteString(fmt.Sprintf(" %s\n", topic.FirstURL))
}
count++
}
}
}
if result.Len() == 0 {
result.WriteString("未找到相关结果。")
}
return &ToolResult{ToolName: "web_search", Success: true, Data: result.String()}, nil
}
// stripHTML 去除 HTML 标签
func stripHTML(s string) string {
inTag := false
var result []rune
for _, r := range s {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
// 替换常见块级标签为空格
result = append(result, ' ')
continue
}
if !inTag {
result = append(result, r)
}
}
return strings.TrimSpace(string(result))
}