Files
Cyrene/backend/ai-core/internal/tools/web_fetch.go
T

160 lines
3.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package tools
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// WebFetchTool 网络访问工具 - 允许昔涟获取网页内容
type WebFetchTool struct {
client *http.Client
timeout time.Duration
}
// NewWebFetchTool 创建网络访问工具
func NewWebFetchTool() *WebFetchTool {
return &WebFetchTool{
client: &http.Client{
Timeout: 15 * time.Second,
},
timeout: 15 * time.Second,
}
}
// Definition 返回工具定义
func (t *WebFetchTool) Definition() ToolDefinition {
return ToolDefinition{
Name: "web_fetch",
Description: "获取指定URL的网页内容。用于查阅新闻、文档、资料等。返回纯文本摘要(前2000字符)。仅支持 HTTP/HTTPS URL。",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"url": map[string]interface{}{
"type": "string",
"description": "要获取的网页URL,必须是完整的 http:// 或 https:// 链接",
},
},
"required": []string{"url"},
},
}
}
// Execute 执行网页获取
func (t *WebFetchTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
url, ok := arguments["url"].(string)
if !ok || url == "" {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: "缺少 url 参数",
}, nil
}
// 安全检查:只允许 HTTP/HTTPS
if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: "仅支持 http:// 或 https:// 链接",
}, nil
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: fmt.Sprintf("创建请求失败: %v", err),
}, nil
}
// 模拟常见浏览器 User-Agent,避免被拒
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; CyreneBot/1.0; +https://github.com/AskaEth/Cyrene)")
req.Header.Set("Accept", "text/html,text/plain,*/*")
resp, err := t.client.Do(req)
if err != nil {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: fmt.Sprintf("请求失败: %v", err),
}, nil
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: fmt.Sprintf("HTTP %d", resp.StatusCode),
}, nil
}
// 限制读取大小(最多 100KB
limitedReader := io.LimitReader(resp.Body, 100*1024)
body, err := io.ReadAll(limitedReader)
if err != nil {
return &ToolResult{
ToolName: "web_fetch",
Success: false,
Error: fmt.Sprintf("读取响应失败: %v", err),
}, nil
}
// 提取纯文本摘要(去除 HTML 标签)
text := extractText(string(body))
// 截断到 2000 字符
if len([]rune(text)) > 2000 {
runes := []rune(text)
text = string(runes[:2000]) + "\n\n... [内容已截断,共" + fmt.Sprintf("%d", len(runes)) + "字符]"
}
result := fmt.Sprintf("URL: %s\n状态: %d\n内容类型: %s\n\n%s",
url, resp.StatusCode, resp.Header.Get("Content-Type"), text)
return &ToolResult{
ToolName: "web_fetch",
Success: true,
Data: result,
}, nil
}
// extractText 从 HTML/文本中提取纯文本
func extractText(raw string) string {
// 简单的 HTML 标签去除
text := raw
inTag := false
var result []rune
for _, r := range text {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
continue
}
if !inTag {
result = append(result, r)
}
}
// 去除多余空白
trimmed := strings.TrimSpace(string(result))
// 压缩连续空行
lines := strings.Split(trimmed, "\n")
var cleanLines []string
for _, line := range lines {
trimLine := strings.TrimSpace(line)
if trimLine != "" {
cleanLines = append(cleanLines, trimLine)
}
}
return strings.Join(cleanLines, "\n")
}