package tools import ( "context" "fmt" "io" "net/http" "strings" "time" ) // WebFetchTool 网络访问工具 - 允许昔涟获取网页内容 type WebFetchTool struct { client *http.Client timeout time.Duration } // NewWebFetchTool 创建网络访问工具 func NewWebFetchTool() *WebFetchTool { return &WebFetchTool{ client: &http.Client{ Timeout: 15 * time.Second, }, timeout: 15 * time.Second, } } // Definition 返回工具定义 func (t *WebFetchTool) Definition() ToolDefinition { return ToolDefinition{ Name: "web_fetch", Description: "获取指定URL的网页内容。用于查阅新闻、文档、资料等。返回纯文本摘要(前2000字符)。仅支持 HTTP/HTTPS URL。", Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "url": map[string]interface{}{ "type": "string", "description": "要获取的网页URL,必须是完整的 http:// 或 https:// 链接", }, }, "required": []string{"url"}, }, } } // Execute 执行网页获取 func (t *WebFetchTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) { url, ok := arguments["url"].(string) if !ok || url == "" { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: "缺少 url 参数", }, nil } // 安全检查:只允许 HTTP/HTTPS if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: "仅支持 http:// 或 https:// 链接", }, nil } req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: fmt.Sprintf("创建请求失败: %v", err), }, nil } // 模拟常见浏览器 User-Agent,避免被拒 req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; CyreneBot/1.0; +https://github.com/AskaEth/Cyrene)") req.Header.Set("Accept", "text/html,text/plain,*/*") resp, err := t.client.Do(req) if err != nil { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: fmt.Sprintf("请求失败: %v", err), }, nil } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: fmt.Sprintf("HTTP %d", resp.StatusCode), }, nil } // 限制读取大小(最多 100KB) limitedReader := io.LimitReader(resp.Body, 100*1024) body, err := io.ReadAll(limitedReader) if err != nil { return &ToolResult{ ToolName: "web_fetch", Success: false, Error: fmt.Sprintf("读取响应失败: %v", err), }, nil } // 提取纯文本摘要(去除 HTML 标签) text := extractText(string(body)) // 截断到 2000 字符 if len([]rune(text)) > 2000 { runes := []rune(text) text = string(runes[:2000]) + "\n\n... [内容已截断,共" + fmt.Sprintf("%d", len(runes)) + "字符]" } result := fmt.Sprintf("URL: %s\n状态: %d\n内容类型: %s\n\n%s", url, resp.StatusCode, resp.Header.Get("Content-Type"), text) return &ToolResult{ ToolName: "web_fetch", Success: true, Data: result, }, nil } // extractText 从 HTML/文本中提取纯文本 func extractText(raw string) string { // 简单的 HTML 标签去除 text := raw inTag := false var result []rune for _, r := range text { if r == '<' { inTag = true continue } if r == '>' { inTag = false continue } if !inTag { result = append(result, r) } } // 去除多余空白 trimmed := strings.TrimSpace(string(result)) // 压缩连续空行 lines := strings.Split(trimmed, "\n") var cleanLines []string for _, line := range lines { trimLine := strings.TrimSpace(line) if trimLine != "" { cleanLines = append(cleanLines, trimLine) } } return strings.Join(cleanLines, "\n") }