160 lines
3.8 KiB
Go
160 lines
3.8 KiB
Go
package tools
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
// WebFetchTool 网络访问工具 - 允许昔涟获取网页内容
|
||
type WebFetchTool struct {
|
||
client *http.Client
|
||
timeout time.Duration
|
||
}
|
||
|
||
// NewWebFetchTool 创建网络访问工具
|
||
func NewWebFetchTool() *WebFetchTool {
|
||
return &WebFetchTool{
|
||
client: &http.Client{
|
||
Timeout: 15 * time.Second,
|
||
},
|
||
timeout: 15 * time.Second,
|
||
}
|
||
}
|
||
|
||
// Definition 返回工具定义
|
||
func (t *WebFetchTool) Definition() ToolDefinition {
|
||
return ToolDefinition{
|
||
Name: "web_fetch",
|
||
Description: "获取指定URL的网页内容。用于查阅新闻、文档、资料等。返回纯文本摘要(前2000字符)。仅支持 HTTP/HTTPS URL。",
|
||
Parameters: map[string]interface{}{
|
||
"type": "object",
|
||
"properties": map[string]interface{}{
|
||
"url": map[string]interface{}{
|
||
"type": "string",
|
||
"description": "要获取的网页URL,必须是完整的 http:// 或 https:// 链接",
|
||
},
|
||
},
|
||
"required": []string{"url"},
|
||
},
|
||
}
|
||
}
|
||
|
||
// Execute 执行网页获取
|
||
func (t *WebFetchTool) Execute(ctx context.Context, arguments map[string]interface{}) (*ToolResult, error) {
|
||
url, ok := arguments["url"].(string)
|
||
if !ok || url == "" {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: "缺少 url 参数",
|
||
}, nil
|
||
}
|
||
|
||
// 安全检查:只允许 HTTP/HTTPS
|
||
if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: "仅支持 http:// 或 https:// 链接",
|
||
}, nil
|
||
}
|
||
|
||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||
if err != nil {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: fmt.Sprintf("创建请求失败: %v", err),
|
||
}, nil
|
||
}
|
||
|
||
// 模拟常见浏览器 User-Agent,避免被拒
|
||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; CyreneBot/1.0; +https://github.com/AskaEth/Cyrene)")
|
||
req.Header.Set("Accept", "text/html,text/plain,*/*")
|
||
|
||
resp, err := t.client.Do(req)
|
||
if err != nil {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: fmt.Sprintf("请求失败: %v", err),
|
||
}, nil
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: fmt.Sprintf("HTTP %d", resp.StatusCode),
|
||
}, nil
|
||
}
|
||
|
||
// 限制读取大小(最多 100KB)
|
||
limitedReader := io.LimitReader(resp.Body, 100*1024)
|
||
body, err := io.ReadAll(limitedReader)
|
||
if err != nil {
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: false,
|
||
Error: fmt.Sprintf("读取响应失败: %v", err),
|
||
}, nil
|
||
}
|
||
|
||
// 提取纯文本摘要(去除 HTML 标签)
|
||
text := extractText(string(body))
|
||
|
||
// 截断到 2000 字符
|
||
if len([]rune(text)) > 2000 {
|
||
runes := []rune(text)
|
||
text = string(runes[:2000]) + "\n\n... [内容已截断,共" + fmt.Sprintf("%d", len(runes)) + "字符]"
|
||
}
|
||
|
||
result := fmt.Sprintf("URL: %s\n状态: %d\n内容类型: %s\n\n%s",
|
||
url, resp.StatusCode, resp.Header.Get("Content-Type"), text)
|
||
|
||
return &ToolResult{
|
||
ToolName: "web_fetch",
|
||
Success: true,
|
||
Data: result,
|
||
}, nil
|
||
}
|
||
|
||
// extractText 从 HTML/文本中提取纯文本
|
||
func extractText(raw string) string {
|
||
// 简单的 HTML 标签去除
|
||
text := raw
|
||
inTag := false
|
||
var result []rune
|
||
for _, r := range text {
|
||
if r == '<' {
|
||
inTag = true
|
||
continue
|
||
}
|
||
if r == '>' {
|
||
inTag = false
|
||
continue
|
||
}
|
||
if !inTag {
|
||
result = append(result, r)
|
||
}
|
||
}
|
||
|
||
// 去除多余空白
|
||
trimmed := strings.TrimSpace(string(result))
|
||
// 压缩连续空行
|
||
lines := strings.Split(trimmed, "\n")
|
||
var cleanLines []string
|
||
for _, line := range lines {
|
||
trimLine := strings.TrimSpace(line)
|
||
if trimLine != "" {
|
||
cleanLines = append(cleanLines, trimLine)
|
||
}
|
||
}
|
||
return strings.Join(cleanLines, "\n")
|
||
}
|