Files
Cyrene/backend/ai-core/internal/rag/embedder.go
T
AskaEth 63a8f95de1 test: Phase 6 全功能测试 — 19个测试全部通过 + 开发路线文档
- host: 沙箱执行/命令拦截/超时/文件读写/系统信息/路径验证 (6 tests)
- rag: 文本分块/余弦相似度/关键词匹配/文档索引+搜索 (4 tests)
- tools: host_exec/host_file/host_system/knowledge_search/knowledge_ingest (5 tests)
- vision: 图片编码/错误处理/定义验证/执行流程 (4 tests)
- Embedder 重构为接口,支持 API 和 Simple 两种实现
- 添加 ROADMAP.md 未来开发路线

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 07:51:07 +08:00

126 lines
3.0 KiB
Go

package rag
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// Embedder is the interface for text embedding.
type Embedder interface {
Embed(ctx context.Context, text string) ([]float64, error)
EmbedBatch(ctx context.Context, texts []string) ([]float64, error)
IsAvailable() bool
}
// APIEmbedder creates text embeddings using an LLM API.
type APIEmbedder struct {
baseURL string
apiKey string
model string
httpClient *http.Client
}
// NewEmbedder creates a new embedding service.
func NewEmbedder(baseURL, apiKey, model string) *APIEmbedder {
return &APIEmbedder{
baseURL: baseURL,
apiKey: apiKey,
model: model,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
type embeddingRequest struct {
Input []string `json:"input"`
Model string `json:"model"`
}
type embeddingResponse struct {
Data []embeddingData `json:"data"`
Model string `json:"model"`
Usage embeddingUsage `json:"usage,omitempty"`
Error *embeddingError `json:"error,omitempty"`
}
type embeddingData struct {
Embedding []float64 `json:"embedding"`
Index int `json:"index"`
}
type embeddingUsage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
}
type embeddingError struct {
Message string `json:"message"`
Code string `json:"code"`
}
// Embed generates an embedding vector for the given text.
func (e *APIEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
return e.EmbedBatch(ctx, []string{text})
}
// EmbedBatch generates embeddings for multiple texts.
func (e *APIEmbedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
if !e.IsAvailable() {
return nil, fmt.Errorf("embedding service not available: no API key configured")
}
reqBody := embeddingRequest{
Input: texts,
Model: e.model,
}
jsonBody, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("marshal embedding request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", e.baseURL+"/embeddings", bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("create embedding request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+e.apiKey)
resp, err := e.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("embedding request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read embedding response: %w", err)
}
var embResp embeddingResponse
if err := json.Unmarshal(body, &embResp); err != nil {
return nil, fmt.Errorf("parse embedding response: %w", err)
}
if embResp.Error != nil {
return nil, fmt.Errorf("embedding API error: %s (code=%s)", embResp.Error.Message, embResp.Error.Code)
}
if len(embResp.Data) == 0 {
return nil, fmt.Errorf("no embedding returned")
}
return embResp.Data[0].Embedding, nil
}
// IsAvailable checks if the embedding service is configured.
func (e *APIEmbedder) IsAvailable() bool {
return e.apiKey != "" && e.baseURL != ""
}