feat: Phase 6.6 知识库 RAG 增强 — 文档索引 + 语义检索 + KnowledgeProvider

- rag.Embedder: LLM API 文本向量化 (OpenAI-compatible)
- rag.KnowledgeStore: 文档分块 + 重叠窗口 + 余弦相似度搜索
- rag.Retriever: 高级知识检索 + 格式化摘要
- KnowledgeProvider: 子会话提供者,整合入编排管线
- knowledge_search / knowledge_ingest 工具
- EnrichmentData 管线全线支持 KnowledgeInfo

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:33:26 +08:00
parent 9a8fb8d0ce
commit cd83eec39e
10 changed files with 752 additions and 3 deletions
+118
View File
@@ -0,0 +1,118 @@
package rag
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// Embedder creates text embeddings using an LLM API.
type Embedder struct {
baseURL string
apiKey string
model string
httpClient *http.Client
}
// NewEmbedder creates a new embedding service.
func NewEmbedder(baseURL, apiKey, model string) *Embedder {
return &Embedder{
baseURL: baseURL,
apiKey: apiKey,
model: model,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
type embeddingRequest struct {
Input []string `json:"input"`
Model string `json:"model"`
}
type embeddingResponse struct {
Data []embeddingData `json:"data"`
Model string `json:"model"`
Usage embeddingUsage `json:"usage,omitempty"`
Error *embeddingError `json:"error,omitempty"`
}
type embeddingData struct {
Embedding []float64 `json:"embedding"`
Index int `json:"index"`
}
type embeddingUsage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
}
type embeddingError struct {
Message string `json:"message"`
Code string `json:"code"`
}
// Embed generates an embedding vector for the given text.
func (e *Embedder) Embed(ctx context.Context, text string) ([]float64, error) {
return e.EmbedBatch(ctx, []string{text})
}
// EmbedBatch generates embeddings for multiple texts.
func (e *Embedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
if !e.IsAvailable() {
return nil, fmt.Errorf("embedding service not available: no API key configured")
}
reqBody := embeddingRequest{
Input: texts,
Model: e.model,
}
jsonBody, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("marshal embedding request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", e.baseURL+"/embeddings", bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("create embedding request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+e.apiKey)
resp, err := e.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("embedding request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read embedding response: %w", err)
}
var embResp embeddingResponse
if err := json.Unmarshal(body, &embResp); err != nil {
return nil, fmt.Errorf("parse embedding response: %w", err)
}
if embResp.Error != nil {
return nil, fmt.Errorf("embedding API error: %s (code=%s)", embResp.Error.Message, embResp.Error.Code)
}
if len(embResp.Data) == 0 {
return nil, fmt.Errorf("no embedding returned")
}
return embResp.Data[0].Embedding, nil
}
// IsAvailable checks if the embedding service is configured.
func (e *Embedder) IsAvailable() bool {
return e.apiKey != "" && e.baseURL != ""
}