feat: Phase 6.6 知识库 RAG 增强 — 文档索引 + 语义检索 + KnowledgeProvider

- rag.Embedder: LLM API 文本向量化 (OpenAI-compatible)
- rag.KnowledgeStore: 文档分块 + 重叠窗口 + 余弦相似度搜索
- rag.Retriever: 高级知识检索 + 格式化摘要
- KnowledgeProvider: 子会话提供者,整合入编排管线
- knowledge_search / knowledge_ingest 工具
- EnrichmentData 管线全线支持 KnowledgeInfo

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:33:26 +08:00
parent 9a8fb8d0ce
commit cd83eec39e
10 changed files with 752 additions and 3 deletions
@@ -0,0 +1,96 @@
package subsession
import (
"context"
"fmt"
"time"
"github.com/yourname/cyrene-ai/ai-core/internal/model"
"github.com/yourname/cyrene-ai/ai-core/internal/rag"
"github.com/yourname/cyrene-ai/pkg/logger"
)
// KnowledgeProvider searches the knowledge base for relevant information.
type KnowledgeProvider struct {
retriever *rag.Retriever
}
// NewKnowledgeProvider creates a knowledge subsession provider.
func NewKnowledgeProvider(retriever *rag.Retriever) *KnowledgeProvider {
return &KnowledgeProvider{retriever: retriever}
}
func (p *KnowledgeProvider) Type() model.SubSessionType {
return model.SubSessionKnowledge
}
func (p *KnowledgeProvider) CanHandle(_ context.Context, intent *model.IntentResult, _ string) bool {
if intent == nil {
return true
}
// Activate for technical questions, how-to queries, and factual questions
switch intent.Primary {
case "knowledge", "technical", "how_to", "factual", "research":
return true
case "chat":
// For general chat, only search if there might be relevant info
return false
}
return true
}
func (p *KnowledgeProvider) Priority() int {
return 3
}
func (p *KnowledgeProvider) Timeout() time.Duration {
return 15 * time.Second
}
func (p *KnowledgeProvider) CreateContext(ctx context.Context, params CreateContextParams) ([]model.LLMMessage, error) {
return []model.LLMMessage{
{Role: model.RoleSystem, Content: "知识库检索子会话"},
{Role: model.RoleUser, Content: params.UserMessage},
}, nil
}
func (p *KnowledgeProvider) Execute(ctx context.Context, subCtx []model.LLMMessage) (*model.SubSessionResult, error) {
userMessage := ""
for i := len(subCtx) - 1; i >= 0; i-- {
if subCtx[i].Role == model.RoleUser {
userMessage = subCtx[i].Content
break
}
}
if userMessage == "" {
return nil, fmt.Errorf("无法提取用户消息")
}
result := &model.SubSessionResult{
Type: model.SubSessionKnowledge,
Confidence: 0,
}
if p.retriever == nil {
result.Summary = "(知识库未就绪)"
return result, nil
}
retrieval, err := p.retriever.Retrieve(ctx, userMessage, 3)
if err != nil {
logger.Printf("[knowledge-subsession] 知识检索失败: %v", err)
result.Error = fmt.Sprintf("检索失败: %v", err)
result.Summary = "(知识库检索失败)"
return result, nil
}
if len(retrieval.Results) == 0 {
result.Summary = "(未找到相关知识)"
return result, nil
}
result.Summary = retrieval.Summary
result.Confidence = 0.6
logger.Printf("[knowledge-subsession] 完成: 找到 %d 条知识", len(retrieval.Results))
return result, nil
}