feat: Phase 6.6 知识库 RAG 增强 — 文档索引 + 语义检索 + KnowledgeProvider
- rag.Embedder: LLM API 文本向量化 (OpenAI-compatible) - rag.KnowledgeStore: 文档分块 + 重叠窗口 + 余弦相似度搜索 - rag.Retriever: 高级知识检索 + 格式化摘要 - KnowledgeProvider: 子会话提供者,整合入编排管线 - knowledge_search / knowledge_ingest 工具 - EnrichmentData 管线全线支持 KnowledgeInfo Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
package subsession
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/yourname/cyrene-ai/ai-core/internal/model"
|
||||
"github.com/yourname/cyrene-ai/ai-core/internal/rag"
|
||||
"github.com/yourname/cyrene-ai/pkg/logger"
|
||||
)
|
||||
|
||||
// KnowledgeProvider searches the knowledge base for relevant information.
|
||||
type KnowledgeProvider struct {
|
||||
retriever *rag.Retriever
|
||||
}
|
||||
|
||||
// NewKnowledgeProvider creates a knowledge subsession provider.
|
||||
func NewKnowledgeProvider(retriever *rag.Retriever) *KnowledgeProvider {
|
||||
return &KnowledgeProvider{retriever: retriever}
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) Type() model.SubSessionType {
|
||||
return model.SubSessionKnowledge
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) CanHandle(_ context.Context, intent *model.IntentResult, _ string) bool {
|
||||
if intent == nil {
|
||||
return true
|
||||
}
|
||||
// Activate for technical questions, how-to queries, and factual questions
|
||||
switch intent.Primary {
|
||||
case "knowledge", "technical", "how_to", "factual", "research":
|
||||
return true
|
||||
case "chat":
|
||||
// For general chat, only search if there might be relevant info
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) Priority() int {
|
||||
return 3
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) Timeout() time.Duration {
|
||||
return 15 * time.Second
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) CreateContext(ctx context.Context, params CreateContextParams) ([]model.LLMMessage, error) {
|
||||
return []model.LLMMessage{
|
||||
{Role: model.RoleSystem, Content: "知识库检索子会话"},
|
||||
{Role: model.RoleUser, Content: params.UserMessage},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *KnowledgeProvider) Execute(ctx context.Context, subCtx []model.LLMMessage) (*model.SubSessionResult, error) {
|
||||
userMessage := ""
|
||||
for i := len(subCtx) - 1; i >= 0; i-- {
|
||||
if subCtx[i].Role == model.RoleUser {
|
||||
userMessage = subCtx[i].Content
|
||||
break
|
||||
}
|
||||
}
|
||||
if userMessage == "" {
|
||||
return nil, fmt.Errorf("无法提取用户消息")
|
||||
}
|
||||
|
||||
result := &model.SubSessionResult{
|
||||
Type: model.SubSessionKnowledge,
|
||||
Confidence: 0,
|
||||
}
|
||||
|
||||
if p.retriever == nil {
|
||||
result.Summary = "(知识库未就绪)"
|
||||
return result, nil
|
||||
}
|
||||
|
||||
retrieval, err := p.retriever.Retrieve(ctx, userMessage, 3)
|
||||
if err != nil {
|
||||
logger.Printf("[knowledge-subsession] 知识检索失败: %v", err)
|
||||
result.Error = fmt.Sprintf("检索失败: %v", err)
|
||||
result.Summary = "(知识库检索失败)"
|
||||
return result, nil
|
||||
}
|
||||
|
||||
if len(retrieval.Results) == 0 {
|
||||
result.Summary = "(未找到相关知识)"
|
||||
return result, nil
|
||||
}
|
||||
|
||||
result.Summary = retrieval.Summary
|
||||
result.Confidence = 0.6
|
||||
logger.Printf("[knowledge-subsession] 完成: 找到 %d 条知识", len(retrieval.Results))
|
||||
return result, nil
|
||||
}
|
||||
Reference in New Issue
Block a user