feat: Phase 6.6 知识库 RAG 增强 — 文档索引 + 语义检索 + KnowledgeProvider

- rag.Embedder: LLM API 文本向量化 (OpenAI-compatible)
- rag.KnowledgeStore: 文档分块 + 重叠窗口 + 余弦相似度搜索
- rag.Retriever: 高级知识检索 + 格式化摘要
- KnowledgeProvider: 子会话提供者,整合入编排管线
- knowledge_search / knowledge_ingest 工具
- EnrichmentData 管线全线支持 KnowledgeInfo

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:33:26 +08:00
parent 9a8fb8d0ce
commit cd83eec39e
10 changed files with 752 additions and 3 deletions
+17
View File
@@ -23,6 +23,7 @@ import (
"github.com/yourname/cyrene-ai/ai-core/internal/model"
"github.com/yourname/cyrene-ai/ai-core/internal/orchestrator"
"github.com/yourname/cyrene-ai/ai-core/internal/persona"
"github.com/yourname/cyrene-ai/ai-core/internal/rag"
"github.com/yourname/cyrene-ai/ai-core/internal/subsession"
"github.com/yourname/cyrene-ai/ai-core/internal/tools"
)
@@ -130,6 +131,13 @@ func main() {
hostManager.SetAllowedDirs([]string{dataDir, os.TempDir(), "."})
log.Printf("主机操控管理器已就绪: 沙箱执行 + 文件隔离 (数据目录=%s)", dataDir)
// 初始化 RAG 知识库 (Phase 6.6: 知识库 RAG 增强)
knowledgeDir := getEnv("KNOWLEDGE_DIR", "./data/knowledge")
ragEmbedder := rag.NewEmbedder(cfg.LLMBaseURL, cfg.LLMAPIKey, "text-embedding-3-small")
knowledgeStore := rag.NewKnowledgeStore(ragEmbedder, knowledgeDir)
knowledgeRetriever := rag.NewRetriever(knowledgeStore)
log.Printf("RAG 知识库已就绪: 目录=%s, 嵌入模型=text-embedding-3-small", knowledgeDir)
// 初始化工具注册中心
toolRegistry := tools.NewRegistry()
if getEnvBool("ENABLE_TOOLS", true) {
@@ -161,6 +169,12 @@ func main() {
// Phase 6.3: 视觉理解工具
toolRegistry.Register(tools.NewVisionTool())
// Phase 6.6: 知识库 RAG 工具
if knowledgeRetriever != nil {
toolRegistry.Register(tools.NewKnowledgeSearchTool(knowledgeRetriever))
toolRegistry.Register(tools.NewKnowledgeIngestTool(knowledgeStore))
}
log.Printf("工具注册中心已就绪: %d 个工具 (%v)", len(toolRegistry.ListTools()), toolRegistry.ListTools())
}
@@ -236,6 +250,9 @@ func main() {
subManager.Register(subsession.NewIoTProvider(iotClient, personaDir))
}
subManager.Register(subsession.NewReviewProvider())
if knowledgeRetriever != nil {
subManager.Register(subsession.NewKnowledgeProvider(knowledgeRetriever))
}
log.Printf("子会话管理器已就绪: %d 个提供者 (%v)", len(subManager.ListProviders()), subManager.ListProviders())
// 构建新的 Orchestrator (v2.0) — 传入 purpose 专用适配器