test: Phase 6 全功能测试 — 19个测试全部通过 + 开发路线文档
- host: 沙箱执行/命令拦截/超时/文件读写/系统信息/路径验证 (6 tests) - rag: 文本分块/余弦相似度/关键词匹配/文档索引+搜索 (4 tests) - tools: host_exec/host_file/host_system/knowledge_search/knowledge_ingest (5 tests) - vision: 图片编码/错误处理/定义验证/执行流程 (4 tests) - Embedder 重构为接口,支持 API 和 Simple 两种实现 - 添加 ROADMAP.md 未来开发路线 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -10,8 +10,15 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Embedder creates text embeddings using an LLM API.
|
||||
type Embedder struct {
|
||||
// Embedder is the interface for text embedding.
|
||||
type Embedder interface {
|
||||
Embed(ctx context.Context, text string) ([]float64, error)
|
||||
EmbedBatch(ctx context.Context, texts []string) ([]float64, error)
|
||||
IsAvailable() bool
|
||||
}
|
||||
|
||||
// APIEmbedder creates text embeddings using an LLM API.
|
||||
type APIEmbedder struct {
|
||||
baseURL string
|
||||
apiKey string
|
||||
model string
|
||||
@@ -19,8 +26,8 @@ type Embedder struct {
|
||||
}
|
||||
|
||||
// NewEmbedder creates a new embedding service.
|
||||
func NewEmbedder(baseURL, apiKey, model string) *Embedder {
|
||||
return &Embedder{
|
||||
func NewEmbedder(baseURL, apiKey, model string) *APIEmbedder {
|
||||
return &APIEmbedder{
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
model: model,
|
||||
@@ -58,12 +65,12 @@ type embeddingError struct {
|
||||
}
|
||||
|
||||
// Embed generates an embedding vector for the given text.
|
||||
func (e *Embedder) Embed(ctx context.Context, text string) ([]float64, error) {
|
||||
func (e *APIEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
|
||||
return e.EmbedBatch(ctx, []string{text})
|
||||
}
|
||||
|
||||
// EmbedBatch generates embeddings for multiple texts.
|
||||
func (e *Embedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
|
||||
func (e *APIEmbedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
|
||||
if !e.IsAvailable() {
|
||||
return nil, fmt.Errorf("embedding service not available: no API key configured")
|
||||
}
|
||||
@@ -113,6 +120,6 @@ func (e *Embedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, e
|
||||
}
|
||||
|
||||
// IsAvailable checks if the embedding service is configured.
|
||||
func (e *Embedder) IsAvailable() bool {
|
||||
func (e *APIEmbedder) IsAvailable() bool {
|
||||
return e.apiKey != "" && e.baseURL != ""
|
||||
}
|
||||
|
||||
@@ -34,12 +34,12 @@ type SearchResult struct {
|
||||
type KnowledgeStore struct {
|
||||
mu sync.RWMutex
|
||||
chunks []Chunk
|
||||
embedder *Embedder
|
||||
embedder Embedder
|
||||
knowledgeDir string
|
||||
}
|
||||
|
||||
// NewKnowledgeStore creates a new knowledge store.
|
||||
func NewKnowledgeStore(embedder *Embedder, knowledgeDir string) *KnowledgeStore {
|
||||
func NewKnowledgeStore(embedder Embedder, knowledgeDir string) *KnowledgeStore {
|
||||
if knowledgeDir == "" {
|
||||
knowledgeDir = "./data/knowledge"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestChunkText(t *testing.T) {
|
||||
text := "Hello World! This is a test document for chunking. "
|
||||
// Make it longer to trigger chunking
|
||||
longText := ""
|
||||
for i := 0; i < 100; i++ {
|
||||
longText += text
|
||||
}
|
||||
|
||||
chunks := chunkText(longText, 512, 128)
|
||||
if len(chunks) < 2 {
|
||||
t.Fatalf("expected at least 2 chunks, got %d (len=%d)", len(chunks), len(longText))
|
||||
}
|
||||
t.Logf("chunking OK: %d chunks from %d chars", len(chunks), len(longText))
|
||||
|
||||
// Verify overlap: each chunk should have some overlap with next
|
||||
for i := 1; i < len(chunks); i++ {
|
||||
prev := chunks[i-1]
|
||||
if len(prev) == 0 {
|
||||
t.Fatalf("empty chunk at index %d", i-1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCosineSimilarity(t *testing.T) {
|
||||
a := []float64{0.5, 0.3, 0.8, 0.1}
|
||||
b := []float64{0.5, 0.3, 0.8, 0.1}
|
||||
sim := cosineSimilarity(a, b)
|
||||
if sim < 0.99 {
|
||||
t.Fatalf("expected similarity ~1.0 for identical vectors, got %f", sim)
|
||||
}
|
||||
|
||||
c := []float64{-0.5, -0.3, -0.8, -0.1}
|
||||
sim2 := cosineSimilarity(a, c)
|
||||
if sim2 > -0.99 {
|
||||
t.Fatalf("expected similarity ~-1.0 for opposite vectors, got %f", sim2)
|
||||
}
|
||||
|
||||
d := []float64{0.0, 0.0, 0.0, 0.0}
|
||||
sim3 := cosineSimilarity(a, d)
|
||||
if sim3 != 0.0 {
|
||||
t.Fatalf("expected 0.0 for zero vector, got %f", sim3)
|
||||
}
|
||||
|
||||
// Different lengths
|
||||
sim4 := cosineSimilarity(a, []float64{0.5})
|
||||
if sim4 != 0.0 {
|
||||
t.Fatalf("expected 0.0 for different length vectors, got %f", sim4)
|
||||
}
|
||||
t.Logf("cosine similarity OK")
|
||||
}
|
||||
|
||||
func TestKeywordMatchScore(t *testing.T) {
|
||||
score := keywordMatchScore("hello world", "hello cyrene world of AI")
|
||||
if score < 0.0 || score > 1.0 {
|
||||
t.Fatalf("score out of range: %f", score)
|
||||
}
|
||||
t.Logf("keyword match OK: score=%f", score)
|
||||
}
|
||||
|
||||
func TestKnowledgeStoreIngestAndSearch(t *testing.T) {
|
||||
// Create temp dir
|
||||
tmpDir, err := os.MkdirTemp("", "cyrene-rag-test")
|
||||
if err != nil {
|
||||
t.Fatalf("create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
// Write a test document
|
||||
docPath := filepath.Join(tmpDir, "test.md")
|
||||
content := `# Cyrene AI 测试文档
|
||||
|
||||
Cyrene 是一个智能 AI 助手,支持语音识别、视觉理解、知识检索等功能。
|
||||
|
||||
## 主要功能
|
||||
|
||||
1. 多模型目的路由
|
||||
2. 宿主机安全操控
|
||||
3. 视觉理解与 OCR
|
||||
4. 知识库 RAG 检索
|
||||
|
||||
## 技术栈
|
||||
|
||||
Go 语言编写的后端服务,React 前端。支持多种 LLM 提供商。`
|
||||
if err := os.WriteFile(docPath, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("write test doc: %v", err)
|
||||
}
|
||||
|
||||
// Use SimpleEmbedder for testing (no API key needed)
|
||||
embedder := &SimpleEmbedder{}
|
||||
store := NewKnowledgeStore(embedder, tmpDir)
|
||||
|
||||
ctx := context.Background()
|
||||
n, err := store.IngestFile(ctx, docPath)
|
||||
if err != nil {
|
||||
t.Fatalf("ingest failed: %v", err)
|
||||
}
|
||||
if n == 0 {
|
||||
t.Fatal("expected at least 1 chunk")
|
||||
}
|
||||
t.Logf("ingest OK: %d chunks indexed from %s", n, docPath)
|
||||
|
||||
// Search
|
||||
results, err := store.Search(ctx, "视觉理解 OCR", 3)
|
||||
if err != nil {
|
||||
t.Fatalf("search failed: %v", err)
|
||||
}
|
||||
t.Logf("search OK: %d results for '视觉理解 OCR'", len(results))
|
||||
for _, r := range results {
|
||||
t.Logf(" - %s (score=%.4f): %.50s...", r.Chunk.DocTitle, r.Score, r.Chunk.Content)
|
||||
}
|
||||
|
||||
// Test stats
|
||||
stats := store.Stats()
|
||||
if stats["total_chunks"].(int) != n {
|
||||
t.Fatalf("stats mismatch: expected %d chunks, got %v", n, stats["total_chunks"])
|
||||
}
|
||||
t.Logf("stats OK: %v", stats)
|
||||
}
|
||||
|
||||
// SimpleEmbedder for testing without API calls.
|
||||
type SimpleEmbedder struct{}
|
||||
|
||||
func (e *SimpleEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
|
||||
vec := make([]float64, 128)
|
||||
runes := []rune(text)
|
||||
for i, r := range runes {
|
||||
idx := int(r) % 128
|
||||
vec[idx] += 1.0 / float64(len(runes))
|
||||
posIdx := (int(r) + i) % 128
|
||||
vec[posIdx] += 0.5 / float64(len(runes))
|
||||
}
|
||||
return vec, nil
|
||||
}
|
||||
|
||||
func (e *SimpleEmbedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
|
||||
// For batch, embed the concatenation
|
||||
combined := ""
|
||||
for _, t := range texts {
|
||||
combined += t
|
||||
}
|
||||
return e.Embed(ctx, combined)
|
||||
}
|
||||
|
||||
func (e *SimpleEmbedder) IsAvailable() bool {
|
||||
return true
|
||||
}
|
||||
Reference in New Issue
Block a user