test: Phase 6 全功能测试 — 19个测试全部通过 + 开发路线文档

- host: 沙箱执行/命令拦截/超时/文件读写/系统信息/路径验证 (6 tests)
- rag: 文本分块/余弦相似度/关键词匹配/文档索引+搜索 (4 tests)
- tools: host_exec/host_file/host_system/knowledge_search/knowledge_ingest (5 tests)
- vision: 图片编码/错误处理/定义验证/执行流程 (4 tests)
- Embedder 重构为接口,支持 API 和 Simple 两种实现
- 添加 ROADMAP.md 未来开发路线

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-24 07:51:07 +08:00
parent edc20170b9
commit 63a8f95de1
7 changed files with 640 additions and 9 deletions
+14 -7
View File
@@ -10,8 +10,15 @@ import (
"time"
)
// Embedder creates text embeddings using an LLM API.
type Embedder struct {
// Embedder is the interface for text embedding.
type Embedder interface {
Embed(ctx context.Context, text string) ([]float64, error)
EmbedBatch(ctx context.Context, texts []string) ([]float64, error)
IsAvailable() bool
}
// APIEmbedder creates text embeddings using an LLM API.
type APIEmbedder struct {
baseURL string
apiKey string
model string
@@ -19,8 +26,8 @@ type Embedder struct {
}
// NewEmbedder creates a new embedding service.
func NewEmbedder(baseURL, apiKey, model string) *Embedder {
return &Embedder{
func NewEmbedder(baseURL, apiKey, model string) *APIEmbedder {
return &APIEmbedder{
baseURL: baseURL,
apiKey: apiKey,
model: model,
@@ -58,12 +65,12 @@ type embeddingError struct {
}
// Embed generates an embedding vector for the given text.
func (e *Embedder) Embed(ctx context.Context, text string) ([]float64, error) {
func (e *APIEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
return e.EmbedBatch(ctx, []string{text})
}
// EmbedBatch generates embeddings for multiple texts.
func (e *Embedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
func (e *APIEmbedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
if !e.IsAvailable() {
return nil, fmt.Errorf("embedding service not available: no API key configured")
}
@@ -113,6 +120,6 @@ func (e *Embedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, e
}
// IsAvailable checks if the embedding service is configured.
func (e *Embedder) IsAvailable() bool {
func (e *APIEmbedder) IsAvailable() bool {
return e.apiKey != "" && e.baseURL != ""
}
@@ -34,12 +34,12 @@ type SearchResult struct {
type KnowledgeStore struct {
mu sync.RWMutex
chunks []Chunk
embedder *Embedder
embedder Embedder
knowledgeDir string
}
// NewKnowledgeStore creates a new knowledge store.
func NewKnowledgeStore(embedder *Embedder, knowledgeDir string) *KnowledgeStore {
func NewKnowledgeStore(embedder Embedder, knowledgeDir string) *KnowledgeStore {
if knowledgeDir == "" {
knowledgeDir = "./data/knowledge"
}
@@ -0,0 +1,155 @@
package rag
import (
"context"
"os"
"path/filepath"
"testing"
)
func TestChunkText(t *testing.T) {
text := "Hello World! This is a test document for chunking. "
// Make it longer to trigger chunking
longText := ""
for i := 0; i < 100; i++ {
longText += text
}
chunks := chunkText(longText, 512, 128)
if len(chunks) < 2 {
t.Fatalf("expected at least 2 chunks, got %d (len=%d)", len(chunks), len(longText))
}
t.Logf("chunking OK: %d chunks from %d chars", len(chunks), len(longText))
// Verify overlap: each chunk should have some overlap with next
for i := 1; i < len(chunks); i++ {
prev := chunks[i-1]
if len(prev) == 0 {
t.Fatalf("empty chunk at index %d", i-1)
}
}
}
func TestCosineSimilarity(t *testing.T) {
a := []float64{0.5, 0.3, 0.8, 0.1}
b := []float64{0.5, 0.3, 0.8, 0.1}
sim := cosineSimilarity(a, b)
if sim < 0.99 {
t.Fatalf("expected similarity ~1.0 for identical vectors, got %f", sim)
}
c := []float64{-0.5, -0.3, -0.8, -0.1}
sim2 := cosineSimilarity(a, c)
if sim2 > -0.99 {
t.Fatalf("expected similarity ~-1.0 for opposite vectors, got %f", sim2)
}
d := []float64{0.0, 0.0, 0.0, 0.0}
sim3 := cosineSimilarity(a, d)
if sim3 != 0.0 {
t.Fatalf("expected 0.0 for zero vector, got %f", sim3)
}
// Different lengths
sim4 := cosineSimilarity(a, []float64{0.5})
if sim4 != 0.0 {
t.Fatalf("expected 0.0 for different length vectors, got %f", sim4)
}
t.Logf("cosine similarity OK")
}
func TestKeywordMatchScore(t *testing.T) {
score := keywordMatchScore("hello world", "hello cyrene world of AI")
if score < 0.0 || score > 1.0 {
t.Fatalf("score out of range: %f", score)
}
t.Logf("keyword match OK: score=%f", score)
}
func TestKnowledgeStoreIngestAndSearch(t *testing.T) {
// Create temp dir
tmpDir, err := os.MkdirTemp("", "cyrene-rag-test")
if err != nil {
t.Fatalf("create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
// Write a test document
docPath := filepath.Join(tmpDir, "test.md")
content := `# Cyrene AI 测试文档
Cyrene 是一个智能 AI 助手,支持语音识别、视觉理解、知识检索等功能。
## 主要功能
1. 多模型目的路由
2. 宿主机安全操控
3. 视觉理解与 OCR
4. 知识库 RAG 检索
## 技术栈
Go 语言编写的后端服务,React 前端。支持多种 LLM 提供商。`
if err := os.WriteFile(docPath, []byte(content), 0644); err != nil {
t.Fatalf("write test doc: %v", err)
}
// Use SimpleEmbedder for testing (no API key needed)
embedder := &SimpleEmbedder{}
store := NewKnowledgeStore(embedder, tmpDir)
ctx := context.Background()
n, err := store.IngestFile(ctx, docPath)
if err != nil {
t.Fatalf("ingest failed: %v", err)
}
if n == 0 {
t.Fatal("expected at least 1 chunk")
}
t.Logf("ingest OK: %d chunks indexed from %s", n, docPath)
// Search
results, err := store.Search(ctx, "视觉理解 OCR", 3)
if err != nil {
t.Fatalf("search failed: %v", err)
}
t.Logf("search OK: %d results for '视觉理解 OCR'", len(results))
for _, r := range results {
t.Logf(" - %s (score=%.4f): %.50s...", r.Chunk.DocTitle, r.Score, r.Chunk.Content)
}
// Test stats
stats := store.Stats()
if stats["total_chunks"].(int) != n {
t.Fatalf("stats mismatch: expected %d chunks, got %v", n, stats["total_chunks"])
}
t.Logf("stats OK: %v", stats)
}
// SimpleEmbedder for testing without API calls.
type SimpleEmbedder struct{}
func (e *SimpleEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
vec := make([]float64, 128)
runes := []rune(text)
for i, r := range runes {
idx := int(r) % 128
vec[idx] += 1.0 / float64(len(runes))
posIdx := (int(r) + i) % 128
vec[posIdx] += 0.5 / float64(len(runes))
}
return vec, nil
}
func (e *SimpleEmbedder) EmbedBatch(ctx context.Context, texts []string) ([]float64, error) {
// For batch, embed the concatenation
combined := ""
for _, t := range texts {
combined += t
}
return e.Embed(ctx, combined)
}
func (e *SimpleEmbedder) IsAvailable() bool {
return true
}