package store import ( "crypto/rand" "database/sql" "fmt" "git.yeij.top/AskaEth/Cyrene/pkg/logger" "strings" "time" "unicode/utf8" ) // ========== 模型定义 ========== // KnowledgeBase 知识库 type KnowledgeBase struct { ID string `json:"id"` UserID string `json:"user_id"` Name string `json:"name"` Description string `json:"description"` DocumentCount int `json:"document_count"` ChunkCount int `json:"chunk_count"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } // KnowledgeDocument 知识库文档 type KnowledgeDocument struct { ID string `json:"id"` KBID string `json:"kb_id"` UserID string `json:"user_id"` Title string `json:"title"` SourceType string `json:"source_type"` // "file", "text", "url" SourceRef string `json:"source_ref"` // 文件 ID 或 URL ContentType string `json:"content_type"` // "text/plain", "text/markdown", "text/html" RawContent string `json:"raw_content"` ChunkCount int `json:"chunk_count"` CreatedAt time.Time `json:"created_at"` } // KnowledgeChunk 文档分块 type KnowledgeChunk struct { ID string `json:"id"` DocID string `json:"doc_id"` KBID string `json:"kb_id"` ChunkIndex int `json:"chunk_index"` Content string `json:"content"` TokenCount int `json:"token_count"` CreatedAt time.Time `json:"created_at"` } // SearchChunkResult 搜索结果的块,包含额外上下文信息 type SearchChunkResult struct { KnowledgeChunk Relevance float64 `json:"relevance"` DocumentTitle string `json:"document_title"` KBName string `json:"kb_name"` Headline string `json:"headline"` } // ========== KnowledgeStore ========== // KnowledgeStore 知识库持久化存储 type KnowledgeStore struct { db *sql.DB } // NewKnowledgeStore 使用已有数据库连接初始化知识库存储并自动建表 func NewKnowledgeStore(db *sql.DB) (*KnowledgeStore, error) { store := &KnowledgeStore{db: db} if err := store.migrate(); err != nil { return nil, fmt.Errorf("知识库表迁移失败: %w", err) } logger.Println("[KnowledgeStore] 知识库持久化存储已初始化") return store, nil } // migrate 自动创建知识库相关表结构 func (s *KnowledgeStore) migrate() error { queries := []string{ // 知识库表 `CREATE TABLE IF NOT EXISTS knowledge_bases ( id VARCHAR(64) PRIMARY KEY, user_id VARCHAR(64) NOT NULL, name VARCHAR(255) NOT NULL, description TEXT DEFAULT '', document_count INT DEFAULT 0, chunk_count INT DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() )`, `CREATE INDEX IF NOT EXISTS idx_kb_user_id ON knowledge_bases(user_id)`, // 文档表 `CREATE TABLE IF NOT EXISTS knowledge_documents ( id VARCHAR(64) PRIMARY KEY, kb_id VARCHAR(64) NOT NULL REFERENCES knowledge_bases(id) ON DELETE CASCADE, user_id VARCHAR(64) NOT NULL, title VARCHAR(512) NOT NULL, source_type VARCHAR(32) DEFAULT 'text', source_ref VARCHAR(1024) DEFAULT '', content_type VARCHAR(64) DEFAULT 'text/plain', raw_content TEXT DEFAULT '', chunk_count INT DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW() )`, `CREATE INDEX IF NOT EXISTS idx_kd_kb_id ON knowledge_documents(kb_id)`, `CREATE INDEX IF NOT EXISTS idx_kd_user_id ON knowledge_documents(user_id)`, // 分块表 `CREATE TABLE IF NOT EXISTS knowledge_chunks ( id VARCHAR(64) PRIMARY KEY, doc_id VARCHAR(64) NOT NULL REFERENCES knowledge_documents(id) ON DELETE CASCADE, kb_id VARCHAR(64) NOT NULL, chunk_index INT NOT NULL, content TEXT NOT NULL, token_count INT DEFAULT 0, tsv TSVECTOR, created_at TIMESTAMPTZ DEFAULT NOW() )`, `CREATE INDEX IF NOT EXISTS idx_kc_doc_id ON knowledge_chunks(doc_id)`, `CREATE INDEX IF NOT EXISTS idx_kc_kb_id ON knowledge_chunks(kb_id)`, } for _, q := range queries { if _, err := s.db.Exec(q); err != nil { return fmt.Errorf("迁移SQL执行失败: %w\nSQL: %s", err, q) } } // 尝试创建 GIN 索引(可能因权限或扩展问题失败,但不影响功能) _, err := s.db.Exec(`CREATE INDEX IF NOT EXISTS idx_kc_tsv_gin ON knowledge_chunks USING GIN(tsv)`) if err != nil { logger.Printf("[KnowledgeStore] ⚠ GIN索引创建失败(将使用ILIKE降级搜索): %v", err) } return nil } // ========== 知识库 CRUD ========== // CreateKB 创建知识库 func (s *KnowledgeStore) CreateKB(kb *KnowledgeBase) error { now := time.Now() if kb.CreatedAt.IsZero() { kb.CreatedAt = now } if kb.UpdatedAt.IsZero() { kb.UpdatedAt = now } _, err := s.db.Exec( `INSERT INTO knowledge_bases (id, user_id, name, description, document_count, chunk_count, created_at, updated_at) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, kb.ID, kb.UserID, kb.Name, kb.Description, kb.DocumentCount, kb.ChunkCount, kb.CreatedAt, kb.UpdatedAt, ) if err != nil { return fmt.Errorf("创建知识库失败: %w", err) } return nil } // GetKBsByUser 获取用户的所有知识库 func (s *KnowledgeStore) GetKBsByUser(userID string) ([]KnowledgeBase, error) { rows, err := s.db.Query( `SELECT id, user_id, name, description, document_count, chunk_count, created_at, updated_at FROM knowledge_bases WHERE user_id = $1 ORDER BY updated_at DESC`, userID, ) if err != nil { return nil, fmt.Errorf("查询知识库列表失败: %w", err) } defer rows.Close() var kbs []KnowledgeBase for rows.Next() { var kb KnowledgeBase if err := rows.Scan(&kb.ID, &kb.UserID, &kb.Name, &kb.Description, &kb.DocumentCount, &kb.ChunkCount, &kb.CreatedAt, &kb.UpdatedAt); err != nil { return nil, fmt.Errorf("扫描知识库行失败: %w", err) } kbs = append(kbs, kb) } if kbs == nil { kbs = []KnowledgeBase{} } return kbs, rows.Err() } // GetKB 获取单个知识库 func (s *KnowledgeStore) GetKB(id string) (*KnowledgeBase, error) { var kb KnowledgeBase err := s.db.QueryRow( `SELECT id, user_id, name, description, document_count, chunk_count, created_at, updated_at FROM knowledge_bases WHERE id = $1`, id, ).Scan(&kb.ID, &kb.UserID, &kb.Name, &kb.Description, &kb.DocumentCount, &kb.ChunkCount, &kb.CreatedAt, &kb.UpdatedAt) if err != nil { if err == sql.ErrNoRows { return nil, nil } return nil, fmt.Errorf("查询知识库失败: %w", err) } return &kb, nil } // UpdateKB 更新知识库名称和描述 func (s *KnowledgeStore) UpdateKB(id string, name, description string) error { _, err := s.db.Exec( `UPDATE knowledge_bases SET name = $1, description = $2, updated_at = NOW() WHERE id = $3`, name, description, id, ) if err != nil { return fmt.Errorf("更新知识库失败: %w", err) } return nil } // DeleteKB 删除知识库(级联删除文档和块) func (s *KnowledgeStore) DeleteKB(id string) error { _, err := s.db.Exec(`DELETE FROM knowledge_bases WHERE id = $1`, id) if err != nil { return fmt.Errorf("删除知识库失败: %w", err) } return nil } // updateKBStats 更新知识库的统计计数 func (s *KnowledgeStore) updateKBStats(kbID string) error { _, err := s.db.Exec( `UPDATE knowledge_bases SET document_count = (SELECT COUNT(*) FROM knowledge_documents WHERE kb_id = $1), chunk_count = (SELECT COUNT(*) FROM knowledge_chunks WHERE kb_id = $1), updated_at = NOW() WHERE id = $1`, kbID, ) return err } // ========== 文档 CRUD ========== // AddDocument 添加文档,返回创建的文档 func (s *KnowledgeStore) AddDocument(doc *KnowledgeDocument) error { if doc.CreatedAt.IsZero() { doc.CreatedAt = time.Now() } _, err := s.db.Exec( `INSERT INTO knowledge_documents (id, kb_id, user_id, title, source_type, source_ref, content_type, raw_content, chunk_count, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`, doc.ID, doc.KBID, doc.UserID, doc.Title, doc.SourceType, doc.SourceRef, doc.ContentType, doc.RawContent, doc.ChunkCount, doc.CreatedAt, ) if err != nil { return fmt.Errorf("添加文档失败: %w", err) } // 更新知识库统计 if err := s.updateKBStats(doc.KBID); err != nil { logger.Printf("[KnowledgeStore] 更新知识库统计失败: %v", err) } return nil } // GetDocument 获取单个文档 func (s *KnowledgeStore) GetDocument(id string) (*KnowledgeDocument, error) { var doc KnowledgeDocument err := s.db.QueryRow( `SELECT id, kb_id, user_id, title, source_type, source_ref, content_type, raw_content, chunk_count, created_at FROM knowledge_documents WHERE id = $1`, id, ).Scan(&doc.ID, &doc.KBID, &doc.UserID, &doc.Title, &doc.SourceType, &doc.SourceRef, &doc.ContentType, &doc.RawContent, &doc.ChunkCount, &doc.CreatedAt) if err != nil { if err == sql.ErrNoRows { return nil, nil } return nil, fmt.Errorf("查询文档失败: %w", err) } return &doc, nil } // GetDocumentsByKB 获取知识库中的所有文档 func (s *KnowledgeStore) GetDocumentsByKB(kbID string) ([]KnowledgeDocument, error) { rows, err := s.db.Query( `SELECT id, kb_id, user_id, title, source_type, source_ref, content_type, raw_content, chunk_count, created_at FROM knowledge_documents WHERE kb_id = $1 ORDER BY created_at DESC`, kbID, ) if err != nil { return nil, fmt.Errorf("查询文档列表失败: %w", err) } defer rows.Close() var docs []KnowledgeDocument for rows.Next() { var doc KnowledgeDocument if err := rows.Scan(&doc.ID, &doc.KBID, &doc.UserID, &doc.Title, &doc.SourceType, &doc.SourceRef, &doc.ContentType, &doc.RawContent, &doc.ChunkCount, &doc.CreatedAt); err != nil { return nil, fmt.Errorf("扫描文档行失败: %w", err) } docs = append(docs, doc) } if docs == nil { docs = []KnowledgeDocument{} } return docs, rows.Err() } // UpdateDocumentChunkCount 更新文档的分块计数 func (s *KnowledgeStore) UpdateDocumentChunkCount(docID string, count int) error { _, err := s.db.Exec( `UPDATE knowledge_documents SET chunk_count = $1 WHERE id = $2`, count, docID, ) return err } // DeleteDocument 删除文档(级联删除块) func (s *KnowledgeStore) DeleteDocument(id string) error { // 先获取 kb_id 以便后续更新统计 var kbID string err := s.db.QueryRow(`SELECT kb_id FROM knowledge_documents WHERE id = $1`, id).Scan(&kbID) if err != nil { if err == sql.ErrNoRows { return nil } return fmt.Errorf("查询文档失败: %w", err) } _, err = s.db.Exec(`DELETE FROM knowledge_documents WHERE id = $1`, id) if err != nil { return fmt.Errorf("删除文档失败: %w", err) } // 更新知识库统计 if err := s.updateKBStats(kbID); err != nil { logger.Printf("[KnowledgeStore] 更新知识库统计失败: %v", err) } return nil } // ========== 分块操作 ========== // AddChunk 添加单个分块 func (s *KnowledgeStore) AddChunk(chunk *KnowledgeChunk) error { if chunk.CreatedAt.IsZero() { chunk.CreatedAt = time.Now() } // 尝试使用 to_tsvector('chinese', content) 设置 tsv // 如果中文分词不可用,使用 simple 配置 _, err := s.db.Exec( `INSERT INTO knowledge_chunks (id, doc_id, kb_id, chunk_index, content, token_count, tsv, created_at) VALUES ($1, $2, $3, $4, $5, $6, CASE WHEN (SELECT count(*) FROM pg_ts_config WHERE cfgname = 'chinese') > 0 THEN to_tsvector('chinese', $5) ELSE to_tsvector('simple', $5) END, $7)`, chunk.ID, chunk.DocID, chunk.KBID, chunk.ChunkIndex, chunk.Content, chunk.TokenCount, chunk.CreatedAt, ) if err != nil { // 降级:不使用 tsv _, err = s.db.Exec( `INSERT INTO knowledge_chunks (id, doc_id, kb_id, chunk_index, content, token_count, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7)`, chunk.ID, chunk.DocID, chunk.KBID, chunk.ChunkIndex, chunk.Content, chunk.TokenCount, chunk.CreatedAt, ) if err != nil { return fmt.Errorf("添加分块失败: %w", err) } } return nil } // DeleteChunksByDocID 删除文档的所有分块 func (s *KnowledgeStore) DeleteChunksByDocID(docID string) error { _, err := s.db.Exec(`DELETE FROM knowledge_chunks WHERE doc_id = $1`, docID) return err } // GetChunksByDocID 获取文档的所有分块 func (s *KnowledgeStore) GetChunksByDocID(docID string) ([]KnowledgeChunk, error) { rows, err := s.db.Query( `SELECT id, doc_id, kb_id, chunk_index, content, token_count, created_at FROM knowledge_chunks WHERE doc_id = $1 ORDER BY chunk_index ASC`, docID, ) if err != nil { return nil, fmt.Errorf("查询分块失败: %w", err) } defer rows.Close() var chunks []KnowledgeChunk for rows.Next() { var c KnowledgeChunk if err := rows.Scan(&c.ID, &c.DocID, &c.KBID, &c.ChunkIndex, &c.Content, &c.TokenCount, &c.CreatedAt); err != nil { return nil, fmt.Errorf("扫描分块行失败: %w", err) } chunks = append(chunks, c) } if chunks == nil { chunks = []KnowledgeChunk{} } return chunks, rows.Err() } // ========== 分块逻辑 ========== // ChunkDocument 将文档分块并存储 func (s *KnowledgeStore) ChunkDocument(docID string) (int, error) { // 获取文档 doc, err := s.GetDocument(docID) if err != nil { return 0, err } if doc == nil { return 0, fmt.Errorf("文档不存在: %s", docID) } // 删除旧的分块 if err := s.DeleteChunksByDocID(docID); err != nil { return 0, fmt.Errorf("删除旧分块失败: %w", err) } // 分块 chunks := splitTextIntoChunks(doc.RawContent, 500, 50) // 存储分块 for i, content := range chunks { chunk := &KnowledgeChunk{ ID: generateUUIDv4(), DocID: docID, KBID: doc.KBID, ChunkIndex: i, Content: content, TokenCount: estimateTokenCount(content), } if err := s.AddChunk(chunk); err != nil { return 0, fmt.Errorf("存储分块 %d 失败: %w", i, err) } } // 更新文档的分块计数 if err := s.UpdateDocumentChunkCount(docID, len(chunks)); err != nil { logger.Printf("[KnowledgeStore] 更新文档分块计数失败: %v", err) } // 更新知识库统计 if err := s.updateKBStats(doc.KBID); err != nil { logger.Printf("[KnowledgeStore] 更新知识库统计失败: %v", err) } return len(chunks), nil } // ========== 搜索 ========== // SearchChunks 在指定知识库中搜索 func (s *KnowledgeStore) SearchChunks(kbID, query string, limit int) ([]SearchChunkResult, error) { if limit <= 0 { limit = 5 } // 尝试使用 PostgreSQL 全文搜索 results, err := s.searchWithFullText(kbID, query, limit) if err != nil { logger.Printf("[KnowledgeStore] 全文搜索失败,降级为ILIKE: %v", err) // 降级为 ILIKE results, err = s.searchWithILike(kbID, query, limit) if err != nil { return nil, err } } if results == nil { results = []SearchChunkResult{} } return results, nil } // SearchAllKBs 在用户的所有知识库中搜索 func (s *KnowledgeStore) SearchAllKBs(userID, query string, limit int) ([]SearchChunkResult, error) { if limit <= 0 { limit = 5 } results, err := s.searchAllWithILike(userID, query, limit) if err != nil { return nil, err } if results == nil { results = []SearchChunkResult{} } return results, nil } // searchWithFullText 使用 PostgreSQL ts_rank + plainto_tsquery 搜索 func (s *KnowledgeStore) searchWithFullText(kbID, query string, limit int) ([]SearchChunkResult, error) { rows, err := s.db.Query( `SELECT kc.id, kc.doc_id, kc.kb_id, kc.chunk_index, kc.content, kc.token_count, kc.created_at, ts_rank(kc.tsv, plainto_tsquery('chinese', $2)) AS relevance, kd.title AS document_title, kb.name AS kb_name FROM knowledge_chunks kc JOIN knowledge_documents kd ON kc.doc_id = kd.id JOIN knowledge_bases kb ON kc.kb_id = kb.id WHERE kc.kb_id = $1 AND kc.tsv @@ plainto_tsquery('chinese', $2) ORDER BY relevance DESC LIMIT $3`, kbID, query, limit, ) if err != nil { return nil, err } defer rows.Close() return scanSearchResults(rows) } // searchWithILike 使用 ILIKE 降级搜索 func (s *KnowledgeStore) searchWithILike(kbID, query string, limit int) ([]SearchChunkResult, error) { // 构建 ILIKE 模式 keywords := tokenizeQuery(query) if len(keywords) == 0 { return []SearchChunkResult{}, nil } // 对每个关键词构建 ILIKE 条件 conditions := make([]string, len(keywords)) args := []interface{}{kbID} placeholderIdx := 2 for i, kw := range keywords { conditions[i] = fmt.Sprintf("kc.content ILIKE $%d", placeholderIdx) args = append(args, "%"+kw+"%") placeholderIdx++ } args = append(args, limit) querySQL := fmt.Sprintf( `SELECT kc.id, kc.doc_id, kc.kb_id, kc.chunk_index, kc.content, kc.token_count, kc.created_at, 0.0 AS relevance, kd.title AS document_title, kb.name AS kb_name FROM knowledge_chunks kc JOIN knowledge_documents kd ON kc.doc_id = kd.id JOIN knowledge_bases kb ON kc.kb_id = kb.id WHERE kc.kb_id = $1 AND (%s) LIMIT $%d`, strings.Join(conditions, " AND "), placeholderIdx, ) rows, err := s.db.Query(querySQL, args...) if err != nil { return nil, fmt.Errorf("ILIKE搜索失败: %w", err) } defer rows.Close() return scanSearchResults(rows) } // searchAllWithILike 跨所有用户知识库使用 ILIKE 搜索 func (s *KnowledgeStore) searchAllWithILike(userID, query string, limit int) ([]SearchChunkResult, error) { keywords := tokenizeQuery(query) if len(keywords) == 0 { return []SearchChunkResult{}, nil } conditions := make([]string, len(keywords)) args := []interface{}{userID} placeholderIdx := 2 for i, kw := range keywords { conditions[i] = fmt.Sprintf("kc.content ILIKE $%d", placeholderIdx) args = append(args, "%"+kw+"%") placeholderIdx++ } args = append(args, limit) querySQL := fmt.Sprintf( `SELECT kc.id, kc.doc_id, kc.kb_id, kc.chunk_index, kc.content, kc.token_count, kc.created_at, 0.0 AS relevance, kd.title AS document_title, kb.name AS kb_name FROM knowledge_chunks kc JOIN knowledge_documents kd ON kc.doc_id = kd.id JOIN knowledge_bases kb ON kc.kb_id = kb.id WHERE kb.user_id = $1 AND (%s) ORDER BY kc.created_at DESC LIMIT $%d`, strings.Join(conditions, " AND "), placeholderIdx, ) rows, err := s.db.Query(querySQL, args...) if err != nil { return nil, fmt.Errorf("全知识库ILIKE搜索失败: %w", err) } defer rows.Close() return scanSearchResults(rows) } // scanSearchResults 扫描搜索结果 func scanSearchResults(rows *sql.Rows) ([]SearchChunkResult, error) { var results []SearchChunkResult for rows.Next() { var r SearchChunkResult if err := rows.Scan(&r.ID, &r.DocID, &r.KBID, &r.ChunkIndex, &r.Content, &r.TokenCount, &r.CreatedAt, &r.Relevance, &r.DocumentTitle, &r.KBName); err != nil { return nil, fmt.Errorf("扫描搜索结果行失败: %w", err) } // 生成高亮片段 r.Headline = r.Content results = append(results, r) } if results == nil { results = []SearchChunkResult{} } return results, rows.Err() } // ========== 文本分块函数 ========== // splitTextIntoChunks 将文本按 maxLen 分块,块之间有 overlap 字符重叠 func splitTextIntoChunks(text string, maxLen int, overlap int) []string { if text == "" { return nil } // 按段落分割 paragraphs := strings.Split(text, "\n\n") var chunks []string var currentChunk strings.Builder for _, para := range paragraphs { para = strings.TrimSpace(para) if para == "" { continue } paraLen := utf8.RuneCountInString(para) if paraLen <= maxLen { // 如果当前块 + 段落不超过 maxLen,追加到当前块 if utf8.RuneCountInString(currentChunk.String()) == 0 { currentChunk.WriteString(para) } else if utf8.RuneCountInString(currentChunk.String())+1+paraLen <= maxLen { currentChunk.WriteString("\n\n") currentChunk.WriteString(para) } else { // 保存当前块,开始新块 chunks = append(chunks, currentChunk.String()) currentChunk.Reset() currentChunk.WriteString(para) } } else { // 段落超过 maxLen,需要按句子分割 // 先保存当前块 if currentChunk.Len() > 0 { chunks = append(chunks, currentChunk.String()) currentChunk.Reset() } // 按句子分割 sentences := splitIntoSentences(para) for _, sent := range sentences { sent = strings.TrimSpace(sent) if sent == "" { continue } sentLen := utf8.RuneCountInString(sent) if sentLen <= maxLen { if utf8.RuneCountInString(currentChunk.String()) == 0 { currentChunk.WriteString(sent) } else if utf8.RuneCountInString(currentChunk.String())+sentLen <= maxLen { currentChunk.WriteString(sent) } else { chunks = append(chunks, currentChunk.String()) currentChunk.Reset() currentChunk.WriteString(sent) } } else { // 句子超过 maxLen,按 maxLen 截断 if currentChunk.Len() > 0 { chunks = append(chunks, currentChunk.String()) currentChunk.Reset() } // 按 maxLen 截断,带 overlap runes := []rune(sent) start := 0 for start < len(runes) { end := start + maxLen if end > len(runes) { end = len(runes) } chunks = append(chunks, string(runes[start:end])) if end >= len(runes) { break } // 下一块从 end-overlap 开始 start = end - overlap if start <= 0 { start = end } } } } } } // 保存最后一个块 if currentChunk.Len() > 0 { chunks = append(chunks, currentChunk.String()) } return chunks } // splitIntoSentences 按句子分割文本(中文。!?和英文标点) func splitIntoSentences(text string) []string { var sentences []string runes := []rune(text) var current strings.Builder for i := 0; i < len(runes); i++ { current.WriteRune(runes[i]) // 检查句子结束标志 if runes[i] == '。' || runes[i] == '!' || runes[i] == '?' || runes[i] == '!' || runes[i] == '?' || (runes[i] == '\n' && i+1 < len(runes) && runes[i+1] != '\n') { sentences = append(sentences, current.String()) current.Reset() } } // 剩余内容 if current.Len() > 0 { remaining := strings.TrimSpace(current.String()) if remaining != "" { sentences = append(sentences, remaining) } } return sentences } // estimateTokenCount 估算 token 数量(中文按每个字符1.2个token,英文按每4个字符1个token) func estimateTokenCount(text string) int { runes := []rune(text) total := 0 for _, r := range runes { if r >= 0x4e00 && r <= 0x9fff { // 中文字符,约1.2个token total += 1 } } // 非中文字符粗略估算:字符数/4 nonChinese := len(runes) - total total = int(float64(total)*1.2) + nonChinese/4 if total < 1 { total = 1 } return total } // tokenizeQuery 将查询字符串分词(简单按空格和标点分割) func tokenizeQuery(query string) []string { // 按空格、中文标点、英文标点分割 query = strings.TrimSpace(query) if query == "" { return nil } // 先用空格分割 parts := strings.Fields(query) var tokens []string for _, part := range parts { part = strings.Trim(part, "。!?!?,.;::;、()()[]{}《》\"'") if part != "" { tokens = append(tokens, part) } } return tokens } // GenerateUUID 使用 crypto/rand 生成 UUID v4 格式的字符串(导出供其他包使用) func GenerateUUID() string { return generateUUIDv4() } // generateUUIDv4 使用 crypto/rand 生成 UUID v4 格式的字符串 func generateUUIDv4() string { b := make([]byte, 16) if _, err := rand.Read(b); err != nil { // 降级方案:基于时间戳 + 简单随机 ts := time.Now().UnixNano() for i := 0; i < 16; i++ { b[i] = byte((ts >> (i * 4)) & 0xFF) } } // 设置 UUID v4 版本位 (version = 4) b[6] = (b[6] & 0x0f) | 0x40 // 设置 UUID variant 位 (variant = 10xx) b[8] = (b[8] & 0x3f) | 0x80 return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:16]) }