// Package cache provides a response cache for skipping redundant LLM calls // on semantically similar inputs (greetings and common IoT commands). package cache import ( "strings" "sync" "time" ) // Entry is a cached LLM response. type Entry struct { FullContent string CachedAt time.Time AccessCount int } // ResponseCache caches LLM responses keyed by normalized user input. // It uses separate TTLs for greetings (longer) and other queries (shorter). type ResponseCache struct { mu sync.RWMutex entries map[string]*Entry maxEntries int greetingTTL time.Duration defaultTTL time.Duration } // New creates a new ResponseCache with sensible defaults. func New() *ResponseCache { return &ResponseCache{ entries: make(map[string]*Entry), maxEntries: 200, greetingTTL: 10 * time.Minute, defaultTTL: 30 * time.Second, } } // Get returns a cached response for the given input if it exists and hasn't expired. func (c *ResponseCache) Get(input string) (string, bool) { key := normalize(input) c.mu.RLock() entry, ok := c.entries[key] c.mu.RUnlock() if !ok { return "", false } ttl := c.defaultTTL if isGreeting(input) { ttl = c.greetingTTL } if time.Since(entry.CachedAt) > ttl { c.mu.Lock() delete(c.entries, key) c.mu.Unlock() return "", false } c.mu.Lock() entry.AccessCount++ c.mu.Unlock() return entry.FullContent, true } // Set stores a response in the cache. func (c *ResponseCache) Set(input, response string) { key := normalize(input) c.mu.Lock() defer c.mu.Unlock() // Evict oldest entries if at capacity if len(c.entries) >= c.maxEntries { var oldestKey string var oldestTime time.Time for k, v := range c.entries { if oldestKey == "" || v.CachedAt.Before(oldestTime) { oldestKey = k oldestTime = v.CachedAt } } if oldestKey != "" { delete(c.entries, oldestKey) } } c.entries[key] = &Entry{ FullContent: response, CachedAt: time.Now(), AccessCount: 0, } } // Invalidate clears all cached entries. func (c *ResponseCache) Invalidate() { c.mu.Lock() c.entries = make(map[string]*Entry) c.mu.Unlock() } // Size returns the current number of cached entries. func (c *ResponseCache) Size() int { c.mu.RLock() defer c.mu.RUnlock() return len(c.entries) } // normalize produces a cache key from user input. func normalize(input string) string { s := strings.TrimSpace(strings.ToLower(input)) // Collapse multiple spaces parts := strings.Fields(s) return strings.Join(parts, " ") } // isGreeting returns true if the input looks like a simple greeting/small-talk // that can be cached with a longer TTL. func isGreeting(input string) bool { normalized := normalize(input) greetings := []string{ "你好", "嗨", "嘿", "哈喽", "hello", "hi", "hey", "早上好", "下午好", "晚上好", "晚安", "早安", "午安", "在吗", "在不在", "在么", "谢谢", "多谢", "感谢", "thanks", "thank you", "好的", "ok", "okay", "行", "可以", "再见", "拜拜", "bye", "byebye", "嗯", "哦", "噢", } for _, g := range greetings { if normalized == g { return true } } return false }