Cyrene/backend/ai-core/internal/cache/response_cache.go

// Package cache provides a response cache for skipping redundant LLM calls
// on semantically similar inputs (greetings and common IoT commands).
package cache

import (
	"strings"
	"sync"
	"time"
)

// Entry is a cached LLM response.
type Entry struct {
	FullContent string
	CachedAt    time.Time
	AccessCount int
}

// ResponseCache caches LLM responses keyed by normalized user input.
// It uses separate TTLs for greetings (longer) and other queries (shorter).
type ResponseCache struct {
	mu          sync.RWMutex
	entries     map[string]*Entry
	maxEntries  int
	greetingTTL time.Duration
	defaultTTL  time.Duration
}

// New creates a new ResponseCache with sensible defaults.
func New() *ResponseCache {
	return &ResponseCache{
		entries:     make(map[string]*Entry),
		maxEntries:  200,
		greetingTTL: 10 * time.Minute,
		defaultTTL:  30 * time.Second,
	}
}

// Get returns a cached response for the given input if it exists and hasn't expired.
func (c *ResponseCache) Get(input string) (string, bool) {
	key := normalize(input)
	c.mu.RLock()
	entry, ok := c.entries[key]
	c.mu.RUnlock()
	if !ok {
		return "", false
	}
	ttl := c.defaultTTL
	if isGreeting(input) {
		ttl = c.greetingTTL
	}
	if time.Since(entry.CachedAt) > ttl {
		c.mu.Lock()
		delete(c.entries, key)
		c.mu.Unlock()
		return "", false
	}
	c.mu.Lock()
	entry.AccessCount++
	c.mu.Unlock()
	return entry.FullContent, true
}

// Set stores a response in the cache.
func (c *ResponseCache) Set(input, response string) {
	key := normalize(input)
	c.mu.Lock()
	defer c.mu.Unlock()

	// Evict oldest entries if at capacity
	if len(c.entries) >= c.maxEntries {
		var oldestKey string
		var oldestTime time.Time
		for k, v := range c.entries {
			if oldestKey == "" || v.CachedAt.Before(oldestTime) {
				oldestKey = k
				oldestTime = v.CachedAt
			}
		}
		if oldestKey != "" {
			delete(c.entries, oldestKey)
		}
	}

	c.entries[key] = &Entry{
		FullContent: response,
		CachedAt:    time.Now(),
		AccessCount: 0,
	}
}

// Invalidate clears all cached entries.
func (c *ResponseCache) Invalidate() {
	c.mu.Lock()
	c.entries = make(map[string]*Entry)
	c.mu.Unlock()
}

// Size returns the current number of cached entries.
func (c *ResponseCache) Size() int {
	c.mu.RLock()
	defer c.mu.RUnlock()
	return len(c.entries)
}

// normalize produces a cache key from user input.
func normalize(input string) string {
	s := strings.TrimSpace(strings.ToLower(input))
	// Collapse multiple spaces
	parts := strings.Fields(s)
	return strings.Join(parts, " ")
}

// isGreeting returns true if the input looks like a simple greeting/small-talk
// that can be cached with a longer TTL.
func isGreeting(input string) bool {
	normalized := normalize(input)
	greetings := []string{
		"你好", "嗨", "嘿", "哈喽", "hello", "hi", "hey",
		"早上好", "下午好", "晚上好", "晚安", "早安", "午安",
		"在吗", "在不在", "在么",
		"谢谢", "多谢", "感谢", "thanks", "thank you",
		"好的", "ok", "okay", "行", "可以",
		"再见", "拜拜", "bye", "byebye",
		"嗯", "哦", "噢",
	}
	for _, g := range greetings {
		if normalized == g {
			return true
		}
	}
	return false
}