feat: LLM 调用日志 + ModelSelector 优化 + devtools.bat 编码修复

- 新增 call_log.go: 全局环形缓冲区记录每次 LLM 调用(模型/Token/耗时/错误)
- OpenAIProvider.doChat/ChatStreamWithTools 自动记录调用日志
- ai-core 暴露 GET /api/v1/llm-calls 端点, DevTools 代理 + UI 面板
- ModelSelector.envProvider 改为单例缓存, 避免重复创建 HTTP Client
- 新增 PurposeToolCalling 适配器, 后台思考工具调用走专用路由
- envFallback 超时 120s→180s, 显式设置 MaxRetries
- devtools.bat 全英文, 解决 Windows CMD GBK 编码乱码问题

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-24 15:44:53 +08:00
parent 7eb5e984c2
commit 47f9de2409
8 changed files with 266 additions and 22 deletions
+23 -3
View File
@@ -67,7 +67,8 @@ func main() {
APIKey: cfg.LLMAPIKey,
Model: cfg.LLMModel,
FallbackModel: cfg.LLMFallbackModel,
Timeout: 120 * time.Second,
MaxRetries: 3,
Timeout: 180 * time.Second,
}
// 创建 ModelSelector (优先使用 models.json,回退到 .env)
@@ -82,10 +83,12 @@ func main() {
thinkerAdapter := llm.NewAdapter(provider)
provider, _ = modelSelector.Select(context.Background(), llm.PurposeMemoryExtraction)
memoryAdapter := llm.NewAdapter(provider)
provider, _ = modelSelector.Select(context.Background(), llm.PurposeToolCalling)
toolAdapter := llm.NewAdapter(provider)
if configLoader != nil && configLoader.HasConfig() {
log.Printf("LLM适配器已就绪: models.json 驱动 (chat=%s, intent=%s, think=%s, memory=%s)",
chatAdapter.ModelName(), intentAdapter.ModelName(), thinkerAdapter.ModelName(), memoryAdapter.ModelName())
log.Printf("LLM适配器已就绪: models.json 驱动 (chat=%s, intent=%s, think=%s, memory=%s, tool=%s)",
chatAdapter.ModelName(), intentAdapter.ModelName(), thinkerAdapter.ModelName(), memoryAdapter.ModelName(), toolAdapter.ModelName())
} else {
log.Printf("LLM适配器已就绪: .env 驱动 (模型=%s)", chatAdapter.ModelName())
}
@@ -193,6 +196,7 @@ func main() {
personaLoader,
memRetriever,
thinkerAdapter,
toolAdapter,
iotClient,
memStore,
toolRegistry,
@@ -311,6 +315,22 @@ func main() {
w.Write([]byte(`{"status":"ok","service":"ai-core","model":"` + chatAdapter.ModelName() + `"}`))
})
// LLM 调用日志(调试用)
mux.HandleFunc("/api/v1/llm-calls", func(w http.ResponseWriter, r *http.Request) {
limit := 50
if n, err := fmt.Sscanf(r.URL.Query().Get("limit"), "%d", &limit); n != 1 || err != nil || limit <= 0 {
limit = 50
}
if limit > 500 {
limit = 500
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"calls": llm.GetCalls(limit),
"total": len(llm.GetCalls(0)),
})
})
// 启动HTTP服务
srv := &http.Server{
Addr: ":" + cfg.Port,
@@ -43,6 +43,7 @@ type Thinker struct {
personaLoader *persona.Loader
memRetriever *memory.Retriever
llmAdapter *llm.Adapter
toolAdapter *llm.Adapter // 工具调用专用适配器
iotClient *tools.IoTClient
// 记忆管理
@@ -224,6 +225,7 @@ func NewThinker(
personaLoader *persona.Loader,
memRetriever *memory.Retriever,
llmAdapter *llm.Adapter,
toolAdapter *llm.Adapter,
iotClient *tools.IoTClient,
memoryStore *memory.Store,
toolRegistry *tools.Registry,
@@ -237,6 +239,7 @@ func NewThinker(
personaLoader: personaLoader,
memRetriever: memRetriever,
llmAdapter: llmAdapter,
toolAdapter: toolAdapter,
iotClient: iotClient,
thinkInterval: cfg.ThinkInterval,
silenceTimeout: cfg.SilenceTimeout,
@@ -598,7 +601,7 @@ func (t *Thinker) performThink(triggerReason string) {
var toolCallRecords []map[string]interface{}
for round := 0; round <= maxToolRounds; round++ {
resp, err := t.llmAdapter.ChatWithTools(ctx, messages, openAITools)
resp, err := t.toolAdapter.ChatWithTools(ctx, messages, openAITools)
if err != nil {
log.Printf("[后台思考] LLM调用失败 (round=%d): %v", round, err)
return
+74
View File
@@ -0,0 +1,74 @@
package llm
import (
"sync"
"time"
)
// CallRecord records a single LLM API call.
type CallRecord struct {
Time time.Time `json:"time"`
Model string `json:"model"`
Duration time.Duration `json:"duration_ms"`
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
// CallLogger is a thread-safe ring buffer for LLM call records.
type CallLogger struct {
mu sync.RWMutex
records []CallRecord
capacity int
head int
size int
}
var globalCallLogger = &CallLogger{capacity: 500}
// LogCall records an LLM call. Safe for concurrent use.
func LogCall(r CallRecord) {
globalCallLogger.log(r)
}
// GetCalls returns recent call records, newest first.
func GetCalls(limit int) []CallRecord {
return globalCallLogger.get(limit)
}
func (cl *CallLogger) log(r CallRecord) {
cl.mu.Lock()
defer cl.mu.Unlock()
if cl.records == nil {
cl.records = make([]CallRecord, cl.capacity)
}
r.Time = time.Now()
cl.records[cl.head] = r
cl.head = (cl.head + 1) % cl.capacity
if cl.size < cl.capacity {
cl.size++
}
}
func (cl *CallLogger) get(limit int) []CallRecord {
cl.mu.RLock()
defer cl.mu.RUnlock()
if limit <= 0 || limit > cl.size {
limit = cl.size
}
result := make([]CallRecord, limit)
for i := 0; i < limit; i++ {
idx := (cl.head - 1 - i) % cl.capacity
if idx < 0 {
idx += cl.capacity
}
result[i] = cl.records[idx]
}
return result
}
+52 -8
View File
@@ -139,14 +139,38 @@ func (p *OpenAIProvider) ChatStreamWithTools(ctx context.Context, messages []mod
go func() {
defer close(ch)
startTime := time.Now()
modelName := p.config.Model
var streamErr error
var finalUsage *model.Usage
defer func() {
r := CallRecord{
Model: modelName,
Duration: time.Since(startTime),
Success: streamErr == nil,
}
if streamErr != nil {
r.Error = streamErr.Error()
}
if finalUsage != nil {
r.PromptTokens = finalUsage.PromptTokens
r.CompletionTokens = finalUsage.CompletionTokens
r.TotalTokens = finalUsage.TotalTokens
}
LogCall(r)
}()
resp, err := p.doChatStream(ctx, messages, p.config.Model, tools)
if err != nil {
// Fallback
if p.config.FallbackModel != "" {
logger.Printf("[LLM] 流式调用主模型失败,降级: %v", err)
modelName = p.config.FallbackModel
resp, err = p.doChatStream(ctx, messages, p.config.FallbackModel, tools)
}
if err != nil {
streamErr = err
ch <- StreamChunk{Error: err, Done: true}
return
}
@@ -184,20 +208,22 @@ func (p *OpenAIProvider) ChatStreamWithTools(ctx context.Context, messages []mod
ch <- StreamChunk{Content: deltaStr}
}
if streamResp.Choices[0].FinishReason != "" {
usage := &model.Usage{}
if streamResp.Usage != nil {
usage.PromptTokens = streamResp.Usage.PromptTokens
usage.CompletionTokens = streamResp.Usage.CompletionTokens
usage.TotalTokens = streamResp.Usage.TotalTokens
finalUsage = &model.Usage{
PromptTokens: streamResp.Usage.PromptTokens,
CompletionTokens: streamResp.Usage.CompletionTokens,
TotalTokens: streamResp.Usage.TotalTokens,
}
}
ch <- StreamChunk{Done: true, Usage: usage}
ch <- StreamChunk{Done: true, Usage: finalUsage}
return
}
}
}
if err := scanner.Err(); err != nil {
ch <- StreamChunk{Error: fmt.Errorf("读取流式响应失败: %w", err), Done: true}
streamErr = fmt.Errorf("读取流式响应失败: %w", err)
ch <- StreamChunk{Error: streamErr, Done: true}
return
}
@@ -222,7 +248,25 @@ type openAIStreamChoice struct {
}
// doChat 执行同步对话请求
func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage, modelName string, stream bool, tools []OpenAITool) (*model.LLMResponse, error) {
func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage, modelName string, stream bool, tools []OpenAITool) (llmResp *model.LLMResponse, err error) {
startTime := time.Now()
defer func() {
r := CallRecord{
Model: modelName,
Duration: time.Since(startTime),
Success: err == nil,
}
if err != nil {
r.Error = err.Error()
}
if llmResp != nil {
r.PromptTokens = llmResp.Usage.PromptTokens
r.CompletionTokens = llmResp.Usage.CompletionTokens
r.TotalTokens = llmResp.Usage.TotalTokens
}
LogCall(r)
}()
// 转换消息格式
oaiMessages := make([]openAIMessage, len(messages))
for i, msg := range messages {
@@ -304,7 +348,7 @@ func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage
// 检查是否有工具调用
choice := oaiResp.Choices[0]
llmResp := &model.LLMResponse{
llmResp = &model.LLMResponse{
Content: contentString(choice.Message.Content),
FinishReason: choice.FinishReason,
ReasoningContent: choice.Message.ReasoningContent,
+11 -5
View File
@@ -25,10 +25,11 @@ var ErrModelNotRequired = fmt.Errorf("model not required, caller should degrade
// ModelSelector routes requests to the best available LLMProvider based on purpose.
type ModelSelector struct {
loader *config.Loader
envCfg OpenAIConfig
mu sync.RWMutex
cache map[string]LLMProvider
loader *config.Loader
envCfg OpenAIConfig
mu sync.RWMutex
cache map[string]LLMProvider
cachedEnv LLMProvider // cached env fallback, created once
}
// NewModelSelector creates a ModelSelector. If loader is nil or has no config,
@@ -81,7 +82,12 @@ func (s *ModelSelector) DefaultAdapter() *Adapter {
}
func (s *ModelSelector) envProvider() LLMProvider {
return NewOpenAIProvider(s.envCfg)
s.mu.Lock()
defer s.mu.Unlock()
if s.cachedEnv == nil {
s.cachedEnv = NewOpenAIProvider(s.envCfg)
}
return s.cachedEnv
}
func (s *ModelSelector) getOrCreateProvider(modelID string, cfg *config.ModelsConfigData) (LLMProvider, error) {