feat: LLM 调用日志 + ModelSelector 优化 + devtools.bat 编码修复
- 新增 call_log.go: 全局环形缓冲区记录每次 LLM 调用(模型/Token/耗时/错误) - OpenAIProvider.doChat/ChatStreamWithTools 自动记录调用日志 - ai-core 暴露 GET /api/v1/llm-calls 端点, DevTools 代理 + UI 面板 - ModelSelector.envProvider 改为单例缓存, 避免重复创建 HTTP Client - 新增 PurposeToolCalling 适配器, 后台思考工具调用走专用路由 - envFallback 超时 120s→180s, 显式设置 MaxRetries - devtools.bat 全英文, 解决 Windows CMD GBK 编码乱码问题 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CallRecord records a single LLM API call.
|
||||
type CallRecord struct {
|
||||
Time time.Time `json:"time"`
|
||||
Model string `json:"model"`
|
||||
Duration time.Duration `json:"duration_ms"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
Success bool `json:"success"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// CallLogger is a thread-safe ring buffer for LLM call records.
|
||||
type CallLogger struct {
|
||||
mu sync.RWMutex
|
||||
records []CallRecord
|
||||
capacity int
|
||||
head int
|
||||
size int
|
||||
}
|
||||
|
||||
var globalCallLogger = &CallLogger{capacity: 500}
|
||||
|
||||
// LogCall records an LLM call. Safe for concurrent use.
|
||||
func LogCall(r CallRecord) {
|
||||
globalCallLogger.log(r)
|
||||
}
|
||||
|
||||
// GetCalls returns recent call records, newest first.
|
||||
func GetCalls(limit int) []CallRecord {
|
||||
return globalCallLogger.get(limit)
|
||||
}
|
||||
|
||||
func (cl *CallLogger) log(r CallRecord) {
|
||||
cl.mu.Lock()
|
||||
defer cl.mu.Unlock()
|
||||
|
||||
if cl.records == nil {
|
||||
cl.records = make([]CallRecord, cl.capacity)
|
||||
}
|
||||
|
||||
r.Time = time.Now()
|
||||
cl.records[cl.head] = r
|
||||
cl.head = (cl.head + 1) % cl.capacity
|
||||
if cl.size < cl.capacity {
|
||||
cl.size++
|
||||
}
|
||||
}
|
||||
|
||||
func (cl *CallLogger) get(limit int) []CallRecord {
|
||||
cl.mu.RLock()
|
||||
defer cl.mu.RUnlock()
|
||||
|
||||
if limit <= 0 || limit > cl.size {
|
||||
limit = cl.size
|
||||
}
|
||||
|
||||
result := make([]CallRecord, limit)
|
||||
for i := 0; i < limit; i++ {
|
||||
idx := (cl.head - 1 - i) % cl.capacity
|
||||
if idx < 0 {
|
||||
idx += cl.capacity
|
||||
}
|
||||
result[i] = cl.records[idx]
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -139,14 +139,38 @@ func (p *OpenAIProvider) ChatStreamWithTools(ctx context.Context, messages []mod
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
startTime := time.Now()
|
||||
modelName := p.config.Model
|
||||
var streamErr error
|
||||
var finalUsage *model.Usage
|
||||
|
||||
defer func() {
|
||||
r := CallRecord{
|
||||
Model: modelName,
|
||||
Duration: time.Since(startTime),
|
||||
Success: streamErr == nil,
|
||||
}
|
||||
if streamErr != nil {
|
||||
r.Error = streamErr.Error()
|
||||
}
|
||||
if finalUsage != nil {
|
||||
r.PromptTokens = finalUsage.PromptTokens
|
||||
r.CompletionTokens = finalUsage.CompletionTokens
|
||||
r.TotalTokens = finalUsage.TotalTokens
|
||||
}
|
||||
LogCall(r)
|
||||
}()
|
||||
|
||||
resp, err := p.doChatStream(ctx, messages, p.config.Model, tools)
|
||||
if err != nil {
|
||||
// Fallback
|
||||
if p.config.FallbackModel != "" {
|
||||
logger.Printf("[LLM] 流式调用主模型失败,降级: %v", err)
|
||||
modelName = p.config.FallbackModel
|
||||
resp, err = p.doChatStream(ctx, messages, p.config.FallbackModel, tools)
|
||||
}
|
||||
if err != nil {
|
||||
streamErr = err
|
||||
ch <- StreamChunk{Error: err, Done: true}
|
||||
return
|
||||
}
|
||||
@@ -184,20 +208,22 @@ func (p *OpenAIProvider) ChatStreamWithTools(ctx context.Context, messages []mod
|
||||
ch <- StreamChunk{Content: deltaStr}
|
||||
}
|
||||
if streamResp.Choices[0].FinishReason != "" {
|
||||
usage := &model.Usage{}
|
||||
if streamResp.Usage != nil {
|
||||
usage.PromptTokens = streamResp.Usage.PromptTokens
|
||||
usage.CompletionTokens = streamResp.Usage.CompletionTokens
|
||||
usage.TotalTokens = streamResp.Usage.TotalTokens
|
||||
finalUsage = &model.Usage{
|
||||
PromptTokens: streamResp.Usage.PromptTokens,
|
||||
CompletionTokens: streamResp.Usage.CompletionTokens,
|
||||
TotalTokens: streamResp.Usage.TotalTokens,
|
||||
}
|
||||
}
|
||||
ch <- StreamChunk{Done: true, Usage: usage}
|
||||
ch <- StreamChunk{Done: true, Usage: finalUsage}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
ch <- StreamChunk{Error: fmt.Errorf("读取流式响应失败: %w", err), Done: true}
|
||||
streamErr = fmt.Errorf("读取流式响应失败: %w", err)
|
||||
ch <- StreamChunk{Error: streamErr, Done: true}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -222,7 +248,25 @@ type openAIStreamChoice struct {
|
||||
}
|
||||
|
||||
// doChat 执行同步对话请求
|
||||
func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage, modelName string, stream bool, tools []OpenAITool) (*model.LLMResponse, error) {
|
||||
func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage, modelName string, stream bool, tools []OpenAITool) (llmResp *model.LLMResponse, err error) {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
r := CallRecord{
|
||||
Model: modelName,
|
||||
Duration: time.Since(startTime),
|
||||
Success: err == nil,
|
||||
}
|
||||
if err != nil {
|
||||
r.Error = err.Error()
|
||||
}
|
||||
if llmResp != nil {
|
||||
r.PromptTokens = llmResp.Usage.PromptTokens
|
||||
r.CompletionTokens = llmResp.Usage.CompletionTokens
|
||||
r.TotalTokens = llmResp.Usage.TotalTokens
|
||||
}
|
||||
LogCall(r)
|
||||
}()
|
||||
|
||||
// 转换消息格式
|
||||
oaiMessages := make([]openAIMessage, len(messages))
|
||||
for i, msg := range messages {
|
||||
@@ -304,7 +348,7 @@ func (p *OpenAIProvider) doChat(ctx context.Context, messages []model.LLMMessage
|
||||
|
||||
// 检查是否有工具调用
|
||||
choice := oaiResp.Choices[0]
|
||||
llmResp := &model.LLMResponse{
|
||||
llmResp = &model.LLMResponse{
|
||||
Content: contentString(choice.Message.Content),
|
||||
FinishReason: choice.FinishReason,
|
||||
ReasoningContent: choice.Message.ReasoningContent,
|
||||
|
||||
@@ -25,10 +25,11 @@ var ErrModelNotRequired = fmt.Errorf("model not required, caller should degrade
|
||||
|
||||
// ModelSelector routes requests to the best available LLMProvider based on purpose.
|
||||
type ModelSelector struct {
|
||||
loader *config.Loader
|
||||
envCfg OpenAIConfig
|
||||
mu sync.RWMutex
|
||||
cache map[string]LLMProvider
|
||||
loader *config.Loader
|
||||
envCfg OpenAIConfig
|
||||
mu sync.RWMutex
|
||||
cache map[string]LLMProvider
|
||||
cachedEnv LLMProvider // cached env fallback, created once
|
||||
}
|
||||
|
||||
// NewModelSelector creates a ModelSelector. If loader is nil or has no config,
|
||||
@@ -81,7 +82,12 @@ func (s *ModelSelector) DefaultAdapter() *Adapter {
|
||||
}
|
||||
|
||||
func (s *ModelSelector) envProvider() LLMProvider {
|
||||
return NewOpenAIProvider(s.envCfg)
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.cachedEnv == nil {
|
||||
s.cachedEnv = NewOpenAIProvider(s.envCfg)
|
||||
}
|
||||
return s.cachedEnv
|
||||
}
|
||||
|
||||
func (s *ModelSelector) getOrCreateProvider(modelID string, cfg *config.ModelsConfigData) (LLMProvider, error) {
|
||||
|
||||
Reference in New Issue
Block a user