Files
Cyrene/backend/gateway/internal/handler/image_handler.go
T
AskaEth bcf4d4e621 feat: 第五轮开发 - 14项未来路线图功能完整实现
W1-W14 全部完成:
- W1: 消息搜索 (ILIKE全文检索 + SearchModal)
- W2: 对话导出 (JSON/Markdown/TXT三格式)
- W3: 记忆时间线 DevTools 可视化
- W4: 通知推送系统 (WebSocket + Browser Notification API)
- W5: 定时提醒 (30s轮询 + 重复提醒 + WebSocket推送)
- W6: 每日简报 (08:00自动生成: 天气+新闻+提醒+AI摘要)
- W7: IoT场景自动化 (规则引擎 10s轮询 + 条件评估 + 场景执行)
- W8: 语音输入 (浏览器 Speech Recognition API)
- W9: STT服务 (voice-service + whisper.cpp)
- W10: TTS服务 (浏览器 Speech Synthesis + edge-tts三档回退)
- W11: 文件管理 (上传/下载/缩略图/纯Go bilinear缩放)
- W12: 知识库RAG (PostgreSQL tsvector + 文档分块 + 检索)
- W13: 多模态 (图片上传+分析: Vision API + 本地Go分析回退)
- W14: PWA (Service Worker + 离线页 + install prompt)

总计: 6个Go微服务 + 10+前端组件 + 10+ PostgreSQL表 + 4个后台调度器
2026-05-19 12:01:09 +08:00

719 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package handler
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"image"
"image/color"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"io"
"log"
"net/http"
"os"
"sort"
"strings"
"github.com/gin-gonic/gin"
"github.com/yourname/cyrene-ai/gateway/internal/config"
"github.com/yourname/cyrene-ai/gateway/internal/middleware"
"github.com/yourname/cyrene-ai/gateway/internal/store"
)
// ImageHandler 图片分析处理器
type ImageHandler struct {
cfg *config.Config
fileStore *store.FileStore
}
// NewImageHandler 创建图片分析处理器
func NewImageHandler(cfg *config.Config, fileStore *store.FileStore) *ImageHandler {
return &ImageHandler{
cfg: cfg,
fileStore: fileStore,
}
}
// ImageAnalysis 图片分析结果
type ImageAnalysis struct {
Format string `json:"format"`
Width int `json:"width"`
Height int `json:"height"`
FileSize int64 `json:"file_size"`
Description string `json:"description"`
TopColors []ColorInfo `json:"top_colors,omitempty"`
EXIF map[string]string `json:"exif,omitempty"`
AnalyzedBy string `json:"analyzed_by"` // "openai_vision" | "local"
}
// ColorInfo 颜色信息
type ColorInfo struct {
Hex string `json:"hex"`
Percent float64 `json:"percent"`
}
// AnalyzeRequestBody 分析请求体
type AnalyzeRequestBody struct {
FileID string `json:"file_id"`
}
// ========== POST /api/v1/images/analyze ==========
// Analyze 分析上传的图片 (multipart/form-data 或 JSON)
func (h *ImageHandler) Analyze(c *gin.Context) {
userID := middleware.GetUserID(c)
// 尝试 JSON body: {"file_id": "xxx"}
contentType := c.GetHeader("Content-Type")
if strings.HasPrefix(contentType, "application/json") {
var body AnalyzeRequestBody
if err := c.ShouldBindJSON(&body); err != nil || body.FileID == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "缺少 file_id 字段", "errorType": "invalid_request"})
return
}
h.analyzeByFileID(c, userID, body.FileID)
return
}
// 尝试 multipart/form-data: 直接上传图片分析
file, header, err := c.Request.FormFile("file")
if err != nil {
// 也尝试 "image" 字段名
file, header, err = c.Request.FormFile("image")
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "未找到图片文件 (使用 file 或 image 字段)", "errorType": "missing_file"})
return
}
}
defer file.Close()
h.analyzeUploadedFile(c, userID, file, header.Filename, header.Size)
}
// ========== GET /api/v1/images/analyze/:file_id ==========
// AnalyzeByID 对已上传的文件进行分析
func (h *ImageHandler) AnalyzeByID(c *gin.Context) {
userID := middleware.GetUserID(c)
fileID := c.Param("file_id")
if fileID == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "缺少 file_id", "errorType": "invalid_request"})
return
}
h.analyzeByFileID(c, userID, fileID)
}
// analyzeByFileID 根据文件ID分析已存储的图片
func (h *ImageHandler) analyzeByFileID(c *gin.Context, userID, fileID string) {
if h.fileStore == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "文件存储不可用", "errorType": "service_unavailable"})
return
}
f, err := h.fileStore.GetFile(fileID)
if err != nil {
log.Printf("[ImageHandler] 查询文件失败: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "查询文件失败", "errorType": "db_error"})
return
}
if f == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "文件不存在", "errorType": "file_not_found"})
return
}
if f.UserID != userID && !f.IsPublic {
c.JSON(http.StatusForbidden, gin.H{"error": "无权访问此文件", "errorType": "access_denied"})
return
}
if !isImageType(f.MimeType) {
c.JSON(http.StatusBadRequest, gin.H{"error": "文件不是图片类型: " + f.MimeType, "errorType": "unsupported_type"})
return
}
result, err := h.analyzeImage(f.StoredPath, f.MimeType, f.Size)
if err != nil {
log.Printf("[ImageHandler] 图片分析失败: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "图片分析失败: " + err.Error(), "errorType": "analysis_error"})
return
}
c.JSON(http.StatusOK, result)
}
// analyzeUploadedFile 分析直接上传的图片文件
func (h *ImageHandler) analyzeUploadedFile(c *gin.Context, userID string, file io.Reader, filename string, fileSize int64) {
// 检查文件大小 (10MB 限制)
const maxImageSize = 10 * 1024 * 1024
if fileSize > maxImageSize {
c.JSON(http.StatusBadRequest, gin.H{"error": "图片大小超过限制 (最大 10MB)", "errorType": "file_too_large"})
return
}
// 读取文件到内存
data, err := io.ReadAll(file)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "读取图片失败", "errorType": "read_error"})
return
}
// 检测格式
_, format, err := image.DecodeConfig(bytes.NewReader(data))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "无法解码图片: " + err.Error(), "errorType": "decode_error"})
return
}
mimeType := "image/" + format
supportedFormats := map[string]bool{
"image/jpeg": true,
"image/png": true,
"image/gif": true,
}
if !supportedFormats[mimeType] {
// 允许所有 image/* 格式,但只对常见格式做深入分析
}
// 写入临时文件进行分析
tmpFile, err := os.CreateTemp("", "cyrene-image-*."+format)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "创建临时文件失败", "errorType": "server_error"})
return
}
defer os.Remove(tmpFile.Name())
defer tmpFile.Close()
if _, err := tmpFile.Write(data); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "写入临时文件失败", "errorType": "server_error"})
return
}
result, err := h.analyzeImage(tmpFile.Name(), mimeType, int64(len(data)))
if err != nil {
log.Printf("[ImageHandler] 图片分析失败: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "图片分析失败: " + err.Error(), "errorType": "analysis_error"})
return
}
c.JSON(http.StatusOK, result)
}
// analyzeImage 核心分析逻辑:先尝试 OpenAI Vision,失败则降级到本地分析
func (h *ImageHandler) analyzeImage(filePath, mimeType string, fileSize int64) (*ImageAnalysis, error) {
// 如果配置了 OpenAI API Key,尝试使用 Vision API
apiKey := h.cfg.LLMAPIKey
if apiKey != "" {
result, err := h.analyzeWithOpenAIVision(filePath, mimeType)
if err == nil {
return result, nil
}
log.Printf("[ImageHandler] OpenAI Vision 分析失败,降级到本地分析: %v", err)
}
// 降级到本地分析
return analyzeImageLocally(filePath, mimeType, fileSize)
}
// analyzeWithOpenAIVision 使用 OpenAI Vision API 分析图片
func (h *ImageHandler) analyzeWithOpenAIVision(filePath, mimeType string) (*ImageAnalysis, error) {
// 读取图片并编码为 base64
data, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("读取图片文件失败: %w", err)
}
base64Data := base64.StdEncoding.EncodeToString(data)
dataURL := fmt.Sprintf("data:%s;base64,%s", mimeType, base64Data)
// 获取本地基本信息
localInfo, err := analyzeImageLocally(filePath, mimeType, int64(len(data)))
if err != nil {
localInfo = &ImageAnalysis{}
}
// 构建 OpenAI Vision API 请求
reqBody := map[string]interface{}{
"model": h.cfg.LLMModel,
"messages": []map[string]interface{}{
{
"role": "user",
"content": []map[string]interface{}{
{
"type": "text",
"text": "请详细描述这张图片的内容。用中文回答。请描述:1) 图片中的主要物体/人物 2) 场景/环境 3) 颜色和色调 4) 文字内容(如果有)5) 整体氛围和风格。请尽可能详细。",
},
{
"type": "image_url",
"image_url": map[string]string{
"url": dataURL,
},
},
},
},
},
"max_tokens": 500,
}
jsonBody, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("序列化请求失败: %w", err)
}
apiURL := strings.TrimRight(h.cfg.LLMAPIURL, "/") + "/chat/completions"
httpReq, err := http.NewRequest("POST", apiURL, bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("创建请求失败: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Authorization", "Bearer "+h.cfg.LLMAPIKey)
httpClient := &http.Client{}
resp, err := httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("API 请求失败: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API 返回错误 (%d): %s", resp.StatusCode, string(body))
}
var result struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("解析响应失败: %w", err)
}
var description string
if len(result.Choices) > 0 {
description = result.Choices[0].Message.Content
}
return &ImageAnalysis{
Format: localInfo.Format,
Width: localInfo.Width,
Height: localInfo.Height,
FileSize: localInfo.FileSize,
Description: description,
TopColors: localInfo.TopColors,
EXIF: localInfo.EXIF,
AnalyzedBy: "openai_vision",
}, nil
}
// analyzeImageLocally 使用 Go 标准库进行本地图片分析
func analyzeImageLocally(filePath, mimeType string, fileSize int64) (*ImageAnalysis, error) {
// 1. 读取文件
data, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("读取文件失败: %w", err)
}
// 2. 解码图片
img, format, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("解码图片失败: %w", err)
}
// 3. 获取尺寸
bounds := img.Bounds()
width := bounds.Dx()
height := bounds.Dy()
// 4. 计算颜色直方图 (采样像素)
topColors := computeColorHistogram(img, 5)
// 5. 读取 EXIF (简单实现: 仅 JPEG)
exif := extractEXIF(data, format)
// 6. 生成描述文本
description := generateLocalDescription(format, width, height, fileSize, topColors)
return &ImageAnalysis{
Format: format,
Width: width,
Height: height,
FileSize: fileSize,
Description: description,
TopColors: topColors,
EXIF: exif,
AnalyzedBy: "local",
}, nil
}
// computeColorHistogram 计算颜色直方图,返回 top N 颜色
func computeColorHistogram(img image.Image, topN int) []ColorInfo {
bounds := img.Bounds()
width := bounds.Dx()
height := bounds.Dy()
// 采样间隔:每 step 个像素采样一个
step := 1
totalPixels := width * height
if totalPixels > 10000 {
step = (width * height) / 10000
if step < 1 {
step = 1
}
}
colorCount := make(map[string]int)
sampledCount := 0
for y := bounds.Min.Y; y < bounds.Max.Y; y += step {
for x := bounds.Min.X; x < bounds.Max.X; x += step {
r, g, b, _ := img.At(x, y).RGBA()
// 量化到 8-bit 并聚类(每 32 级一分组,减少颜色种类)
qr := int(r>>8) / 32
qg := int(g>>8) / 32
qb := int(b>>8) / 32
key := fmt.Sprintf("%02d_%02d_%02d", qr, qg, qb)
colorCount[key]++
sampledCount++
}
}
if sampledCount == 0 {
return nil
}
// 排序取 topN
type kv struct {
key string
count int
}
var sorted []kv
for k, v := range colorCount {
sorted = append(sorted, kv{k, v})
}
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].count > sorted[j].count
})
result := make([]ColorInfo, 0, topN)
for i := 0; i < topN && i < len(sorted); i++ {
var qr, qg, qb int
fmt.Sscanf(sorted[i].key, "%d_%d_%d", &qr, &qg, &qb)
// 量化组的中间值
r := qr*32 + 16
g := qg*32 + 16
b := qb*32 + 16
hex := fmt.Sprintf("#%02X%02X%02X", r, g, b)
pct := float64(sorted[i].count) / float64(sampledCount) * 100
result = append(result, ColorInfo{
Hex: hex,
Percent: pct,
})
}
return result
}
// extractEXIF 简单提取 JPEG EXIF 信息
func extractEXIF(data []byte, format string) map[string]string {
if format != "jpeg" {
return nil
}
exif := make(map[string]string)
// 查找 EXIF 标记 (0xFFE1)
for i := 0; i < len(data)-4; i++ {
if data[i] == 0xFF && data[i+1] == 0xE1 {
if i+10 >= len(data) {
break
}
// 验证 EXIF 标识 "Exif\0\0"
if string(data[i+4:i+10]) != "Exif\x00\x00" {
continue
}
exifStart := i + 10
if exifStart+8 >= len(data) {
break
}
// 判断字节序
var bigEndian bool
if data[exifStart] == 'M' && data[exifStart+1] == 'M' {
bigEndian = true
} else if data[exifStart] == 'I' && data[exifStart+1] == 'I' {
bigEndian = false
} else {
break
}
// 读取 IFD0
tiffStart := exifStart
readUint16 := func(offset int) uint16 {
if offset+2 > len(data) {
return 0
}
if bigEndian {
return uint16(data[offset])<<8 | uint16(data[offset+1])
}
return uint16(data[offset+1])<<8 | uint16(data[offset])
}
ifd0Offset := int(readUint16(tiffStart + 4))
if ifd0Offset < 8 {
break
}
ifd0Addr := tiffStart + ifd0Offset
if ifd0Addr+2 >= len(data) {
break
}
numEntries := int(readUint16(ifd0Addr))
entryAddr := ifd0Addr + 2
// 常见 EXIF 标签
tagNames := map[uint16]string{
0x010F: "Make",
0x0110: "Model",
0x0112: "Orientation",
0x0132: "DateTime",
0x829A: "ExposureTime",
0x829D: "FNumber",
0x8827: "ISO",
0x9003: "DateTimeOriginal",
0x920A: "FocalLength",
}
for j := 0; j < numEntries && entryAddr+12 <= len(data); j++ {
tag := readUint16(entryAddr)
dataType := readUint16(entryAddr + 2)
dataCount := int(readUint16(entryAddr + 4))
entryAddr += 12
if name, ok := tagNames[tag]; ok {
valueLen := dataCount
switch dataType {
case 2: // ASCII
valueLen = dataCount
case 3, 4: // SHORT, LONG
valueLen = dataCount * 2
case 5: // RATIONAL
valueLen = dataCount * 8
}
if valueLen <= 4 {
// 值在 tag 自身中
valData := data[entryAddr-4 : entryAddr]
valStr := extractASCIIValue(valData, dataType, dataCount, bigEndian)
if valStr != "" {
exif[name] = valStr
}
}
}
}
break // 只处理第一个 EXIF 块
}
}
if len(exif) == 0 {
return nil
}
return exif
}
// extractASCIIValue 从 EXIF 数据中提取 ASCII 值
func extractASCIIValue(data []byte, dataType uint16, count int, bigEndian bool) string {
switch dataType {
case 2: // ASCII string
s := string(data)
if idx := strings.IndexByte(s, 0); idx >= 0 {
s = s[:idx]
}
return s
case 3: // SHORT
if len(data) >= 2 {
var val uint16
if bigEndian {
val = uint16(data[0])<<8 | uint16(data[1])
} else {
val = uint16(data[1])<<8 | uint16(data[0])
}
return fmt.Sprintf("%d", val)
}
case 5: // RATIONAL
// 简化处理:返回原始字节
return ""
}
return ""
}
// generateLocalDescription 生成本地图片描述文本
func generateLocalDescription(format string, width, height int, fileSize int64, topColors []ColorInfo) string {
var sb strings.Builder
formatNames := map[string]string{
"jpeg": "JPEG",
"jpg": "JPEG",
"png": "PNG",
"gif": "GIF",
"webp": "WebP",
"bmp": "BMP",
}
formatName := strings.ToUpper(format)
if name, ok := formatNames[strings.ToLower(format)]; ok {
formatName = name
}
sb.WriteString(fmt.Sprintf("这是一张 %s 格式的图片,", formatName))
sb.WriteString(fmt.Sprintf("分辨率为 %d×%d 像素,", width, height))
sb.WriteString(fmt.Sprintf("文件大小为 %s。", formatFileSize(fileSize)))
// 判断大致比例
ratio := float64(width) / float64(height)
if ratio > 1.8 {
sb.WriteString("图片呈宽幅横幅比例。")
} else if ratio < 0.6 {
sb.WriteString("图片呈竖幅比例。")
} else if ratio > 1.2 {
sb.WriteString("图片接近横向画幅。")
} else if ratio < 0.8 {
sb.WriteString("图片接近纵向画幅。")
} else {
sb.WriteString("图片接近正方形比例。")
}
// 描述主要颜色
if len(topColors) > 0 {
sb.WriteString(" 主要色调为")
for i, c := range topColors {
if i > 0 {
if i == len(topColors)-1 {
sb.WriteString(" 和 ")
} else {
sb.WriteString("、")
}
}
colorName := getColorName(c.Hex)
sb.WriteString(fmt.Sprintf("%s(%s, %.0f%%)", colorName, c.Hex, c.Percent))
}
sb.WriteString("。")
}
return sb.String()
}
// formatFileSize 格式化文件大小
func formatFileSize(size int64) string {
if size < 1024 {
return fmt.Sprintf("%d B", size)
}
if size < 1024*1024 {
return fmt.Sprintf("%.1f KB", float64(size)/1024)
}
return fmt.Sprintf("%.1f MB", float64(size)/(1024*1024))
}
// getColorName 根据 hex 颜色获取中文颜色名
func getColorName(hex string) string {
if len(hex) < 7 {
return hex
}
var r, g, b uint8
fmt.Sscanf(hex, "#%02X%02X%02X", &r, &g, &b)
// 灰度判断
if absDiff(r, g) < 20 && absDiff(g, b) < 20 && absDiff(r, b) < 20 {
if r < 40 {
return "黑色"
}
if r < 100 {
return "深灰色"
}
if r < 180 {
return "灰色"
}
if r < 230 {
return "浅灰色"
}
return "白色"
}
// HSL 近似判断色调
maxC := max(r, max(g, b))
minC := min(r, min(g, b))
delta := maxC - minC
if delta < 30 {
if maxC < 60 {
return "暗色"
}
if maxC > 200 {
return "浅色"
}
return "中性色"
}
var hue string
switch {
case r == maxC:
if g >= b {
hue = "红色"
} else {
hue = "品红色"
}
case g == maxC:
if b >= r {
hue = "绿色"
} else {
hue = "黄绿色"
}
default:
if r >= g {
hue = "紫红色"
} else {
hue = "蓝色"
}
}
// 亮度修饰
if maxC < 80 {
hue = "深" + hue
} else if minC > 200 {
hue = "浅" + hue
}
return hue
}
func absDiff(a, b uint8) int {
if a > b {
return int(a - b)
}
return int(b - a)
}
func max(a, b uint8) uint8 {
if a > b {
return a
}
return b
}
func min(a, b uint8) uint8 {
if a < b {
return a
}
return b
}
// ========== color.RGBA → string 辅助 ==========
var _ = color.RGBA{} // 确保 color 包被使用