Files
Cyrene/frontend/web/src/components/chat/ChatInput.tsx
T
AskaEth 6ef9e082a6 feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST)
- Gateway: VoiceStreamManager代理WS流式STT到voice-service
- Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码
- 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端)
- 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并
- 文档: 完善gateway-api.md和voice-service.md语音API文档
- 工具: scripts/voice/ 语音转换脚本集

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 11:50:40 +08:00

564 lines
20 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { useState, useRef, useCallback, useEffect } from 'react';
import type { ChatMode, MessageAttachment } from '@/types/chat';
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
import { useVoiceInput } from '@/hooks/useVoiceInput';
import { uploadFile } from '@/api/files';
import { useChatStore } from '@/store/chatStore';
interface ChatInputProps {
onSend: (content: string, mode: ChatMode, attachments?: MessageAttachment[]) => void;
onSendVoiceStream?: (msg: import('@/types/chat').WSClientMessage) => void;
disabled?: boolean;
}
interface PendingImage {
file: File;
previewUrl: string;
id: string; // 临时 ID
}
const MAX_IMAGE_SIZE = 10 * 1024 * 1024; // 10MB
const SUPPORTED_IMAGE_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp'];
const MAX_IMAGES = 5;
export function ChatInput({ onSend, onSendVoiceStream, disabled }: ChatInputProps) {
const [content, setContent] = useState('');
const [mode, setMode] = useState<ChatMode>('text');
const [pendingImages, setPendingImages] = useState<PendingImage[]>([]);
const [uploading, setUploading] = useState(false);
const [uploadError, setUploadError] = useState('');
const textareaRef = useRef<HTMLTextAreaElement>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const isTyping = useChatStore((s) => s.isTyping);
const {
isListening: isSRListening,
isSupported: isSRSpported,
isFallbackMode,
interimText,
finalText,
error: srError,
startListening: startSR,
stopListening: stopSR,
resetText,
} = useSpeechRecognition();
// VAD-based voice input (primary when supported)
const {
isListening: isVADListening,
isSpeaking: isVADSpeaking,
isSupported: isVADSupported,
interimText: vadInterimText,
finalText: vadFinalText,
error: vadError,
startListening: startVAD,
stopListening: stopVAD,
} = useVoiceInput({
onTranscription: (text: string) => {
setContent((prev) => {
const trimmed = prev.trimEnd();
return (trimmed ? trimmed + ' ' : '') + text;
});
},
sendMessage: onSendVoiceStream,
});
const isListening = isVADSupported ? isVADListening : isSRListening;
const voiceError = isVADSupported ? vadError : srError;
// 当 SR finalText 更新时,追加到输入框 (仅非 VAD 模式)
useEffect(() => {
if (!isVADSupported && finalText) {
setContent((prev) => {
const trimmed = prev.trimEnd();
return (trimmed ? trimmed + ' ' : '') + finalText;
});
resetText();
}
}, [isVADSupported, finalText, resetText]);
const handleSend = useCallback(async () => {
const trimmed = content.trim();
const hasImages = pendingImages.length > 0;
if ((!trimmed && !hasImages) || disabled || uploading) return;
let attachments: MessageAttachment[] | undefined;
if (hasImages) {
setUploading(true);
setUploadError('');
try {
const uploadedAttachments: MessageAttachment[] = [];
for (const img of pendingImages) {
try {
const result = await uploadFile(img.file);
uploadedAttachments.push({
type: 'image',
url: result.url,
thumbnail_url: result.thumbnail_url,
filename: result.filename,
size: result.size,
});
} catch (err) {
console.error('[ChatInput] 图片上传失败:', img.file.name, err);
// 使用 data URL 作为降级
uploadedAttachments.push({
type: 'image',
url: img.previewUrl,
filename: img.file.name,
size: img.file.size,
});
}
}
if (uploadedAttachments.length > 0) {
attachments = uploadedAttachments;
}
} catch (err) {
setUploadError('图片上传失败,请重试');
setUploading(false);
return;
}
setUploading(false);
}
useChatStore.getState().setTyping(true);
onSend(trimmed, mode, attachments);
setContent('');
setPendingImages([]);
// 重置文本框高度
if (textareaRef.current) {
textareaRef.current.style.height = 'auto';
}
}, [content, mode, disabled, onSend, pendingImages, uploading]);
const handleKeyDown = useCallback(
(e: React.KeyboardEvent) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleSend();
}
// Ctrl+Shift+V 触发语音输入
if (e.key === 'V' && e.ctrlKey && e.shiftKey) {
e.preventDefault();
if (isListening) {
isVADSupported ? stopVAD() : stopSR();
} else {
isVADSupported ? startVAD() : startSR();
}
}
},
[handleSend, isListening, isVADSupported, startVAD, stopVAD, startSR, stopSR]
);
// 粘贴图片
const handlePaste = useCallback(
(e: React.ClipboardEvent) => {
const items = e.clipboardData?.items;
if (!items) return;
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (item.type.startsWith('image/')) {
e.preventDefault();
const file = item.getAsFile();
if (file) {
addImageFile(file);
}
}
}
},
[]
);
// 添加图片文件
const addImageFile = useCallback(
(file: File) => {
setUploadError('');
// 检查文件大小
if (file.size > MAX_IMAGE_SIZE) {
setUploadError(`图片 "${file.name}" 超过 10MB 限制`);
return;
}
// 检查文件类型
if (!SUPPORTED_IMAGE_TYPES.includes(file.type)) {
setUploadError(`不支持的图片格式: ${file.type}`);
return;
}
// 检查数量限制
setPendingImages((prev) => {
if (prev.length >= MAX_IMAGES) {
setUploadError(`最多同时上传 ${MAX_IMAGES} 张图片`);
return prev;
}
const previewUrl = URL.createObjectURL(file);
return [...prev, { file, previewUrl, id: `img_${Date.now()}_${Math.random().toString(36).slice(2)}` }];
});
},
[]
);
// 移除待上传图片
const removeImage = useCallback((id: string) => {
setPendingImages((prev) => {
const img = prev.find((p) => p.id === id);
if (img) {
URL.revokeObjectURL(img.previewUrl);
}
return prev.filter((p) => p.id !== id);
});
}, []);
// 拖拽上传
const [isDragOver, setIsDragOver] = useState(false);
const dragCounterRef = useRef(0);
const handleDragEnter = useCallback((e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
dragCounterRef.current++;
if (e.dataTransfer.items && e.dataTransfer.items.length > 0) {
setIsDragOver(true);
}
}, []);
const handleDragLeave = useCallback((e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
dragCounterRef.current--;
if (dragCounterRef.current <= 0) {
dragCounterRef.current = 0;
setIsDragOver(false);
}
}, []);
const handleDragOver = useCallback((e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
}, []);
const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
setIsDragOver(false);
dragCounterRef.current = 0;
const files = e.dataTransfer?.files;
if (files) {
for (let i = 0; i < files.length; i++) {
const file = files[i];
if (file.type.startsWith('image/')) {
addImageFile(file);
}
}
}
}, [addImageFile]);
// 文件选择
const handleFileSelect = useCallback(
(e: React.ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (!files) return;
for (let i = 0; i < files.length; i++) {
addImageFile(files[i]);
}
// 重置 input 以便再次选择相同文件
e.target.value = '';
},
[addImageFile]
);
const handleInput = useCallback(() => {
const el = textareaRef.current;
if (el) {
el.style.height = 'auto';
el.style.height = Math.min(el.scrollHeight, 150) + 'px';
}
}, []);
const handleVoiceToggle = useCallback(() => {
if (isListening) {
isVADSupported ? stopVAD() : stopSR();
} else {
isVADSupported ? startVAD() : startSR();
}
}, [isListening, isVADSupported, startVAD, stopVAD, startSR, stopSR]);
return (
<div
className={`relative border-t border-pink-100 dark:border-pink-900 bg-white/80 dark:bg-gray-900/80 backdrop-blur-sm px-4 py-3 transition-colors ${isDragOver ? 'bg-pink-50/80 dark:bg-pink-900/20' : ''}`}
onDragEnter={handleDragEnter}
onDragLeave={handleDragLeave}
onDragOver={handleDragOver}
onDrop={handleDrop}
>
{/* 拖拽上传覆盖层 */}
{isDragOver && (
<div className="absolute inset-0 z-10 bg-pink-100/60 dark:bg-pink-900/40 border-2 border-dashed border-pink-400 rounded-lg flex items-center justify-center pointer-events-none">
<div className="text-center">
<span className="text-3xl">📷</span>
<p className="text-sm text-pink-500 font-medium mt-1"></p>
</div>
</div>
)}
<div className="flex flex-col gap-2 max-w-3xl mx-auto">
{/* 昔涟正在输入指示器 */}
{isTyping && (
<div className="flex items-center gap-2 px-1 animate-fadeIn">
<div className="flex gap-1">
<span
className="w-2 h-2 rounded-full bg-pink-400 animate-bounce"
style={{ animationDelay: '0ms' }}
/>
<span
className="w-2 h-2 rounded-full bg-pink-400 animate-bounce"
style={{ animationDelay: '150ms' }}
/>
<span
className="w-2 h-2 rounded-full bg-pink-400 animate-bounce"
style={{ animationDelay: '300ms' }}
/>
</div>
<span className="text-xs text-pink-400 font-medium">...</span>
</div>
)}
{/* VAD 语音状态提示 */}
{isVADSupported && isVADListening && (
<div className="text-sm text-pink-500 dark:text-pink-400 italic px-1" aria-live="polite">
{isVADSpeaking
? (vadInterimText || '检测到语音,正在识别...')
: '正在聆听...'}
</div>
)}
{/* 实时识别文本提示 (仅非 VAD 模式) */}
{!isVADSupported && isListening && interimText && (
<div
className="interim-text text-sm text-pink-500 dark:text-pink-400 italic px-1"
aria-live="polite"
aria-atomic="true"
>
{interimText}
</div>
)}
{/* 错误提示 */}
{voiceError && (
<div
className="text-xs text-red-500 dark:text-red-400 px-1"
role="alert"
>
{voiceError}
</div>
)}
{/* 上传错误提示 */}
{uploadError && (
<div
className="text-xs text-red-500 dark:text-red-400 px-1"
role="alert"
>
{uploadError}
</div>
)}
{/* 图片预览区 */}
{pendingImages.length > 0 && (
<div className="flex gap-2 flex-wrap px-1">
{pendingImages.map((img) => (
<div
key={img.id}
className="relative group w-16 h-16 rounded-lg overflow-hidden border border-pink-200 dark:border-pink-800 flex-shrink-0"
>
<img
src={img.previewUrl}
alt={img.file.name}
className="w-full h-full object-cover"
/>
{uploading && (
<div className="absolute inset-0 bg-black/40 flex items-center justify-center">
<svg className="animate-spin h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
</div>
)}
{!uploading && (
<button
onClick={() => removeImage(img.id)}
className="absolute top-0.5 right-0.5 w-5 h-5 rounded-full bg-black/50 text-white opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
aria-label="移除图片"
>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-3.5 h-3.5">
<path fillRule="evenodd" d="M5.47 5.47a.75.75 0 011.06 0L12 10.94l5.47-5.47a.75.75 0 111.06 1.06L13.06 12l5.47 5.47a.75.75 0 11-1.06 1.06L12 13.06l-5.47 5.47a.75.75 0 01-1.06-1.06L10.94 12 5.47 6.53a.75.75 0 010-1.06z" clipRule="evenodd" />
</svg>
</button>
)}
</div>
))}
</div>
)}
<div className="flex items-end gap-2">
{/* 模式切换 */}
<div className="flex gap-1">
<button
onClick={() => setMode('text')}
className={`p-2 rounded-lg text-xs transition-colors ${
mode === 'text'
? 'bg-pink-100 text-pink-600 dark:bg-pink-900 dark:text-pink-300'
: 'text-gray-400 hover:text-gray-600'
}`}
title="文字模式"
>
💬
</button>
<button
onClick={() => setMode('voice_msg')}
className={`p-2 rounded-lg text-xs transition-colors ${
mode === 'voice_msg'
? 'bg-pink-100 text-pink-600 dark:bg-pink-900 dark:text-pink-300'
: 'text-gray-400 hover:text-gray-600'
}`}
title="语音消息"
>
🎤
</button>
</div>
{/* 图片上传按钮 */}
<button
onClick={() => fileInputRef.current?.click()}
disabled={disabled || uploading}
className="p-2 rounded-lg text-xs transition-colors text-gray-400 hover:text-pink-500 hover:bg-pink-50 dark:hover:bg-pink-900/30 disabled:opacity-40 disabled:cursor-not-allowed"
title="上传图片"
aria-label="上传图片"
>
📷
</button>
<input
ref={fileInputRef}
type="file"
accept="image/jpeg,image/png,image/gif,image/webp,image/bmp"
multiple
onChange={handleFileSelect}
className="hidden"
aria-hidden="true"
/>
{/* 输入框 */}
<textarea
ref={textareaRef}
value={content}
onChange={(e) => setContent(e.target.value)}
onKeyDown={handleKeyDown}
onPaste={handlePaste}
onInput={handleInput}
placeholder="和昔涟说点什么吧... 支持粘贴图片"
disabled={disabled || uploading}
rows={1}
className="flex-1 resize-none rounded-xl border border-pink-200 dark:border-pink-800 bg-white dark:bg-gray-800 px-4 py-2 text-sm text-gray-700 dark:text-gray-200 placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-pink-400 focus:border-transparent disabled:opacity-50"
/>
{/* 语音输入按钮 (VAD 或浏览器 SpeechRecognition/MediaRecorder 支持时显示) */}
{(isVADSupported || isSRSpported) && (
<button
onClick={handleVoiceToggle}
disabled={disabled || uploading}
aria-label={isListening ? '停止语音输入' : '开始语音输入'}
aria-pressed={isListening}
title={isListening ? '停止聆听 (Ctrl+Shift+V)' : isVADSupported ? '语音输入 (自动检测说话)' : '语音输入 (Ctrl+Shift+V)'}
className={`p-2 rounded-xl transition-all flex-shrink-0 border-2 ${
isListening
? 'voice-btn-active bg-red-500 border-red-500 text-white'
: isVADSpeaking
? 'bg-yellow-400 border-yellow-400 text-white'
: 'bg-gray-100 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-500 hover:text-red-500 hover:border-red-300'
} disabled:opacity-40 disabled:cursor-not-allowed`}
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
className="w-5 h-5"
>
<path d="M12 2a3 3 0 0 0-3 3v6a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z" />
<path d="M7.5 11a4.5 4.5 0 0 0 9 0h1.5a6 6 0 0 1-5.25 5.95V20h3.75v1.5h-9v-1.5h3.75v-2.05A6 6 0 0 1 6 11h1.5Z" />
</svg>
</button>
)}
{/* 不支持时显示禁用按钮 */}
{!isVADSupported && !isSRSpported && (
<button
disabled
title="您的浏览器不支持语音识别"
className="p-2 rounded-xl bg-gray-100 dark:bg-gray-800 border border-gray-200 dark:border-gray-700 text-gray-300 dark:text-gray-600 flex-shrink-0 cursor-not-allowed"
aria-label="语音输入不可用"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
className="w-5 h-5"
>
<path d="M12 2a3 3 0 0 0-3 3v6a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z" />
<path d="M7.5 11a4.5 4.5 0 0 0 9 0h1.5a6 6 0 0 1-5.25 5.95V20h3.75v1.5h-9v-1.5h3.75v-2.05A6 6 0 0 1 6 11h1.5Z" />
<line x1="4" y1="4" x2="20" y2="20" stroke="currentColor" strokeWidth="2" />
</svg>
</button>
)}
{/* 发送按钮 */}
<button
onClick={handleSend}
disabled={disabled || uploading || (!content.trim() && pendingImages.length === 0)}
className="p-2 rounded-xl bg-pink-400 text-white hover:bg-pink-500 disabled:opacity-40 disabled:cursor-not-allowed transition-colors flex-shrink-0"
>
{uploading ? (
<svg className="animate-spin h-5 w-5" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
) : (
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
className="w-5 h-5"
>
<path d="M3.478 2.404a.75.75 0 0 0-.926.941l2.432 7.905H13.5a.75.75 0 0 1 0 1.5H4.984l-2.432 7.905a.75.75 0 0 0 .926.94 60.519 60.519 0 0 0 18.445-8.986.75.75 0 0 0 0-1.218A60.517 60.517 0 0 0 3.478 2.404Z" />
</svg>
)}
</button>
</div>
{/* 语音输入状态提示 */}
{isListening && (
<p className="text-xs text-red-400 text-center animate-pulse">
{isVADSupported
? (isVADSpeaking ? '🔊 检测到语音,正在识别...' : '🎤 正在聆听...')
: (isFallbackMode ? '🎤 后端语音识别中...' : '🎤 正在聆听...')
}
<span className="text-gray-400 ml-2">(Ctrl+Shift+V )</span>
</p>
)}
{mode !== 'text' && !isListening && (
<p className="text-xs text-gray-400 text-center">
{mode === 'voice_msg' ? '点击麦克风按钮开始语音输入 ♪' : ''}
</p>
)}
</div>
</div>
);
}