feat: 语音流式输入管线 + VAD前端集成 + 插件-工具合并清理
- 前端: VAD语音检测(@ricky0123/vad-web) + useVoiceInput双模式(流式WS/REST) - Gateway: VoiceStreamManager代理WS流式STT到voice-service - Voice-service: DashScope REST → Realtime WS → Whisper三级引擎 + ffmpeg转码 - 共享模块: pkg/audio(音频转换) + pkg/dashscope(ASR REST客户端) - 清理: 移除旧plugin-manager和pkg/plugins,完成插件→工具合并 - 文档: 完善gateway-api.md和voice-service.md语音API文档 - 工具: scripts/voice/ 语音转换脚本集 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Generated
+156
@@ -8,6 +8,7 @@
|
||||
"name": "cyrene-frontend",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"@ricky0123/vad-web": "^0.0.30",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"zustand": "^4.5.5"
|
||||
@@ -848,6 +849,78 @@
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/codegen/-/codegen-2.0.5.tgz",
|
||||
"integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/eventemitter/-/eventemitter-1.1.1.tgz",
|
||||
"integrity": "sha512-vW1GmwMZNnL+gMRaovlh9yZX74kc+TTU3FObkkurpMaRtBfLP3ldjS9KQWlwZgraRE0+dheEEoAxdzcJQ8eXZg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/fetch/-/fetch-1.1.1.tgz",
|
||||
"integrity": "sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/inquire/-/inquire-1.1.2.tgz",
|
||||
"integrity": "sha512-pa0vFRuws4wkvaXKK1uXZMAwAX4/t8ANaJo45iw/oQHNQ9q5xUzwgFmVJGXiga2BeN+zpX7Vf9vmsiIa2J+MUw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/@protobufjs/utf8/-/utf8-1.1.1.tgz",
|
||||
"integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@ricky0123/vad-web": {
|
||||
"version": "0.0.30",
|
||||
"resolved": "https://registry.npmmirror.com/@ricky0123/vad-web/-/vad-web-0.0.30.tgz",
|
||||
"integrity": "sha512-cJyYrh4YeeUBJcbR9Bic/bFDyB9qBkAepvpuWM3vLxnAi7bC3VHzf51UeNdT+OtY4D7MLAgV8iJMc4z41ZnaWg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"onnxruntime-web": "^1.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@rolldown/pluginutils": {
|
||||
"version": "1.0.0-beta.27",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
|
||||
@@ -1257,6 +1330,15 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.9.2",
|
||||
"resolved": "https://registry.npmmirror.com/@types/node/-/node-25.9.2.tgz",
|
||||
"integrity": "sha512-G05zqtJhcDLb8uslf5EjCxXg9G1KQxiV8OS0R26IC//Eoyitzqe8z37I7cqvnZlrlSfgocQRfSn/AHBZJJFyGw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": ">=7.24.0 <7.24.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/prop-types": {
|
||||
"version": "15.7.15",
|
||||
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz",
|
||||
@@ -1704,6 +1786,12 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/flatbuffers": {
|
||||
"version": "25.9.23",
|
||||
"resolved": "https://registry.npmmirror.com/flatbuffers/-/flatbuffers-25.9.23.tgz",
|
||||
"integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/fraction.js": {
|
||||
"version": "5.3.4",
|
||||
"resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
|
||||
@@ -1766,6 +1854,12 @@
|
||||
"node": ">=10.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/guid-typescript": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmmirror.com/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/hasown": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
|
||||
@@ -1903,6 +1997,12 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmmirror.com/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/loose-envify": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
||||
@@ -2024,6 +2124,26 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-common": {
|
||||
"version": "1.26.0",
|
||||
"resolved": "https://registry.npmmirror.com/onnxruntime-common/-/onnxruntime-common-1.26.0.tgz",
|
||||
"integrity": "sha512-qVyMR4lcWgbkc4getFV+GQijsTnbg/siteoqcDwa3sI/LxbrMSNw4ePyvCq/ymdQaRomCA7YuWmhzsswxvymdw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/onnxruntime-web": {
|
||||
"version": "1.26.0",
|
||||
"resolved": "https://registry.npmmirror.com/onnxruntime-web/-/onnxruntime-web-1.26.0.tgz",
|
||||
"integrity": "sha512-LbRr/8zZt2xilI2smrVQGGKINo0U46i8qJp+UXyMBGfqN7KjnH1BiwCwLwyNIVV4i9CKFv7Sf4PwLKWnT8/bEA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"flatbuffers": "^25.1.24",
|
||||
"guid-typescript": "^1.0.9",
|
||||
"long": "^5.2.3",
|
||||
"onnxruntime-common": "1.26.0",
|
||||
"platform": "^1.3.6",
|
||||
"protobufjs": "^7.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/path-parse": {
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
|
||||
@@ -2071,6 +2191,12 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/platform": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmmirror.com/platform/-/platform-1.3.6.tgz",
|
||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.5.14",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.14.tgz",
|
||||
@@ -2234,6 +2360,30 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.6.2",
|
||||
"resolved": "https://registry.npmmirror.com/protobufjs/-/protobufjs-7.6.2.tgz",
|
||||
"integrity": "sha512-N9EiLovGEQOJSPF26Ij7qUGvahfEnq0eeYZ02aigIedkmz1qZSwjnP9SBITHJuF/6MYbIW4HDN8zdYjsjqJKXQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.5",
|
||||
"@protobufjs/eventemitter": "^1.1.1",
|
||||
"@protobufjs/fetch": "^1.1.1",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.2",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/queue-microtask": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
|
||||
@@ -2623,6 +2773,12 @@
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.24.6",
|
||||
"resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.24.6.tgz",
|
||||
"integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/update-browserslist-db": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ricky0123/vad-web": "^0.0.30",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"zustand": "^4.5.5"
|
||||
|
||||
@@ -36,7 +36,7 @@ function setHashSessionId(sessionId: string | null) {
|
||||
|
||||
export default function App() {
|
||||
const { isLoggedIn, login, register, loading: authLoading, userId } = useAuth();
|
||||
const { send } = useChat();
|
||||
const { send, sendVoiceStreamMessage } = useChat();
|
||||
const { loadSessionsFromServer, ensureMainSession, setCurrentSessionId, setMessages, loadMessagesFromServer, sessions, currentSessionId } = useSessionStore();
|
||||
|
||||
const [authMode, setAuthMode] = useState<'login' | 'register'>('login');
|
||||
@@ -330,41 +330,42 @@ export default function App() {
|
||||
return (
|
||||
<ErrorBoundary>
|
||||
<AppLayout>
|
||||
<PageRouter onSend={send} />
|
||||
<PageRouter onSend={send} onSendVoiceStream={sendVoiceStreamMessage} />
|
||||
</AppLayout>
|
||||
</ErrorBoundary>
|
||||
);
|
||||
}
|
||||
|
||||
type SendFn = (content: string, mode?: import('@/types/chat').ChatMode, attachments?: import('@/types/chat').MessageAttachment[]) => void;
|
||||
type SendVoiceStreamFn = (msg: import('@/types/chat').WSClientMessage) => void;
|
||||
|
||||
function PageRouter({ onSend }: { onSend: SendFn }) {
|
||||
function PageRouter({ onSend, onSendVoiceStream }: { onSend: SendFn; onSendVoiceStream: SendVoiceStreamFn }) {
|
||||
const currentPage = usePageStore((s) => s.currentPage);
|
||||
const isAdmin = isAdminUser(localStorage.getItem('user_id') || '');
|
||||
|
||||
switch (currentPage) {
|
||||
case 'admin-models':
|
||||
if (!isAdmin) return <ChatPage onSend={onSend} />;
|
||||
if (!isAdmin) return <ChatPage onSend={onSend} onSendVoiceStream={onSendVoiceStream} />;
|
||||
return <ModelsAdminPage />;
|
||||
case 'admin-dashboard':
|
||||
if (!isAdmin) return <ChatPage onSend={onSend} />;
|
||||
if (!isAdmin) return <ChatPage onSend={onSend} onSendVoiceStream={onSendVoiceStream} />;
|
||||
return <AdminDashboard />;
|
||||
case 'profile':
|
||||
return <ProfilePage />;
|
||||
case 'chat':
|
||||
default:
|
||||
return <ChatPage onSend={onSend} />;
|
||||
return <ChatPage onSend={onSend} onSendVoiceStream={onSendVoiceStream} />;
|
||||
}
|
||||
}
|
||||
|
||||
function ChatPage({ onSend }: { onSend: SendFn }) {
|
||||
function ChatPage({ onSend, onSendVoiceStream }: { onSend: SendFn; onSendVoiceStream: SendVoiceStreamFn }) {
|
||||
return (
|
||||
<div className="flex flex-col h-full overflow-hidden">
|
||||
<div className="flex-1 min-h-0 overflow-hidden">
|
||||
<ChatContainer />
|
||||
</div>
|
||||
<div className="flex-shrink-0">
|
||||
<ChatInput onSend={onSend} />
|
||||
<ChatInput onSend={onSend} onSendVoiceStream={onSendVoiceStream} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import { useState, useRef, useCallback, useEffect } from 'react';
|
||||
import type { ChatMode, MessageAttachment } from '@/types/chat';
|
||||
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
|
||||
import { useVoiceInput } from '@/hooks/useVoiceInput';
|
||||
import { uploadFile } from '@/api/files';
|
||||
import { useChatStore } from '@/store/chatStore';
|
||||
|
||||
interface ChatInputProps {
|
||||
onSend: (content: string, mode: ChatMode, attachments?: MessageAttachment[]) => void;
|
||||
onSendVoiceStream?: (msg: import('@/types/chat').WSClientMessage) => void;
|
||||
disabled?: boolean;
|
||||
}
|
||||
|
||||
@@ -19,7 +21,7 @@ const MAX_IMAGE_SIZE = 10 * 1024 * 1024; // 10MB
|
||||
const SUPPORTED_IMAGE_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp'];
|
||||
const MAX_IMAGES = 5;
|
||||
|
||||
export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
export function ChatInput({ onSend, onSendVoiceStream, disabled }: ChatInputProps) {
|
||||
const [content, setContent] = useState('');
|
||||
const [mode, setMode] = useState<ChatMode>('text');
|
||||
const [pendingImages, setPendingImages] = useState<PendingImage[]>([]);
|
||||
@@ -30,27 +32,50 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
const isTyping = useChatStore((s) => s.isTyping);
|
||||
|
||||
const {
|
||||
isListening,
|
||||
isSupported,
|
||||
isListening: isSRListening,
|
||||
isSupported: isSRSpported,
|
||||
isFallbackMode,
|
||||
interimText,
|
||||
finalText,
|
||||
error,
|
||||
startListening,
|
||||
stopListening,
|
||||
error: srError,
|
||||
startListening: startSR,
|
||||
stopListening: stopSR,
|
||||
resetText,
|
||||
} = useSpeechRecognition();
|
||||
|
||||
// 当 finalText 更新时,追加到输入框
|
||||
// VAD-based voice input (primary when supported)
|
||||
const {
|
||||
isListening: isVADListening,
|
||||
isSpeaking: isVADSpeaking,
|
||||
isSupported: isVADSupported,
|
||||
interimText: vadInterimText,
|
||||
finalText: vadFinalText,
|
||||
error: vadError,
|
||||
startListening: startVAD,
|
||||
stopListening: stopVAD,
|
||||
} = useVoiceInput({
|
||||
onTranscription: (text: string) => {
|
||||
setContent((prev) => {
|
||||
const trimmed = prev.trimEnd();
|
||||
return (trimmed ? trimmed + ' ' : '') + text;
|
||||
});
|
||||
},
|
||||
sendMessage: onSendVoiceStream,
|
||||
});
|
||||
|
||||
const isListening = isVADSupported ? isVADListening : isSRListening;
|
||||
const voiceError = isVADSupported ? vadError : srError;
|
||||
|
||||
// 当 SR finalText 更新时,追加到输入框 (仅非 VAD 模式)
|
||||
useEffect(() => {
|
||||
if (finalText) {
|
||||
if (!isVADSupported && finalText) {
|
||||
setContent((prev) => {
|
||||
const trimmed = prev.trimEnd();
|
||||
return (trimmed ? trimmed + ' ' : '') + finalText;
|
||||
});
|
||||
resetText();
|
||||
}
|
||||
}, [finalText, resetText]);
|
||||
}, [isVADSupported, finalText, resetText]);
|
||||
|
||||
const handleSend = useCallback(async () => {
|
||||
const trimmed = content.trim();
|
||||
@@ -121,13 +146,13 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
if (e.key === 'V' && e.ctrlKey && e.shiftKey) {
|
||||
e.preventDefault();
|
||||
if (isListening) {
|
||||
stopListening();
|
||||
isVADSupported ? stopVAD() : stopSR();
|
||||
} else {
|
||||
startListening();
|
||||
isVADSupported ? startVAD() : startSR();
|
||||
}
|
||||
}
|
||||
},
|
||||
[handleSend, isListening, startListening, stopListening]
|
||||
[handleSend, isListening, isVADSupported, startVAD, stopVAD, startSR, stopSR]
|
||||
);
|
||||
|
||||
// 粘贴图片
|
||||
@@ -260,11 +285,11 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
|
||||
const handleVoiceToggle = useCallback(() => {
|
||||
if (isListening) {
|
||||
stopListening();
|
||||
isVADSupported ? stopVAD() : stopSR();
|
||||
} else {
|
||||
startListening();
|
||||
isVADSupported ? startVAD() : startSR();
|
||||
}
|
||||
}, [isListening, startListening, stopListening]);
|
||||
}, [isListening, isVADSupported, startVAD, stopVAD, startSR, stopSR]);
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -305,8 +330,17 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 实时识别文本提示 */}
|
||||
{isListening && interimText && (
|
||||
{/* VAD 语音状态提示 */}
|
||||
{isVADSupported && isVADListening && (
|
||||
<div className="text-sm text-pink-500 dark:text-pink-400 italic px-1" aria-live="polite">
|
||||
{isVADSpeaking
|
||||
? (vadInterimText || '检测到语音,正在识别...')
|
||||
: '正在聆听...'}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 实时识别文本提示 (仅非 VAD 模式) */}
|
||||
{!isVADSupported && isListening && interimText && (
|
||||
<div
|
||||
className="interim-text text-sm text-pink-500 dark:text-pink-400 italic px-1"
|
||||
aria-live="polite"
|
||||
@@ -317,12 +351,12 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
)}
|
||||
|
||||
{/* 错误提示 */}
|
||||
{error && (
|
||||
{voiceError && (
|
||||
<div
|
||||
className="text-xs text-red-500 dark:text-red-400 px-1"
|
||||
role="alert"
|
||||
>
|
||||
⚠️ {error}
|
||||
⚠️ {voiceError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -434,18 +468,20 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
className="flex-1 resize-none rounded-xl border border-pink-200 dark:border-pink-800 bg-white dark:bg-gray-800 px-4 py-2 text-sm text-gray-700 dark:text-gray-200 placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-pink-400 focus:border-transparent disabled:opacity-50"
|
||||
/>
|
||||
|
||||
{/* 语音输入按钮 (仅浏览器支持时显示) */}
|
||||
{isSupported && (
|
||||
{/* 语音输入按钮 (VAD 或浏览器 SpeechRecognition/MediaRecorder 支持时显示) */}
|
||||
{(isVADSupported || isSRSpported) && (
|
||||
<button
|
||||
onClick={handleVoiceToggle}
|
||||
disabled={disabled || uploading}
|
||||
aria-label={isListening ? '停止语音输入' : '开始语音输入'}
|
||||
aria-pressed={isListening}
|
||||
title={isListening ? '停止聆听 (Ctrl+Shift+V)' : '语音输入 (Ctrl+Shift+V)'}
|
||||
title={isListening ? '停止聆听 (Ctrl+Shift+V)' : isVADSupported ? '语音输入 (自动检测说话)' : '语音输入 (Ctrl+Shift+V)'}
|
||||
className={`p-2 rounded-xl transition-all flex-shrink-0 border-2 ${
|
||||
isListening
|
||||
? 'voice-btn-active bg-red-500 border-red-500 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-500 hover:text-red-500 hover:border-red-300'
|
||||
: isVADSpeaking
|
||||
? 'bg-yellow-400 border-yellow-400 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-500 hover:text-red-500 hover:border-red-300'
|
||||
} disabled:opacity-40 disabled:cursor-not-allowed`}
|
||||
>
|
||||
<svg
|
||||
@@ -461,7 +497,7 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
)}
|
||||
|
||||
{/* 不支持时显示禁用按钮 */}
|
||||
{!isSupported && (
|
||||
{!isVADSupported && !isSRSpported && (
|
||||
<button
|
||||
disabled
|
||||
title="您的浏览器不支持语音识别"
|
||||
@@ -508,7 +544,10 @@ export function ChatInput({ onSend, disabled }: ChatInputProps) {
|
||||
{/* 语音输入状态提示 */}
|
||||
{isListening && (
|
||||
<p className="text-xs text-red-400 text-center animate-pulse">
|
||||
{isFallbackMode ? '🎤 后端语音识别中...' : '🎤 正在聆听...'}
|
||||
{isVADSupported
|
||||
? (isVADSpeaking ? '🔊 检测到语音,正在识别...' : '🎤 正在聆听...')
|
||||
: (isFallbackMode ? '🎤 后端语音识别中...' : '🎤 正在聆听...')
|
||||
}
|
||||
<span className="text-gray-400 ml-2">(Ctrl+Shift+V 停止)</span>
|
||||
</p>
|
||||
)}
|
||||
|
||||
@@ -43,11 +43,19 @@ export function useChat() {
|
||||
[addMessage, setTyping, sendMessage]
|
||||
);
|
||||
|
||||
const sendVoiceStreamMessage = useCallback(
|
||||
(msg: import('@/types/chat').WSClientMessage) => {
|
||||
sendMessage(msg);
|
||||
},
|
||||
[sendMessage]
|
||||
);
|
||||
|
||||
return {
|
||||
messages,
|
||||
isTyping,
|
||||
isConnected,
|
||||
send,
|
||||
sendVoiceStreamMessage,
|
||||
clearMessages,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,221 @@
|
||||
import { useState, useRef, useCallback, useEffect } from 'react';
|
||||
import { MicVAD, utils } from '@ricky0123/vad-web';
|
||||
import { transcribeAudio } from '@/api/voice';
|
||||
import { useChatStore } from '@/store/chatStore';
|
||||
import type { WSClientMessage } from '@/types/chat';
|
||||
|
||||
interface UseVoiceInputOptions {
|
||||
onTranscription: (text: string) => void;
|
||||
language?: string;
|
||||
/** 提供后启用流式模式:通过 WebSocket 分片发送音频,实时返回中间结果 */
|
||||
sendMessage?: (msg: WSClientMessage) => void;
|
||||
}
|
||||
|
||||
interface UseVoiceInputReturn {
|
||||
isListening: boolean;
|
||||
isSpeaking: boolean;
|
||||
isSupported: boolean;
|
||||
/** 流式模式:voice_interim 中间识别文本 */
|
||||
interimText: string;
|
||||
/** 流式模式:voice_final 最终识别文本 */
|
||||
finalText: string;
|
||||
error: string | null;
|
||||
startListening: () => Promise<void>;
|
||||
stopListening: () => void;
|
||||
}
|
||||
|
||||
export function useVoiceInput({
|
||||
onTranscription,
|
||||
language = 'zh',
|
||||
sendMessage,
|
||||
}: UseVoiceInputOptions): UseVoiceInputReturn {
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const vadRef = useRef<MicVAD | null>(null);
|
||||
const inSpeechRef = useRef(false);
|
||||
const seqRef = useRef(0);
|
||||
|
||||
const isSupported =
|
||||
typeof window !== 'undefined' &&
|
||||
typeof AudioWorkletNode !== 'undefined' &&
|
||||
typeof WebAssembly !== 'undefined';
|
||||
|
||||
const isStreaming = !!sendMessage;
|
||||
|
||||
// Subscribe to streaming results from store
|
||||
const voiceInterimText = useChatStore((s) => s.voiceInterimText);
|
||||
const voiceFinalText = useChatStore((s) => s.voiceFinalText);
|
||||
|
||||
const stopListening = useCallback(() => {
|
||||
if (vadRef.current) {
|
||||
vadRef.current.pause();
|
||||
}
|
||||
inSpeechRef.current = false;
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
}, []);
|
||||
|
||||
const startListening = useCallback(async () => {
|
||||
if (!isSupported) return;
|
||||
|
||||
setError(null);
|
||||
seqRef.current = 0;
|
||||
|
||||
try {
|
||||
if (vadRef.current) {
|
||||
await vadRef.current.destroy();
|
||||
vadRef.current = null;
|
||||
}
|
||||
|
||||
vadRef.current = await MicVAD.new({
|
||||
onSpeechStart: () => {
|
||||
setIsSpeaking(true);
|
||||
inSpeechRef.current = true;
|
||||
seqRef.current = 0;
|
||||
|
||||
if (isStreaming) {
|
||||
sendMessage!({
|
||||
type: 'voice_stream_start',
|
||||
format: 'wav',
|
||||
language,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
onFrameProcessed: async (_probabilities, frame) => {
|
||||
if (!isStreaming || !inSpeechRef.current) return;
|
||||
|
||||
// Accumulate audio frames during speech for the voice_stream_chunk
|
||||
// Send every ~300ms of audio (10 frames at ~30ms each)
|
||||
const frameSeq = seqRef.current++;
|
||||
const CHUNK_INTERVAL = 10; // send every 10 frames
|
||||
|
||||
if (frameSeq % CHUNK_INTERVAL === 0) {
|
||||
try {
|
||||
const wavBuffer = utils.encodeWAV(frame);
|
||||
const base64 = arrayBufferToBase64(wavBuffer);
|
||||
|
||||
sendMessage!({
|
||||
type: 'voice_stream_chunk',
|
||||
audio_data: base64,
|
||||
sequence: Math.floor(frameSeq / CHUNK_INTERVAL),
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch {
|
||||
// Ignore encoding errors for individual chunks
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
onSpeechEnd: async (audio: Float32Array) => {
|
||||
setIsSpeaking(false);
|
||||
inSpeechRef.current = false;
|
||||
|
||||
if (isStreaming) {
|
||||
// Send the last chunk with accumulated audio if any
|
||||
try {
|
||||
const wavBuffer = utils.encodeWAV(audio);
|
||||
const base64 = arrayBufferToBase64(wavBuffer);
|
||||
|
||||
sendMessage!({
|
||||
type: 'voice_stream_chunk',
|
||||
audio_data: base64,
|
||||
sequence: -1,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
|
||||
// Signal end of voice stream — gateway returns voice_final
|
||||
sendMessage!({
|
||||
type: 'voice_stream_end',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
// onTranscription will be called in useEffect when voiceFinalText updates
|
||||
return;
|
||||
}
|
||||
|
||||
// REST mode: send full audio for transcription
|
||||
const wavBuffer = utils.encodeWAV(audio);
|
||||
const wavBlob = new Blob([wavBuffer], { type: 'audio/wav' });
|
||||
|
||||
try {
|
||||
const result = await transcribeAudio(wavBlob, language);
|
||||
if (result.error) {
|
||||
setError(result.error);
|
||||
} else if (result.data?.text) {
|
||||
onTranscription(result.data.text);
|
||||
}
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : '语音识别失败');
|
||||
}
|
||||
},
|
||||
|
||||
onVADMisfire: () => {
|
||||
setIsSpeaking(false);
|
||||
inSpeechRef.current = false;
|
||||
|
||||
if (isStreaming) {
|
||||
sendMessage!({
|
||||
type: 'voice_stream_end',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
startOnLoad: true,
|
||||
});
|
||||
|
||||
setIsListening(true);
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof DOMException && err.name === 'NotAllowedError'
|
||||
? '麦克风权限被拒绝'
|
||||
: err instanceof Error
|
||||
? err.message
|
||||
: 'VAD 初始化失败';
|
||||
setError(message);
|
||||
}
|
||||
}, [isSupported, language, isStreaming, sendMessage, onTranscription]);
|
||||
|
||||
// In streaming mode, watch for voice_final from the server
|
||||
useEffect(() => {
|
||||
if (isStreaming && voiceFinalText) {
|
||||
onTranscription(voiceFinalText);
|
||||
useChatStore.getState().setVoiceFinalText('');
|
||||
}
|
||||
}, [isStreaming, voiceFinalText, onTranscription]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (vadRef.current) {
|
||||
vadRef.current.destroy();
|
||||
vadRef.current = null;
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return {
|
||||
isListening,
|
||||
isSpeaking,
|
||||
isSupported,
|
||||
interimText: isStreaming ? voiceInterimText : '',
|
||||
finalText: isStreaming ? voiceFinalText : '',
|
||||
error,
|
||||
startListening,
|
||||
stopListening,
|
||||
};
|
||||
}
|
||||
|
||||
function arrayBufferToBase64(buffer: ArrayBuffer): string {
|
||||
const bytes = new Uint8Array(buffer);
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.byteLength; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
@@ -254,7 +254,7 @@ function handleServerMessage(msg: WSServerMessage) {
|
||||
client_info: msg.client_info,
|
||||
audioUrl: msg.full_audio_url,
|
||||
segments: msg.segments,
|
||||
metadata: msg.tool_calls ? { tool_calls: msg.tool_calls } : undefined,
|
||||
metadata: msg.metadata || (msg.tool_calls ? { tool_calls: msg.tool_calls } : undefined),
|
||||
});
|
||||
}
|
||||
setTyping(false);
|
||||
@@ -462,6 +462,18 @@ function handleServerMessage(msg: WSServerMessage) {
|
||||
}
|
||||
break;
|
||||
|
||||
case 'voice_interim':
|
||||
if (msg.text !== undefined) {
|
||||
useChatStore.getState().setVoiceInterimText(msg.text);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'voice_final':
|
||||
if (msg.text !== undefined) {
|
||||
useChatStore.getState().setVoiceFinalText(msg.text);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'pong':
|
||||
break;
|
||||
|
||||
|
||||
@@ -21,6 +21,10 @@ interface ChatStore {
|
||||
isLoadingHistory: boolean;
|
||||
historyPage: number;
|
||||
|
||||
// 流式语音识别状态
|
||||
voiceInterimText: string;
|
||||
voiceFinalText: string;
|
||||
|
||||
// 多气泡消息队列:确保气泡依次出现 + 逐字动画
|
||||
messageQueue: Message[];
|
||||
|
||||
@@ -37,6 +41,8 @@ interface ChatStore {
|
||||
clearMessages: () => void;
|
||||
|
||||
setContinuousMode: (enabled: boolean) => void;
|
||||
setVoiceInterimText: (text: string) => void;
|
||||
setVoiceFinalText: (text: string) => void;
|
||||
setBackgroundThinkingStatus: (status: BackgroundThinkingStatus) => void;
|
||||
setIoTDevices: (devices: IoTDevice[]) => void;
|
||||
|
||||
@@ -54,6 +60,8 @@ export const useChatStore = create<ChatStore>((set) => ({
|
||||
backgroundThinkingStatus: 'idle',
|
||||
iotDevices: [],
|
||||
iotDevicesLastUpdated: null,
|
||||
voiceInterimText: '',
|
||||
voiceFinalText: '',
|
||||
hasMoreMessages: false,
|
||||
isLoadingHistory: false,
|
||||
historyPage: 1,
|
||||
@@ -143,6 +151,9 @@ export const useChatStore = create<ChatStore>((set) => ({
|
||||
|
||||
setContinuousMode: (enabled) => set({ continuousMode: enabled }),
|
||||
|
||||
setVoiceInterimText: (text) => set({ voiceInterimText: text }),
|
||||
setVoiceFinalText: (text) => set({ voiceFinalText: text, voiceInterimText: '' }),
|
||||
|
||||
setBackgroundThinkingStatus: (status) => set({ backgroundThinkingStatus: status }),
|
||||
|
||||
setIoTDevices: (devices) =>
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
// 页面路由 Store — 管理当前显示的页面
|
||||
|
||||
import { create } from 'zustand';
|
||||
|
||||
export type PageId = 'chat' | 'admin-models' | 'admin-dashboard' | 'profile';
|
||||
|
||||
interface PageState {
|
||||
currentPage: PageId;
|
||||
setPage: (page: PageId) => void;
|
||||
goToChat: () => void;
|
||||
}
|
||||
|
||||
export const usePageStore = create<PageState>((set) => ({
|
||||
currentPage: 'chat',
|
||||
setPage: (page) => set({ currentPage: page }),
|
||||
goToChat: () => set({ currentPage: 'chat' }),
|
||||
}));
|
||||
@@ -105,11 +105,14 @@ export interface StreamSegment {
|
||||
|
||||
/** WebSocket 客户端消息 */
|
||||
export interface WSClientMessage {
|
||||
type: 'message' | 'voice_input' | 'ping' | 'history';
|
||||
type: 'message' | 'voice_input' | 'voice_stream_start' | 'voice_stream_chunk' | 'voice_stream_end' | 'ping' | 'history';
|
||||
session_id?: string;
|
||||
mode?: ChatMode;
|
||||
content?: string;
|
||||
audio_data?: string; // base64
|
||||
format?: string; // 音频格式 (voice_stream_start): webm, wav, pcm, opus
|
||||
language?: string; // 识别语言 (voice_stream_start): zh, en, ja, ko, auto
|
||||
sequence?: number; // 音频分片序号 (voice_stream_chunk)
|
||||
attachments?: MessageAttachment[];
|
||||
timestamp: number;
|
||||
client_id?: string;
|
||||
@@ -139,7 +142,7 @@ export interface AppNotification extends NotificationData {
|
||||
|
||||
/** WebSocket 服务端消息 */
|
||||
export interface WSServerMessage {
|
||||
type: 'stream_start' | 'response' | 'segment' | 'audio' | 'error' | 'device_update' | 'pong' | 'history_response' | 'stream_chunk' | 'stream_end' | 'background_thinking' | 'notification' | 'multi_message' | 'stream_segments' | 'review' | 'thinking' | 'tool_progress' | 'system_info';
|
||||
type: 'stream_start' | 'response' | 'segment' | 'audio' | 'error' | 'device_update' | 'pong' | 'history_response' | 'stream_chunk' | 'stream_end' | 'background_thinking' | 'notification' | 'multi_message' | 'stream_segments' | 'review' | 'thinking' | 'tool_progress' | 'system_info' | 'voice_interim' | 'voice_final';
|
||||
message_id?: string;
|
||||
text?: string;
|
||||
content?: string;
|
||||
@@ -161,6 +164,7 @@ export interface WSServerMessage {
|
||||
notification?: NotificationData;
|
||||
tool_progress?: ToolProgressInfo;
|
||||
system_info?: SystemInfoPayload;
|
||||
metadata?: Record<string, unknown>;
|
||||
protocol_version?: number;
|
||||
timestamp: number;
|
||||
client_info?: ClientInfo;
|
||||
|
||||
Reference in New Issue
Block a user