cd60b01cf3
DevTools (新增): - 进程管理器: 启动/停止/重启/编译 + 端口自动释放 - 服务接管 (tryAdopt): 检测已运行服务,健康检查通过则直接接管 - 一键启动 (startAllSequential): 按 ai-core→gateway→frontend 顺序启动 - 日志布局切换: 标签页模式 ↔ 三栏并列模式 - 性能监控: CPU/内存采样 + SVG 折线图 - Web UI + WebSocket 实时推送 前端修复: - tailwind.config.ts: 修复空配置导致 CSS 不加载 (增加 content/colors/fontFamily) - postcss.config.js: 新建缺失的 PostCSS 配置 - App.tsx: 移除注册功能,仅保留管理员登录 (admin / cyrene-dev-admin) 后端新增: - config.go: AdminUsername/AdminPassword/RegistrationEnabled 环境变量 - auth_handler.go: 管理员登录 + 注册邮箱验证码 + 注册开关控制 - 管理员凭据: admin / cyrene-dev-admin (默认) 其他: - .gitignore: 新增 devtools/node_modules/ devtools/logs/ devtools/package-lock.json - devtools.sh: DevTools 一键启动脚本
192 lines
3.8 KiB
Go
192 lines
3.8 KiB
Go
package llm
|
||
|
||
import (
|
||
"strings"
|
||
"sync"
|
||
"unicode"
|
||
)
|
||
|
||
// Segmenter 断句器 —— 将流式文本按句号切分为语音播放片段
|
||
type Segmenter struct {
|
||
mu sync.Mutex
|
||
buffer strings.Builder
|
||
segments []Segment
|
||
index int
|
||
}
|
||
|
||
// Segment 语音片段
|
||
type Segment struct {
|
||
Index int `json:"index"`
|
||
Text string `json:"text"`
|
||
}
|
||
|
||
// NewSegmenter 创建断句器
|
||
func NewSegmenter() *Segmenter {
|
||
return &Segmenter{}
|
||
}
|
||
|
||
// Feed 喂入新的文本片段
|
||
// 返回已完成的断句列表
|
||
func (s *Segmenter) Feed(delta string) []Segment {
|
||
s.mu.Lock()
|
||
defer s.mu.Unlock()
|
||
|
||
s.buffer.WriteString(delta)
|
||
content := s.buffer.String()
|
||
|
||
var newSegments []Segment
|
||
|
||
for {
|
||
idx := findSentenceEnd(content)
|
||
if idx == -1 {
|
||
break
|
||
}
|
||
|
||
segmentText := strings.TrimSpace(content[:idx+len(string(content[idx]))])
|
||
// 检查是否是完整中文字符的句末
|
||
// idx 指向标点符号的位置
|
||
runes := []rune(content)
|
||
var byteIdx int
|
||
for i, r := range runes {
|
||
if i == idx {
|
||
// 标点之后的字符
|
||
break
|
||
}
|
||
byteIdx += len(string(r))
|
||
}
|
||
|
||
// 简化处理:直接取到idx+1字节 (对于ASCII标点)
|
||
// 对于中文标点,需要用rune处理
|
||
realIdx := 0
|
||
runeCount := 0
|
||
for i, r := range content {
|
||
if runeCount == idx {
|
||
realIdx = i
|
||
break
|
||
}
|
||
runeCount++
|
||
_ = r
|
||
}
|
||
// 包含标点符号本身
|
||
endIdx := realIdx + len(string([]rune(content)[idx]))
|
||
if endIdx <= realIdx {
|
||
endIdx = realIdx + 3 // fallback for UTF-8 multi-byte
|
||
}
|
||
|
||
segmentText = strings.TrimSpace(content[:endIdx])
|
||
if segmentText == "" {
|
||
content = strings.TrimSpace(content[endIdx:])
|
||
s.buffer.Reset()
|
||
s.buffer.WriteString(content)
|
||
continue
|
||
}
|
||
|
||
s.index++
|
||
seg := Segment{
|
||
Index: s.index,
|
||
Text: segmentText,
|
||
}
|
||
s.segments = append(s.segments, seg)
|
||
newSegments = append(newSegments, seg)
|
||
|
||
// 更新buffer,移除已处理的部分
|
||
content = strings.TrimSpace(content[endIdx:])
|
||
s.buffer.Reset()
|
||
s.buffer.WriteString(content)
|
||
}
|
||
|
||
return newSegments
|
||
}
|
||
|
||
// Flush 强制输出buffer中剩余的内容
|
||
func (s *Segmenter) Flush() *Segment {
|
||
s.mu.Lock()
|
||
defer s.mu.Unlock()
|
||
|
||
remaining := strings.TrimSpace(s.buffer.String())
|
||
if remaining == "" {
|
||
return nil
|
||
}
|
||
|
||
s.index++
|
||
seg := Segment{
|
||
Index: s.index,
|
||
Text: remaining,
|
||
}
|
||
s.segments = append(s.segments, seg)
|
||
s.buffer.Reset()
|
||
|
||
return &seg
|
||
}
|
||
|
||
// AllSegments 返回所有已完成的断句
|
||
func (s *Segmenter) AllSegments() []Segment {
|
||
s.mu.Lock()
|
||
defer s.mu.Unlock()
|
||
|
||
result := make([]Segment, len(s.segments))
|
||
copy(result, s.segments)
|
||
return result
|
||
}
|
||
|
||
// findSentenceEnd 查找句子结束位置(返回标点符号在rune数组中的索引)
|
||
// 中文标点:。!? 英文标点:. ! ?
|
||
func findSentenceEnd(text string) int {
|
||
runes := []rune(text)
|
||
for i, r := range runes {
|
||
if isSentenceEnd(r) {
|
||
return i
|
||
}
|
||
}
|
||
return -1
|
||
}
|
||
|
||
// isSentenceEnd 判断是否为句末标点
|
||
func isSentenceEnd(r rune) bool {
|
||
switch r {
|
||
case '。', '!', '?', '.', '!', '?', '\n':
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
// SplitIntoSegments 将完整文本按句号断句(用于post-processing)
|
||
func SplitIntoSegments(text string) []Segment {
|
||
var segments []Segment
|
||
runes := []rune(text)
|
||
|
||
start := 0
|
||
index := 0
|
||
|
||
for i, r := range runes {
|
||
if isSentenceEnd(r) {
|
||
segText := strings.TrimSpace(string(runes[start : i+1]))
|
||
if segText != "" {
|
||
index++
|
||
segments = append(segments, Segment{
|
||
Index: index,
|
||
Text: segText,
|
||
})
|
||
}
|
||
start = i + 1
|
||
}
|
||
}
|
||
|
||
// 处理末尾无标点的剩余文本
|
||
if start < len(runes) {
|
||
remaining := strings.TrimSpace(string(runes[start:]))
|
||
if remaining != "" {
|
||
index++
|
||
segments = append(segments, Segment{
|
||
Index: index,
|
||
Text: remaining,
|
||
})
|
||
}
|
||
}
|
||
|
||
return segments
|
||
}
|
||
|
||
// Ensure unicode is used
|
||
var _ = unicode.Is
|