@@ -2,6 +2,14 @@ import type { FastifyInstance } from 'fastify';
22import { classifyIntent , getStreamingAgentByIntent } from '../agents/index.js' ;
33import { createLLMClient , chatWithLLMStreaming } from '../services/llm.js' ;
44
5+ // 流式配置
6+ const STREAMING_CONFIG = {
7+ // 每个token/字符的延迟(毫秒),可以通过环境变量配置
8+ delayPerToken : parseInt ( process . env . STREAMING_DELAY_MS || '0' ) ,
9+ // 是否按字符流式传输,否则按词语
10+ streamByCharacter : process . env . STREAM_BY_CHARACTER === 'true' ,
11+ } ;
12+
513// AI SDK 5.0 compatible types
614interface UIMessagePart {
715 type : 'text' ;
@@ -65,9 +73,9 @@ export default async function chatRoutes(fastify: FastifyInstance) {
6573 let completionTokens = 0 ;
6674
6775 try {
68- // Real streaming from LLM
76+ // Real streaming from LLM with optimized chunking
6977 for await ( const chunk of chatWithLLMStreaming ( llmClient , llmMessages ) ) {
70- // Send each chunk as it arrives from LLM
78+ // Send each chunk as it arrives from LLM - no artificial delays
7179 const textPart = `0:${ JSON . stringify ( chunk ) } \n` ;
7280 reply . raw . write ( textPart ) ;
7381 completionTokens += chunk . length ;
@@ -137,14 +145,23 @@ export default async function chatRoutes(fastify: FastifyInstance) {
137145 case 'assistant_message' : {
138146 if ( chunk . content ) {
139147 fullContent = chunk . content ;
140- // Send text chunks character by character using Text Parts
141- const chars = chunk . content . split ( '' ) ;
142- for ( const char of chars ) {
143- // Text Part: 0:string\n
144- const textPart = `0:${ JSON . stringify ( char ) } \n` ;
145- reply . raw . write ( textPart ) ;
146- // Small delay for streaming effect
147- await new Promise ( resolve => setTimeout ( resolve , 50 ) ) ;
148+
149+ // 智能流式传输:根据配置选择字符级或词语级
150+ const chunks = STREAMING_CONFIG . streamByCharacter
151+ ? chunk . content . split ( '' )
152+ : chunk . content . split ( / ( \s + ) / ) ;
153+
154+ for ( const textChunk of chunks ) {
155+ if ( textChunk ) { // 跳过空字符串
156+ // Text Part: 0:string\n
157+ const textPart = `0:${ JSON . stringify ( textChunk ) } \n` ;
158+ reply . raw . write ( textPart ) ;
159+
160+ // 可配置的延迟
161+ if ( STREAMING_CONFIG . delayPerToken > 0 ) {
162+ await new Promise ( resolve => setTimeout ( resolve , STREAMING_CONFIG . delayPerToken ) ) ;
163+ }
164+ }
148165 }
149166 }
150167 break ;
0 commit comments