astack-tech
diff --git a/‎examples/serve-astack/backend/.env.example‎
Lines changed: 17 additions & 1 deletion b/‎examples/serve-astack/backend/.env.example‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎examples/serve-astack/backend/src/routes/chat.ts‎
Lines changed: 27 additions & 10 deletions b/‎examples/serve-astack/backend/src/routes/chat.ts‎
Lines changed: 27 additions & 10 deletions
diff --git a/‎examples/serve-astack/backend/src/services/llm.ts‎
Lines changed: 2 additions & 0 deletions b/‎examples/serve-astack/backend/src/services/llm.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/serve-astack/frontend/package.json‎
Lines changed: 5 additions & 0 deletions b/‎examples/serve-astack/frontend/package.json‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/serve-astack/frontend/src/app/highlight.css‎
Lines changed: 92 additions & 0 deletions b/‎examples/serve-astack/frontend/src/app/highlight.css‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎examples/serve-astack/frontend/src/app/layout.tsx‎
Lines changed: 1 addition & 0 deletions b/‎examples/serve-astack/frontend/src/app/layout.tsx‎
Lines changed: 1 addition & 0 deletions
@@ -30,4 +30,20 @@ MATH_AGENT_TEMPERATURE=0.3
 TEXT_AGENT_TEMPERATURE=0.7
 
 # 普通聊天温度参数（0-1）
-CHAT_TEMPERATURE=0.7
+CHAT_TEMPERATURE=0.7
+
+# ===== 流式性能优化配置 =====
+# LLM 最大 token 数量
+LLM_MAX_TOKENS=2048
+
+# 流式传输延迟（毫秒）
+# 0 = 无延迟（最快）
+# 1-5 = 几乎无感知延迟
+# 10-20 = 自然打字感觉
+# 50+ = 慢速演示效果
+STREAMING_DELAY_MS=0
+
+# 流式传输模式
+# true = 按字符流式（更细腻但性能稍差）
+# false = 按词语流式（推荐，性能更好）
+STREAM_BY_CHARACTER=false
@@ -2,6 +2,14 @@ import type { FastifyInstance } from 'fastify';
 import { classifyIntent, getStreamingAgentByIntent } from '../agents/index.js';
 import { createLLMClient, chatWithLLMStreaming } from '../services/llm.js';
 
+// 流式配置
+const STREAMING_CONFIG = {
+ // 每个token/字符的延迟（毫秒），可以通过环境变量配置
+ delayPerToken: parseInt(process.env.STREAMING_DELAY_MS || '0'),
+ // 是否按字符流式传输，否则按词语
+ streamByCharacter: process.env.STREAM_BY_CHARACTER === 'true',
+};
+
 // AI SDK 5.0 compatible types
 interface UIMessagePart {
  type: 'text';
@@ -65,9 +73,9 @@ export default async function chatRoutes(fastify: FastifyInstance) {
  let completionTokens = 0;
 
  try {
- // Real streaming from LLM
+ // Real streaming from LLM with optimized chunking
  for await (const chunk of chatWithLLMStreaming(llmClient, llmMessages)) {
- // Send each chunk as it arrives from LLM
+ // Send each chunk as it arrives from LLM - no artificial delays
  const textPart = `0:${JSON.stringify(chunk)}\n`;
  reply.raw.write(textPart);
  completionTokens += chunk.length;
@@ -137,14 +145,23 @@ export default async function chatRoutes(fastify: FastifyInstance) {
  case 'assistant_message': {
  if (chunk.content) {
  fullContent = chunk.content;
- // Send text chunks character by character using Text Parts
- const chars = chunk.content.split('');
- for (const char of chars) {
- // Text Part: 0:string\n
- const textPart = `0:${JSON.stringify(char)}\n`;
- reply.raw.write(textPart);
- // Small delay for streaming effect
- await new Promise(resolve => setTimeout(resolve, 50));
+ 
+ // 智能流式传输：根据配置选择字符级或词语级
+ const chunks = STREAMING_CONFIG.streamByCharacter 
+ ? chunk.content.split('')
+ : chunk.content.split(/(\s+)/);
+ 
+ for (const textChunk of chunks) {
+ if (textChunk) { // 跳过空字符串
+ // Text Part: 0:string\n
+ const textPart = `0:${JSON.stringify(textChunk)}\n`;
+ reply.raw.write(textPart);
+ 
+ // 可配置的延迟
+ if (STREAMING_CONFIG.delayPerToken > 0) {
+ await new Promise(resolve => setTimeout(resolve, STREAMING_CONFIG.delayPerToken));
+ }
+ }
  }
  }
  break;
 
@@ -16,6 +16,8 @@ export function createLLMClient(): ModelProvider {
  apiKey,
  model: 'deepseek-chat',
  temperature: 0.7,
+ // 优化流式性能的参数
+ maxTokens: parseInt(process.env.LLM_MAX_TOKENS || '2048'),
  });
 
  return model as ModelProvider;
 
@@ -13,12 +13,17 @@
  "@ai-sdk/react": "^1.0.18",
  "@radix-ui/react-scroll-area": "^1.0.5",
  "@radix-ui/react-slot": "^1.0.2",
+ "@tailwindcss/typography": "^0.5.19",
  "class-variance-authority": "^0.7.0",
  "clsx": "^2.1.0",
  "lucide-react": "^0.263.1",
  "next": "15.5.4",
  "react": "19.1.0",
  "react-dom": "19.1.0",
+ "react-markdown": "^10.1.0",
+ "rehype-highlight": "^7.0.2",
+ "rehype-raw": "^7.0.0",
+ "remark-gfm": "^4.0.1",
  "shiki": "^3.13.0",
  "streamdown": "^1.3.0"
  },
 
@@ -0,0 +1,92 @@
+/* GitHub Dark theme for highlight.js */
+.hljs {
+ color: #e6edf3;
+ background: #0d1117;
+}
+
+.hljs-doctag,
+.hljs-keyword,
+.hljs-meta .hljs-keyword,
+.hljs-template-tag,
+.hljs-template-variable,
+.hljs-type,
+.hljs-variable.language_ {
+ color: #ff7b72;
+}
+
+.hljs-title,
+.hljs-title.class_,
+.hljs-title.class_.inherited__,
+.hljs-title.function_ {
+ color: #d2a8ff;
+}
+
+.hljs-attr,
+.hljs-attribute,
+.hljs-literal,
+.hljs-meta,
+.hljs-number,
+.hljs-operator,
+.hljs-variable,
+.hljs-selector-attr,
+.hljs-selector-class,
+.hljs-selector-id {
+ color: #79c0ff;
+}
+
+.hljs-regexp,
+.hljs-string,
+.hljs-meta .hljs-string {
+ color: #a5d6ff;
+}
+
+.hljs-built_in,
+.hljs-symbol {
+ color: #ffa657;
+}
+
+.hljs-comment,
+.hljs-code,
+.hljs-formula {
+ color: #8b949e;
+}
+
+.hljs-name,
+.hljs-quote,
+.hljs-selector-tag,
+.hljs-selector-pseudo {
+ color: #7ee787;
+}
+
+.hljs-subst {
+ color: #e6edf3;
+}
+
+.hljs-section {
+ color: #1f6feb;
+ font-weight: bold;
+}
+
+.hljs-bullet {
+ color: #f2cc60;
+}
+
+.hljs-emphasis {
+ color: #e6edf3;
+ font-style: italic;
+}
+
+.hljs-strong {
+ color: #e6edf3;
+ font-weight: bold;
+}
+
+.hljs-addition {
+ color: #aff5b4;
+ background-color: #033a16;
+}
+
+.hljs-deletion {
+ color: #ffdcd7;
+ background-color: #67060c;
+}
@@ -1,6 +1,7 @@
 import type { Metadata } from 'next';
 import { Geist, Geist_Mono } from 'next/font/google';
 import './globals.css';
+import './highlight.css';
 import React from 'react';
 
 const geistSans = Geist({