mirror of
https://github.com/frappe/gunicorn.git
synced 2026-07-01 18:21:30 +08:00
Add a lightweight chat simulator demonstrating dirty worker streaming: - Token-by-token SSE streaming via async generators - FastAPI endpoint with browser UI - Multiple canned responses based on keywords - Docker deployment with docker-compose - Integration tests for SSE protocol Update docs/content/dirty.md to link to both examples.
272 lines
8.0 KiB
Python
272 lines
8.0 KiB
Python
import json
|
|
from fastapi import FastAPI
|
|
from fastapi.responses import StreamingResponse, HTMLResponse
|
|
from pydantic import BaseModel
|
|
from gunicorn.dirty.client import get_dirty_client_async
|
|
|
|
|
|
app = FastAPI(
|
|
title="Streaming Chat Demo",
|
|
description="Demonstrates dirty worker streaming with simulated LLM responses",
|
|
)
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
prompt: str
|
|
thinking: bool = False
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
response: str
|
|
|
|
|
|
@app.post("/chat")
|
|
async def chat(request: ChatRequest):
|
|
"""Stream a chat response using Server-Sent Events.
|
|
|
|
The response is streamed token-by-token, simulating LLM inference.
|
|
Each token is sent as an SSE event with JSON data.
|
|
|
|
Args:
|
|
request: Chat request with prompt and optional thinking mode
|
|
|
|
Returns:
|
|
StreamingResponse with text/event-stream content type
|
|
"""
|
|
client = await get_dirty_client_async()
|
|
action = "generate_with_thinking" if request.thinking else "generate"
|
|
|
|
async def stream():
|
|
async for token in client.stream_async(
|
|
"streaming_chat.chat_app:ChatApp",
|
|
action,
|
|
request.prompt
|
|
):
|
|
data = json.dumps({"token": token})
|
|
yield f"data: {data}\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
|
|
return StreamingResponse(
|
|
stream(),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no", # Disable nginx buffering
|
|
}
|
|
)
|
|
|
|
|
|
@app.post("/chat/sync", response_model=ChatResponse)
|
|
async def chat_sync(request: ChatRequest):
|
|
"""Non-streaming chat endpoint for comparison.
|
|
|
|
Waits for the complete response before returning.
|
|
Useful for testing or when streaming isn't needed.
|
|
|
|
Args:
|
|
request: Chat request with prompt
|
|
|
|
Returns:
|
|
Complete response as JSON
|
|
"""
|
|
client = await get_dirty_client_async()
|
|
action = "generate_with_thinking" if request.thinking else "generate"
|
|
|
|
tokens = []
|
|
async for token in client.stream_async(
|
|
"streaming_chat.chat_app:ChatApp",
|
|
action,
|
|
request.prompt
|
|
):
|
|
tokens.append(token)
|
|
|
|
return ChatResponse(response="".join(tokens))
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
return {"status": "ok"}
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
async def index():
|
|
"""Simple chat UI for testing streaming."""
|
|
return """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Streaming Chat Demo</title>
|
|
<style>
|
|
body {
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
background: #1a1a2e;
|
|
color: #eee;
|
|
}
|
|
h1 { color: #00d9ff; }
|
|
.chat-container {
|
|
background: #16213e;
|
|
border-radius: 8px;
|
|
padding: 20px;
|
|
margin: 20px 0;
|
|
}
|
|
#response {
|
|
min-height: 100px;
|
|
padding: 15px;
|
|
background: #0f0f23;
|
|
border-radius: 4px;
|
|
white-space: pre-wrap;
|
|
font-family: 'Monaco', 'Menlo', monospace;
|
|
line-height: 1.6;
|
|
}
|
|
.input-group {
|
|
display: flex;
|
|
gap: 10px;
|
|
margin-top: 15px;
|
|
}
|
|
input[type="text"] {
|
|
flex: 1;
|
|
padding: 12px;
|
|
border: 1px solid #333;
|
|
border-radius: 4px;
|
|
background: #0f0f23;
|
|
color: #eee;
|
|
font-size: 16px;
|
|
}
|
|
button {
|
|
padding: 12px 24px;
|
|
background: #00d9ff;
|
|
color: #000;
|
|
border: none;
|
|
border-radius: 4px;
|
|
cursor: pointer;
|
|
font-weight: bold;
|
|
}
|
|
button:hover { background: #00b8d9; }
|
|
button:disabled { background: #555; cursor: not-allowed; }
|
|
.checkbox-group {
|
|
margin-top: 10px;
|
|
}
|
|
label { cursor: pointer; }
|
|
.suggestions {
|
|
margin-top: 15px;
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 8px;
|
|
}
|
|
.suggestion {
|
|
padding: 6px 12px;
|
|
background: #333;
|
|
border-radius: 4px;
|
|
cursor: pointer;
|
|
font-size: 14px;
|
|
}
|
|
.suggestion:hover { background: #444; }
|
|
.cursor {
|
|
display: inline-block;
|
|
width: 8px;
|
|
height: 18px;
|
|
background: #00d9ff;
|
|
animation: blink 1s infinite;
|
|
vertical-align: text-bottom;
|
|
}
|
|
@keyframes blink {
|
|
0%, 50% { opacity: 1; }
|
|
51%, 100% { opacity: 0; }
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>Streaming Chat Demo</h1>
|
|
<p>This demo shows token-by-token streaming using Gunicorn's dirty workers.</p>
|
|
|
|
<div class="chat-container">
|
|
<div id="response"></div>
|
|
<div class="input-group">
|
|
<input type="text" id="prompt" placeholder="Type a message..."
|
|
onkeypress="if(event.key==='Enter') sendMessage()">
|
|
<button onclick="sendMessage()" id="sendBtn">Send</button>
|
|
</div>
|
|
<div class="checkbox-group">
|
|
<label>
|
|
<input type="checkbox" id="thinking"> Show thinking phase
|
|
</label>
|
|
</div>
|
|
<div class="suggestions">
|
|
<span class="suggestion" onclick="setPrompt('hello')">hello</span>
|
|
<span class="suggestion" onclick="setPrompt('explain dirty workers')">explain</span>
|
|
<span class="suggestion" onclick="setPrompt('how does streaming work?')">streaming</span>
|
|
<span class="suggestion" onclick="setPrompt('show me code')">code</span>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
function setPrompt(text) {
|
|
document.getElementById('prompt').value = text;
|
|
sendMessage();
|
|
}
|
|
|
|
async function sendMessage() {
|
|
const promptEl = document.getElementById('prompt');
|
|
const responseEl = document.getElementById('response');
|
|
const sendBtn = document.getElementById('sendBtn');
|
|
const thinking = document.getElementById('thinking').checked;
|
|
|
|
const prompt = promptEl.value.trim();
|
|
if (!prompt) return;
|
|
|
|
sendBtn.disabled = true;
|
|
responseEl.innerHTML = '<span class="cursor"></span>';
|
|
|
|
try {
|
|
const response = await fetch('/chat', {
|
|
method: 'POST',
|
|
headers: {'Content-Type': 'application/json'},
|
|
body: JSON.stringify({prompt, thinking})
|
|
});
|
|
|
|
const reader = response.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let text = '';
|
|
|
|
while (true) {
|
|
const {done, value} = await reader.read();
|
|
if (done) break;
|
|
|
|
const chunk = decoder.decode(value);
|
|
const lines = chunk.split('\\n');
|
|
|
|
for (const line of lines) {
|
|
if (line.startsWith('data: ')) {
|
|
const data = line.slice(6);
|
|
if (data === '[DONE]') {
|
|
responseEl.textContent = text;
|
|
} else {
|
|
try {
|
|
const parsed = JSON.parse(data);
|
|
text += parsed.token;
|
|
responseEl.innerHTML = text + '<span class="cursor"></span>';
|
|
} catch (e) {}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
responseEl.textContent = 'Error: ' + error.message;
|
|
}
|
|
|
|
sendBtn.disabled = false;
|
|
promptEl.value = '';
|
|
promptEl.focus();
|
|
}
|
|
|
|
document.getElementById('prompt').focus();
|
|
</script>
|
|
</body>
|
|
</html>
|
|
"""
|