Benoit Chesneau 95b7ffeeaa chore: prepare release 25.0.2
- Bump version to 25.0.2
- Update copyright year to 2026 in LICENSE and NOTICE
- Add license headers to all Python source files
- Add changelog entry for 25.0.2
2026-02-06 08:21:18 +01:00

276 lines
8.1 KiB
Python

#
# This file is part of gunicorn released under the MIT license.
# See the NOTICE for more information.
import json
from fastapi import FastAPI
from fastapi.responses import StreamingResponse, HTMLResponse
from pydantic import BaseModel
from gunicorn.dirty.client import get_dirty_client_async
app = FastAPI(
title="Streaming Chat Demo",
description="Demonstrates dirty worker streaming with simulated LLM responses",
)
class ChatRequest(BaseModel):
prompt: str
thinking: bool = False
class ChatResponse(BaseModel):
response: str
@app.post("/chat")
async def chat(request: ChatRequest):
"""Stream a chat response using Server-Sent Events.
The response is streamed token-by-token, simulating LLM inference.
Each token is sent as an SSE event with JSON data.
Args:
request: Chat request with prompt and optional thinking mode
Returns:
StreamingResponse with text/event-stream content type
"""
client = await get_dirty_client_async()
action = "generate_with_thinking" if request.thinking else "generate"
async def stream():
async for token in client.stream_async(
"streaming_chat.chat_app:ChatApp",
action,
request.prompt
):
data = json.dumps({"token": token})
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
}
)
@app.post("/chat/sync", response_model=ChatResponse)
async def chat_sync(request: ChatRequest):
"""Non-streaming chat endpoint for comparison.
Waits for the complete response before returning.
Useful for testing or when streaming isn't needed.
Args:
request: Chat request with prompt
Returns:
Complete response as JSON
"""
client = await get_dirty_client_async()
action = "generate_with_thinking" if request.thinking else "generate"
tokens = []
async for token in client.stream_async(
"streaming_chat.chat_app:ChatApp",
action,
request.prompt
):
tokens.append(token)
return ChatResponse(response="".join(tokens))
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "ok"}
@app.get("/", response_class=HTMLResponse)
async def index():
"""Simple chat UI for testing streaming."""
return """
<!DOCTYPE html>
<html>
<head>
<title>Streaming Chat Demo</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background: #1a1a2e;
color: #eee;
}
h1 { color: #00d9ff; }
.chat-container {
background: #16213e;
border-radius: 8px;
padding: 20px;
margin: 20px 0;
}
#response {
min-height: 100px;
padding: 15px;
background: #0f0f23;
border-radius: 4px;
white-space: pre-wrap;
font-family: 'Monaco', 'Menlo', monospace;
line-height: 1.6;
}
.input-group {
display: flex;
gap: 10px;
margin-top: 15px;
}
input[type="text"] {
flex: 1;
padding: 12px;
border: 1px solid #333;
border-radius: 4px;
background: #0f0f23;
color: #eee;
font-size: 16px;
}
button {
padding: 12px 24px;
background: #00d9ff;
color: #000;
border: none;
border-radius: 4px;
cursor: pointer;
font-weight: bold;
}
button:hover { background: #00b8d9; }
button:disabled { background: #555; cursor: not-allowed; }
.checkbox-group {
margin-top: 10px;
}
label { cursor: pointer; }
.suggestions {
margin-top: 15px;
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.suggestion {
padding: 6px 12px;
background: #333;
border-radius: 4px;
cursor: pointer;
font-size: 14px;
}
.suggestion:hover { background: #444; }
.cursor {
display: inline-block;
width: 8px;
height: 18px;
background: #00d9ff;
animation: blink 1s infinite;
vertical-align: text-bottom;
}
@keyframes blink {
0%, 50% { opacity: 1; }
51%, 100% { opacity: 0; }
}
</style>
</head>
<body>
<h1>Streaming Chat Demo</h1>
<p>This demo shows token-by-token streaming using Gunicorn's dirty workers.</p>
<div class="chat-container">
<div id="response"></div>
<div class="input-group">
<input type="text" id="prompt" placeholder="Type a message..."
onkeypress="if(event.key==='Enter') sendMessage()">
<button onclick="sendMessage()" id="sendBtn">Send</button>
</div>
<div class="checkbox-group">
<label>
<input type="checkbox" id="thinking"> Show thinking phase
</label>
</div>
<div class="suggestions">
<span class="suggestion" onclick="setPrompt('hello')">hello</span>
<span class="suggestion" onclick="setPrompt('explain dirty workers')">explain</span>
<span class="suggestion" onclick="setPrompt('how does streaming work?')">streaming</span>
<span class="suggestion" onclick="setPrompt('show me code')">code</span>
</div>
</div>
<script>
function setPrompt(text) {
document.getElementById('prompt').value = text;
sendMessage();
}
async function sendMessage() {
const promptEl = document.getElementById('prompt');
const responseEl = document.getElementById('response');
const sendBtn = document.getElementById('sendBtn');
const thinking = document.getElementById('thinking').checked;
const prompt = promptEl.value.trim();
if (!prompt) return;
sendBtn.disabled = true;
responseEl.innerHTML = '<span class="cursor"></span>';
try {
const response = await fetch('/chat', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({prompt, thinking})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
let text = '';
while (true) {
const {done, value} = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
responseEl.textContent = text;
} else {
try {
const parsed = JSON.parse(data);
text += parsed.token;
responseEl.innerHTML = text + '<span class="cursor"></span>';
} catch (e) {}
}
}
}
}
} catch (error) {
responseEl.textContent = 'Error: ' + error.message;
}
sendBtn.disabled = false;
promptEl.value = '';
promptEl.focus();
}
document.getElementById('prompt').focus();
</script>
</body>
</html>
"""