import os
import sys
from openai import OpenAI
from openai.types.responses import (
ResponseCompletedEvent,
ResponseCreatedEvent,
ResponseErrorEvent,
ResponseOutputItemAddedEvent,
ResponseReasoningSummaryTextDeltaEvent,
ResponseTextDeltaEvent,
)
def collect_events(connection) -> list:
events = []
for event in connection:
events.append(event)
if event.type in ("response.completed", "error"):
break
return events
def print_streaming_events(events: list) -> None:
for event in events:
if isinstance(event, ResponseCreatedEvent):
print(f" [created] id={event.response.id}")
elif isinstance(event, ResponseOutputItemAddedEvent):
item_type = event.item.type
if item_type == "reasoning":
print(" [thinking] ", end="", flush=True)
elif item_type == "message":
print("\n [response] ", end="", flush=True)
elif isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
print(event.delta, end="", flush=True)
elif isinstance(event, ResponseTextDeltaEvent):
print(event.delta, end="", flush=True)
elif isinstance(event, ResponseCompletedEvent):
usage = event.response.usage
print(f"\n [completed] tokens: input={usage.input_tokens}, "
f"output={usage.output_tokens}, total={usage.total_tokens}")
elif isinstance(event, ResponseErrorEvent):
print(f" [error] {event.code}: {event.message}")
def main() -> None:
base_url = os.environ.get("LLMSIM_URL", "http://localhost:8080/openai/v1")
ws_url = base_url.replace("https://", "wss://").replace("http://", "ws://")
print("=" * 60)
print("OpenAI Responses API WebSocket Mode + LLMSim Example")
print("=" * 60)
print(f"\nConnecting to: {ws_url}/responses")
client = OpenAI(
base_url=base_url,
api_key="not-needed", websocket_base_url=ws_url,
)
try:
with client.responses.connect() as connection:
print("Connected!\n")
print("1. Simple Text Input")
print("-" * 40)
connection.response.create(
model="gpt-5",
input="What is the capital of France?",
)
events = collect_events(connection)
print_streaming_events(events)
print()
print("2. Message Array Input")
print("-" * 40)
connection.response.create(
model="gpt-5",
input=[
{"type": "message", "role": "user", "content": "Tell me a joke."},
],
)
events = collect_events(connection)
print_streaming_events(events)
print()
print("3. Multi-turn Conversation (previous_response_id)")
print("-" * 40)
connection.response.create(
model="gpt-5",
input="Hello, I have a question.",
)
events1 = collect_events(connection)
resp_id = None
for event in events1:
if isinstance(event, ResponseCompletedEvent):
resp_id = event.response.id
print(f" Turn 1 completed, response_id={resp_id}")
connection.response.create(
model="gpt-5",
input="What was I saying?",
previous_response_id=resp_id,
)
events2 = collect_events(connection)
print(" Turn 2:")
print_streaming_events(events2)
print()
print("4. Error: previous_response_id Not Found")
print("-" * 40)
connection.response.create(
model="gpt-5",
input="Hello",
previous_response_id="resp_does_not_exist",
)
events = collect_events(connection)
print_streaming_events(events)
print()
print("5. Reasoning Model (o3)")
print("-" * 40)
connection.response.create(
model="o3",
input="What is 123 * 456?",
reasoning={"effort": "medium", "summary": "auto"},
)
events = collect_events(connection)
print_streaming_events(events)
print()
print("6. Multiple Models on Same Connection")
print("-" * 40)
for model in ["gpt-5", "gpt-4o", "gpt-5-mini"]:
connection.response.create(model=model, input="Hi!")
events = collect_events(connection)
for event in events:
if isinstance(event, ResponseCompletedEvent):
usage = event.response.usage
print(f" {model}: output_tokens={usage.output_tokens}")
print()
except Exception as e:
print(f"\nError: Could not connect to {ws_url}/responses")
print(f" {e}")
print("\nMake sure the llmsim server is running:")
print(" llmsim serve --port 8080")
sys.exit(1)
print("=" * 60)
print("Examples complete!")
print("=" * 60)
if __name__ == "__main__":
main()