import json
import os
import sys
from typing import Any, Iterator
import httpx
def create_response(
client: httpx.Client,
model: str,
input_data: str | list[dict[str, Any]],
stream: bool = False,
**kwargs: Any,
) -> dict[str, Any] | Iterator[dict[str, Any]]:
payload = {
"model": model,
"input": input_data,
"stream": stream,
**kwargs,
}
if stream:
return _stream_response(client, payload)
else:
response = client.post("/openai/v1/responses", json=payload)
response.raise_for_status()
return response.json()
def _stream_response(
client: httpx.Client, payload: dict[str, Any]
) -> Iterator[dict[str, Any]]:
with client.stream("POST", "/openai/v1/responses", json=payload) as response:
response.raise_for_status()
buffer = ""
for chunk in response.iter_text():
buffer += chunk
while "\n\n" in buffer:
event_str, buffer = buffer.split("\n\n", 1)
if not event_str.strip():
continue
event = {}
for line in event_str.strip().split("\n"):
if line.startswith("event: "):
event["event"] = line[7:]
elif line.startswith("data: "):
try:
event["data"] = json.loads(line[6:])
except json.JSONDecodeError:
event["data"] = line[6:]
if event:
yield event
def main() -> None:
base_url = os.environ.get("LLMSIM_URL", "http://localhost:8080")
print("=" * 60)
print("OpenAI Responses API + LLMSim Example")
print("=" * 60)
print(f"\nConnecting to: {base_url}")
print()
client = httpx.Client(base_url=base_url, timeout=60.0)
print("1. Simple Text Input")
print("-" * 40)
try:
response = create_response(
client,
model="gpt-5",
input_data="What is the capital of France?",
)
print(f"Response ID: {response['id']}")
print(f"Status: {response['status']}")
print(f"Output: {response.get('output_text', 'N/A')}")
if response.get("usage"):
print(f"Tokens: {response['usage']}")
except httpx.HTTPStatusError as e:
print(f"Error: {e}")
print("\nMake sure the llmsim server is running:")
print(" llmsim serve --port 8080")
sys.exit(1)
print()
print("2. Message Array Input")
print("-" * 40)
response = create_response(
client,
model="gpt-5",
input_data=[
{"type": "message", "role": "system", "content": "You are a helpful assistant."},
{"type": "message", "role": "user", "content": "Tell me a joke."},
],
)
print(f"Response ID: {response['id']}")
print(f"Output: {response.get('output_text', 'N/A')}")
print()
print("3. With Instructions")
print("-" * 40)
response = create_response(
client,
model="gpt-5",
input_data="Write something creative.",
instructions="You are a creative writing assistant. Be poetic and imaginative.",
)
print(f"Output: {response.get('output_text', 'N/A')}")
print()
print("4. Different Models")
print("-" * 40)
models = ["gpt-5-mini", "claude-opus-4.5", "o3-mini"]
for model in models:
response = create_response(
client,
model=model,
input_data="Hello!",
)
output = response.get("output_text", "")
print(f"{model}: {output[:60]}...")
print()
print("5. Streaming Response")
print("-" * 40)
print("Response: ", end="", flush=True)
for event in create_response(
client,
model="gpt-5",
input_data="Tell me a short story.",
stream=True,
):
event_type = event.get("event", "")
data = event.get("data", {})
if event_type == "response.output_text.delta":
print(data.get("delta", ""), end="", flush=True)
elif event_type == "response.completed":
usage = data.get("response", {}).get("usage", {})
print(f"\n\nTokens: {usage}")
print()
print("6. Full Response Structure")
print("-" * 40)
response = create_response(
client,
model="gpt-5",
input_data="Hello, world!",
)
print(f"ID: {response['id']}")
print(f"Object: {response['object']}")
print(f"Model: {response['model']}")
print(f"Status: {response['status']}")
print(f"Output items: {len(response.get('output', []))}")
if response.get("output"):
item = response["output"][0]
print(f" - Type: {item.get('type')}")
print(f" - Role: {item.get('role')}")
print(f" - Status: {item.get('status')}")
print()
print("7. Reasoning / Thinking (non-streaming)")
print("-" * 40)
response = create_response(
client,
model="o3",
input_data="What is 2 + 2?",
reasoning={"effort": "medium", "summary": "auto"},
)
print(f"Model: {response['model']}")
print(f"Output items: {len(response.get('output', []))}")
for item in response.get("output", []):
if item["type"] == "reasoning":
print(f"\n [Thinking]")
print(f" ID: {item['id']}")
print(f" Status: {item['status']}")
if item.get("summary"):
for s in item["summary"]:
print(f" Summary: {s['text']}")
else:
print(" (no summary requested)")
elif item["type"] == "message":
print(f"\n [Response]")
for content in item.get("content", []):
if content["type"] == "output_text":
print(f" Text: {content['text'][:100]}")
if response.get("usage"):
usage = response["usage"]
reasoning_tokens = usage.get("output_tokens_details", {}).get("reasoning_tokens", 0)
print(f"\n Tokens: input={usage['input_tokens']}, output={usage['output_tokens']}, "
f"reasoning={reasoning_tokens}, total={usage['total_tokens']}")
print()
print("8. Reasoning Effort Levels")
print("-" * 40)
for effort in ["low", "medium", "high"]:
response = create_response(
client,
model="o3",
input_data="Explain gravity.",
reasoning={"effort": effort, "summary": "concise"},
)
usage = response.get("usage", {})
reasoning_tokens = usage.get("output_tokens_details", {}).get("reasoning_tokens", 0)
print(f" effort={effort:6s} -> reasoning_tokens={reasoning_tokens}, total={usage.get('total_tokens', 0)}")
print()
print("9. Streaming with Thinking")
print("-" * 40)
for event in create_response(
client,
model="o3",
input_data="Why is the sky blue?",
stream=True,
reasoning={"effort": "medium", "summary": "auto"},
):
event_type = event.get("event", "")
data = event.get("data", {})
if event_type == "response.output_item.added":
item = data.get("item", {})
if item.get("type") == "reasoning":
print("[Thinking] ", end="", flush=True)
elif item.get("type") == "message":
print("\n[Response] ", end="", flush=True)
elif event_type == "response.reasoning_summary_text.delta":
print(data.get("delta", ""), end="", flush=True)
elif event_type == "response.output_text.delta":
print(data.get("delta", ""), end="", flush=True)
elif event_type == "response.completed":
usage = data.get("response", {}).get("usage", {})
reasoning_tokens = usage.get("output_tokens_details", {}).get("reasoning_tokens", 0)
print(f"\n\n Tokens: input={usage.get('input_tokens', 0)}, "
f"output={usage.get('output_tokens', 0)}, "
f"reasoning={reasoning_tokens}, "
f"total={usage.get('total_tokens', 0)}")
print()
print("=" * 60)
print("Examples complete!")
print("=" * 60)
if __name__ == "__main__":
main()