use futures_util::StreamExt;
use openai_oxide::OpenAI;
use openai_oxide::types::responses::*;
use std::time::Instant;
const MODEL: &str = "gpt-5.4";
const ITERATIONS: usize = 5;
fn stats(times: &mut Vec<u128>) -> (u128, u128, u128, u128) {
times.sort();
let min = times[0];
let max = *times.last().unwrap();
let median = times[times.len() / 2];
let p95 = times[((times.len() as f64 * 0.95) as usize).min(times.len() - 1)];
(median, p95, min, max)
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = OpenAI::from_env()?;
println!("Warming up (TLS + HTTP/2)...");
client
.responses()
.create(ResponseCreateRequest::new(MODEL).input("ping"))
.await?;
println!("Ready.\n");
println!("=== openai-oxide benchmark ({ITERATIONS} iterations, model: {MODEL}) ===\n");
println!(
"{:<25} {:>8} {:>8} {:>8} {:>8}",
"Test", "Median", "P95", "Min", "Max"
);
println!("{}", "-".repeat(65));
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("What is the capital of France? One word.")
.max_output_tokens(16);
let t0 = Instant::now();
let _ = client.responses().create(req).await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Plain text", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("List 3 programming languages with year created")
.text(ResponseTextConfig {
format: Some(ResponseTextFormat::JsonSchema {
name: "languages".into(),
description: None,
schema: Some(serde_json::json!({
"type": "object",
"properties": {
"languages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"year": {"type": "integer"}
},
"required": ["name", "year"],
"additionalProperties": false
}
}
},
"required": ["languages"],
"additionalProperties": false
})),
strict: Some(true),
}),
verbosity: None,
})
.max_output_tokens(200);
let t0 = Instant::now();
let _ = client.responses().create(req).await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Structured output", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("What's the weather in Tokyo?")
.tools(vec![ResponseTool::Function {
name: "get_weather".into(),
description: Some("Get weather".into()),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"city": {"type": "string"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["city", "unit"],
"additionalProperties": false
})),
strict: None,
}]);
let t0 = Instant::now();
let _ = client.responses().create(req).await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Function calling", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
let r1 = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("Remember: the answer is 42.")
.store(true)
.max_output_tokens(32),
)
.await?;
let _ = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("What is the answer?")
.previous_response_id(&r1.id)
.max_output_tokens(16),
)
.await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Multi-turn (2 reqs)", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("What is the latest Rust version?")
.tools(vec![ResponseTool::WebSearch {
search_context_size: Some("low".into()),
user_location: None,
}])
.max_output_tokens(100);
let t0 = Instant::now();
let _ = client.responses().create(req).await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Web search", med, p95, min, max
);
let mut times = Vec::new();
let complex_schema = serde_json::json!({
"type": "object",
"properties": {
"company": {
"type": "object",
"properties": {
"name": {"type": "string"},
"founded": {"type": "integer"},
"ceo": {"type": "string"},
"products": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"category": {"type": "string", "enum": ["hardware", "software", "service"]},
"revenue_billions": {"type": "number"},
"active": {"type": "boolean"}
},
"required": ["name", "category", "revenue_billions", "active"],
"additionalProperties": false
}
}
},
"required": ["name", "founded", "ceo", "products"],
"additionalProperties": false
},
"competitors": {
"type": "array",
"items": {"type": "string"}
},
"summary": {"type": "string"}
},
"required": ["company", "competitors", "summary"],
"additionalProperties": false
});
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("Analyze Apple Inc: products with revenue, competitors, summary.")
.text(ResponseTextConfig {
format: Some(ResponseTextFormat::JsonSchema {
name: "company_analysis".into(),
description: None,
schema: Some(complex_schema.clone()),
strict: Some(true),
}),
verbosity: None,
})
.max_output_tokens(800);
let t0 = Instant::now();
let resp = client.responses().create(req).await?;
let _: serde_json::Value = serde_json::from_str(&resp.output_text())?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Nested structured", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
let step1 = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("What's the weather in Tokyo and what should I wear?")
.tools(vec![ResponseTool::Function {
name: "get_weather".into(),
description: Some("Get weather for a city".into()),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"city": {"type": "string"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["city", "unit"],
"additionalProperties": false
})),
strict: None,
}])
.store(true),
)
.await?;
let call_id = step1
.function_calls()
.first()
.map(|fc| fc.call_id.clone())
.unwrap_or_else(|| "call_1".into());
let _step2: serde_json::Value = client
.responses()
.create_raw(&serde_json::json!({
"model": MODEL,
"previous_response_id": step1.id,
"max_output_tokens": 200,
"input": [{
"type": "function_call_output",
"call_id": call_id,
"output": "{\"temp\":22,\"condition\":\"sunny\",\"humidity\":45}"
}],
"text": {
"format": {
"type": "json_schema",
"name": "recommendation",
"strict": true,
"schema": {
"type": "object",
"properties": {
"outfit": {"type": "string"},
"accessories": {"type": "array", "items": {"type": "string"}},
"warning": {"type": "string"}
},
"required": ["outfit", "accessories", "warning"],
"additionalProperties": false
}
}
}
}))
.await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Agent loop (2-step)", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
for i in 1..=5 {
let _ = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input(format!("What is {i}+{i}? Reply with just the number."))
.max_output_tokens(16),
)
.await?;
}
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Rapid-fire (5 calls)", med, p95, min, max
);
let system_prompt = "You are a senior software architect with 20 years of experience in distributed systems, microservices, and cloud-native architectures. Always provide specific, actionable advice with code examples where relevant. Consider scalability, maintainability, and security in every recommendation.";
let _ = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.instructions(system_prompt)
.input("ping")
.prompt_cache_key("bench-architect")
.max_output_tokens(16),
)
.await?;
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
let _ = client
.responses()
.create(
ResponseCreateRequest::new(MODEL)
.instructions(system_prompt)
.input("How should I design a rate limiter for an API gateway?")
.prompt_cache_key("bench-architect")
.prompt_cache_retention("24h")
.max_output_tokens(200),
)
.await?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Prompt-cached", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let req = ResponseCreateRequest::new(MODEL)
.input("Explain quicksort in 3 sentences.")
.max_output_tokens(200);
let t0 = Instant::now();
let mut stream = client.responses().create_stream(req).await?;
while let Some(event) = stream.next().await {
let ev = event?;
if matches!(
ev,
openai_oxide::types::responses::ResponseStreamEvent::ResponseOutputTextDelta(_)
) {
times.push(t0.elapsed().as_millis());
break;
}
}
while let Some(_) = stream.next().await {}
}
if !times.is_empty() {
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Streaming TTFT", med, p95, min, max
);
}
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
let (c1, c2, c3) = (client.clone(), client.clone(), client.clone());
let (r1, r2, r3) = tokio::join!(
async {
c1.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("Capital of France? One word.")
.max_output_tokens(16),
)
.await
},
async {
c2.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("Capital of Japan? One word.")
.max_output_tokens(16),
)
.await
},
async {
c3.responses()
.create(
ResponseCreateRequest::new(MODEL)
.input("Capital of Brazil? One word.")
.max_output_tokens(16),
)
.await
},
);
r1?;
r2?;
r3?;
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Parallel 3x (fan-out)", med, p95, min, max
);
let mut times = Vec::new();
for _ in 0..ITERATIONS {
let t0 = Instant::now();
let (c1, c2) = (client.clone(), client.clone());
tokio::select! {
r = async { c1.responses().create(ResponseCreateRequest::new(MODEL).input("What is 7*8? Number only.").max_output_tokens(16)).await } => { r?; }
r = async { c2.responses().create(ResponseCreateRequest::new(MODEL).input("What is 7*8? Number only.").max_output_tokens(16)).await } => { r?; }
}
times.push(t0.elapsed().as_millis());
}
let (med, p95, min, max) = stats(&mut times);
println!(
"{:<25} {:>6}ms {:>6}ms {:>6}ms {:>6}ms",
"Hedged (2x race)", med, p95, min, max
);
println!("\n{ITERATIONS} iterations per test. All times include full HTTP round-trip.");
println!("Client: openai-oxide with reqwest 0.13, gzip, HTTP/2, tcp_nodelay.");
Ok(())
}