use std::process::Command;
fn main() {
println!("APR Serve Tracing Example");
println!("=========================");
println!();
println!("This example demonstrates the X-Trace-Level header for debugging inference.");
println!();
println!("Start the server:");
println!(" apr serve /path/to/model.gguf --port 8080");
println!();
println!("Then use one of these trace levels:");
println!();
println!("1. Brick-level (token operations):");
println!(r#" curl -H "X-Trace-Level: brick" http://localhost:8080/v1/chat/completions ..."#);
println!();
println!("2. Step-level (forward pass steps):");
println!(r#" curl -H "X-Trace-Level: step" http://localhost:8080/v1/chat/completions ..."#);
println!();
println!("3. Layer-level (per-layer timing):");
println!(r#" curl -H "X-Trace-Level: layer" http://localhost:8080/v1/chat/completions ..."#);
println!();
let health = Command::new("curl")
.args(["-s", "http://localhost:8080/health"])
.output();
match health {
Ok(output) if output.status.success() => {
let response = String::from_utf8_lossy(&output.stdout);
println!("Server health: {response}");
}
_ => {
println!("Server not running. Start with: apr serve <model> --port 8080");
}
}
}