pub fn format_size(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;
if bytes >= GB {
format!("{:.1} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.1} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.1} KB", bytes as f64 / KB as f64)
} else {
format!("{bytes} B")
}
}
pub fn display_model_info(model_ref: &str, file_data: &[u8]) -> Result<()> {
use crate::format::{APR_MAGIC, GGUF_MAGIC};
if model_ref.ends_with(".gguf") || file_data.starts_with(GGUF_MAGIC) {
use crate::gguf::GGUFModel;
let gguf = GGUFModel::from_bytes(file_data)?;
println!(" Format: GGUF v{}", gguf.header.version);
println!(" Tensors: {}", gguf.header.tensor_count);
} else if model_ref.ends_with(".safetensors") {
use crate::safetensors::SafetensorsModel;
let st = SafetensorsModel::from_bytes(file_data)?;
println!(" Format: SafeTensors");
println!(" Tensors: {}", st.tensors.len());
} else if model_ref.ends_with(".apr") || file_data.starts_with(APR_MAGIC) {
use crate::model_loader::read_apr_model_type;
let model_type = read_apr_model_type(file_data).unwrap_or_else(|| "Unknown".to_string());
println!(" Format: APR (Aprender Native)");
println!(" Model Type: {model_type}");
} else {
println!(" Format: Unknown ({} bytes)", file_data.len());
}
Ok(())
}
pub fn run_visualization(use_color: bool, samples: usize) {
use crate::viz::{
print_benchmark_results, render_ascii_histogram, render_sparkline, BenchmarkData,
};
println!("Realizar Benchmark Visualization Demo");
println!("=====================================");
println!();
let mut rng_state = 42u64;
let latencies: Vec<f64> = (0..samples)
.map(|_| {
rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1);
let uniform = (rng_state >> 33) as f64 / (1u64 << 31) as f64;
let log_mean = 3.0; let log_std = 0.5;
(log_mean + log_std * (2.0 * uniform - 1.0)).exp()
})
.collect();
println!("1. Sparkline (latency trend)");
println!(" {}", render_sparkline(&latencies, 60));
println!();
println!("2. ASCII Histogram (latency distribution)");
let hist = render_ascii_histogram(&latencies, 12, 50);
for line in hist.lines() {
println!(" {line}");
}
println!();
println!("3. Full Benchmark Report");
let data = BenchmarkData::new("inference_latency", latencies);
print_benchmark_results(&data, use_color);
println!();
println!("4. Multi-Benchmark Comparison");
println!();
let benchmarks = [
("tensor_add", 15.2, 18.1),
("tensor_mul", 16.8, 20.3),
("matmul_128", 145.3, 172.1),
("softmax", 23.4, 28.9),
("attention", 892.1, 1024.5),
];
println!(
" {:.<20} {:>10} {:>10} {:>10}",
"Benchmark", "p50 (us)", "p99 (us)", "Trend"
);
println!(" {}", "-".repeat(55));
for (name, p50, p99) in benchmarks {
let trend: Vec<f64> = (0..20)
.map(|i| p50 + (i as f64 / 20.0) * (p99 - p50) * 0.3)
.collect();
let sparkline = render_sparkline(&trend, 10);
println!(" {:.<20} {:>10.1} {:>10.1} {}", name, p50, p99, sparkline);
}
println!();
println!("Visualization powered by trueno-viz");
}
fn print_bench_config(
runtime_name: &str,
model: Option<&str>,
url: Option<&str>,
output: Option<&str>,
) {
println!("Benchmark Configuration:");
println!(" Runtime: {runtime_name}");
if let Some(m) = model {
println!(" Model: {m}");
}
if let Some(u) = url {
println!(" URL: {u}");
}
if let Some(o) = output {
println!(" Output: {o}");
}
println!();
}
#[allow(clippy::disallowed_methods)]
fn write_bench_json(
output_path: &str,
stdout: &str,
suite: Option<&str>,
runtime: Option<&str>,
model: Option<&str>,
) -> Result<()> {
use std::fs::File;
use std::io::Write;
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let results = parse_cargo_bench_output(stdout, suite);
let json_output = serde_json::json!({
"version": "1.0",
"timestamp": timestamp,
"runtime": runtime.unwrap_or("realizar"),
"suite": suite,
"model": model,
"results": results,
"raw_output": stdout
});
let mut file = File::create(output_path).map_err(|e| RealizarError::IoError {
message: format!("Failed to create output file {output_path}: {e}"),
})?;
let json_str = serde_json::to_string_pretty(&json_output).map_err(|e| {
RealizarError::IoError {
message: format!("Failed to serialize benchmark results to JSON: {e}"),
}
})?;
file.write_all(json_str.as_bytes())
.map_err(|e| RealizarError::IoError {
message: format!("Failed to write to output file {output_path}: {e}"),
})?;
println!();
println!("Benchmark results written to: {output_path}");
Ok(())
}
fn print_bench_usage() {
println!("Available benchmark suites:");
println!();
for (name, description) in BENCHMARK_SUITES {
println!(" {name:<12} - {description}");
}
println!();
println!("Usage:");
println!(" realizar bench # Run all benchmarks");
println!(" realizar bench tensor_ops # Run specific suite");
println!(" realizar bench --list # List available suites");
println!(" realizar bench --runtime realizar # Specify runtime");
println!(" realizar bench --output results.json # Save JSON results");
println!();
println!("External Runtime Benchmarking (REAL HTTP calls):");
println!(" realizar bench --runtime ollama --url http://localhost:11434 --model llama3.2");
println!(" realizar bench --runtime vllm --url http://localhost:8000 --model meta-llama/Llama-3.2-1B");
println!(" realizar bench --runtime llama-cpp --url http://localhost:8080");
}
fn validate_suite_or_error(suite_name: &str) -> bool {
if BENCHMARK_SUITES.iter().any(|(name, _)| *name == suite_name) {
return true;
}
eprintln!("Error: Unknown benchmark suite '{suite_name}'");
eprintln!();
eprintln!("Available suites:");
for (name, _) in BENCHMARK_SUITES {
eprintln!(" {name}");
}
false
}
fn execute_cargo_bench(
cmd: &mut std::process::Command,
capture: bool,
) -> Result<Option<std::process::Output>> {
if capture {
let output = cmd
.output()
.map_err(|e| RealizarError::UnsupportedOperation {
operation: "run_benchmarks".to_string(),
reason: format!("Failed to execute cargo bench: {e}"),
})?;
return Ok(Some(output));
}
let status = cmd
.status()
.map_err(|e| RealizarError::UnsupportedOperation {
operation: "run_benchmarks".to_string(),
reason: format!("Failed to execute cargo bench: {e}"),
})?;
if !status.success() {
return Err(RealizarError::UnsupportedOperation {
operation: "run_benchmarks".to_string(),
reason: format!("Benchmarks failed with exit code: {:?}", status.code()),
});
}
Ok(None)
}
pub fn run_benchmarks(
suite: Option<String>,
list: bool,
runtime: Option<String>,
model: Option<String>,
url: Option<String>,
output: Option<String>,
) -> Result<()> {
if list {
print_bench_usage();
return Ok(());
}
let runtime_name = runtime.clone().unwrap_or_else(|| "realizar".to_string());
print_bench_config(
&runtime_name,
model.as_deref(),
url.as_deref(),
output.as_deref(),
);
if let (Some(ref rt), Some(ref server_url)) = (&runtime, &url) {
return run_external_benchmark(rt, server_url, model.as_deref(), output.as_deref());
}
let mut cmd = std::process::Command::new("cargo");
cmd.arg("bench");
if let Some(ref suite_name) = suite {
if !validate_suite_or_error(suite_name) {
std::process::exit(1);
}
cmd.arg("--bench").arg(suite_name);
}
println!("Running benchmarks...");
println!();
let bench_output = match execute_cargo_bench(&mut cmd, output.is_some())? {
Some(out) => out,
None => return Ok(()),
};
if !bench_output.status.success() {
eprintln!("{}", String::from_utf8_lossy(&bench_output.stderr));
return Err(RealizarError::UnsupportedOperation {
operation: "run_benchmarks".to_string(),
reason: format!(
"Benchmarks failed with exit code: {:?}",
bench_output.status.code()
),
});
}
let stdout = String::from_utf8_lossy(&bench_output.stdout);
print!("{stdout}");
if let Some(ref output_path) = output {
write_bench_json(
output_path,
&stdout,
suite.as_deref(),
runtime.as_deref(),
model.as_deref(),
)?;
}
Ok(())
}
#[allow(clippy::disallowed_methods)]
fn parse_bench_line(line: &str, suite: Option<&str>) -> Option<serde_json::Value> {
if !line.contains("bench:") || !line.contains("ns/iter") {
return None;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 5 {
return None;
}
let test_idx = parts.iter().position(|&p| p == "test")?;
let name = parts.get(test_idx + 1)?;
let bench_idx = parts.iter().position(|&p| p == "bench:")?;
let time_str = parts.get(bench_idx + 1)?;
let time_ns = time_str.replace(',', "").parse::<u64>().ok()?;
Some(serde_json::json!({
"name": name,
"time_ns": time_ns,
"suite": suite
}))
}
fn parse_cargo_bench_output(output: &str, suite: Option<&str>) -> Vec<serde_json::Value> {
output
.lines()
.filter_map(|line| parse_bench_line(line, suite))
.collect()
}
#[cfg(feature = "bench-http")]
fn execute_runtime_request(
client: &crate::http_client::ModelHttpClient,
runtime: &str,
url: &str,
model: Option<&str>,
prompt: &str,
) -> Result<crate::http_client::InferenceTiming> {
use crate::http_client::{CompletionRequest, OllamaOptions, OllamaRequest};
match runtime.to_lowercase().as_str() {
"ollama" => {
let request = OllamaRequest {
model: model.unwrap_or("llama3.2").to_string(),
prompt: prompt.to_string(),
stream: false,
options: Some(OllamaOptions {
num_predict: Some(50),
temperature: Some(0.7),
}),
};
client
.ollama_generate(url, &request)
.map_err(|e| RealizarError::ConnectionError(e.to_string()))
},
"vllm" => {
let request = CompletionRequest {
model: model.unwrap_or("default").to_string(),
prompt: prompt.to_string(),
max_tokens: 50,
temperature: Some(0.7),
stream: false,
};
client
.openai_completion(url, &request, None)
.map_err(|e| RealizarError::ConnectionError(e.to_string()))
},
"llama-cpp" => {
let request = CompletionRequest {
model: "default".to_string(),
prompt: prompt.to_string(),
max_tokens: 50,
temperature: Some(0.7),
stream: false,
};
client
.llamacpp_completion(url, &request)
.map_err(|e| RealizarError::ConnectionError(e.to_string()))
},
_ => Err(RealizarError::UnsupportedOperation {
operation: "external_benchmark".to_string(),
reason: format!(
"Unknown runtime: {}. Supported: ollama, vllm, llama-cpp",
runtime
),
}),
}
}