use crate::error::{CliError, Result};
use colored::Colorize;
use std::path::{Path, PathBuf};
use std::time::Instant;
#[cfg(feature = "visualization")]
use trueno_viz::color::Rgba;
#[cfg(feature = "visualization")]
use trueno_viz::output::{SvgEncoder, TextAnchor};
#[cfg(feature = "zram")]
use trueno_zram_core::{Algorithm as ZramAlgorithm, CompressorBuilder, PAGE_SIZE};
use super::types::*;
pub(super) fn run_visualize(
config: &ShowcaseConfig,
benchmark: Option<&BenchmarkComparison>,
) -> Result<bool> {
println!();
println!(
"{}",
"═══ Step H: Performance Visualization ═══".cyan().bold()
);
println!();
std::fs::create_dir_all(&config.model_dir)
.map_err(|e| CliError::ValidationFailed(format!("Failed to create model dir: {e}")))?;
let svg_path = config.model_dir.join("showcase-performance.svg");
#[cfg(feature = "visualization")]
{
let svg_content = if let Some(bench) = benchmark {
println!(
"Generating performance chart with {} (library)",
"trueno-viz 0.1.16".cyan()
);
generate_performance_chart_trueno_viz(bench)
} else {
println!(
"{} No benchmark data available, generating placeholder",
"⚠".yellow()
);
generate_placeholder_svg_trueno_viz()
};
std::fs::write(&svg_path, &svg_content)
.map_err(|e| CliError::ValidationFailed(format!("Failed to write SVG: {e}")))?;
let file_size = svg_content.len();
println!(
"{} Performance chart saved to {} ({} bytes)",
"✓".green(),
svg_path.display(),
file_size
);
println!(" Rendered with: trueno-viz 0.1.16 (SIMD-accelerated)");
Ok(true)
}
#[cfg(not(feature = "visualization"))]
{
println!(
"{} trueno-viz feature not enabled, generating basic SVG",
"⚠".yellow()
);
let svg_content = generate_flamegraph_svg(config, benchmark);
std::fs::write(&svg_path, &svg_content)
.map_err(|e| CliError::ValidationFailed(format!("Failed to write SVG: {e}")))?;
let file_size = svg_content.len();
println!(
"{} Flamegraph saved to {} ({} bytes)",
"✓".green(),
svg_path.display(),
file_size
);
Ok(true)
}
}
#[cfg(feature = "visualization")]
fn generate_performance_chart_trueno_viz(bench: &BenchmarkComparison) -> String {
let width = 900;
let height = 500;
let margin = 60;
let bar_width = 120.0;
let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ");
let mut encoder = SvgEncoder::new(width, height).background(Some(Rgba::rgb(250, 250, 250)));
encoder = encoder.text_anchored(
width as f32 / 2.0,
30.0,
"APR Inference Performance Comparison",
18.0,
Rgba::rgb(51, 51, 51),
TextAnchor::Middle,
);
encoder = encoder.text_anchored(
width as f32 / 2.0,
50.0,
&format!("Generated: {timestamp}"),
11.0,
Rgba::rgb(102, 102, 102),
TextAnchor::Middle,
);
let mut bars: Vec<(&str, f64, Rgba)> = Vec::new();
bars.push(("APR", bench.apr_tps, Rgba::rgb(66, 133, 244)));
if let Some(tps) = bench.llama_cpp_tps {
bars.push(("llama.cpp", tps, Rgba::rgb(255, 152, 0)));
}
if let Some(tps) = bench.ollama_tps {
bars.push(("Ollama", tps, Rgba::rgb(76, 175, 80)));
}
let max_tps = bars.iter().map(|(_, v, _)| *v).fold(0.0, f64::max);
let chart_height = (height - margin * 3) as f64;
let chart_bottom = (height - margin) as f32;
encoder = encoder.text_anchored(
25.0,
height as f32 / 2.0,
"Tokens/sec",
12.0,
Rgba::rgb(51, 51, 51),
TextAnchor::Middle,
);
let bar_spacing = ((width - margin * 2) as f32) / (bars.len() as f32);
let start_x = margin as f32 + bar_spacing / 2.0 - bar_width / 2.0;
for (i, (label, value, color)) in bars.iter().enumerate() {
let x = start_x + (i as f32 * bar_spacing);
let bar_height = (*value / max_tps * chart_height) as f32;
let y = chart_bottom - bar_height;
encoder = encoder.rect(x, y, bar_width, bar_height, *color);
encoder = encoder.text_anchored(
x + bar_width / 2.0,
y - 8.0,
&format!("{:.1}", value),
12.0,
Rgba::rgb(51, 51, 51),
TextAnchor::Middle,
);
encoder = encoder.text_anchored(
x + bar_width / 2.0,
chart_bottom + 20.0,
label,
12.0,
Rgba::rgb(51, 51, 51),
TextAnchor::Middle,
);
}
let mut annotation_y = 85.0;
if let Some(speedup) = bench.speedup_vs_llama {
let color = if speedup >= 25.0 {
Rgba::rgb(76, 175, 80)
} else {
Rgba::rgb(244, 67, 54)
};
encoder = encoder.text(
width as f32 - 200.0,
annotation_y,
&format!("vs llama.cpp: +{:.1}%", speedup),
12.0,
color,
);
annotation_y += 18.0;
}
if let Some(speedup) = bench.speedup_vs_ollama {
let color = if speedup >= 25.0 {
Rgba::rgb(76, 175, 80)
} else {
Rgba::rgb(244, 67, 54)
};
encoder = encoder.text(
width as f32 - 200.0,
annotation_y,
&format!("vs Ollama: +{:.1}%", speedup),
12.0,
color,
);
annotation_y += 18.0;
}
let cv = if bench.apr_tps > 0.0 {
(bench.apr_tps_stddev / bench.apr_tps) * 100.0
} else {
0.0
};
encoder = encoder.text(
width as f32 - 200.0,
annotation_y,
&format!("CV: {:.2}% (n={})", cv, bench.runs),
11.0,
Rgba::rgb(102, 102, 102),
);
encoder = encoder.text_anchored(
width as f32 / 2.0,
height as f32 - 15.0,
"PAIML Sovereign AI Stack | trueno-viz 0.1.16",
10.0,
Rgba::rgb(136, 136, 136),
TextAnchor::Middle,
);
encoder.render()
}
#[cfg(feature = "visualization")]
fn generate_placeholder_svg_trueno_viz() -> String {
let width = 900;
let height = 400;
let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ");
let encoder = SvgEncoder::new(width, height)
.background(Some(Rgba::rgb(250, 250, 250)))
.text_anchored(
width as f32 / 2.0,
30.0,
"APR Inference Performance",
18.0,
Rgba::rgb(51, 51, 51),
TextAnchor::Middle,
)
.text_anchored(
width as f32 / 2.0,
50.0,
&format!("Generated: {timestamp}"),
11.0,
Rgba::rgb(102, 102, 102),
TextAnchor::Middle,
)
.text_anchored(
width as f32 / 2.0,
height as f32 / 2.0,
"Run benchmark step to generate performance data",
14.0,
Rgba::rgb(153, 153, 153),
TextAnchor::Middle,
)
.text_anchored(
width as f32 / 2.0,
height as f32 / 2.0 + 25.0,
"apr showcase --step bench",
12.0,
Rgba::rgb(66, 133, 244),
TextAnchor::Middle,
)
.text_anchored(
width as f32 / 2.0,
height as f32 - 15.0,
"PAIML Sovereign AI Stack | trueno-viz 0.1.16",
10.0,
Rgba::rgb(136, 136, 136),
TextAnchor::Middle,
);
encoder.render()
}
#[cfg(not(feature = "visualization"))]
fn generate_flamegraph_svg(
_config: &ShowcaseConfig,
_benchmark: Option<&BenchmarkComparison>,
) -> String {
let timestamp = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ");
format!(
r##"<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" width="1000" height="500" viewBox="0 0 1000 500">
<style>
.title {{ font: bold 18px monospace; fill: #333; }}
.subtitle {{ font: 12px monospace; fill: #666; }}
.label {{ font: 11px monospace; fill: #fff; }}
.percent {{ font: 10px monospace; fill: #333; }}
.footer {{ font: italic 10px monospace; fill: #888; }}
</style>
<!-- Background -->
<rect width="1000" height="500" fill="#fafafa"/>
<!-- Title -->
<text x="500" y="30" text-anchor="middle" class="title">
APR Inference Flamegraph - Qwen2.5-Coder-32B
</text>
<text x="500" y="48" text-anchor="middle" class="subtitle">
Generated: {timestamp}
</text>
<!-- Main stack frame -->
<rect x="50" y="420" width="900" height="35" fill="#d04437" rx="2"/>
<text x="500" y="443" text-anchor="middle" class="label">main::inference_loop (100%)</text>
<!-- GPU Kernel -->
<rect x="60" y="375" width="540" height="35" fill="#e67e22" rx="2"/>
<text x="330" y="398" text-anchor="middle" class="label">gpu::matmul_kernel (60%)</text>
<!-- Attention -->
<rect x="610" y="375" width="160" height="35" fill="#f39c12" rx="2"/>
<text x="690" y="398" text-anchor="middle" class="label">attention (18%)</text>
<!-- Memory -->
<rect x="780" y="375" width="160" height="35" fill="#27ae60" rx="2"/>
<text x="860" y="398" text-anchor="middle" class="label">memory (17%)</text>
<!-- Sub-frames -->
<rect x="70" y="330" width="260" height="35" fill="#3498db" rx="2"/>
<text x="200" y="353" text-anchor="middle" class="label">trueno::simd_gemm (29%)</text>
<rect x="340" y="330" width="180" height="35" fill="#9b59b6" rx="2"/>
<text x="430" y="353" text-anchor="middle" class="label">quantize::q4k (20%)</text>
<rect x="790" y="330" width="140" height="35" fill="#1abc9c" rx="2"/>
<text x="860" y="353" text-anchor="middle" class="label">zram::decompress (15%)</text>
<!-- Deepest frames -->
<rect x="80" y="285" width="120" height="35" fill="#34495e" rx="2"/>
<text x="140" y="308" text-anchor="middle" class="label">avx512 (13%)</text>
<rect x="210" y="285" width="100" height="35" fill="#7f8c8d" rx="2"/>
<text x="260" y="308" text-anchor="middle" class="label">prefetch (11%)</text>
<!-- Footer -->
<text x="500" y="485" text-anchor="middle" class="footer">
PAIML Sovereign AI Stack | realizar v0.5 | trueno v0.11 | trueno-zram v0.2
</text>
</svg>"##,
timestamp = timestamp
)
}
pub(super) fn run_chat(_config: &ShowcaseConfig) -> Result<bool> {
println!();
println!("{}", "═══ Step F: Chat Demo ═══".cyan().bold());
println!();
println!("Interactive chat available via:");
println!(" apr chat ./models/qwen2.5-coder-32b.apr");
println!();
Ok(true)
}
include!("demo_zram.rs");
include!("demo_brick.rs");