use crate::error::{CliError, Result};
use crossterm::{
event::{self, DisableMouseCapture, EnableMouseCapture, Event, KeyCode, KeyEventKind},
execute,
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
};
use std::io;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ModelFormat {
Gguf,
SafeTensors,
Apr,
}
#[allow(clippy::trivially_copy_pass_by_ref)] impl ModelFormat {
pub fn from_path(path: &std::path::Path) -> Option<Self> {
let ext = path.extension()?.to_str()?.to_lowercase();
match ext.as_str() {
"gguf" => Some(Self::Gguf),
"safetensors" => Some(Self::SafeTensors),
"apr" => Some(Self::Apr),
_ => None,
}
}
pub fn brick_prefix(&self) -> &'static str {
match self {
Self::Gguf => "gguf",
Self::SafeTensors => "st",
Self::Apr => "apr",
}
}
}
#[derive(Debug, Clone)]
pub struct CbtopConfig {
pub model: Option<String>,
pub attach: Option<String>,
pub model_path: Option<PathBuf>,
pub headless: bool,
pub json: bool,
pub output: Option<PathBuf>,
pub ci: bool,
pub throughput_threshold: Option<f64>,
pub brick_score_threshold: Option<u32>,
pub warmup: usize,
pub iterations: usize,
pub speculative: bool,
pub speculation_k: usize,
pub draft_model_path: Option<PathBuf>,
pub concurrent: usize,
pub simulated: bool,
}
impl Default for CbtopConfig {
fn default() -> Self {
Self {
model: None,
attach: None,
model_path: None,
headless: false,
json: false,
output: None,
ci: false,
throughput_threshold: None,
brick_score_threshold: None,
warmup: 10,
iterations: 100,
speculative: false,
speculation_k: 4,
draft_model_path: None,
concurrent: 1, simulated: false,
}
}
}
#[derive(Debug, Clone)]
pub struct HeadlessReport {
pub model: String,
pub timestamp: String,
pub hardware: HardwareInfo,
pub throughput: ThroughputMetrics,
pub brick_scores: Vec<BrickScore>,
pub pmat_scores: PmatScores,
pub falsification: FalsificationSummary,
pub status: String,
pub ci_result: String,
}
#[derive(Debug, Clone)]
pub struct PmatScores {
pub rust_project_score: f64,
pub tdg_score: f64,
pub cuda_tdg_score: f64,
pub brick_score: u32,
pub grade: String,
}
#[derive(Debug, Clone)]
pub struct HardwareInfo {
pub gpu: String,
pub cpu: String,
pub memory_gb: u32,
}
#[derive(Debug, Clone)]
pub struct ThroughputMetrics {
pub tokens_per_sec: f64,
pub ttft_ms: f64,
pub cv_percent: f64,
pub p50_us: f64,
pub p99_us: f64,
}
#[derive(Debug, Clone)]
pub struct BrickScore {
pub name: String,
pub score: u32,
pub grade: String,
pub budget_us: f64,
pub actual_us: f64,
pub gap_factor: f64,
}
#[derive(Debug, Clone)]
pub struct FalsificationSummary {
pub total_points: u32,
pub passed: u32,
pub failed: u32,
pub blocked: u32,
}
#[derive(Debug, Clone)]
struct BrickTiming {
name: &'static str,
budget_us: f64,
actual_us: f64,
samples: Vec<f64>,
}
impl BrickTiming {
fn new(name: &'static str, budget_us: f64) -> Self {
Self {
name,
budget_us,
actual_us: 0.0,
samples: Vec::with_capacity(100),
}
}
fn gap_factor(&self) -> f64 {
if self.budget_us > 0.0 {
self.actual_us / self.budget_us
} else {
1.0
}
}
fn status(&self) -> &'static str {
if self.actual_us <= self.budget_us {
"✅"
} else {
"❌"
}
}
fn percent_of_budget(&self) -> u16 {
if self.budget_us > 0.0 {
((self.actual_us / self.budget_us) * 100.0).min(200.0) as u16
} else {
100
}
}
fn add_sample(&mut self, us: f64) {
self.samples.push(us);
if self.samples.len() > 100 {
self.samples.remove(0);
}
self.actual_us = self.samples.iter().sum::<f64>() / self.samples.len() as f64;
}
fn sparkline_data(&self) -> Vec<u64> {
self.samples
.iter()
.map(|&x| (x * 10.0).min(255.0) as u64)
.collect()
}
}
#[derive(Debug, Clone)]
struct PipelineState {
bricks: Vec<BrickTiming>,
layer_idx: usize,
total_layers: usize,
tokens_generated: usize,
total_us: f64,
target_tok_s: f64,
current_tok_s: f64,
}
impl PipelineState {
fn new() -> Self {
let bricks = vec![
BrickTiming::new("RmsNorm", 1.5),
BrickTiming::new("QkvBrick", 6.0),
BrickTiming::new("RoPE", 1.0),
BrickTiming::new("Attention", 10.0),
BrickTiming::new("OProj", 3.5),
BrickTiming::new("RmsNorm", 1.5),
BrickTiming::new("FfnBrick", 12.2),
];
Self {
bricks,
layer_idx: 0,
total_layers: 28, tokens_generated: 0,
total_us: 0.0,
target_tok_s: 976.0, current_tok_s: 0.0,
}
}
fn total_budget(&self) -> f64 {
self.bricks.iter().map(|b| b.budget_us).sum()
}
fn total_actual(&self) -> f64 {
self.bricks.iter().map(|b| b.actual_us).sum()
}
fn bottleneck(&self) -> Option<&BrickTiming> {
self.bricks.iter().max_by(|a, b| {
a.gap_factor()
.partial_cmp(&b.gap_factor())
.unwrap_or(std::cmp::Ordering::Equal)
})
}
fn update_demo(&mut self) {
use std::time::{SystemTime, UNIX_EPOCH};
let seed = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("System time before Unix epoch")
.as_millis() as u64;
for (i, brick) in self.bricks.iter_mut().enumerate() {
let base = brick.budget_us;
let variance = (((seed >> (i * 4)) & 0xFF) as f64 / 255.0 - 0.5) * base * 0.4;
brick.add_sample(base + variance);
}
self.tokens_generated += 1;
self.total_us = self.total_actual() * self.total_layers as f64;
if self.total_us > 0.0 {
self.current_tok_s = 1_000_000.0 / self.total_us;
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum View {
Pipeline,
Budget,
Histogram,
Gpu,
Memory,
}
impl View {
fn titles() -> Vec<&'static str> {
vec![
"Pipeline [p]",
"Budget [b]",
"Histogram [h]",
"GPU [g]",
"Memory [m]",
]
}
fn index(self) -> usize {
match self {
View::Pipeline => 0,
View::Budget => 1,
View::Histogram => 2,
View::Gpu => 3,
View::Memory => 4,
}
}
}
struct App {
model_name: String,
pipeline: PipelineState,
current_view: View,
selected_brick: usize,
should_quit: bool,
demo_mode: bool,
}
impl App {
fn new(model: Option<&str>) -> Self {
Self {
model_name: model.unwrap_or("qwen2.5-coder-1.5b").to_string(),
pipeline: PipelineState::new(),
current_view: View::Pipeline,
selected_brick: 0,
should_quit: false,
demo_mode: true, }
}
fn next_brick(&mut self) {
if !self.pipeline.bricks.is_empty() {
self.selected_brick = (self.selected_brick + 1) % self.pipeline.bricks.len();
}
}
fn prev_brick(&mut self) {
if !self.pipeline.bricks.is_empty() {
self.selected_brick = if self.selected_brick == 0 {
self.pipeline.bricks.len() - 1
} else {
self.selected_brick - 1
};
}
}
fn tick(&mut self) {
if self.demo_mode {
self.pipeline.update_demo();
}
}
}
#[provable_contracts_macros::contract("apr-cli-operations-v1", equation = "long_running_graceful")]
pub fn run(config: CbtopConfig) -> Result<()> {
if config.headless {
run_headless(config)
} else {
run_tui(config.model.as_deref(), config.attach.as_deref())
}
}
fn run_headless(config: CbtopConfig) -> Result<()> {
if config.simulated {
eprintln!("cbtop: WARNING - Using simulated data (--simulated flag set)");
eprintln!(" For real profiling: apr cbtop --model-path <FILE> --headless");
return run_headless_simulated(config);
}
#[cfg(all(feature = "inference", feature = "cuda"))]
{
if config.model_path.is_some() {
return run_headless_real(config);
}
return Err(CliError::ValidationFailed(
"Headless mode requires --model-path for real profiling.\n\
For CI testing with simulated data, use: apr cbtop --headless --simulated\n\
For real profiling, use: apr cbtop --model-path <FILE> --headless"
.to_string(),
));
}
#[cfg(all(feature = "inference", not(feature = "cuda")))]
{
let _ = &config;
return Err(CliError::ValidationFailed(
"Headless real profiling requires 'cuda' feature.\n\
For CI testing with simulated data, use: apr cbtop --headless --simulated"
.to_string(),
));
}
#[cfg(not(feature = "inference"))]
{
return Err(CliError::ValidationFailed(
"Headless mode requires --model-path and the 'inference' feature.\n\
For CI testing with simulated data, use: apr cbtop --headless --simulated\n\
Rebuild with: cargo build -p apr-cli --features inference"
.to_string(),
));
}
}
include!("cbtop_get_cpu_memory.rs");
include!("cbtop_report_tui.rs");
include!("gguf.rs");
include!("cbtop_measure_batch.rs");
include!("cbtop_06.rs");