fn run_modality_test(
config: &Config,
cell: &MatrixCell,
prompt: &str,
max_tokens: u32,
) -> Result<String, String> {
let max_tokens_str = max_tokens.to_string();
match cell.modality {
Modality::Run => {
let mut args: Vec<&str> = vec![
"run",
&cell.model_uri,
"--prompt",
prompt,
"--max-tokens",
&max_tokens_str,
];
if let Some(flag) = cell.backend.flag() {
args.push(flag);
}
if cell.with_trace {
args.push("--trace");
}
run_apr(config, &args)
}
Modality::Chat => run_chat_test(
config,
&cell.model_uri,
prompt,
cell.backend,
cell.with_trace,
DEFAULT_TIMEOUT,
),
Modality::Serve => run_serve_test(
config,
&cell.model_uri,
prompt,
cell.backend,
cell.with_trace,
DEFAULT_TIMEOUT,
),
}
}
fn strip_ansi(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\x1b' {
if chars.next() == Some('[') {
for c2 in chars.by_ref() {
if c2.is_ascii_alphabetic() {
break;
}
}
}
} else {
result.push(c);
}
}
result
}
fn extract_output(raw: &str) -> String {
let lines: Vec<&str> = raw.lines().collect();
let mut in_output = false;
let mut content = Vec::new();
for line in lines {
let clean = strip_ansi(line);
if clean.starts_with("Output:") {
in_output = true;
continue;
}
if clean.starts_with("Completed in ") {
break;
}
if in_output {
content.push(strip_ansi(line));
}
}
content.join("\n").trim().to_string()
}
const GARBAGE_PATTERNS: &[&str] = &[
"\u{FFFD}", "[UNK]", "akunji", "olumbia", "专门窗", "token0", "token1", "<0x", ];
const BPE_ARTIFACTS: &[char] = &[
'Ġ', 'Ċ', 'ĉ', ];
#[derive(Debug)]
enum VerifyResult {
#[allow(dead_code)]
Pass(String),
FailEmpty,
FailGarbage(String),
FailBpeArtifact(char),
FailMissingAnswer(String),
}
fn verify_output(output: &str, expected_contains: Option<&str>) -> VerifyResult {
let trimmed = output.trim();
if trimmed.is_empty() {
return VerifyResult::FailEmpty;
}
for pattern in GARBAGE_PATTERNS {
if trimmed.contains(pattern) {
return VerifyResult::FailGarbage((*pattern).to_string());
}
}
for &artifact in BPE_ARTIFACTS {
if trimmed.contains(artifact) {
return VerifyResult::FailBpeArtifact(artifact);
}
}
if let Some(expected) = expected_contains {
if !contains_as_word(trimmed, expected) {
return VerifyResult::FailMissingAnswer(format!(
"Expected '{}' as standalone word, got: {}",
expected,
trimmed.chars().take(50).collect::<String>()
));
}
}
VerifyResult::Pass(trimmed.to_string())
}
fn contains_as_word(haystack: &str, needle: &str) -> bool {
let mut search_start = 0;
while let Some(pos) = haystack[search_start..].find(needle) {
let abs_pos = search_start + pos;
let end_pos = abs_pos + needle.len();
let left_ok = abs_pos == 0 || {
let prev_char = haystack[..abs_pos]
.chars()
.last()
.expect("non-empty prefix must have a last char");
!prev_char.is_alphanumeric()
};
let right_ok = end_pos >= haystack.len() || {
let next_char = haystack[end_pos..]
.chars()
.next()
.expect("non-empty suffix must have a next char");
!next_char.is_alphanumeric()
};
if left_ok && right_ok {
return true;
}
search_start = abs_pos + 1;
if search_start >= haystack.len() {
break;
}
}
false
}
fn verify_to_test(name: &'static str, max_points: u32, result: VerifyResult) -> TestResult {
match result {
VerifyResult::Pass(_) => TestResult::pass(name, max_points, "Clean output".to_string()),
VerifyResult::FailEmpty => TestResult::fail(name, max_points, "Empty output".to_string()),
VerifyResult::FailGarbage(p) => {
TestResult::fail(name, max_points, format!("GARBAGE: '{p}'"))
}
VerifyResult::FailBpeArtifact(c) => {
TestResult::fail(name, max_points, format!("BPE artifact: '{c}'"))
}
VerifyResult::FailMissingAnswer(msg) => TestResult::fail(name, max_points, msg),
}
}
fn run_verify_test(
config: &Config,
cell: &MatrixCell,
name: &'static str,
max_points: u32,
prompt: &str,
max_tokens: u32,
expected: Option<&str>,
) -> TestResult {
match run_modality_test(config, cell, prompt, max_tokens) {
Ok(raw) => verify_to_test(
name,
max_points,
verify_output(&extract_output(&raw), expected),
),
Err(e) => TestResult::fail(name, max_points, e),
}
}
fn run_perf_test(config: &Config, cell: &MatrixCell) -> TestResult {
let perf_start = Instant::now();
match run_modality_test(config, cell, "Count from 1 to 20.", 50) {
Ok(output) => {
let elapsed = perf_start.elapsed().as_secs_f64();
let tokens_est = (output.split_whitespace().count() as f64 * 1.3).max(10.0);
let tps = tokens_est / elapsed;
let base_target = match (cell.backend, cell.format) {
(Backend::Cpu, _) => config.min_cpu_tps,
(Backend::Gpu, Format::SafeTensors) => config.min_gpu_tps_float32,
(Backend::Gpu, _) => config.min_gpu_tps,
};
let target = match cell.modality {
Modality::Run => base_target,
Modality::Chat | Modality::Serve => base_target * 0.5,
};
if tps >= target {
TestResult::pass("Performance", 3, format!("{tps:.1} tok/s >= {target:.1}"))
} else {
TestResult::fail("Performance", 3, format!("{tps:.1} tok/s < {target:.1}"))
}
}
Err(e) => TestResult::fail("Performance", 3, e),
}
}
fn run_cell_tests(config: &Config, cell: &MatrixCell) -> CellResult {
let start = Instant::now();
let mut tests = Vec::new();
if cell.backend == Backend::Gpu && !gpu_available() {
tests.push(TestResult::skip(
"All Tests",
15,
"No GPU available".to_string(),
));
return CellResult {
cell: cell.clone(),
tests,
total_points: 0,
max_points: 15,
elapsed: start.elapsed(),
};
}
match run_modality_test(
config,
cell,
"What is 2+2? Answer with just the number.",
10,
) {
Ok(_) => tests.push(TestResult::pass(
"Model Load",
2,
format!("{} via {:?}", cell.model_uri, cell.modality),
)),
Err(e) => {
tests.push(TestResult::fail("Model Load", 2, e));
return CellResult {
cell: cell.clone(),
tests,
total_points: 0,
max_points: 15,
elapsed: start.elapsed(),
};
}
}
tests.push(run_verify_test(
config,
cell,
"Correct Output",
3,
"What is 2+2? Answer with just the number.",
10,
Some("4"),
));
tests.push(run_verify_test(
config,
cell,
"No Garbage",
3,
"Say hello.",
20,
None,
));
match run_modality_test(config, cell, "Say hello.", 20) {
Ok(raw) => {
let output = extract_output(&raw);
let has_bpe = BPE_ARTIFACTS.iter().any(|&c| output.contains(c));
tests.push(if has_bpe {
TestResult::fail("No BPE Artifacts", 2, "Ġ/Ċ/ĉ detected".to_string())
} else {
TestResult::pass("No BPE Artifacts", 2, "Clean tokens".to_string())
});
}
Err(e) => tests.push(TestResult::fail("No BPE Artifacts", 2, e)),
}
let trace_cell = MatrixCell {
with_trace: true,
..cell.clone()
};
match run_modality_test(config, &trace_cell, "Hi", 5) {
Ok(_) => tests.push(TestResult::pass(
"Trace Works",
2,
format!("{:?} + trace accepted", cell.modality),
)),
Err(e) if e.contains("not supported") || e.contains("trace") => {
tests.push(TestResult::skip(
"Trace Works",
2,
format!("Trace not supported for {:?}", cell.modality),
));
}
Err(e) => tests.push(TestResult::fail("Trace Works", 2, e)),
}
tests.push(run_perf_test(config, cell));
let total: u32 = tests.iter().map(|t| t.points).sum();
let max: u32 = tests.iter().map(|t| t.max_points).sum();
CellResult {
cell: cell.clone(),
tests,
total_points: total,
max_points: max,
elapsed: start.elapsed(),
}
}
fn print_cell_result(result: &CellResult) {
let status = if result.passed() {
format!("{}✓ PASS{}", GREEN, NC)
} else {
format!("{}✗ FAIL{}", RED, NC)
};
println!();
println!(
"{}┌─────────────────────────────────────────────────────────────┐{}",
BLUE, NC
);
println!(
"{}│{} {} {:<42} {:>8} {}│{}",
BLUE,
NC,
BOLD,
result.cell.label(),
status,
BLUE,
NC
);
println!(
"{}├─────────────────────────────────────────────────────────────┤{}",
BLUE, NC
);
for test in &result.tests {
let icon = if test.passed {
format!("{}✓{}", GREEN, NC)
} else {
format!("{}✗{}", RED, NC)
};
let points = format!("{}/{}", test.points, test.max_points);
let detail = test.details.as_deref().unwrap_or("");
println!(
"{}│{} {} {:<20} {:>5} {:<25}{}│{}",
BLUE,
NC,
icon,
test.name,
points,
detail.chars().take(25).collect::<String>(),
BLUE,
NC
);
}
println!(
"{}├─────────────────────────────────────────────────────────────┤{}",
BLUE, NC
);
println!(
"{}│{} Total: {}/{} points ({:.1}s) {:>24}{}│{}",
BLUE,
NC,
result.total_points,
result.max_points,
result.elapsed.as_secs_f64(),
"",
BLUE,
NC
);
println!(
"{}└─────────────────────────────────────────────────────────────┘{}",
BLUE, NC
);
}