#![allow(clippy::disallowed_methods)]
use std::process::{Command, Stdio};
use std::time::{Duration, Instant};
const GARBAGE_PATTERNS: &[&str] = &[
"\u{FFFD}", "[UNK]", "akunji", "olumbia", "专门窗", "token0", "token1", "<0x", ];
const BPE_ARTIFACTS: &[char] = &[
'Ġ', 'Ċ', 'ĉ', ];
fn contains_as_word(haystack: &str, needle: &str) -> bool {
let mut search_start = 0;
while let Some(pos) = haystack[search_start..].find(needle) {
let abs_pos = search_start + pos;
let end_pos = abs_pos + needle.len();
let left_ok = abs_pos == 0 || {
let prev_char = haystack[..abs_pos]
.chars()
.last()
.expect("non-empty prefix must have a last char");
!prev_char.is_alphanumeric()
};
let right_ok = end_pos >= haystack.len() || {
let next_char = haystack[end_pos..]
.chars()
.next()
.expect("non-empty suffix must have a next char");
!next_char.is_alphanumeric()
};
if left_ok && right_ok {
return true;
}
search_start = abs_pos + 1;
if search_start >= haystack.len() {
break;
}
}
false
}
fn verify_output(output: &str, expected_contains: Option<&str>) -> Result<String, String> {
let trimmed = output.trim();
if trimmed.is_empty() {
return Err("FailEmpty".to_string());
}
for pattern in GARBAGE_PATTERNS {
if trimmed.contains(pattern) {
return Err(format!("FailGarbage: {}", pattern));
}
}
for &artifact in BPE_ARTIFACTS {
if trimmed.contains(artifact) {
return Err(format!("FailBpeArtifact: {}", artifact));
}
}
if let Some(expected) = expected_contains {
if !contains_as_word(trimmed, expected) {
return Err(format!(
"FailMissingAnswer: expected '{}' as standalone word",
expected
));
}
}
Ok(trimmed.to_string())
}
fn print_section_header(title: &str) {
println!(
"\n{}═══════════════════════════════════════════════════════════════{}",
"\x1b[1;34m", "\x1b[0m"
);
println!("{}{}{}", "\x1b[1;33m", title, "\x1b[0m");
println!(
"{}═══════════════════════════════════════════════════════════════{}\n",
"\x1b[1;34m", "\x1b[0m"
);
}
fn run_garbage_cases(cases: &[(&str, bool, &str)]) -> (u32, u32) {
let mut passed = 0u32;
let mut failed = 0u32;
for &(input, should_pass, desc) in cases {
let result = verify_output(input, None);
let actually_passed = result.is_ok();
if actually_passed == should_pass {
println!(" {}✓ PASS{}: {}", "\x1b[32m", "\x1b[0m", desc);
passed += 1;
} else {
println!(" {}✗ FAIL{}: {}", "\x1b[31m", "\x1b[0m", desc);
println!(
" Expected: {}, Got: {:?}",
if should_pass { "PASS" } else { "FAIL" },
result
);
failed += 1;
}
}
(passed, failed)
}
fn test_garbage_detection() {
print_section_header("TEST 2: GARBAGE DETECTION FALSIFICATION");
let test_cases: Vec<(&str, bool, &str)> = vec![
(
"This is a token",
true,
"Common English word 'token' - should PASS",
),
(
"token12345",
false,
"Raw token ID leak (token0/token1 pattern) - should FAIL",
),
(
"The symbol Ġ is used in BPE",
false,
"BPE artifact Ġ - should FAIL",
),
(
"I am akunji",
false,
"Model collapse hallucination - should FAIL",
),
(
"Welcome to Columbia University",
false,
"Contains 'olumbia' - should FAIL",
),
("Hello, world!", true, "Clean output - should PASS"),
("The answer is 4", true, "Simple answer - should PASS"),
(
"token",
true,
"Just 'token' without number - should PASS (edge case)",
),
(
"mytoken0value",
false,
"token0 embedded in string - should FAIL",
),
("<0x0A>newline", false, "Byte token leak - should FAIL"),
(
"Test [UNK] marker",
false,
"Unknown token marker - should FAIL",
),
("Unicode: 专门窗 text", false, "CJK garbage - should FAIL"),
];
let (passed, failed) = run_garbage_cases(&test_cases);
println!("\n Summary: {}/{} tests passed", passed, passed + failed);
if failed > 0 {
println!(
" {}⚠ FALSIFICATION SUCCESSFUL: Garbage detection has {} edge case failures{}",
"\x1b[1;31m", failed, "\x1b[0m"
);
} else {
println!(
" {}✓ Garbage detection held up under testing{}",
"\x1b[32m", "\x1b[0m"
);
}
}
fn eval_answer_case(input: &str, expected: Option<&str>, should_pass: bool, desc: &str) -> u32 {
let result = verify_output(input, expected);
let actually_passed = result.is_ok();
let is_risk = desc.contains("RISK") || desc.contains("EDGE CASE");
if actually_passed != should_pass {
println!(" {}✗{}: {}", "\x1b[31m", "\x1b[0m", desc);
println!(
" Expected: {}, Got: {:?}",
if should_pass { "PASS" } else { "FAIL" },
result
);
return 0;
}
if is_risk {
println!(
" {}⚠ RISK{}: {} (passes but semantically wrong)",
"\x1b[33m", "\x1b[0m", desc
);
return 1;
}
println!(" {}✓{}: {}", "\x1b[32m", "\x1b[0m", desc);
0
}
fn test_answer_verification() {
print_section_header("TEST 5: FALSE CONFIDENCE AUDIT (Answer Verification)");
let test_cases: Vec<(&str, Option<&str>, bool, &str)> = vec![
("4", Some("4"), true, "Exact answer '4' - should PASS"),
(
"The answer is 4.",
Some("4"),
true,
"Answer with context - should PASS",
),
(
"The answer is not 4, but 5.",
Some("4"),
true,
"Contains standalone '4' - PASSES (acceptable)",
),
("2+2=4", Some("4"), true, "Equation format - should PASS"),
(
"Four",
Some("4"),
false,
"Word 'Four' not digit '4' - should FAIL",
),
("5", Some("4"), false, "Wrong answer - should FAIL"),
(
"The result is 14",
Some("4"),
false,
"FIXED: '14' no longer matches '4' - should FAIL",
),
(
"I counted 4 apples and 5 oranges",
Some("4"),
true,
"Multiple numbers - should PASS",
),
("", Some("4"), false, "Empty output - should FAIL"),
("forty-four", Some("4"), false, "Spelled out - should FAIL"),
(
"answer=4",
Some("4"),
true,
"'4' after '=' is standalone - should PASS",
),
(
"x4y",
Some("4"),
false,
"'4' embedded in alphanumeric - should FAIL",
),
(
"4.0",
Some("4"),
true,
"'4' before '.' is standalone - should PASS",
),
];
let brittle_cases: u32 = test_cases
.iter()
.map(|&(input, expected, should_pass, desc)| {
eval_answer_case(input, expected, should_pass, desc)
})
.sum();
if brittle_cases > 0 {
println!(
"\n {}⚠ FALSIFICATION FINDING:{} {} brittle cases remain",
"\x1b[1;33m", "\x1b[0m", brittle_cases
);
} else {
println!(
"\n {}✓ FIX VERIFIED:{} Word boundary check prevents false positives",
"\x1b[32m", "\x1b[0m"
);
println!(" - 'The result is 14' now correctly FAILS (4 embedded in 14)");
println!(" - 'x4y' correctly FAILS (4 embedded in alphanumeric)");
println!(" - 'The answer is 4.' correctly PASSES (4 at word boundary)");
}
}
fn report_cell_count(num: u32) {
match num {
27 => println!(" {}✓ Matrix claims 27 cells{}", "\x1b[32m", "\x1b[0m"),
21 => println!(
" {}⚠ Matrix has 21 cells (not 27 as documented){}",
"\x1b[33m", "\x1b[0m"
),
_ => println!(
" {}⚠ Matrix has {} cells (unexpected){}",
"\x1b[33m", num, "\x1b[0m"
),
}
}
fn parse_matrix_output(out: &std::process::Output) {
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
let line = stdout
.lines()
.chain(stderr.lines())
.find(|l| l.contains("FULL MATRIX:") || (l.contains("Testing") && l.contains("cell")));
let Some(line) = line else {
println!(" Could not find cell count in output");
println!(" (This test requires models to be available)");
return;
};
println!(" Found: {}", line.trim());
if let Some(num) = line.split_whitespace().find_map(|w| w.parse::<u32>().ok()) {
report_cell_count(num);
}
}
fn print_matrix_calculation() {
println!(" Expected matrix calculation:");
println!(" Modalities: Run, Chat, Serve (3)");
println!(" Formats: GGUF, SafeTensors, APR (3)");
println!(" Configs per format:");
println!(" - GGUF: CPU, CPU+trace, GPU = 3 cells");
println!(" - SafeTensors: CPU, CPU+trace = 2 cells");
println!(" - APR: CPU, CPU+trace = 2 cells");
println!(" Per modality: 3 + 2 + 2 = 7 cells");
println!(" Total: 3 modalities × 7 = 21 cells");
println!();
println!(
" {}NOTE:{} Documentation was updated: 21 cells (not original 27 claim).",
"\x1b[32m", "\x1b[0m"
);
println!();
println!(" To verify, run: cargo run --example qa_run -- --full-matrix --help");
println!(" And count 'Testing N cell(s)' in the output.");
}
fn test_matrix_integrity() {
print_section_header("TEST 4: MATRIX INTEGRITY CHECK");
print_matrix_calculation();
println!("\n Running actual matrix count...");
let output = Command::new("cargo")
.args([
"run",
"--example",
"qa_run",
"--release",
"--",
"--full-matrix",
"--verbose",
])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output();
match output {
Ok(out) => parse_matrix_output(&out),
Err(e) => println!(" Could not run qa_run: {}", e),
}
}
include!("includes/qa_falsify_include_01.rs");