1use crate::types::VerifyResult;
23
24const FALLBACK_THRESHOLD: f64 = 0.6;
26
27pub struct Verifier;
28
29impl Verifier {
30 pub fn verify(original: &str, compressed: &str) -> VerifyResult {
36 let mut passed = Vec::new();
37 let mut failed = Vec::new();
38
39 let retention = if original.is_empty() {
41 1.0
42 } else {
43 compressed.len() as f64 / original.len() as f64
44 };
45 if retention >= 0.10 {
46 passed.push("min_retention".to_string());
47 } else {
48 failed.push((
49 "min_retention".to_string(),
50 format!("output is only {:.1}% of input length", retention * 100.0),
51 ));
52 }
53
54 let error_lines: Vec<&str> = original
56 .lines()
57 .filter(|l| {
58 let lower = l.to_lowercase();
59 lower.contains("error:") || lower.contains("warning:") || lower.contains("fatal:")
60 || lower.contains("panic:") || lower.contains("exception:")
61 })
62 .collect();
63 if error_lines.is_empty() {
64 passed.push("error_lines".to_string());
65 } else {
66 let missing: Vec<&str> = error_lines
67 .iter()
68 .filter(|&&line| !compressed.contains(line.trim()))
69 .copied()
70 .collect();
71 if missing.is_empty() {
72 passed.push("error_lines".to_string());
73 } else {
74 failed.push((
75 "error_lines".to_string(),
76 format!("{} error/warning line(s) missing from output", missing.len()),
77 ));
78 }
79 }
80
81 let path_lines: Vec<&str> = original
83 .lines()
84 .filter(|l| {
85 (l.contains('/') || l.contains('\\'))
86 && l.chars().any(|c| c == '.')
87 && l.len() < 200 })
89 .take(20) .collect();
91 if path_lines.is_empty() {
92 passed.push("file_paths".to_string());
93 } else {
94 let missing_paths = path_lines
95 .iter()
96 .filter(|&&line| {
97 let token = line.split_whitespace()
99 .find(|t| t.contains('/') || t.contains('\\'))
100 .unwrap_or("");
101 !token.is_empty() && !compressed.contains(token)
102 })
103 .count();
104 if missing_paths == 0 {
105 passed.push("file_paths".to_string());
106 } else {
107 failed.push((
108 "file_paths".to_string(),
109 format!("{missing_paths} file path(s) missing from output"),
110 ));
111 }
112 }
113
114 let orig_trimmed = original.trim();
118 if orig_trimmed.starts_with('{') || orig_trimmed.starts_with('[') {
119 if let Ok(orig_val) = serde_json::from_str::<serde_json::Value>(orig_trimmed) {
120 let orig_keys = collect_top_level_keys(&orig_val);
121 if orig_keys.is_empty() {
122 passed.push("json_keys".to_string());
123 } else {
124 let present: usize = orig_keys
125 .iter()
126 .filter(|&&k| compressed.contains(k))
127 .count();
128 let retention_ratio = present as f64 / orig_keys.len() as f64;
129 if retention_ratio >= 0.5 {
131 passed.push("json_keys".to_string());
132 } else {
133 let missing: Vec<&str> = orig_keys
134 .iter()
135 .filter(|&&k| !compressed.contains(k))
136 .copied()
137 .collect();
138 failed.push((
139 "json_keys".to_string(),
140 format!("only {:.0}% of JSON keys retained; missing: {:?}",
141 retention_ratio * 100.0,
142 &missing[..missing.len().min(5)]),
143 ));
144 }
145 }
146 } else {
147 passed.push("json_keys".to_string()); }
149 } else {
150 passed.push("json_keys".to_string()); }
152
153 let hunk_headers: Vec<&str> = original
155 .lines()
156 .filter(|l| l.starts_with("@@"))
157 .collect();
158 if hunk_headers.is_empty() {
159 passed.push("diff_hunks".to_string());
160 } else {
161 let missing_hunks = hunk_headers
162 .iter()
163 .filter(|&&h| !compressed.contains(h))
164 .count();
165 if missing_hunks == 0 {
166 passed.push("diff_hunks".to_string());
167 } else {
168 failed.push((
169 "diff_hunks".to_string(),
170 format!("{missing_hunks} diff hunk header(s) missing"),
171 ));
172 }
173 }
174
175 let numbers: Vec<&str> = original
177 .split(|c: char| !c.is_ascii_digit() && c != '.' && c != '-')
178 .filter(|s| !s.is_empty() && s.len() >= 2 && s.parse::<f64>().is_ok())
179 .take(10)
180 .collect();
181 if numbers.is_empty() {
182 passed.push("numeric_values".to_string());
183 } else {
184 let missing_nums = numbers
185 .iter()
186 .filter(|&&n| !compressed.contains(n))
187 .count();
188 if missing_nums == 0 {
189 passed.push("numeric_values".to_string());
190 } else {
191 failed.push((
192 "numeric_values".to_string(),
193 format!("{missing_nums} numeric value(s) missing from output"),
194 ));
195 }
196 }
197
198 let preservation_tokens = extract_preservation_tokens(original);
210 if preservation_tokens.is_empty() {
211 passed.push("preservation".to_string());
212 } else {
213 let present = preservation_tokens
214 .iter()
215 .filter(|t| compressed.contains(t.as_str()))
216 .count();
217 let total = preservation_tokens.len();
218 let ratio = present as f64 / total as f64;
219 if ratio >= 0.85 {
220 passed.push("preservation".to_string());
221 } else {
222 let missing: Vec<&str> = preservation_tokens
223 .iter()
224 .filter(|t| !compressed.contains(t.as_str()))
225 .take(5)
226 .map(|t| t.as_str())
227 .collect();
228 failed.push((
229 "preservation".to_string(),
230 format!(
231 "only {}/{} preservation tokens retained ({:.0}%); missing: {:?}",
232 present, total, ratio * 100.0, missing,
233 ),
234 ));
235 }
236 }
237
238 let total = passed.len() + failed.len();
248 let mut confidence = if total == 0 {
249 1.0
250 } else {
251 passed.len() as f64 / total as f64
252 };
253 let preservation_failed = failed.iter().any(|(k, _)| k == "preservation");
254 if preservation_failed {
255 confidence = confidence.min(0.5);
256 }
257
258 let fallback_triggered = confidence < FALLBACK_THRESHOLD;
259
260 VerifyResult {
261 passed: failed.is_empty(),
262 confidence,
263 checks_passed: passed,
264 checks_failed: failed,
265 fallback_triggered,
266 }
267 }
268
269 pub fn should_fallback(result: &VerifyResult) -> bool {
271 result.fallback_triggered
272 }
273}
274
275fn collect_top_level_keys(value: &serde_json::Value) -> Vec<&str> {
276 match value {
277 serde_json::Value::Object(map) => map.keys().map(|k| k.as_str()).collect(),
278 _ => vec![],
279 }
280}
281
282const MAX_SCAN_BYTES: usize = 1024 * 1024;
302const MAX_TOKENS: usize = 500;
303
304fn extract_preservation_tokens(input: &str) -> Vec<String> {
307 let scan = &input[..input.len().min(MAX_SCAN_BYTES)];
308 let bytes = scan.as_bytes();
309 let mut tokens: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
310
311 let mut i = 0;
312 while i < bytes.len() && tokens.len() < MAX_TOKENS {
313 let b = bytes[i];
314
315 if b == b'`' {
317 if let Some(end) = find_closing(bytes, i + 1, b'`') {
318 let slice = &scan[i + 1..end];
319 if !slice.is_empty()
322 && slice.len() <= 200
323 && is_identifier_or_path_content(slice)
324 {
325 tokens.insert(slice.to_string());
326 }
327 i = end + 1;
328 continue;
329 }
330 }
331
332 if b == b'$' && i + 1 < bytes.len() {
334 let next = bytes[i + 1];
335 if next == b'{' {
336 if let Some(end) = find_closing(bytes, i + 2, b'}') {
337 let slice = &scan[i + 2..end];
338 if is_env_name(slice) {
339 tokens.insert(format!("${{{}}}", slice));
340 }
341 i = end + 1;
342 continue;
343 }
344 } else if next.is_ascii_uppercase() || next == b'_' {
345 let start = i + 1;
347 let mut j = start;
348 while j < bytes.len()
349 && (bytes[j].is_ascii_uppercase()
350 || bytes[j] == b'_'
351 || bytes[j].is_ascii_digit())
352 {
353 j += 1;
354 }
355 if j > start {
356 tokens.insert(format!("${}", &scan[start..j]));
357 i = j;
358 continue;
359 }
360 }
361 }
362
363 if is_url_start(bytes, i) {
365 let end = scan_url_end(bytes, i);
366 if end > i + 8 {
367 tokens.insert(scan[i..end].to_string());
369 i = end;
370 continue;
371 }
372 }
373
374 if is_path_start(bytes, i) {
378 let end = scan_path_end(bytes, i);
379 if end > i {
380 let slice = &scan[i..end];
381 if slice.contains('/') && is_plausible_path(slice) {
383 tokens.insert(slice.to_string());
384 i = end;
385 continue;
386 }
387 }
388 }
389
390 if b.is_ascii_digit() || (b == b'v' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()) {
392 let end = scan_version_end(bytes, i);
393 if end > i {
394 let slice = &scan[i..end];
395 if is_version(slice) {
396 tokens.insert(slice.to_string());
397 i = end;
398 continue;
399 }
400 }
401 }
402
403 i += 1;
404 }
405
406 tokens.into_iter().collect()
407}
408
409fn find_closing(bytes: &[u8], start: usize, target: u8) -> Option<usize> {
410 let end = (start + 256).min(bytes.len());
413 bytes[start..end].iter().position(|&b| b == target).map(|off| start + off)
414}
415
416fn is_identifier_or_path_content(s: &str) -> bool {
417 let space_count = s.bytes().filter(|&b| b == b' ').count();
422 if space_count > 3 {
423 return false;
424 }
425 s.bytes().any(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'/' || b == b':')
427}
428
429fn is_env_name(s: &str) -> bool {
430 !s.is_empty()
431 && s.len() <= 64
432 && s.bytes()
433 .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_')
434}
435
436fn is_url_start(bytes: &[u8], i: usize) -> bool {
437 const PREFIXES: &[&[u8]] = &[
438 b"https://", b"http://", b"git://", b"ssh://", b"ftp://",
439 b"file://", b"git@", b"ws://", b"wss://",
440 ];
441 PREFIXES.iter().any(|p| bytes[i..].starts_with(p))
442}
443
444fn scan_url_end(bytes: &[u8], start: usize) -> usize {
445 let cap = (start + 2048).min(bytes.len());
448 let mut i = start;
449 while i < cap {
450 let b = bytes[i];
451 if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
452 || b == b'"' || b == b'\'' || b == b'<' || b == b'>'
453 || b == b'`'
454 {
455 break;
456 }
457 i += 1;
458 }
459 while i > start {
461 let last = bytes[i - 1];
462 if last == b'.' || last == b',' || last == b';' || last == b':'
463 || last == b')' || last == b']' || last == b'!' || last == b'?'
464 {
465 i -= 1;
466 } else {
467 break;
468 }
469 }
470 i
471}
472
473fn is_path_start(bytes: &[u8], i: usize) -> bool {
474 if i >= bytes.len() {
475 return false;
476 }
477 if i > 0 && bytes[i - 1].is_ascii_alphanumeric() {
479 return false;
480 }
481 let b = bytes[i];
482 b == b'/' || b == b'.' || b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
483}
484
485fn scan_path_end(bytes: &[u8], start: usize) -> usize {
486 let cap = (start + 512).min(bytes.len());
489 let mut i = start;
490 while i < cap {
491 let b = bytes[i];
492 if b.is_ascii_alphanumeric() || b == b'_' || b == b'-' || b == b'.' || b == b'/' {
493 i += 1;
494 } else {
495 break;
496 }
497 }
498 while i > start && bytes[i - 1] == b'.' {
500 i -= 1;
501 }
502 i
503}
504
505fn is_plausible_path(s: &str) -> bool {
506 let bytes = s.as_bytes();
507 let slash = match bytes.iter().position(|&b| b == b'/') {
508 Some(p) => p,
509 None => return false,
510 };
511 let has_before = slash > 0;
515 let has_after = slash + 1 < bytes.len();
516 if !has_before && !has_after {
517 return false;
518 }
519 bytes.iter().any(|b| b.is_ascii_alphabetic())
520}
521
522fn scan_version_end(bytes: &[u8], start: usize) -> usize {
523 let cap = (start + 64).min(bytes.len());
524 let mut i = start;
525 while i < cap {
526 let b = bytes[i];
527 if b.is_ascii_alphanumeric() || b == b'.' || b == b'-' {
528 i += 1;
529 } else {
530 break;
531 }
532 }
533 i
534}
535
536fn is_version(s: &str) -> bool {
537 let trimmed = s.strip_prefix('v').unwrap_or(s);
540 let dots = trimmed.bytes().filter(|&b| b == b'.').count();
541 if dots < 2 {
542 return false;
543 }
544 let first_segment: String = trimmed.chars().take_while(|c| c.is_ascii_digit()).collect();
546 if first_segment.is_empty() {
547 return false;
548 }
549 let after_first: &str = &trimmed[first_segment.len() + 1..];
551 after_first.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false)
552}
553
554#[cfg(test)]
559mod tests {
560 use super::*;
561
562 #[test]
563 fn verify_identical_passes_all() {
564 let text = "error: something went wrong\nfile: src/main.rs\n";
565 let result = Verifier::verify(text, text);
566 assert!(result.passed);
567 assert!((result.confidence - 1.0).abs() < f64::EPSILON);
568 assert!(!result.fallback_triggered);
569 }
570
571 #[test]
572 fn verify_empty_input_passes() {
573 let result = Verifier::verify("", "");
574 assert!(result.passed);
575 }
576
577 #[test]
578 fn verify_detects_missing_error_line() {
579 let original = "error: connection refused\nsome other content here\n";
580 let compressed = "some other content here\n"; let result = Verifier::verify(original, compressed);
582 assert!(!result.passed);
583 assert!(result.checks_failed.iter().any(|(k, _)| k == "error_lines"));
584 }
585
586 #[test]
587 fn verify_detects_over_compression() {
588 let original = "error: critical failure at line 42\n@@ -1,5 +1,5 @@\n/path/to/file.rs\nvalue: 12345\n".repeat(20);
590 let compressed = "x"; let result = Verifier::verify(&original, compressed);
592 assert!(!result.passed);
593 assert!(result.checks_failed.iter().any(|(k, _)| k == "min_retention"));
594 assert!(result.fallback_triggered, "should trigger fallback: confidence={:.2}", result.confidence);
595 }
596
597 #[test]
598 fn verify_json_keys_preserved() {
599 let original = r#"{"id":1,"name":"Alice","status":"active"}"#;
600 let compressed = r#"TOON:{id:1,name:"Alice",status:"active"}"#;
601 let result = Verifier::verify(original, compressed);
602 assert!(result.checks_passed.contains(&"json_keys".to_string()));
603 }
604
605 #[test]
606 fn verify_detects_missing_json_keys() {
607 let original = r#"{"id":1,"name":"Alice","status":"active","role":"admin","email":"a@b.com","created":"2024-01-01"}"#;
608 let compressed = r#"TOON:{id:1}"#; let result = Verifier::verify(original, compressed);
610 assert!(result.checks_failed.iter().any(|(k, _)| k == "json_keys"),
611 "should fail json_keys when <50% of keys retained");
612 }
613
614 #[test]
615 fn verify_diff_hunks_preserved() {
616 let original = "@@ -1,5 +1,5 @@\n-old\n+new\n context\n";
617 let compressed = "@@ -1,5 +1,5 @@\n-old\n+new\n";
618 let result = Verifier::verify(original, compressed);
619 assert!(result.checks_passed.contains(&"diff_hunks".to_string()));
620 }
621
622 #[test]
623 fn verify_detects_missing_diff_hunks() {
624 let original = "@@ -1,5 +1,5 @@\n-old\n+new\n";
625 let compressed = "-old\n+new\n"; let result = Verifier::verify(original, compressed);
627 assert!(result.checks_failed.iter().any(|(k, _)| k == "diff_hunks"));
628 }
629
630 #[test]
631 fn fallback_threshold_triggers_correctly() {
632 let original = "error: critical failure\n@@ -1,5 +1,5 @@\n/path/to/file.rs:42\n";
634 let compressed = "x"; let result = Verifier::verify(original, compressed);
636 assert!(result.fallback_triggered, "should trigger fallback on low confidence");
637 }
638
639 #[test]
642 fn verify_cargo_test_output_preserved() {
643 let original = "running 47 tests\ntest engine::tests::test_compress ... ok\ntest pipeline::tests::compress_json ... ok\ntest result: ok. 47 passed; 0 failed; 0 ignored; finished in 2.34s\n";
644 let compressed = "47 tests\ntest result: ok. 47 passed; 0 failed; finished in 2.34s\n";
645 let result = Verifier::verify(original, compressed);
646 assert!(result.confidence >= 0.7, "cargo test output should verify well: {:.2}", result.confidence);
648 }
649
650 #[test]
651 fn verify_rust_compile_error_preserved() {
652 let original = "error[E0308]: mismatched types\n --> src/main.rs:42:5\n |\n42 | let x: i32 = \"hello\";\n | ^^^^^^^ expected `i32`, found `&str`\n\nerror: aborting due to previous error\n";
653 let compressed = "error[E0308]: mismatched types\n --> src/main.rs:42:5\nerror: aborting due to previous error\n";
654 let result = Verifier::verify(original, compressed);
655 assert!(result.checks_passed.contains(&"error_lines".to_string()),
657 "error lines should be preserved");
658 }
659
660 #[test]
661 fn verify_git_log_output() {
662 let original = "commit a1b2c3d4\nAuthor: Ojus Chugh <ojuschugh@gmail.com>\nDate: Sun Apr 12 10:00:00 2026\n\n feat: Add compression engine\n\ncommit b2c3d4e5\nAuthor: Ojus Chugh <ojuschugh@gmail.com>\nDate: Sat Apr 11 15:30:00 2026\n\n fix: Handle edge case\n";
663 let compressed = "commit a1b2c3d4\n feat: Add compression engine\ncommit b2c3d4e5\n fix: Handle edge case\n";
664 let result = Verifier::verify(original, compressed);
665 assert!(result.confidence >= 0.7, "git log should verify well: {:.2}", result.confidence);
666 }
667
668 #[test]
669 fn verify_json_api_with_stripped_nulls() {
670 let original = r#"{"id":1,"name":"Alice","debug_info":null,"trace_id":null,"status":"active"}"#;
672 let compressed = r#"TOON:{id:1,name:"Alice",status:"active"}"#;
673 let result = Verifier::verify(original, compressed);
674 assert!(result.checks_passed.contains(&"json_keys".to_string()),
676 "60% key retention should pass: {:?}", result.checks_failed);
677 }
678
679 #[test]
682 fn extract_detects_absolute_paths() {
683 let tokens = extract_preservation_tokens("see /etc/myapp/config.yml for details");
684 assert!(tokens.contains(&"/etc/myapp/config.yml".to_string()),
685 "absolute path should be extracted: {:?}", tokens);
686 }
687
688 #[test]
689 fn extract_detects_relative_paths() {
690 let tokens = extract_preservation_tokens("edit src/main.rs and tests/util.rs");
691 assert!(tokens.contains(&"src/main.rs".to_string()), "{:?}", tokens);
692 assert!(tokens.contains(&"tests/util.rs".to_string()), "{:?}", tokens);
693 }
694
695 #[test]
696 fn extract_detects_directory_listing_entries() {
697 let input = "drwxr-xr-x user staff 192 Apr 18 packages/\n\
699 drwxr-xr-x user staff 96 Apr 18 configuration/\n";
700 let tokens = extract_preservation_tokens(input);
701 assert!(tokens.iter().any(|t| t.contains("packages")),
703 "should extract packages: {:?}", tokens);
704 assert!(tokens.iter().any(|t| t.contains("configuration")),
705 "should extract configuration: {:?}", tokens);
706 }
707
708 #[test]
709 fn extract_detects_urls() {
710 let input = "clone from https://github.com/example/repository and \
711 read https://docs.example.com/guide.";
712 let tokens = extract_preservation_tokens(input);
713 assert!(tokens.contains(&"https://github.com/example/repository".to_string()),
714 "{:?}", tokens);
715 assert!(tokens.iter().any(|t| t.starts_with("https://docs.example.com")),
716 "{:?}", tokens);
717 }
718
719 #[test]
720 fn extract_detects_backtick_identifiers() {
721 let tokens = extract_preservation_tokens(
722 "use `SqzEngine::new` and `CompressionPipeline::compress`"
723 );
724 assert!(tokens.contains(&"SqzEngine::new".to_string()), "{:?}", tokens);
725 assert!(tokens.contains(&"CompressionPipeline::compress".to_string()), "{:?}", tokens);
726 }
727
728 #[test]
729 fn extract_detects_env_vars() {
730 let tokens = extract_preservation_tokens("set $HOME and ${FOO_BAR} and $PATH");
731 assert!(tokens.contains(&"$HOME".to_string()), "{:?}", tokens);
732 assert!(tokens.contains(&"${FOO_BAR}".to_string()), "{:?}", tokens);
733 assert!(tokens.contains(&"$PATH".to_string()), "{:?}", tokens);
734 }
735
736 #[test]
737 fn extract_detects_version_numbers() {
738 let tokens = extract_preservation_tokens(
739 "upgrade to 1.2.3 from v0.7.0 and pin 2.0.0-beta.1"
740 );
741 assert!(tokens.iter().any(|t| t.starts_with("1.2.3")), "{:?}", tokens);
742 assert!(tokens.iter().any(|t| t.starts_with("v0.7.0")), "{:?}", tokens);
743 }
744
745 #[test]
746 fn extract_ignores_prose() {
747 let tokens = extract_preservation_tokens(
749 "The quick brown fox jumps over the lazy dog. Lorem ipsum dolor sit amet."
750 );
751 assert!(tokens.is_empty(), "prose should yield no preservation tokens: {:?}", tokens);
752 }
753
754 #[test]
755 fn extract_ignores_fractions_in_prose() {
756 let tokens = extract_preservation_tokens("We completed 3/4 of the tasks");
758 assert!(tokens.iter().all(|t| !t.contains("3/4")),
759 "fraction should not be extracted as path: {:?}", tokens);
760 }
761
762 #[test]
763 fn extract_caps_at_max_tokens() {
764 let mut input = String::new();
766 for i in 0..1000 {
767 input.push_str(&format!("file_{}/sub_{}.txt ", i, i));
768 }
769 let tokens = extract_preservation_tokens(&input);
770 assert!(tokens.len() <= MAX_TOKENS, "should cap at {MAX_TOKENS}, got {}", tokens.len());
771 }
772
773 #[test]
776 fn verify_rejects_packages_to_pkgs_rewrite() {
777 let original = "drwxr-xr-x user staff 192 Apr 18 packages/\n\
779 drwxr-xr-x user staff 128 Apr 18 documentation/\n";
780 let compressed = "drwxr-xr-x user staff 192 Apr 18 pkgs/\n\
781 drwxr-xr-x user staff 128 Apr 18 docs/\n";
782 let result = Verifier::verify(original, compressed);
783 assert!(
784 result.checks_failed.iter().any(|(k, _)| k == "preservation"),
785 "should fail preservation when packages→pkgs: {:?}", result.checks_failed
786 );
787 }
788
789 #[test]
790 fn verify_rejects_config_path_rewrite() {
791 let original = "check /etc/myapp/configuration/default.yml for errors";
793 let compressed = "check /etc/myapp/config/default.yml for errors";
794 let result = Verifier::verify(original, compressed);
795 assert!(
796 result.checks_failed.iter().any(|(k, _)| k == "preservation"),
797 "should fail preservation when path segment rewritten: {:?}", result.checks_failed
798 );
799 }
800
801 #[test]
802 fn verify_rejects_github_repo_rewrite() {
803 let original = "origin https://github.com/example/repository (fetch)";
805 let compressed = "origin https://github.com/example/repo (fetch)";
806 let result = Verifier::verify(original, compressed);
807 assert!(
808 result.checks_failed.iter().any(|(k, _)| k == "preservation"),
809 "should fail preservation when URL path rewritten: {:?}", result.checks_failed
810 );
811 }
812
813 #[test]
814 fn verify_rejects_drops_filenames_entirely() {
815 let original = "drwxr-xr-x packages/\n\
817 drwxr-xr-x configuration/\n\
818 drwxr-xr-x documentation/\n\
819 drwxr-xr-x environment/\n";
820 let compressed = "drwxr-xr-x ... [×4, varying: 4 unique values]\n";
821 let result = Verifier::verify(original, compressed);
822 assert!(
823 result.checks_failed.iter().any(|(k, _)| k == "preservation"),
824 "should fail preservation when filenames dropped: {:?}", result.checks_failed
825 );
826 }
827
828 #[test]
829 fn verify_accepts_lossless_dedup_output() {
830 let original = "see /etc/myapp/default.yml and src/main.rs";
832 let compressed = "see /etc/myapp/default.yml and src/main.rs";
833 let result = Verifier::verify(original, compressed);
834 assert!(
835 result.checks_passed.contains(&"preservation".to_string()),
836 "identical content must pass preservation: {:?}", result.checks_failed
837 );
838 }
839
840 #[test]
841 fn verify_accepts_json_null_stripped() {
842 let original = r#"{"path":"/etc/foo.yml","debug":null,"log":"/var/log/app.log"}"#;
844 let compressed = r#"TOON:{path:"/etc/foo.yml",log:"/var/log/app.log"}"#;
845 let result = Verifier::verify(original, compressed);
846 assert!(
847 result.checks_passed.contains(&"preservation".to_string()),
848 "null-stripping must not trip preservation: {:?}", result.checks_failed
849 );
850 }
851
852 #[test]
853 fn verify_accepts_empty_input() {
854 let result = Verifier::verify("", "");
856 assert!(result.checks_passed.contains(&"preservation".to_string()));
857 }
858
859 #[test]
860 fn preservation_failure_triggers_fallback() {
861 let original = "commit: check /etc/myapp/configuration/default.yml\n\
866 file: src/main.rs line 42\n";
867 let compressed = "commit: check /etc/myapp/config/default.yml\n\
868 file: src/main.rs line 42\n";
869 let result = Verifier::verify(original, compressed);
870 assert!(
871 result.checks_failed.iter().any(|(k, _)| k == "preservation"),
872 "preservation should fail"
873 );
874 assert!(
875 result.fallback_triggered,
876 "preservation failure alone must trigger fallback (confidence={:.2})",
877 result.confidence
878 );
879 }
880}