1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9 STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13#[serde(default)]
14pub struct VerificationConfig {
15 pub enabled: Option<bool>,
16 pub mode: Option<String>,
21 pub strict_mode: Option<bool>,
22 pub check_paths: Option<bool>,
23 pub check_identifiers: Option<bool>,
24 pub check_line_numbers: Option<bool>,
25 pub check_structure: Option<bool>,
26}
27
28impl VerificationConfig {
29 pub fn enabled_effective(&self) -> bool {
30 self.enabled.unwrap_or(true)
31 }
32 pub fn strict_mode_effective(&self) -> bool {
33 self.strict_mode.unwrap_or(false)
34 }
35 pub fn check_paths_effective(&self) -> bool {
36 self.check_paths.unwrap_or(true)
37 }
38 pub fn check_identifiers_effective(&self) -> bool {
39 self.check_identifiers.unwrap_or(true)
40 }
41 pub fn check_line_numbers_effective(&self) -> bool {
42 self.check_line_numbers.unwrap_or(false)
43 }
44 pub fn check_structure_effective(&self) -> bool {
45 self.check_structure.unwrap_or(true)
46 }
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum VerificationMode {
51 Off,
52 Warn,
53 Fail,
54}
55
56fn parse_mode(s: &str) -> VerificationMode {
57 match s.trim().to_lowercase().as_str() {
58 "off" | "disabled" | "none" => VerificationMode::Off,
59 "fail" | "strict" | "enforce" => VerificationMode::Fail,
60 _ => VerificationMode::Warn,
61 }
62}
63
64impl VerificationConfig {
65 fn effective_mode(&self) -> VerificationMode {
66 if let Some(m) = self.mode.as_deref() {
67 return parse_mode(m);
68 }
69 if !self.enabled_effective() {
70 return VerificationMode::Off;
71 }
72 if self.strict_mode_effective() {
73 VerificationMode::Fail
74 } else {
75 VerificationMode::Warn
76 }
77 }
78
79 fn is_enabled(&self) -> bool {
80 self.effective_mode() != VerificationMode::Off
81 }
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
85pub enum WarningKind {
86 MissingPath,
87 MangledIdentifier,
88 LineNumberDrift,
89 TruncatedBlock,
90}
91
92impl std::fmt::Display for WarningKind {
93 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 match self {
95 Self::MissingPath => write!(f, "missing_path"),
96 Self::MangledIdentifier => write!(f, "mangled_identifier"),
97 Self::LineNumberDrift => write!(f, "line_drift"),
98 Self::TruncatedBlock => write!(f, "truncated_block"),
99 }
100 }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct VerificationWarning {
105 pub kind: WarningKind,
106 pub detail: String,
107 pub severity: WarningSeverity,
108}
109
110#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
111pub enum WarningSeverity {
112 Low,
113 Medium,
114 High,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct VerificationResult {
119 pub pass: bool,
120 pub warnings: Vec<VerificationWarning>,
121 pub info_loss_score: f64,
122 pub paths_checked: usize,
123 pub identifiers_checked: usize,
124}
125
126impl VerificationResult {
127 pub fn ok() -> Self {
128 Self {
129 pass: true,
130 warnings: Vec::new(),
131 info_loss_score: 0.0,
132 paths_checked: 0,
133 identifiers_checked: 0,
134 }
135 }
136
137 pub fn format_compact(&self) -> String {
138 if self.warnings.is_empty() {
139 return "PASS".to_string();
140 }
141 let status = if self.pass { "WARN" } else { "FAIL" };
142 let mut counts = std::collections::BTreeMap::<String, u32>::new();
143 for w in &self.warnings {
144 *counts.entry(w.kind.to_string()).or_insert(0) += 1;
145 }
146 let counts: Vec<String> = counts
147 .into_iter()
148 .map(|(k, v)| format!("{k}={v}"))
149 .collect();
150 format!(
151 "{status}({}) loss={:.1}%",
152 counts.join(", "),
153 self.info_loss_score * 100.0
154 )
155 }
156}
157
158pub fn verify_output(
159 source: &str,
160 compressed: &str,
161 config: &VerificationConfig,
162) -> VerificationResult {
163 if !config.is_enabled() || source.is_empty() || compressed.is_empty() {
164 return VerificationResult::ok();
165 }
166
167 if source == compressed {
169 return VerificationResult::ok();
170 }
171
172 let mut warnings = Vec::new();
173 let mut paths_checked = 0;
174 let mut identifiers_checked = 0;
175
176 if config.check_paths_effective() {
177 let (path_warnings, count) = check_paths(source, compressed);
178 paths_checked = count;
179 warnings.extend(path_warnings);
180 }
181
182 if config.check_identifiers_effective() {
183 let (id_warnings, count) = check_identifiers(source, compressed);
184 identifiers_checked = count;
185 warnings.extend(id_warnings);
186 }
187
188 if config.check_line_numbers_effective() {
189 warnings.extend(check_line_numbers(source, compressed));
190 }
191
192 if config.check_structure_effective() {
193 warnings.extend(check_structure(source, compressed));
194 }
195
196 let total_checks = (paths_checked + identifiers_checked).max(1);
197 let loss_items = warnings
198 .iter()
199 .filter(|w| w.severity == WarningSeverity::High)
200 .count() as f64
201 * 2.0
202 + warnings
203 .iter()
204 .filter(|w| w.severity == WarningSeverity::Medium)
205 .count() as f64;
206 let info_loss_score = (loss_items / total_checks as f64).min(1.0);
207
208 let mode = config.effective_mode();
209 let pass = if mode == VerificationMode::Fail {
210 !warnings
211 .iter()
212 .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
213 } else {
214 !warnings.iter().any(|w| w.severity == WarningSeverity::High)
215 };
216
217 let result = VerificationResult {
218 pass,
219 warnings,
220 info_loss_score,
221 paths_checked,
222 identifiers_checked,
223 };
224
225 record_result(&result);
226 result
227}
228
229fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
230 let paths = extract_file_paths(source);
231 let mut warnings = Vec::new();
232
233 for path in &paths {
234 let basename = path.rsplit('/').next().unwrap_or(path);
235 if !compressed.contains(basename) {
236 warnings.push(VerificationWarning {
237 kind: WarningKind::MissingPath,
238 detail: format!("Path reference lost: {path}"),
239 severity: WarningSeverity::Medium,
240 });
241 }
242 }
243
244 (warnings, paths.len())
245}
246
247fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
248 let identifiers = extract_identifiers(source);
249 let mut warnings = Vec::new();
250 let significant: Vec<&str> = identifiers
251 .iter()
252 .filter(|id| id.len() >= 4)
253 .map(String::as_str)
254 .collect();
255
256 for id in &significant {
257 if !compressed.contains(id) {
258 warnings.push(VerificationWarning {
259 kind: WarningKind::MangledIdentifier,
260 detail: format!("Identifier lost: {id}"),
261 severity: if id.len() >= 8 {
262 WarningSeverity::High
263 } else {
264 WarningSeverity::Low
265 },
266 });
267 }
268 }
269
270 (warnings, significant.len())
271}
272
273fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
274 let source_max = source.lines().count();
275 let mut warnings = Vec::new();
276
277 let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
278 .ok()
279 .or_else(|| Regex::new(r"(\d+)").ok());
280
281 if let Some(re_like) = re_like {
282 for cap in re_like.captures_iter(compressed) {
283 if let Some(m) = cap.get(1) {
284 if let Ok(n) = m.as_str().parse::<usize>() {
285 if n > source_max && n < 999_999 {
286 warnings.push(VerificationWarning {
287 kind: WarningKind::LineNumberDrift,
288 detail: format!("Line {n} exceeds source max {source_max}"),
289 severity: WarningSeverity::Low,
290 });
291 }
292 }
293 }
294 }
295 }
296
297 warnings
298}
299
300fn check_structure(source: &str, compressed: &str) -> Vec<VerificationWarning> {
301 let mut warnings = Vec::new();
302
303 let src_opens: usize = source.chars().filter(|&c| c == '{').count();
304 let src_closes: usize = source.chars().filter(|&c| c == '}').count();
305 let src_diff = (src_opens as i64 - src_closes as i64).unsigned_abs();
306
307 let opens: usize = compressed.chars().filter(|&c| c == '{').count();
308 let closes: usize = compressed.chars().filter(|&c| c == '}').count();
309 if opens > 0 || closes > 0 {
310 let diff = (opens as i64 - closes as i64).unsigned_abs();
311 if diff > (src_diff + 2) && diff > 2 {
313 warnings.push(VerificationWarning {
314 kind: WarningKind::TruncatedBlock,
315 detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
316 severity: WarningSeverity::Medium,
317 });
318 }
319 }
320
321 let src_parens_open: usize = source.chars().filter(|&c| c == '(').count();
322 let src_parens_close: usize = source.chars().filter(|&c| c == ')').count();
323 let src_parens_diff = (src_parens_open as i64 - src_parens_close as i64).unsigned_abs();
324
325 let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
326 let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
327 if parens_open > 0 || parens_close > 0 {
328 let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
329 if diff > (src_parens_diff + 3) && diff > 3 {
330 warnings.push(VerificationWarning {
331 kind: WarningKind::TruncatedBlock,
332 detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
333 severity: WarningSeverity::Low,
334 });
335 }
336 }
337
338 warnings
339}
340
341fn extract_file_paths(text: &str) -> Vec<String> {
342 let mut paths = Vec::new();
343 let re = Regex::new(
344 r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
345 )
346 .ok()
347 .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
348
349 if let Some(re) = re {
350 for cap in re.captures_iter(text) {
351 if let Some(m) = cap.get(1) {
352 let p = m.as_str().to_string();
353 if !paths.contains(&p) && p.len() < 200 {
354 paths.push(p);
355 }
356 }
357 }
358 }
359 paths
360}
361
362fn extract_identifiers(text: &str) -> Vec<String> {
363 let mut ids = Vec::new();
364 let re = Regex::new(
365 r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
366 )
367 .ok()
368 .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
369
370 if let Some(re) = re {
371 for cap in re.captures_iter(text) {
372 if let Some(m) = cap.get(2) {
373 let id = m.as_str().to_string();
374 if !ids.contains(&id) {
375 ids.push(id);
376 }
377 }
378 }
379 }
380 ids
381}
382
383struct VerificationStats {
384 pass_count: AtomicU64,
385 warn_run_count: AtomicU64,
386 warn_item_count: AtomicU64,
387 total_count: AtomicU64,
388 sum_info_loss_score_ppm: AtomicU64,
389 last_info_loss_score_ppm: AtomicU64,
390 recent_warnings: Mutex<Vec<VerificationWarning>>,
391}
392
393impl VerificationStats {
394 fn new() -> Self {
395 Self {
396 pass_count: AtomicU64::new(0),
397 warn_run_count: AtomicU64::new(0),
398 warn_item_count: AtomicU64::new(0),
399 total_count: AtomicU64::new(0),
400 sum_info_loss_score_ppm: AtomicU64::new(0),
401 last_info_loss_score_ppm: AtomicU64::new(0),
402 recent_warnings: Mutex::new(Vec::new()),
403 }
404 }
405}
406
407fn record_result(result: &VerificationResult) {
408 let stats = global_stats();
409 stats.total_count.fetch_add(1, Ordering::Relaxed);
410 if result.warnings.is_empty() {
411 stats.pass_count.fetch_add(1, Ordering::Relaxed);
412 } else {
413 stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
414 stats
415 .warn_item_count
416 .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
417 }
418 let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
419 stats
420 .sum_info_loss_score_ppm
421 .fetch_add(ppm, Ordering::Relaxed);
422 stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
423
424 if !result.warnings.is_empty() {
425 if let Ok(mut recent) = stats.recent_warnings.lock() {
426 for w in &result.warnings {
427 recent.push(w.clone());
428 }
429 if recent.len() > 200 {
430 let excess = recent.len() - 200;
431 recent.drain(..excess);
432 }
433 }
434
435 for w in &result.warnings {
436 crate::core::events::emit_verification_warning(
437 &w.kind.to_string(),
438 &w.detail,
439 &format!("{:?}", w.severity),
440 );
441 }
442 }
443}
444
445pub fn stats_snapshot() -> VerificationSnapshot {
446 let s = global_stats();
447 let total = s.total_count.load(Ordering::Relaxed);
448 let pass = s.pass_count.load(Ordering::Relaxed);
449 let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
450 let warn_items = s.warn_item_count.load(Ordering::Relaxed);
451 let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
452 let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
453 let recent = s
454 .recent_warnings
455 .lock()
456 .map(|r| r.clone())
457 .unwrap_or_default();
458 VerificationSnapshot {
459 total,
460 pass,
461 warn_runs,
462 warn_items,
463 pass_rate: if total > 0 {
464 pass as f64 / total as f64
465 } else {
466 1.0
467 },
468 avg_info_loss_score: if total > 0 {
469 (sum_ppm as f64 / total as f64) / 1_000_000.0
470 } else {
471 0.0
472 },
473 last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
474 recent_warnings: recent,
475 }
476}
477
478#[derive(Debug, Clone, Serialize)]
479pub struct VerificationSnapshot {
480 pub total: u64,
481 pub pass: u64,
482 pub warn_runs: u64,
483 pub warn_items: u64,
484 pub pass_rate: f64,
485 pub avg_info_loss_score: f64,
486 pub last_info_loss_score: f64,
487 pub recent_warnings: Vec<VerificationWarning>,
488}
489
490impl VerificationSnapshot {
491 pub fn format_compact(&self) -> String {
492 format!(
493 "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
494 self.pass,
495 self.total,
496 self.pass_rate * 100.0,
497 self.warn_runs,
498 self.warn_items,
499 self.avg_info_loss_score * 100.0
500 )
501 }
502}
503
504#[cfg(test)]
505mod tests {
506 use super::*;
507
508 fn cfg() -> VerificationConfig {
509 VerificationConfig::default()
510 }
511
512 #[test]
513 fn empty_input_passes() {
514 let r = verify_output("", "", &cfg());
515 assert!(r.pass);
516 }
517
518 #[test]
519 fn identical_passes() {
520 let src = "fn hello() { println!(\"world\"); }";
521 let r = verify_output(src, src, &cfg());
522 assert!(r.pass);
523 assert!(r.warnings.is_empty());
524 }
525
526 #[test]
527 fn detects_missing_path() {
528 let src = "import { foo } from src/utils/helper.ts";
529 let compressed = "import foo";
530 let r = verify_output(src, compressed, &cfg());
531 assert!(r
532 .warnings
533 .iter()
534 .any(|w| w.kind == WarningKind::MissingPath));
535 }
536
537 #[test]
538 fn detects_lost_identifier() {
539 let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
540 let compressed = "fn calc() -> f64 { sum }";
541 let r = verify_output(src, compressed, &cfg());
542 assert!(r
543 .warnings
544 .iter()
545 .any(|w| w.kind == WarningKind::MangledIdentifier));
546 }
547
548 #[test]
549 fn detects_brace_mismatch() {
550 let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
551 let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
552 let r = verify_output(src, compressed, &cfg());
553 assert!(r
554 .warnings
555 .iter()
556 .any(|w| w.kind == WarningKind::TruncatedBlock));
557 }
558
559 #[test]
560 fn preserved_identifiers_pass() {
561 let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
562 let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
563 let r = verify_output(src, compressed, &cfg());
564 let mangled = r
565 .warnings
566 .iter()
567 .filter(|w| w.kind == WarningKind::MangledIdentifier)
568 .count();
569 assert_eq!(mangled, 0);
570 }
571
572 #[test]
573 fn extract_paths_finds_common_extensions() {
574 let text = "see src/core/auth.rs and lib/utils.py for details";
575 let paths = extract_file_paths(text);
576 assert!(paths.iter().any(|p| p.contains("auth.rs")));
577 assert!(paths.iter().any(|p| p.contains("utils.py")));
578 }
579
580 #[test]
581 fn extract_identifiers_finds_functions() {
582 let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
583 let ids = extract_identifiers(text);
584 assert!(ids.contains(&"calculate_total".to_string()));
585 assert!(ids.contains(&"UserProfile".to_string()));
586 }
587
588 #[test]
589 fn info_loss_score_bounded() {
590 let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
591 let compressed = "compressed";
592 let r = verify_output(src, compressed, &cfg());
593 assert!(r.info_loss_score >= 0.0);
594 assert!(r.info_loss_score <= 1.0);
595 }
596
597 #[test]
598 fn snapshot_starts_clean() {
599 let snap = stats_snapshot();
600 assert!(snap.pass_rate >= 0.0);
601 assert!(snap.pass_rate <= 1.0);
602 }
603
604 #[test]
605 fn disabled_config_passes() {
606 let mut c = cfg();
607 c.enabled = Some(false);
608 let r = verify_output("fn foo() {}", "bar", &c);
609 assert!(r.pass);
610 }
611
612 #[test]
613 fn strict_mode_fails_on_medium() {
614 let mut c = cfg();
615 c.strict_mode = Some(true);
616 let src = "import { foo } from src/utils/helper.ts";
617 let compressed = "import foo";
618 let r = verify_output(src, compressed, &c);
619 assert!(!r.pass, "strict mode should FAIL on medium warnings");
620 assert!(
621 r.format_compact().starts_with("FAIL("),
622 "compact should show FAIL: {}",
623 r.format_compact()
624 );
625 }
626
627 #[test]
628 fn compact_format_is_deterministic_and_sorted() {
629 let src = "fn calculate_monthly_revenue() {} see src/utils/helper.ts";
630 let compressed = "compressed";
631 let r = verify_output(src, compressed, &cfg());
632 let s = r.format_compact();
633 let want_order = ["mangled_identifier", "missing_path"];
635 let mut idx = 0usize;
636 for k in want_order {
637 if let Some(pos) = s.find(k) {
638 assert!(pos >= idx, "expected sorted keys in: {s}");
639 idx = pos;
640 }
641 }
642 }
643}