1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9 STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct VerificationConfig {
14 pub enabled: bool,
15 pub strict_mode: bool,
16 pub check_paths: bool,
17 pub check_identifiers: bool,
18 pub check_line_numbers: bool,
19 pub check_structure: bool,
20}
21
22impl Default for VerificationConfig {
23 fn default() -> Self {
24 Self {
25 enabled: true,
26 strict_mode: false,
27 check_paths: true,
28 check_identifiers: true,
29 check_line_numbers: false,
30 check_structure: true,
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36pub enum WarningKind {
37 MissingPath,
38 MangledIdentifier,
39 LineNumberDrift,
40 TruncatedBlock,
41}
42
43impl std::fmt::Display for WarningKind {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 match self {
46 Self::MissingPath => write!(f, "missing_path"),
47 Self::MangledIdentifier => write!(f, "mangled_identifier"),
48 Self::LineNumberDrift => write!(f, "line_drift"),
49 Self::TruncatedBlock => write!(f, "truncated_block"),
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct VerificationWarning {
56 pub kind: WarningKind,
57 pub detail: String,
58 pub severity: WarningSeverity,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
62pub enum WarningSeverity {
63 Low,
64 Medium,
65 High,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct VerificationResult {
70 pub pass: bool,
71 pub warnings: Vec<VerificationWarning>,
72 pub info_loss_score: f64,
73 pub paths_checked: usize,
74 pub identifiers_checked: usize,
75}
76
77impl VerificationResult {
78 pub fn ok() -> Self {
79 Self {
80 pass: true,
81 warnings: Vec::new(),
82 info_loss_score: 0.0,
83 paths_checked: 0,
84 identifiers_checked: 0,
85 }
86 }
87
88 pub fn format_compact(&self) -> String {
89 if self.warnings.is_empty() {
90 return "PASS".to_string();
91 }
92 let status = if self.pass { "WARN" } else { "FAIL" };
93 let counts: Vec<String> = self
94 .warnings
95 .iter()
96 .fold(std::collections::HashMap::new(), |mut acc, w| {
97 *acc.entry(w.kind.to_string()).or_insert(0u32) += 1;
98 acc
99 })
100 .into_iter()
101 .map(|(k, v)| format!("{k}={v}"))
102 .collect();
103 format!(
104 "{status}({}) loss={:.1}%",
105 counts.join(", "),
106 self.info_loss_score * 100.0
107 )
108 }
109}
110
111pub fn verify_output(
112 source: &str,
113 compressed: &str,
114 config: &VerificationConfig,
115) -> VerificationResult {
116 if !config.enabled || source.is_empty() || compressed.is_empty() {
117 return VerificationResult::ok();
118 }
119
120 if source == compressed {
122 return VerificationResult::ok();
123 }
124
125 let mut warnings = Vec::new();
126 let mut paths_checked = 0;
127 let mut identifiers_checked = 0;
128
129 if config.check_paths {
130 let (path_warnings, count) = check_paths(source, compressed);
131 paths_checked = count;
132 warnings.extend(path_warnings);
133 }
134
135 if config.check_identifiers {
136 let (id_warnings, count) = check_identifiers(source, compressed);
137 identifiers_checked = count;
138 warnings.extend(id_warnings);
139 }
140
141 if config.check_line_numbers {
142 warnings.extend(check_line_numbers(source, compressed));
143 }
144
145 if config.check_structure {
146 warnings.extend(check_structure(source, compressed));
147 }
148
149 let total_checks = (paths_checked + identifiers_checked).max(1);
150 let loss_items = warnings
151 .iter()
152 .filter(|w| w.severity == WarningSeverity::High)
153 .count() as f64
154 * 2.0
155 + warnings
156 .iter()
157 .filter(|w| w.severity == WarningSeverity::Medium)
158 .count() as f64;
159 let info_loss_score = (loss_items / total_checks as f64).min(1.0);
160
161 let pass = if config.strict_mode {
162 !warnings
163 .iter()
164 .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
165 } else {
166 !warnings.iter().any(|w| w.severity == WarningSeverity::High)
167 };
168
169 let result = VerificationResult {
170 pass,
171 warnings,
172 info_loss_score,
173 paths_checked,
174 identifiers_checked,
175 };
176
177 record_result(&result);
178 result
179}
180
181fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
182 let paths = extract_file_paths(source);
183 let mut warnings = Vec::new();
184
185 for path in &paths {
186 let basename = path.rsplit('/').next().unwrap_or(path);
187 if !compressed.contains(basename) {
188 warnings.push(VerificationWarning {
189 kind: WarningKind::MissingPath,
190 detail: format!("Path reference lost: {path}"),
191 severity: WarningSeverity::Medium,
192 });
193 }
194 }
195
196 (warnings, paths.len())
197}
198
199fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
200 let identifiers = extract_identifiers(source);
201 let mut warnings = Vec::new();
202 let significant: Vec<&str> = identifiers
203 .iter()
204 .filter(|id| id.len() >= 4)
205 .map(String::as_str)
206 .collect();
207
208 for id in &significant {
209 if !compressed.contains(id) {
210 warnings.push(VerificationWarning {
211 kind: WarningKind::MangledIdentifier,
212 detail: format!("Identifier lost: {id}"),
213 severity: if id.len() >= 8 {
214 WarningSeverity::High
215 } else {
216 WarningSeverity::Low
217 },
218 });
219 }
220 }
221
222 (warnings, significant.len())
223}
224
225fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
226 let source_max = source.lines().count();
227 let mut warnings = Vec::new();
228
229 let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
230 .ok()
231 .or_else(|| Regex::new(r"(\d+)").ok());
232
233 if let Some(re_like) = re_like {
234 for cap in re_like.captures_iter(compressed) {
235 if let Some(m) = cap.get(1) {
236 if let Ok(n) = m.as_str().parse::<usize>() {
237 if n > source_max && n < 999_999 {
238 warnings.push(VerificationWarning {
239 kind: WarningKind::LineNumberDrift,
240 detail: format!("Line {n} exceeds source max {source_max}"),
241 severity: WarningSeverity::Low,
242 });
243 }
244 }
245 }
246 }
247 }
248
249 warnings
250}
251
252fn check_structure(source: &str, compressed: &str) -> Vec<VerificationWarning> {
253 let mut warnings = Vec::new();
254
255 let src_opens: usize = source.chars().filter(|&c| c == '{').count();
256 let src_closes: usize = source.chars().filter(|&c| c == '}').count();
257 let src_diff = (src_opens as i64 - src_closes as i64).unsigned_abs();
258
259 let opens: usize = compressed.chars().filter(|&c| c == '{').count();
260 let closes: usize = compressed.chars().filter(|&c| c == '}').count();
261 if opens > 0 || closes > 0 {
262 let diff = (opens as i64 - closes as i64).unsigned_abs();
263 if diff > (src_diff + 2) && diff > 2 {
265 warnings.push(VerificationWarning {
266 kind: WarningKind::TruncatedBlock,
267 detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
268 severity: WarningSeverity::Medium,
269 });
270 }
271 }
272
273 let src_parens_open: usize = source.chars().filter(|&c| c == '(').count();
274 let src_parens_close: usize = source.chars().filter(|&c| c == ')').count();
275 let src_parens_diff = (src_parens_open as i64 - src_parens_close as i64).unsigned_abs();
276
277 let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
278 let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
279 if parens_open > 0 || parens_close > 0 {
280 let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
281 if diff > (src_parens_diff + 3) && diff > 3 {
282 warnings.push(VerificationWarning {
283 kind: WarningKind::TruncatedBlock,
284 detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
285 severity: WarningSeverity::Low,
286 });
287 }
288 }
289
290 warnings
291}
292
293fn extract_file_paths(text: &str) -> Vec<String> {
294 let mut paths = Vec::new();
295 let re = Regex::new(
296 r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
297 )
298 .ok()
299 .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
300
301 if let Some(re) = re {
302 for cap in re.captures_iter(text) {
303 if let Some(m) = cap.get(1) {
304 let p = m.as_str().to_string();
305 if !paths.contains(&p) && p.len() < 200 {
306 paths.push(p);
307 }
308 }
309 }
310 }
311 paths
312}
313
314fn extract_identifiers(text: &str) -> Vec<String> {
315 let mut ids = Vec::new();
316 let re = Regex::new(
317 r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
318 )
319 .ok()
320 .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
321
322 if let Some(re) = re {
323 for cap in re.captures_iter(text) {
324 if let Some(m) = cap.get(2) {
325 let id = m.as_str().to_string();
326 if !ids.contains(&id) {
327 ids.push(id);
328 }
329 }
330 }
331 }
332 ids
333}
334
335struct VerificationStats {
336 pass_count: AtomicU64,
337 warn_run_count: AtomicU64,
338 warn_item_count: AtomicU64,
339 total_count: AtomicU64,
340 sum_info_loss_score_ppm: AtomicU64,
341 last_info_loss_score_ppm: AtomicU64,
342 recent_warnings: Mutex<Vec<VerificationWarning>>,
343}
344
345impl VerificationStats {
346 fn new() -> Self {
347 Self {
348 pass_count: AtomicU64::new(0),
349 warn_run_count: AtomicU64::new(0),
350 warn_item_count: AtomicU64::new(0),
351 total_count: AtomicU64::new(0),
352 sum_info_loss_score_ppm: AtomicU64::new(0),
353 last_info_loss_score_ppm: AtomicU64::new(0),
354 recent_warnings: Mutex::new(Vec::new()),
355 }
356 }
357}
358
359fn record_result(result: &VerificationResult) {
360 let stats = global_stats();
361 stats.total_count.fetch_add(1, Ordering::Relaxed);
362 if result.warnings.is_empty() {
363 stats.pass_count.fetch_add(1, Ordering::Relaxed);
364 } else {
365 stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
366 stats
367 .warn_item_count
368 .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
369 }
370 let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
371 stats
372 .sum_info_loss_score_ppm
373 .fetch_add(ppm, Ordering::Relaxed);
374 stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
375
376 if !result.warnings.is_empty() {
377 if let Ok(mut recent) = stats.recent_warnings.lock() {
378 for w in &result.warnings {
379 recent.push(w.clone());
380 }
381 if recent.len() > 200 {
382 let excess = recent.len() - 200;
383 recent.drain(..excess);
384 }
385 }
386
387 for w in &result.warnings {
388 crate::core::events::emit_verification_warning(
389 &w.kind.to_string(),
390 &w.detail,
391 &format!("{:?}", w.severity),
392 );
393 }
394 }
395}
396
397pub fn stats_snapshot() -> VerificationSnapshot {
398 let s = global_stats();
399 let total = s.total_count.load(Ordering::Relaxed);
400 let pass = s.pass_count.load(Ordering::Relaxed);
401 let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
402 let warn_items = s.warn_item_count.load(Ordering::Relaxed);
403 let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
404 let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
405 let recent = s
406 .recent_warnings
407 .lock()
408 .map(|r| r.clone())
409 .unwrap_or_default();
410 VerificationSnapshot {
411 total,
412 pass,
413 warn_runs,
414 warn_items,
415 pass_rate: if total > 0 {
416 pass as f64 / total as f64
417 } else {
418 1.0
419 },
420 avg_info_loss_score: if total > 0 {
421 (sum_ppm as f64 / total as f64) / 1_000_000.0
422 } else {
423 0.0
424 },
425 last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
426 recent_warnings: recent,
427 }
428}
429
430#[derive(Debug, Clone, Serialize)]
431pub struct VerificationSnapshot {
432 pub total: u64,
433 pub pass: u64,
434 pub warn_runs: u64,
435 pub warn_items: u64,
436 pub pass_rate: f64,
437 pub avg_info_loss_score: f64,
438 pub last_info_loss_score: f64,
439 pub recent_warnings: Vec<VerificationWarning>,
440}
441
442impl VerificationSnapshot {
443 pub fn format_compact(&self) -> String {
444 format!(
445 "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
446 self.pass,
447 self.total,
448 self.pass_rate * 100.0,
449 self.warn_runs,
450 self.warn_items,
451 self.avg_info_loss_score * 100.0
452 )
453 }
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459
460 fn cfg() -> VerificationConfig {
461 VerificationConfig::default()
462 }
463
464 #[test]
465 fn empty_input_passes() {
466 let r = verify_output("", "", &cfg());
467 assert!(r.pass);
468 }
469
470 #[test]
471 fn identical_passes() {
472 let src = "fn hello() { println!(\"world\"); }";
473 let r = verify_output(src, src, &cfg());
474 assert!(r.pass);
475 assert!(r.warnings.is_empty());
476 }
477
478 #[test]
479 fn detects_missing_path() {
480 let src = "import { foo } from src/utils/helper.ts";
481 let compressed = "import foo";
482 let r = verify_output(src, compressed, &cfg());
483 assert!(r
484 .warnings
485 .iter()
486 .any(|w| w.kind == WarningKind::MissingPath));
487 }
488
489 #[test]
490 fn detects_lost_identifier() {
491 let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
492 let compressed = "fn calc() -> f64 { sum }";
493 let r = verify_output(src, compressed, &cfg());
494 assert!(r
495 .warnings
496 .iter()
497 .any(|w| w.kind == WarningKind::MangledIdentifier));
498 }
499
500 #[test]
501 fn detects_brace_mismatch() {
502 let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
503 let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
504 let r = verify_output(src, compressed, &cfg());
505 assert!(r
506 .warnings
507 .iter()
508 .any(|w| w.kind == WarningKind::TruncatedBlock));
509 }
510
511 #[test]
512 fn preserved_identifiers_pass() {
513 let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
514 let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
515 let r = verify_output(src, compressed, &cfg());
516 let mangled = r
517 .warnings
518 .iter()
519 .filter(|w| w.kind == WarningKind::MangledIdentifier)
520 .count();
521 assert_eq!(mangled, 0);
522 }
523
524 #[test]
525 fn extract_paths_finds_common_extensions() {
526 let text = "see src/core/auth.rs and lib/utils.py for details";
527 let paths = extract_file_paths(text);
528 assert!(paths.iter().any(|p| p.contains("auth.rs")));
529 assert!(paths.iter().any(|p| p.contains("utils.py")));
530 }
531
532 #[test]
533 fn extract_identifiers_finds_functions() {
534 let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
535 let ids = extract_identifiers(text);
536 assert!(ids.contains(&"calculate_total".to_string()));
537 assert!(ids.contains(&"UserProfile".to_string()));
538 }
539
540 #[test]
541 fn info_loss_score_bounded() {
542 let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
543 let compressed = "compressed";
544 let r = verify_output(src, compressed, &cfg());
545 assert!(r.info_loss_score >= 0.0);
546 assert!(r.info_loss_score <= 1.0);
547 }
548
549 #[test]
550 fn snapshot_starts_clean() {
551 let snap = stats_snapshot();
552 assert!(snap.pass_rate >= 0.0);
553 assert!(snap.pass_rate <= 1.0);
554 }
555
556 #[test]
557 fn disabled_config_passes() {
558 let mut c = cfg();
559 c.enabled = false;
560 let r = verify_output("fn foo() {}", "bar", &c);
561 assert!(r.pass);
562 }
563}