1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9 STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct VerificationConfig {
14 pub enabled: bool,
15 pub strict_mode: bool,
16 pub check_paths: bool,
17 pub check_identifiers: bool,
18 pub check_line_numbers: bool,
19 pub check_structure: bool,
20}
21
22impl Default for VerificationConfig {
23 fn default() -> Self {
24 Self {
25 enabled: true,
26 strict_mode: false,
27 check_paths: true,
28 check_identifiers: true,
29 check_line_numbers: false,
30 check_structure: true,
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36pub enum WarningKind {
37 MissingPath,
38 MangledIdentifier,
39 LineNumberDrift,
40 TruncatedBlock,
41}
42
43impl std::fmt::Display for WarningKind {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 match self {
46 Self::MissingPath => write!(f, "missing_path"),
47 Self::MangledIdentifier => write!(f, "mangled_identifier"),
48 Self::LineNumberDrift => write!(f, "line_drift"),
49 Self::TruncatedBlock => write!(f, "truncated_block"),
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct VerificationWarning {
56 pub kind: WarningKind,
57 pub detail: String,
58 pub severity: WarningSeverity,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
62pub enum WarningSeverity {
63 Low,
64 Medium,
65 High,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct VerificationResult {
70 pub pass: bool,
71 pub warnings: Vec<VerificationWarning>,
72 pub info_loss_score: f64,
73 pub paths_checked: usize,
74 pub identifiers_checked: usize,
75}
76
77impl VerificationResult {
78 pub fn ok() -> Self {
79 Self {
80 pass: true,
81 warnings: Vec::new(),
82 info_loss_score: 0.0,
83 paths_checked: 0,
84 identifiers_checked: 0,
85 }
86 }
87
88 pub fn format_compact(&self) -> String {
89 if self.pass {
90 return "PASS".to_string();
91 }
92 let counts: Vec<String> = self
93 .warnings
94 .iter()
95 .fold(std::collections::HashMap::new(), |mut acc, w| {
96 *acc.entry(w.kind.to_string()).or_insert(0u32) += 1;
97 acc
98 })
99 .into_iter()
100 .map(|(k, v)| format!("{k}={v}"))
101 .collect();
102 format!(
103 "WARN({}) loss={:.1}%",
104 counts.join(", "),
105 self.info_loss_score * 100.0
106 )
107 }
108}
109
110pub fn verify_output(
111 source: &str,
112 compressed: &str,
113 config: &VerificationConfig,
114) -> VerificationResult {
115 if !config.enabled || source.is_empty() || compressed.is_empty() {
116 return VerificationResult::ok();
117 }
118
119 let mut warnings = Vec::new();
120 let mut paths_checked = 0;
121 let mut identifiers_checked = 0;
122
123 if config.check_paths {
124 let (path_warnings, count) = check_paths(source, compressed);
125 paths_checked = count;
126 warnings.extend(path_warnings);
127 }
128
129 if config.check_identifiers {
130 let (id_warnings, count) = check_identifiers(source, compressed);
131 identifiers_checked = count;
132 warnings.extend(id_warnings);
133 }
134
135 if config.check_line_numbers {
136 warnings.extend(check_line_numbers(source, compressed));
137 }
138
139 if config.check_structure {
140 warnings.extend(check_structure(compressed));
141 }
142
143 let total_checks = (paths_checked + identifiers_checked).max(1);
144 let loss_items = warnings
145 .iter()
146 .filter(|w| w.severity == WarningSeverity::High)
147 .count() as f64
148 * 2.0
149 + warnings
150 .iter()
151 .filter(|w| w.severity == WarningSeverity::Medium)
152 .count() as f64;
153 let info_loss_score = (loss_items / total_checks as f64).min(1.0);
154
155 let pass = if config.strict_mode {
156 !warnings
157 .iter()
158 .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
159 } else {
160 !warnings.iter().any(|w| w.severity == WarningSeverity::High)
161 };
162
163 let result = VerificationResult {
164 pass,
165 warnings,
166 info_loss_score,
167 paths_checked,
168 identifiers_checked,
169 };
170
171 record_result(&result);
172 result
173}
174
175fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
176 let paths = extract_file_paths(source);
177 let mut warnings = Vec::new();
178
179 for path in &paths {
180 let basename = path.rsplit('/').next().unwrap_or(path);
181 if !compressed.contains(basename) {
182 warnings.push(VerificationWarning {
183 kind: WarningKind::MissingPath,
184 detail: format!("Path reference lost: {path}"),
185 severity: WarningSeverity::Medium,
186 });
187 }
188 }
189
190 (warnings, paths.len())
191}
192
193fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
194 let identifiers = extract_identifiers(source);
195 let mut warnings = Vec::new();
196 let significant: Vec<&str> = identifiers
197 .iter()
198 .filter(|id| id.len() >= 4)
199 .map(String::as_str)
200 .collect();
201
202 for id in &significant {
203 if !compressed.contains(id) {
204 warnings.push(VerificationWarning {
205 kind: WarningKind::MangledIdentifier,
206 detail: format!("Identifier lost: {id}"),
207 severity: if id.len() >= 8 {
208 WarningSeverity::High
209 } else {
210 WarningSeverity::Low
211 },
212 });
213 }
214 }
215
216 (warnings, significant.len())
217}
218
219fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
220 let source_max = source.lines().count();
221 let mut warnings = Vec::new();
222
223 let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
224 .ok()
225 .or_else(|| Regex::new(r"(\d+)").ok());
226
227 if let Some(re_like) = re_like {
228 for cap in re_like.captures_iter(compressed) {
229 if let Some(m) = cap.get(1) {
230 if let Ok(n) = m.as_str().parse::<usize>() {
231 if n > source_max && n < 999_999 {
232 warnings.push(VerificationWarning {
233 kind: WarningKind::LineNumberDrift,
234 detail: format!("Line {n} exceeds source max {source_max}"),
235 severity: WarningSeverity::Low,
236 });
237 }
238 }
239 }
240 }
241 }
242
243 warnings
244}
245
246fn check_structure(compressed: &str) -> Vec<VerificationWarning> {
247 let mut warnings = Vec::new();
248
249 let opens: usize = compressed.chars().filter(|&c| c == '{').count();
250 let closes: usize = compressed.chars().filter(|&c| c == '}').count();
251 if opens > 0 || closes > 0 {
252 let diff = (opens as i64 - closes as i64).unsigned_abs();
253 if diff > 2 {
254 warnings.push(VerificationWarning {
255 kind: WarningKind::TruncatedBlock,
256 detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
257 severity: WarningSeverity::Medium,
258 });
259 }
260 }
261
262 let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
263 let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
264 if parens_open > 0 || parens_close > 0 {
265 let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
266 if diff > 3 {
267 warnings.push(VerificationWarning {
268 kind: WarningKind::TruncatedBlock,
269 detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
270 severity: WarningSeverity::Low,
271 });
272 }
273 }
274
275 warnings
276}
277
278fn extract_file_paths(text: &str) -> Vec<String> {
279 let mut paths = Vec::new();
280 let re = Regex::new(
281 r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
282 )
283 .ok()
284 .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
285
286 if let Some(re) = re {
287 for cap in re.captures_iter(text) {
288 if let Some(m) = cap.get(1) {
289 let p = m.as_str().to_string();
290 if !paths.contains(&p) && p.len() < 200 {
291 paths.push(p);
292 }
293 }
294 }
295 }
296 paths
297}
298
299fn extract_identifiers(text: &str) -> Vec<String> {
300 let mut ids = Vec::new();
301 let re = Regex::new(
302 r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
303 )
304 .ok()
305 .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
306
307 if let Some(re) = re {
308 for cap in re.captures_iter(text) {
309 if let Some(m) = cap.get(2) {
310 let id = m.as_str().to_string();
311 if !ids.contains(&id) {
312 ids.push(id);
313 }
314 }
315 }
316 }
317 ids
318}
319
320struct VerificationStats {
321 pass_count: AtomicU64,
322 warn_run_count: AtomicU64,
323 warn_item_count: AtomicU64,
324 total_count: AtomicU64,
325 sum_info_loss_score_ppm: AtomicU64,
326 last_info_loss_score_ppm: AtomicU64,
327 recent_warnings: Mutex<Vec<VerificationWarning>>,
328}
329
330impl VerificationStats {
331 fn new() -> Self {
332 Self {
333 pass_count: AtomicU64::new(0),
334 warn_run_count: AtomicU64::new(0),
335 warn_item_count: AtomicU64::new(0),
336 total_count: AtomicU64::new(0),
337 sum_info_loss_score_ppm: AtomicU64::new(0),
338 last_info_loss_score_ppm: AtomicU64::new(0),
339 recent_warnings: Mutex::new(Vec::new()),
340 }
341 }
342}
343
344fn record_result(result: &VerificationResult) {
345 let stats = global_stats();
346 stats.total_count.fetch_add(1, Ordering::Relaxed);
347 if result.warnings.is_empty() {
348 stats.pass_count.fetch_add(1, Ordering::Relaxed);
349 } else {
350 stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
351 stats
352 .warn_item_count
353 .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
354 }
355 let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
356 stats
357 .sum_info_loss_score_ppm
358 .fetch_add(ppm, Ordering::Relaxed);
359 stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
360
361 if !result.warnings.is_empty() {
362 if let Ok(mut recent) = stats.recent_warnings.lock() {
363 for w in &result.warnings {
364 recent.push(w.clone());
365 }
366 if recent.len() > 200 {
367 let excess = recent.len() - 200;
368 recent.drain(..excess);
369 }
370 }
371
372 for w in &result.warnings {
373 crate::core::events::emit_verification_warning(
374 &w.kind.to_string(),
375 &w.detail,
376 &format!("{:?}", w.severity),
377 );
378 }
379 }
380}
381
382pub fn stats_snapshot() -> VerificationSnapshot {
383 let s = global_stats();
384 let total = s.total_count.load(Ordering::Relaxed);
385 let pass = s.pass_count.load(Ordering::Relaxed);
386 let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
387 let warn_items = s.warn_item_count.load(Ordering::Relaxed);
388 let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
389 let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
390 let recent = s
391 .recent_warnings
392 .lock()
393 .map(|r| r.clone())
394 .unwrap_or_default();
395 VerificationSnapshot {
396 total,
397 pass,
398 warn_runs,
399 warn_items,
400 pass_rate: if total > 0 {
401 pass as f64 / total as f64
402 } else {
403 1.0
404 },
405 avg_info_loss_score: if total > 0 {
406 (sum_ppm as f64 / total as f64) / 1_000_000.0
407 } else {
408 0.0
409 },
410 last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
411 recent_warnings: recent,
412 }
413}
414
415#[derive(Debug, Clone, Serialize)]
416pub struct VerificationSnapshot {
417 pub total: u64,
418 pub pass: u64,
419 pub warn_runs: u64,
420 pub warn_items: u64,
421 pub pass_rate: f64,
422 pub avg_info_loss_score: f64,
423 pub last_info_loss_score: f64,
424 pub recent_warnings: Vec<VerificationWarning>,
425}
426
427impl VerificationSnapshot {
428 pub fn format_compact(&self) -> String {
429 format!(
430 "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
431 self.pass,
432 self.total,
433 self.pass_rate * 100.0,
434 self.warn_runs,
435 self.warn_items,
436 self.avg_info_loss_score * 100.0
437 )
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::*;
444
445 fn cfg() -> VerificationConfig {
446 VerificationConfig::default()
447 }
448
449 #[test]
450 fn empty_input_passes() {
451 let r = verify_output("", "", &cfg());
452 assert!(r.pass);
453 }
454
455 #[test]
456 fn identical_passes() {
457 let src = "fn hello() { println!(\"world\"); }";
458 let r = verify_output(src, src, &cfg());
459 assert!(r.pass);
460 assert!(r.warnings.is_empty());
461 }
462
463 #[test]
464 fn detects_missing_path() {
465 let src = "import { foo } from src/utils/helper.ts";
466 let compressed = "import foo";
467 let r = verify_output(src, compressed, &cfg());
468 assert!(r
469 .warnings
470 .iter()
471 .any(|w| w.kind == WarningKind::MissingPath));
472 }
473
474 #[test]
475 fn detects_lost_identifier() {
476 let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
477 let compressed = "fn calc() -> f64 { sum }";
478 let r = verify_output(src, compressed, &cfg());
479 assert!(r
480 .warnings
481 .iter()
482 .any(|w| w.kind == WarningKind::MangledIdentifier));
483 }
484
485 #[test]
486 fn detects_brace_mismatch() {
487 let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
488 let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
489 let r = verify_output(src, compressed, &cfg());
490 assert!(r
491 .warnings
492 .iter()
493 .any(|w| w.kind == WarningKind::TruncatedBlock));
494 }
495
496 #[test]
497 fn preserved_identifiers_pass() {
498 let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
499 let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
500 let r = verify_output(src, compressed, &cfg());
501 let mangled = r
502 .warnings
503 .iter()
504 .filter(|w| w.kind == WarningKind::MangledIdentifier)
505 .count();
506 assert_eq!(mangled, 0);
507 }
508
509 #[test]
510 fn extract_paths_finds_common_extensions() {
511 let text = "see src/core/auth.rs and lib/utils.py for details";
512 let paths = extract_file_paths(text);
513 assert!(paths.iter().any(|p| p.contains("auth.rs")));
514 assert!(paths.iter().any(|p| p.contains("utils.py")));
515 }
516
517 #[test]
518 fn extract_identifiers_finds_functions() {
519 let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
520 let ids = extract_identifiers(text);
521 assert!(ids.contains(&"calculate_total".to_string()));
522 assert!(ids.contains(&"UserProfile".to_string()));
523 }
524
525 #[test]
526 fn info_loss_score_bounded() {
527 let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
528 let compressed = "compressed";
529 let r = verify_output(src, compressed, &cfg());
530 assert!(r.info_loss_score >= 0.0);
531 assert!(r.info_loss_score <= 1.0);
532 }
533
534 #[test]
535 fn snapshot_starts_clean() {
536 let snap = stats_snapshot();
537 assert!(snap.pass_rate >= 0.0);
538 assert!(snap.pass_rate <= 1.0);
539 }
540
541 #[test]
542 fn disabled_config_passes() {
543 let mut c = cfg();
544 c.enabled = false;
545 let r = verify_output("fn foo() {}", "bar", &c);
546 assert!(r.pass);
547 }
548}