Skip to main content

lean_ctx/core/
output_verification.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9    STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize, Default)]
13#[serde(default)]
14pub struct VerificationConfig {
15    pub enabled: Option<bool>,
16    /// Optional explicit verification mode.
17    /// - "off": disable verifier entirely
18    /// - "warn": fail only on High severity warnings
19    /// - "fail": fail on Medium+High warnings (strict)
20    pub mode: Option<String>,
21    pub strict_mode: Option<bool>,
22    pub check_paths: Option<bool>,
23    pub check_identifiers: Option<bool>,
24    pub check_line_numbers: Option<bool>,
25    pub check_structure: Option<bool>,
26}
27
28impl VerificationConfig {
29    pub fn enabled_effective(&self) -> bool {
30        self.enabled.unwrap_or(true)
31    }
32    pub fn strict_mode_effective(&self) -> bool {
33        self.strict_mode.unwrap_or(false)
34    }
35    pub fn check_paths_effective(&self) -> bool {
36        self.check_paths.unwrap_or(true)
37    }
38    pub fn check_identifiers_effective(&self) -> bool {
39        self.check_identifiers.unwrap_or(true)
40    }
41    pub fn check_line_numbers_effective(&self) -> bool {
42        self.check_line_numbers.unwrap_or(false)
43    }
44    pub fn check_structure_effective(&self) -> bool {
45        self.check_structure.unwrap_or(true)
46    }
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum VerificationMode {
51    Off,
52    Warn,
53    Fail,
54}
55
56fn parse_mode(s: &str) -> VerificationMode {
57    match s.trim().to_lowercase().as_str() {
58        "off" | "disabled" | "none" => VerificationMode::Off,
59        "fail" | "strict" | "enforce" => VerificationMode::Fail,
60        _ => VerificationMode::Warn,
61    }
62}
63
64impl VerificationConfig {
65    fn effective_mode(&self) -> VerificationMode {
66        if let Some(m) = self.mode.as_deref() {
67            return parse_mode(m);
68        }
69        if !self.enabled_effective() {
70            return VerificationMode::Off;
71        }
72        if self.strict_mode_effective() {
73            VerificationMode::Fail
74        } else {
75            VerificationMode::Warn
76        }
77    }
78
79    fn is_enabled(&self) -> bool {
80        self.effective_mode() != VerificationMode::Off
81    }
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
85pub enum WarningKind {
86    MissingPath,
87    MangledIdentifier,
88    LineNumberDrift,
89    TruncatedBlock,
90}
91
92impl std::fmt::Display for WarningKind {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        match self {
95            Self::MissingPath => write!(f, "missing_path"),
96            Self::MangledIdentifier => write!(f, "mangled_identifier"),
97            Self::LineNumberDrift => write!(f, "line_drift"),
98            Self::TruncatedBlock => write!(f, "truncated_block"),
99        }
100    }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct VerificationWarning {
105    pub kind: WarningKind,
106    pub detail: String,
107    pub severity: WarningSeverity,
108}
109
110#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
111pub enum WarningSeverity {
112    Low,
113    Medium,
114    High,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct VerificationResult {
119    pub pass: bool,
120    pub warnings: Vec<VerificationWarning>,
121    pub info_loss_score: f64,
122    pub paths_checked: usize,
123    pub identifiers_checked: usize,
124}
125
126impl VerificationResult {
127    pub fn ok() -> Self {
128        Self {
129            pass: true,
130            warnings: Vec::new(),
131            info_loss_score: 0.0,
132            paths_checked: 0,
133            identifiers_checked: 0,
134        }
135    }
136
137    pub fn format_compact(&self) -> String {
138        if self.warnings.is_empty() {
139            return "PASS".to_string();
140        }
141        let status = if self.pass { "WARN" } else { "FAIL" };
142        let mut counts = std::collections::BTreeMap::<String, u32>::new();
143        for w in &self.warnings {
144            *counts.entry(w.kind.to_string()).or_insert(0) += 1;
145        }
146        let counts: Vec<String> = counts
147            .into_iter()
148            .map(|(k, v)| format!("{k}={v}"))
149            .collect();
150        format!(
151            "{status}({}) loss={:.1}%",
152            counts.join(", "),
153            self.info_loss_score * 100.0
154        )
155    }
156}
157
158pub fn verify_output(
159    source: &str,
160    compressed: &str,
161    config: &VerificationConfig,
162) -> VerificationResult {
163    if !config.is_enabled() || source.is_empty() || compressed.is_empty() {
164        return VerificationResult::ok();
165    }
166
167    // No-op compression should never produce warnings.
168    if source == compressed {
169        return VerificationResult::ok();
170    }
171
172    let mut warnings = Vec::new();
173    let mut paths_checked = 0;
174    let mut identifiers_checked = 0;
175
176    if config.check_paths_effective() {
177        let (path_warnings, count) = check_paths(source, compressed);
178        paths_checked = count;
179        warnings.extend(path_warnings);
180    }
181
182    if config.check_identifiers_effective() {
183        let (id_warnings, count) = check_identifiers(source, compressed);
184        identifiers_checked = count;
185        warnings.extend(id_warnings);
186    }
187
188    if config.check_line_numbers_effective() {
189        warnings.extend(check_line_numbers(source, compressed));
190    }
191
192    if config.check_structure_effective() {
193        warnings.extend(check_structure(source, compressed));
194    }
195
196    let total_checks = (paths_checked + identifiers_checked).max(1);
197    let loss_items = warnings
198        .iter()
199        .filter(|w| w.severity == WarningSeverity::High)
200        .count() as f64
201        * 2.0
202        + warnings
203            .iter()
204            .filter(|w| w.severity == WarningSeverity::Medium)
205            .count() as f64;
206    let info_loss_score = (loss_items / total_checks as f64).min(1.0);
207
208    let mode = config.effective_mode();
209    let pass = if mode == VerificationMode::Fail {
210        !warnings
211            .iter()
212            .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
213    } else {
214        !warnings.iter().any(|w| w.severity == WarningSeverity::High)
215    };
216
217    let result = VerificationResult {
218        pass,
219        warnings,
220        info_loss_score,
221        paths_checked,
222        identifiers_checked,
223    };
224
225    record_result(&result);
226    result
227}
228
229fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
230    let paths = extract_file_paths(source);
231    let mut warnings = Vec::new();
232
233    for path in &paths {
234        let basename = path.rsplit('/').next().unwrap_or(path);
235        if !compressed.contains(basename) {
236            warnings.push(VerificationWarning {
237                kind: WarningKind::MissingPath,
238                detail: format!("Path reference lost: {path}"),
239                severity: WarningSeverity::Medium,
240            });
241        }
242    }
243
244    (warnings, paths.len())
245}
246
247fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
248    let identifiers = extract_identifiers(source);
249    let mut warnings = Vec::new();
250    let significant: Vec<&str> = identifiers
251        .iter()
252        .filter(|id| id.len() >= 4)
253        .map(String::as_str)
254        .collect();
255
256    for id in &significant {
257        if !compressed.contains(id) {
258            warnings.push(VerificationWarning {
259                kind: WarningKind::MangledIdentifier,
260                detail: format!("Identifier lost: {id}"),
261                severity: if id.len() >= 8 {
262                    WarningSeverity::High
263                } else {
264                    WarningSeverity::Low
265                },
266            });
267        }
268    }
269
270    (warnings, significant.len())
271}
272
273fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
274    let source_max = source.lines().count();
275    let mut warnings = Vec::new();
276
277    let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
278        .ok()
279        .or_else(|| Regex::new(r"(\d+)").ok());
280
281    if let Some(re_like) = re_like {
282        for cap in re_like.captures_iter(compressed) {
283            if let Some(m) = cap.get(1) {
284                if let Ok(n) = m.as_str().parse::<usize>() {
285                    if n > source_max && n < 999_999 {
286                        warnings.push(VerificationWarning {
287                            kind: WarningKind::LineNumberDrift,
288                            detail: format!("Line {n} exceeds source max {source_max}"),
289                            severity: WarningSeverity::Low,
290                        });
291                    }
292                }
293            }
294        }
295    }
296
297    warnings
298}
299
300fn check_structure(source: &str, compressed: &str) -> Vec<VerificationWarning> {
301    let mut warnings = Vec::new();
302
303    let src_opens: usize = source.chars().filter(|&c| c == '{').count();
304    let src_closes: usize = source.chars().filter(|&c| c == '}').count();
305    let src_diff = (src_opens as i64 - src_closes as i64).unsigned_abs();
306
307    let opens: usize = compressed.chars().filter(|&c| c == '{').count();
308    let closes: usize = compressed.chars().filter(|&c| c == '}').count();
309    if opens > 0 || closes > 0 {
310        let diff = (opens as i64 - closes as i64).unsigned_abs();
311        // Only warn if compression materially worsened structural balance.
312        if diff > (src_diff + 2) && diff > 2 {
313            warnings.push(VerificationWarning {
314                kind: WarningKind::TruncatedBlock,
315                detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
316                severity: WarningSeverity::Medium,
317            });
318        }
319    }
320
321    let src_parens_open: usize = source.chars().filter(|&c| c == '(').count();
322    let src_parens_close: usize = source.chars().filter(|&c| c == ')').count();
323    let src_parens_diff = (src_parens_open as i64 - src_parens_close as i64).unsigned_abs();
324
325    let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
326    let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
327    if parens_open > 0 || parens_close > 0 {
328        let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
329        if diff > (src_parens_diff + 3) && diff > 3 {
330            warnings.push(VerificationWarning {
331                kind: WarningKind::TruncatedBlock,
332                detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
333                severity: WarningSeverity::Low,
334            });
335        }
336    }
337
338    warnings
339}
340
341fn extract_file_paths(text: &str) -> Vec<String> {
342    let mut paths = Vec::new();
343    let re = Regex::new(
344        r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
345    )
346    .ok()
347    .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
348
349    if let Some(re) = re {
350        for cap in re.captures_iter(text) {
351            if let Some(m) = cap.get(1) {
352                let p = m.as_str().to_string();
353                if !paths.contains(&p) && p.len() < 200 {
354                    paths.push(p);
355                }
356            }
357        }
358    }
359    paths
360}
361
362fn extract_identifiers(text: &str) -> Vec<String> {
363    let mut ids = Vec::new();
364    let re = Regex::new(
365        r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
366    )
367    .ok()
368    .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
369
370    if let Some(re) = re {
371        for cap in re.captures_iter(text) {
372            if let Some(m) = cap.get(2) {
373                let id = m.as_str().to_string();
374                if !ids.contains(&id) {
375                    ids.push(id);
376                }
377            }
378        }
379    }
380    ids
381}
382
383struct VerificationStats {
384    pass_count: AtomicU64,
385    warn_run_count: AtomicU64,
386    warn_item_count: AtomicU64,
387    total_count: AtomicU64,
388    sum_info_loss_score_ppm: AtomicU64,
389    last_info_loss_score_ppm: AtomicU64,
390    recent_warnings: Mutex<Vec<VerificationWarning>>,
391}
392
393impl VerificationStats {
394    fn new() -> Self {
395        Self {
396            pass_count: AtomicU64::new(0),
397            warn_run_count: AtomicU64::new(0),
398            warn_item_count: AtomicU64::new(0),
399            total_count: AtomicU64::new(0),
400            sum_info_loss_score_ppm: AtomicU64::new(0),
401            last_info_loss_score_ppm: AtomicU64::new(0),
402            recent_warnings: Mutex::new(Vec::new()),
403        }
404    }
405}
406
407fn record_result(result: &VerificationResult) {
408    let stats = global_stats();
409    stats.total_count.fetch_add(1, Ordering::Relaxed);
410    if result.warnings.is_empty() {
411        stats.pass_count.fetch_add(1, Ordering::Relaxed);
412    } else {
413        stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
414        stats
415            .warn_item_count
416            .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
417    }
418    let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
419    stats
420        .sum_info_loss_score_ppm
421        .fetch_add(ppm, Ordering::Relaxed);
422    stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
423
424    if !result.warnings.is_empty() {
425        if let Ok(mut recent) = stats.recent_warnings.lock() {
426            for w in &result.warnings {
427                recent.push(w.clone());
428            }
429            if recent.len() > 200 {
430                let excess = recent.len() - 200;
431                recent.drain(..excess);
432            }
433        }
434
435        for w in &result.warnings {
436            crate::core::events::emit_verification_warning(
437                &w.kind.to_string(),
438                &w.detail,
439                &format!("{:?}", w.severity),
440            );
441        }
442    }
443}
444
445pub fn stats_snapshot() -> VerificationSnapshot {
446    let s = global_stats();
447    let total = s.total_count.load(Ordering::Relaxed);
448    let pass = s.pass_count.load(Ordering::Relaxed);
449    let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
450    let warn_items = s.warn_item_count.load(Ordering::Relaxed);
451    let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
452    let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
453    let recent = s
454        .recent_warnings
455        .lock()
456        .map(|r| r.clone())
457        .unwrap_or_default();
458    VerificationSnapshot {
459        total,
460        pass,
461        warn_runs,
462        warn_items,
463        pass_rate: if total > 0 {
464            pass as f64 / total as f64
465        } else {
466            1.0
467        },
468        avg_info_loss_score: if total > 0 {
469            (sum_ppm as f64 / total as f64) / 1_000_000.0
470        } else {
471            0.0
472        },
473        last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
474        recent_warnings: recent,
475    }
476}
477
478#[derive(Debug, Clone, Serialize)]
479pub struct VerificationSnapshot {
480    pub total: u64,
481    pub pass: u64,
482    pub warn_runs: u64,
483    pub warn_items: u64,
484    pub pass_rate: f64,
485    pub avg_info_loss_score: f64,
486    pub last_info_loss_score: f64,
487    pub recent_warnings: Vec<VerificationWarning>,
488}
489
490impl VerificationSnapshot {
491    pub fn format_compact(&self) -> String {
492        format!(
493            "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
494            self.pass,
495            self.total,
496            self.pass_rate * 100.0,
497            self.warn_runs,
498            self.warn_items,
499            self.avg_info_loss_score * 100.0
500        )
501    }
502}
503
504#[cfg(test)]
505mod tests {
506    use super::*;
507
508    fn cfg() -> VerificationConfig {
509        VerificationConfig::default()
510    }
511
512    #[test]
513    fn empty_input_passes() {
514        let r = verify_output("", "", &cfg());
515        assert!(r.pass);
516    }
517
518    #[test]
519    fn identical_passes() {
520        let src = "fn hello() { println!(\"world\"); }";
521        let r = verify_output(src, src, &cfg());
522        assert!(r.pass);
523        assert!(r.warnings.is_empty());
524    }
525
526    #[test]
527    fn detects_missing_path() {
528        let src = "import { foo } from src/utils/helper.ts";
529        let compressed = "import foo";
530        let r = verify_output(src, compressed, &cfg());
531        assert!(r
532            .warnings
533            .iter()
534            .any(|w| w.kind == WarningKind::MissingPath));
535    }
536
537    #[test]
538    fn detects_lost_identifier() {
539        let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
540        let compressed = "fn calc() -> f64 { sum }";
541        let r = verify_output(src, compressed, &cfg());
542        assert!(r
543            .warnings
544            .iter()
545            .any(|w| w.kind == WarningKind::MangledIdentifier));
546    }
547
548    #[test]
549    fn detects_brace_mismatch() {
550        let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
551        let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
552        let r = verify_output(src, compressed, &cfg());
553        assert!(r
554            .warnings
555            .iter()
556            .any(|w| w.kind == WarningKind::TruncatedBlock));
557    }
558
559    #[test]
560    fn preserved_identifiers_pass() {
561        let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
562        let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
563        let r = verify_output(src, compressed, &cfg());
564        let mangled = r
565            .warnings
566            .iter()
567            .filter(|w| w.kind == WarningKind::MangledIdentifier)
568            .count();
569        assert_eq!(mangled, 0);
570    }
571
572    #[test]
573    fn extract_paths_finds_common_extensions() {
574        let text = "see src/core/auth.rs and lib/utils.py for details";
575        let paths = extract_file_paths(text);
576        assert!(paths.iter().any(|p| p.contains("auth.rs")));
577        assert!(paths.iter().any(|p| p.contains("utils.py")));
578    }
579
580    #[test]
581    fn extract_identifiers_finds_functions() {
582        let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
583        let ids = extract_identifiers(text);
584        assert!(ids.contains(&"calculate_total".to_string()));
585        assert!(ids.contains(&"UserProfile".to_string()));
586    }
587
588    #[test]
589    fn info_loss_score_bounded() {
590        let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
591        let compressed = "compressed";
592        let r = verify_output(src, compressed, &cfg());
593        assert!(r.info_loss_score >= 0.0);
594        assert!(r.info_loss_score <= 1.0);
595    }
596
597    #[test]
598    fn snapshot_starts_clean() {
599        let snap = stats_snapshot();
600        assert!(snap.pass_rate >= 0.0);
601        assert!(snap.pass_rate <= 1.0);
602    }
603
604    #[test]
605    fn disabled_config_passes() {
606        let mut c = cfg();
607        c.enabled = Some(false);
608        let r = verify_output("fn foo() {}", "bar", &c);
609        assert!(r.pass);
610    }
611
612    #[test]
613    fn strict_mode_fails_on_medium() {
614        let mut c = cfg();
615        c.strict_mode = Some(true);
616        let src = "import { foo } from src/utils/helper.ts";
617        let compressed = "import foo";
618        let r = verify_output(src, compressed, &c);
619        assert!(!r.pass, "strict mode should FAIL on medium warnings");
620        assert!(
621            r.format_compact().starts_with("FAIL("),
622            "compact should show FAIL: {}",
623            r.format_compact()
624        );
625    }
626
627    #[test]
628    fn compact_format_is_deterministic_and_sorted() {
629        let src = "fn calculate_monthly_revenue() {} see src/utils/helper.ts";
630        let compressed = "compressed";
631        let r = verify_output(src, compressed, &cfg());
632        let s = r.format_compact();
633        // Stable ordering for parsing: keys are lexicographically sorted.
634        let want_order = ["mangled_identifier", "missing_path"];
635        let mut idx = 0usize;
636        for k in want_order {
637            if let Some(pos) = s.find(k) {
638                assert!(pos >= idx, "expected sorted keys in: {s}");
639                idx = pos;
640            }
641        }
642    }
643}