Skip to main content

lean_ctx/core/
output_verification.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9    STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct VerificationConfig {
14    pub enabled: bool,
15    pub strict_mode: bool,
16    pub check_paths: bool,
17    pub check_identifiers: bool,
18    pub check_line_numbers: bool,
19    pub check_structure: bool,
20}
21
22impl Default for VerificationConfig {
23    fn default() -> Self {
24        Self {
25            enabled: true,
26            strict_mode: false,
27            check_paths: true,
28            check_identifiers: true,
29            check_line_numbers: false,
30            check_structure: true,
31        }
32    }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36pub enum WarningKind {
37    MissingPath,
38    MangledIdentifier,
39    LineNumberDrift,
40    TruncatedBlock,
41}
42
43impl std::fmt::Display for WarningKind {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            Self::MissingPath => write!(f, "missing_path"),
47            Self::MangledIdentifier => write!(f, "mangled_identifier"),
48            Self::LineNumberDrift => write!(f, "line_drift"),
49            Self::TruncatedBlock => write!(f, "truncated_block"),
50        }
51    }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct VerificationWarning {
56    pub kind: WarningKind,
57    pub detail: String,
58    pub severity: WarningSeverity,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
62pub enum WarningSeverity {
63    Low,
64    Medium,
65    High,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct VerificationResult {
70    pub pass: bool,
71    pub warnings: Vec<VerificationWarning>,
72    pub info_loss_score: f64,
73    pub paths_checked: usize,
74    pub identifiers_checked: usize,
75}
76
77impl VerificationResult {
78    pub fn ok() -> Self {
79        Self {
80            pass: true,
81            warnings: Vec::new(),
82            info_loss_score: 0.0,
83            paths_checked: 0,
84            identifiers_checked: 0,
85        }
86    }
87
88    pub fn format_compact(&self) -> String {
89        if self.warnings.is_empty() {
90            return "PASS".to_string();
91        }
92        let status = if self.pass { "WARN" } else { "FAIL" };
93        let counts: Vec<String> = self
94            .warnings
95            .iter()
96            .fold(std::collections::HashMap::new(), |mut acc, w| {
97                *acc.entry(w.kind.to_string()).or_insert(0u32) += 1;
98                acc
99            })
100            .into_iter()
101            .map(|(k, v)| format!("{k}={v}"))
102            .collect();
103        format!(
104            "{status}({}) loss={:.1}%",
105            counts.join(", "),
106            self.info_loss_score * 100.0
107        )
108    }
109}
110
111pub fn verify_output(
112    source: &str,
113    compressed: &str,
114    config: &VerificationConfig,
115) -> VerificationResult {
116    if !config.enabled || source.is_empty() || compressed.is_empty() {
117        return VerificationResult::ok();
118    }
119
120    // No-op compression should never produce warnings.
121    if source == compressed {
122        return VerificationResult::ok();
123    }
124
125    let mut warnings = Vec::new();
126    let mut paths_checked = 0;
127    let mut identifiers_checked = 0;
128
129    if config.check_paths {
130        let (path_warnings, count) = check_paths(source, compressed);
131        paths_checked = count;
132        warnings.extend(path_warnings);
133    }
134
135    if config.check_identifiers {
136        let (id_warnings, count) = check_identifiers(source, compressed);
137        identifiers_checked = count;
138        warnings.extend(id_warnings);
139    }
140
141    if config.check_line_numbers {
142        warnings.extend(check_line_numbers(source, compressed));
143    }
144
145    if config.check_structure {
146        warnings.extend(check_structure(source, compressed));
147    }
148
149    let total_checks = (paths_checked + identifiers_checked).max(1);
150    let loss_items = warnings
151        .iter()
152        .filter(|w| w.severity == WarningSeverity::High)
153        .count() as f64
154        * 2.0
155        + warnings
156            .iter()
157            .filter(|w| w.severity == WarningSeverity::Medium)
158            .count() as f64;
159    let info_loss_score = (loss_items / total_checks as f64).min(1.0);
160
161    let pass = if config.strict_mode {
162        !warnings
163            .iter()
164            .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
165    } else {
166        !warnings.iter().any(|w| w.severity == WarningSeverity::High)
167    };
168
169    let result = VerificationResult {
170        pass,
171        warnings,
172        info_loss_score,
173        paths_checked,
174        identifiers_checked,
175    };
176
177    record_result(&result);
178    result
179}
180
181fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
182    let paths = extract_file_paths(source);
183    let mut warnings = Vec::new();
184
185    for path in &paths {
186        let basename = path.rsplit('/').next().unwrap_or(path);
187        if !compressed.contains(basename) {
188            warnings.push(VerificationWarning {
189                kind: WarningKind::MissingPath,
190                detail: format!("Path reference lost: {path}"),
191                severity: WarningSeverity::Medium,
192            });
193        }
194    }
195
196    (warnings, paths.len())
197}
198
199fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
200    let identifiers = extract_identifiers(source);
201    let mut warnings = Vec::new();
202    let significant: Vec<&str> = identifiers
203        .iter()
204        .filter(|id| id.len() >= 4)
205        .map(String::as_str)
206        .collect();
207
208    for id in &significant {
209        if !compressed.contains(id) {
210            warnings.push(VerificationWarning {
211                kind: WarningKind::MangledIdentifier,
212                detail: format!("Identifier lost: {id}"),
213                severity: if id.len() >= 8 {
214                    WarningSeverity::High
215                } else {
216                    WarningSeverity::Low
217                },
218            });
219        }
220    }
221
222    (warnings, significant.len())
223}
224
225fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
226    let source_max = source.lines().count();
227    let mut warnings = Vec::new();
228
229    let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
230        .ok()
231        .or_else(|| Regex::new(r"(\d+)").ok());
232
233    if let Some(re_like) = re_like {
234        for cap in re_like.captures_iter(compressed) {
235            if let Some(m) = cap.get(1) {
236                if let Ok(n) = m.as_str().parse::<usize>() {
237                    if n > source_max && n < 999_999 {
238                        warnings.push(VerificationWarning {
239                            kind: WarningKind::LineNumberDrift,
240                            detail: format!("Line {n} exceeds source max {source_max}"),
241                            severity: WarningSeverity::Low,
242                        });
243                    }
244                }
245            }
246        }
247    }
248
249    warnings
250}
251
252fn check_structure(source: &str, compressed: &str) -> Vec<VerificationWarning> {
253    let mut warnings = Vec::new();
254
255    let src_opens: usize = source.chars().filter(|&c| c == '{').count();
256    let src_closes: usize = source.chars().filter(|&c| c == '}').count();
257    let src_diff = (src_opens as i64 - src_closes as i64).unsigned_abs();
258
259    let opens: usize = compressed.chars().filter(|&c| c == '{').count();
260    let closes: usize = compressed.chars().filter(|&c| c == '}').count();
261    if opens > 0 || closes > 0 {
262        let diff = (opens as i64 - closes as i64).unsigned_abs();
263        // Only warn if compression materially worsened structural balance.
264        if diff > (src_diff + 2) && diff > 2 {
265            warnings.push(VerificationWarning {
266                kind: WarningKind::TruncatedBlock,
267                detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
268                severity: WarningSeverity::Medium,
269            });
270        }
271    }
272
273    let src_parens_open: usize = source.chars().filter(|&c| c == '(').count();
274    let src_parens_close: usize = source.chars().filter(|&c| c == ')').count();
275    let src_parens_diff = (src_parens_open as i64 - src_parens_close as i64).unsigned_abs();
276
277    let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
278    let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
279    if parens_open > 0 || parens_close > 0 {
280        let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
281        if diff > (src_parens_diff + 3) && diff > 3 {
282            warnings.push(VerificationWarning {
283                kind: WarningKind::TruncatedBlock,
284                detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
285                severity: WarningSeverity::Low,
286            });
287        }
288    }
289
290    warnings
291}
292
293fn extract_file_paths(text: &str) -> Vec<String> {
294    let mut paths = Vec::new();
295    let re = Regex::new(
296        r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
297    )
298    .ok()
299    .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
300
301    if let Some(re) = re {
302        for cap in re.captures_iter(text) {
303            if let Some(m) = cap.get(1) {
304                let p = m.as_str().to_string();
305                if !paths.contains(&p) && p.len() < 200 {
306                    paths.push(p);
307                }
308            }
309        }
310    }
311    paths
312}
313
314fn extract_identifiers(text: &str) -> Vec<String> {
315    let mut ids = Vec::new();
316    let re = Regex::new(
317        r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
318    )
319    .ok()
320    .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
321
322    if let Some(re) = re {
323        for cap in re.captures_iter(text) {
324            if let Some(m) = cap.get(2) {
325                let id = m.as_str().to_string();
326                if !ids.contains(&id) {
327                    ids.push(id);
328                }
329            }
330        }
331    }
332    ids
333}
334
335struct VerificationStats {
336    pass_count: AtomicU64,
337    warn_run_count: AtomicU64,
338    warn_item_count: AtomicU64,
339    total_count: AtomicU64,
340    sum_info_loss_score_ppm: AtomicU64,
341    last_info_loss_score_ppm: AtomicU64,
342    recent_warnings: Mutex<Vec<VerificationWarning>>,
343}
344
345impl VerificationStats {
346    fn new() -> Self {
347        Self {
348            pass_count: AtomicU64::new(0),
349            warn_run_count: AtomicU64::new(0),
350            warn_item_count: AtomicU64::new(0),
351            total_count: AtomicU64::new(0),
352            sum_info_loss_score_ppm: AtomicU64::new(0),
353            last_info_loss_score_ppm: AtomicU64::new(0),
354            recent_warnings: Mutex::new(Vec::new()),
355        }
356    }
357}
358
359fn record_result(result: &VerificationResult) {
360    let stats = global_stats();
361    stats.total_count.fetch_add(1, Ordering::Relaxed);
362    if result.warnings.is_empty() {
363        stats.pass_count.fetch_add(1, Ordering::Relaxed);
364    } else {
365        stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
366        stats
367            .warn_item_count
368            .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
369    }
370    let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
371    stats
372        .sum_info_loss_score_ppm
373        .fetch_add(ppm, Ordering::Relaxed);
374    stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
375
376    if !result.warnings.is_empty() {
377        if let Ok(mut recent) = stats.recent_warnings.lock() {
378            for w in &result.warnings {
379                recent.push(w.clone());
380            }
381            if recent.len() > 200 {
382                let excess = recent.len() - 200;
383                recent.drain(..excess);
384            }
385        }
386
387        for w in &result.warnings {
388            crate::core::events::emit_verification_warning(
389                &w.kind.to_string(),
390                &w.detail,
391                &format!("{:?}", w.severity),
392            );
393        }
394    }
395}
396
397pub fn stats_snapshot() -> VerificationSnapshot {
398    let s = global_stats();
399    let total = s.total_count.load(Ordering::Relaxed);
400    let pass = s.pass_count.load(Ordering::Relaxed);
401    let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
402    let warn_items = s.warn_item_count.load(Ordering::Relaxed);
403    let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
404    let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
405    let recent = s
406        .recent_warnings
407        .lock()
408        .map(|r| r.clone())
409        .unwrap_or_default();
410    VerificationSnapshot {
411        total,
412        pass,
413        warn_runs,
414        warn_items,
415        pass_rate: if total > 0 {
416            pass as f64 / total as f64
417        } else {
418            1.0
419        },
420        avg_info_loss_score: if total > 0 {
421            (sum_ppm as f64 / total as f64) / 1_000_000.0
422        } else {
423            0.0
424        },
425        last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
426        recent_warnings: recent,
427    }
428}
429
430#[derive(Debug, Clone, Serialize)]
431pub struct VerificationSnapshot {
432    pub total: u64,
433    pub pass: u64,
434    pub warn_runs: u64,
435    pub warn_items: u64,
436    pub pass_rate: f64,
437    pub avg_info_loss_score: f64,
438    pub last_info_loss_score: f64,
439    pub recent_warnings: Vec<VerificationWarning>,
440}
441
442impl VerificationSnapshot {
443    pub fn format_compact(&self) -> String {
444        format!(
445            "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
446            self.pass,
447            self.total,
448            self.pass_rate * 100.0,
449            self.warn_runs,
450            self.warn_items,
451            self.avg_info_loss_score * 100.0
452        )
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    fn cfg() -> VerificationConfig {
461        VerificationConfig::default()
462    }
463
464    #[test]
465    fn empty_input_passes() {
466        let r = verify_output("", "", &cfg());
467        assert!(r.pass);
468    }
469
470    #[test]
471    fn identical_passes() {
472        let src = "fn hello() { println!(\"world\"); }";
473        let r = verify_output(src, src, &cfg());
474        assert!(r.pass);
475        assert!(r.warnings.is_empty());
476    }
477
478    #[test]
479    fn detects_missing_path() {
480        let src = "import { foo } from src/utils/helper.ts";
481        let compressed = "import foo";
482        let r = verify_output(src, compressed, &cfg());
483        assert!(r
484            .warnings
485            .iter()
486            .any(|w| w.kind == WarningKind::MissingPath));
487    }
488
489    #[test]
490    fn detects_lost_identifier() {
491        let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
492        let compressed = "fn calc() -> f64 { sum }";
493        let r = verify_output(src, compressed, &cfg());
494        assert!(r
495            .warnings
496            .iter()
497            .any(|w| w.kind == WarningKind::MangledIdentifier));
498    }
499
500    #[test]
501    fn detects_brace_mismatch() {
502        let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
503        let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
504        let r = verify_output(src, compressed, &cfg());
505        assert!(r
506            .warnings
507            .iter()
508            .any(|w| w.kind == WarningKind::TruncatedBlock));
509    }
510
511    #[test]
512    fn preserved_identifiers_pass() {
513        let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
514        let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
515        let r = verify_output(src, compressed, &cfg());
516        let mangled = r
517            .warnings
518            .iter()
519            .filter(|w| w.kind == WarningKind::MangledIdentifier)
520            .count();
521        assert_eq!(mangled, 0);
522    }
523
524    #[test]
525    fn extract_paths_finds_common_extensions() {
526        let text = "see src/core/auth.rs and lib/utils.py for details";
527        let paths = extract_file_paths(text);
528        assert!(paths.iter().any(|p| p.contains("auth.rs")));
529        assert!(paths.iter().any(|p| p.contains("utils.py")));
530    }
531
532    #[test]
533    fn extract_identifiers_finds_functions() {
534        let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
535        let ids = extract_identifiers(text);
536        assert!(ids.contains(&"calculate_total".to_string()));
537        assert!(ids.contains(&"UserProfile".to_string()));
538    }
539
540    #[test]
541    fn info_loss_score_bounded() {
542        let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
543        let compressed = "compressed";
544        let r = verify_output(src, compressed, &cfg());
545        assert!(r.info_loss_score >= 0.0);
546        assert!(r.info_loss_score <= 1.0);
547    }
548
549    #[test]
550    fn snapshot_starts_clean() {
551        let snap = stats_snapshot();
552        assert!(snap.pass_rate >= 0.0);
553        assert!(snap.pass_rate <= 1.0);
554    }
555
556    #[test]
557    fn disabled_config_passes() {
558        let mut c = cfg();
559        c.enabled = false;
560        let r = verify_output("fn foo() {}", "bar", &c);
561        assert!(r.pass);
562    }
563}