Skip to main content

lean_ctx/core/
output_verification.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Mutex, OnceLock};
5
6static STATS: OnceLock<VerificationStats> = OnceLock::new();
7
8fn global_stats() -> &'static VerificationStats {
9    STATS.get_or_init(VerificationStats::new)
10}
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct VerificationConfig {
14    pub enabled: bool,
15    pub strict_mode: bool,
16    pub check_paths: bool,
17    pub check_identifiers: bool,
18    pub check_line_numbers: bool,
19    pub check_structure: bool,
20}
21
22impl Default for VerificationConfig {
23    fn default() -> Self {
24        Self {
25            enabled: true,
26            strict_mode: false,
27            check_paths: true,
28            check_identifiers: true,
29            check_line_numbers: false,
30            check_structure: true,
31        }
32    }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
36pub enum WarningKind {
37    MissingPath,
38    MangledIdentifier,
39    LineNumberDrift,
40    TruncatedBlock,
41}
42
43impl std::fmt::Display for WarningKind {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            Self::MissingPath => write!(f, "missing_path"),
47            Self::MangledIdentifier => write!(f, "mangled_identifier"),
48            Self::LineNumberDrift => write!(f, "line_drift"),
49            Self::TruncatedBlock => write!(f, "truncated_block"),
50        }
51    }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct VerificationWarning {
56    pub kind: WarningKind,
57    pub detail: String,
58    pub severity: WarningSeverity,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
62pub enum WarningSeverity {
63    Low,
64    Medium,
65    High,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct VerificationResult {
70    pub pass: bool,
71    pub warnings: Vec<VerificationWarning>,
72    pub info_loss_score: f64,
73    pub paths_checked: usize,
74    pub identifiers_checked: usize,
75}
76
77impl VerificationResult {
78    pub fn ok() -> Self {
79        Self {
80            pass: true,
81            warnings: Vec::new(),
82            info_loss_score: 0.0,
83            paths_checked: 0,
84            identifiers_checked: 0,
85        }
86    }
87
88    pub fn format_compact(&self) -> String {
89        if self.pass {
90            return "PASS".to_string();
91        }
92        let counts: Vec<String> = self
93            .warnings
94            .iter()
95            .fold(std::collections::HashMap::new(), |mut acc, w| {
96                *acc.entry(w.kind.to_string()).or_insert(0u32) += 1;
97                acc
98            })
99            .into_iter()
100            .map(|(k, v)| format!("{k}={v}"))
101            .collect();
102        format!(
103            "WARN({}) loss={:.1}%",
104            counts.join(", "),
105            self.info_loss_score * 100.0
106        )
107    }
108}
109
110pub fn verify_output(
111    source: &str,
112    compressed: &str,
113    config: &VerificationConfig,
114) -> VerificationResult {
115    if !config.enabled || source.is_empty() || compressed.is_empty() {
116        return VerificationResult::ok();
117    }
118
119    let mut warnings = Vec::new();
120    let mut paths_checked = 0;
121    let mut identifiers_checked = 0;
122
123    if config.check_paths {
124        let (path_warnings, count) = check_paths(source, compressed);
125        paths_checked = count;
126        warnings.extend(path_warnings);
127    }
128
129    if config.check_identifiers {
130        let (id_warnings, count) = check_identifiers(source, compressed);
131        identifiers_checked = count;
132        warnings.extend(id_warnings);
133    }
134
135    if config.check_line_numbers {
136        warnings.extend(check_line_numbers(source, compressed));
137    }
138
139    if config.check_structure {
140        warnings.extend(check_structure(compressed));
141    }
142
143    let total_checks = (paths_checked + identifiers_checked).max(1);
144    let loss_items = warnings
145        .iter()
146        .filter(|w| w.severity == WarningSeverity::High)
147        .count() as f64
148        * 2.0
149        + warnings
150            .iter()
151            .filter(|w| w.severity == WarningSeverity::Medium)
152            .count() as f64;
153    let info_loss_score = (loss_items / total_checks as f64).min(1.0);
154
155    let pass = if config.strict_mode {
156        !warnings
157            .iter()
158            .any(|w| w.severity == WarningSeverity::High || w.severity == WarningSeverity::Medium)
159    } else {
160        !warnings.iter().any(|w| w.severity == WarningSeverity::High)
161    };
162
163    let result = VerificationResult {
164        pass,
165        warnings,
166        info_loss_score,
167        paths_checked,
168        identifiers_checked,
169    };
170
171    record_result(&result);
172    result
173}
174
175fn check_paths(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
176    let paths = extract_file_paths(source);
177    let mut warnings = Vec::new();
178
179    for path in &paths {
180        let basename = path.rsplit('/').next().unwrap_or(path);
181        if !compressed.contains(basename) {
182            warnings.push(VerificationWarning {
183                kind: WarningKind::MissingPath,
184                detail: format!("Path reference lost: {path}"),
185                severity: WarningSeverity::Medium,
186            });
187        }
188    }
189
190    (warnings, paths.len())
191}
192
193fn check_identifiers(source: &str, compressed: &str) -> (Vec<VerificationWarning>, usize) {
194    let identifiers = extract_identifiers(source);
195    let mut warnings = Vec::new();
196    let significant: Vec<&str> = identifiers
197        .iter()
198        .filter(|id| id.len() >= 4)
199        .map(String::as_str)
200        .collect();
201
202    for id in &significant {
203        if !compressed.contains(id) {
204            warnings.push(VerificationWarning {
205                kind: WarningKind::MangledIdentifier,
206                detail: format!("Identifier lost: {id}"),
207                severity: if id.len() >= 8 {
208                    WarningSeverity::High
209                } else {
210                    WarningSeverity::Low
211                },
212            });
213        }
214    }
215
216    (warnings, significant.len())
217}
218
219fn check_line_numbers(source: &str, compressed: &str) -> Vec<VerificationWarning> {
220    let source_max = source.lines().count();
221    let mut warnings = Vec::new();
222
223    let re_like = Regex::new(r"(?:line\s+|L|:)(\d{1,6})")
224        .ok()
225        .or_else(|| Regex::new(r"(\d+)").ok());
226
227    if let Some(re_like) = re_like {
228        for cap in re_like.captures_iter(compressed) {
229            if let Some(m) = cap.get(1) {
230                if let Ok(n) = m.as_str().parse::<usize>() {
231                    if n > source_max && n < 999_999 {
232                        warnings.push(VerificationWarning {
233                            kind: WarningKind::LineNumberDrift,
234                            detail: format!("Line {n} exceeds source max {source_max}"),
235                            severity: WarningSeverity::Low,
236                        });
237                    }
238                }
239            }
240        }
241    }
242
243    warnings
244}
245
246fn check_structure(compressed: &str) -> Vec<VerificationWarning> {
247    let mut warnings = Vec::new();
248
249    let opens: usize = compressed.chars().filter(|&c| c == '{').count();
250    let closes: usize = compressed.chars().filter(|&c| c == '}').count();
251    if opens > 0 || closes > 0 {
252        let diff = (opens as i64 - closes as i64).unsigned_abs();
253        if diff > 2 {
254            warnings.push(VerificationWarning {
255                kind: WarningKind::TruncatedBlock,
256                detail: format!("Brace mismatch: {{ {opens} vs }} {closes}"),
257                severity: WarningSeverity::Medium,
258            });
259        }
260    }
261
262    let parens_open: usize = compressed.chars().filter(|&c| c == '(').count();
263    let parens_close: usize = compressed.chars().filter(|&c| c == ')').count();
264    if parens_open > 0 || parens_close > 0 {
265        let diff = (parens_open as i64 - parens_close as i64).unsigned_abs();
266        if diff > 3 {
267            warnings.push(VerificationWarning {
268                kind: WarningKind::TruncatedBlock,
269                detail: format!("Paren mismatch: ( {parens_open} vs ) {parens_close}"),
270                severity: WarningSeverity::Low,
271            });
272        }
273    }
274
275    warnings
276}
277
278fn extract_file_paths(text: &str) -> Vec<String> {
279    let mut paths = Vec::new();
280    let re = Regex::new(
281        r#"(?:^|[\s"'`(,])([a-zA-Z0-9_./-]{2,}\.(?:rs|ts|tsx|js|jsx|py|go|java|rb|cpp|c|h|toml|yaml|yml|json|md))\b"#
282    )
283    .ok()
284    .or_else(|| Regex::new(r"(\S+\.\w+)").ok());
285
286    if let Some(re) = re {
287        for cap in re.captures_iter(text) {
288            if let Some(m) = cap.get(1) {
289                let p = m.as_str().to_string();
290                if !paths.contains(&p) && p.len() < 200 {
291                    paths.push(p);
292                }
293            }
294        }
295    }
296    paths
297}
298
299fn extract_identifiers(text: &str) -> Vec<String> {
300    let mut ids = Vec::new();
301    let re = Regex::new(
302        r"\b(fn|struct|enum|trait|type|class|function|const|let|var|def|pub)\s+([a-zA-Z_][a-zA-Z0-9_]*)"
303    )
304    .ok()
305    .or_else(|| Regex::new(r"([a-zA-Z_]\w+)").ok());
306
307    if let Some(re) = re {
308        for cap in re.captures_iter(text) {
309            if let Some(m) = cap.get(2) {
310                let id = m.as_str().to_string();
311                if !ids.contains(&id) {
312                    ids.push(id);
313                }
314            }
315        }
316    }
317    ids
318}
319
320struct VerificationStats {
321    pass_count: AtomicU64,
322    warn_run_count: AtomicU64,
323    warn_item_count: AtomicU64,
324    total_count: AtomicU64,
325    sum_info_loss_score_ppm: AtomicU64,
326    last_info_loss_score_ppm: AtomicU64,
327    recent_warnings: Mutex<Vec<VerificationWarning>>,
328}
329
330impl VerificationStats {
331    fn new() -> Self {
332        Self {
333            pass_count: AtomicU64::new(0),
334            warn_run_count: AtomicU64::new(0),
335            warn_item_count: AtomicU64::new(0),
336            total_count: AtomicU64::new(0),
337            sum_info_loss_score_ppm: AtomicU64::new(0),
338            last_info_loss_score_ppm: AtomicU64::new(0),
339            recent_warnings: Mutex::new(Vec::new()),
340        }
341    }
342}
343
344fn record_result(result: &VerificationResult) {
345    let stats = global_stats();
346    stats.total_count.fetch_add(1, Ordering::Relaxed);
347    if result.warnings.is_empty() {
348        stats.pass_count.fetch_add(1, Ordering::Relaxed);
349    } else {
350        stats.warn_run_count.fetch_add(1, Ordering::Relaxed);
351        stats
352            .warn_item_count
353            .fetch_add(result.warnings.len() as u64, Ordering::Relaxed);
354    }
355    let ppm = (result.info_loss_score.clamp(0.0, 1.0) * 1_000_000.0).round() as u64;
356    stats
357        .sum_info_loss_score_ppm
358        .fetch_add(ppm, Ordering::Relaxed);
359    stats.last_info_loss_score_ppm.store(ppm, Ordering::Relaxed);
360
361    if !result.warnings.is_empty() {
362        if let Ok(mut recent) = stats.recent_warnings.lock() {
363            for w in &result.warnings {
364                recent.push(w.clone());
365            }
366            if recent.len() > 200 {
367                let excess = recent.len() - 200;
368                recent.drain(..excess);
369            }
370        }
371
372        for w in &result.warnings {
373            crate::core::events::emit_verification_warning(
374                &w.kind.to_string(),
375                &w.detail,
376                &format!("{:?}", w.severity),
377            );
378        }
379    }
380}
381
382pub fn stats_snapshot() -> VerificationSnapshot {
383    let s = global_stats();
384    let total = s.total_count.load(Ordering::Relaxed);
385    let pass = s.pass_count.load(Ordering::Relaxed);
386    let warn_runs = s.warn_run_count.load(Ordering::Relaxed);
387    let warn_items = s.warn_item_count.load(Ordering::Relaxed);
388    let sum_ppm = s.sum_info_loss_score_ppm.load(Ordering::Relaxed);
389    let last_ppm = s.last_info_loss_score_ppm.load(Ordering::Relaxed);
390    let recent = s
391        .recent_warnings
392        .lock()
393        .map(|r| r.clone())
394        .unwrap_or_default();
395    VerificationSnapshot {
396        total,
397        pass,
398        warn_runs,
399        warn_items,
400        pass_rate: if total > 0 {
401            pass as f64 / total as f64
402        } else {
403            1.0
404        },
405        avg_info_loss_score: if total > 0 {
406            (sum_ppm as f64 / total as f64) / 1_000_000.0
407        } else {
408            0.0
409        },
410        last_info_loss_score: (last_ppm as f64) / 1_000_000.0,
411        recent_warnings: recent,
412    }
413}
414
415#[derive(Debug, Clone, Serialize)]
416pub struct VerificationSnapshot {
417    pub total: u64,
418    pub pass: u64,
419    pub warn_runs: u64,
420    pub warn_items: u64,
421    pub pass_rate: f64,
422    pub avg_info_loss_score: f64,
423    pub last_info_loss_score: f64,
424    pub recent_warnings: Vec<VerificationWarning>,
425}
426
427impl VerificationSnapshot {
428    pub fn format_compact(&self) -> String {
429        format!(
430            "Verification: {}/{} pass ({:.0}%), warn_runs={}, warn_items={}, loss(avg)={:.1}%",
431            self.pass,
432            self.total,
433            self.pass_rate * 100.0,
434            self.warn_runs,
435            self.warn_items,
436            self.avg_info_loss_score * 100.0
437        )
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444
445    fn cfg() -> VerificationConfig {
446        VerificationConfig::default()
447    }
448
449    #[test]
450    fn empty_input_passes() {
451        let r = verify_output("", "", &cfg());
452        assert!(r.pass);
453    }
454
455    #[test]
456    fn identical_passes() {
457        let src = "fn hello() { println!(\"world\"); }";
458        let r = verify_output(src, src, &cfg());
459        assert!(r.pass);
460        assert!(r.warnings.is_empty());
461    }
462
463    #[test]
464    fn detects_missing_path() {
465        let src = "import { foo } from src/utils/helper.ts";
466        let compressed = "import foo";
467        let r = verify_output(src, compressed, &cfg());
468        assert!(r
469            .warnings
470            .iter()
471            .any(|w| w.kind == WarningKind::MissingPath));
472    }
473
474    #[test]
475    fn detects_lost_identifier() {
476        let src = "fn calculate_monthly_revenue(data: &[f64]) -> f64 { data.iter().sum() }";
477        let compressed = "fn calc() -> f64 { sum }";
478        let r = verify_output(src, compressed, &cfg());
479        assert!(r
480            .warnings
481            .iter()
482            .any(|w| w.kind == WarningKind::MangledIdentifier));
483    }
484
485    #[test]
486    fn detects_brace_mismatch() {
487        let src = "fn a() { if true { b(); } } fn c() { d(); } fn e() { f(); }";
488        let compressed = "fn a() { if true { b(); fn c() { d(); fn e() { f();";
489        let r = verify_output(src, compressed, &cfg());
490        assert!(r
491            .warnings
492            .iter()
493            .any(|w| w.kind == WarningKind::TruncatedBlock));
494    }
495
496    #[test]
497    fn preserved_identifiers_pass() {
498        let src = "fn process_data(input: Vec<u8>) -> Result<()> { Ok(()) }";
499        let compressed = "fn process_data(input: Vec<u8>) -> Result<()>";
500        let r = verify_output(src, compressed, &cfg());
501        let mangled = r
502            .warnings
503            .iter()
504            .filter(|w| w.kind == WarningKind::MangledIdentifier)
505            .count();
506        assert_eq!(mangled, 0);
507    }
508
509    #[test]
510    fn extract_paths_finds_common_extensions() {
511        let text = "see src/core/auth.rs and lib/utils.py for details";
512        let paths = extract_file_paths(text);
513        assert!(paths.iter().any(|p| p.contains("auth.rs")));
514        assert!(paths.iter().any(|p| p.contains("utils.py")));
515    }
516
517    #[test]
518    fn extract_identifiers_finds_functions() {
519        let text = "fn calculate_total(x: i32) -> i32 { x }\nstruct UserProfile { name: String }";
520        let ids = extract_identifiers(text);
521        assert!(ids.contains(&"calculate_total".to_string()));
522        assert!(ids.contains(&"UserProfile".to_string()));
523    }
524
525    #[test]
526    fn info_loss_score_bounded() {
527        let src = "fn very_long_function_name_here() {}\nfn another_significant_fn() {}";
528        let compressed = "compressed";
529        let r = verify_output(src, compressed, &cfg());
530        assert!(r.info_loss_score >= 0.0);
531        assert!(r.info_loss_score <= 1.0);
532    }
533
534    #[test]
535    fn snapshot_starts_clean() {
536        let snap = stats_snapshot();
537        assert!(snap.pass_rate >= 0.0);
538        assert!(snap.pass_rate <= 1.0);
539    }
540
541    #[test]
542    fn disabled_config_passes() {
543        let mut c = cfg();
544        c.enabled = false;
545        let r = verify_output("fn foo() {}", "bar", &c);
546        assert!(r.pass);
547    }
548}