Skip to main content

oxilean_codegen/opt_cache/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use crate::lcnf::{LcnfExpr, LcnfFunDecl, LcnfLetValue};
6
7use std::collections::{HashMap, HashSet, VecDeque};
8
9/// Summary of data locality characteristics for a function or loop body.
10#[derive(Debug, Clone)]
11pub struct DataLocalityInfo {
12    /// All memory accesses captured for this scope.
13    pub accesses: Vec<MemoryAccess>,
14    /// Estimated working set size in bytes.
15    pub working_set_bytes: u64,
16    /// The cache level that best accommodates the working set.
17    pub best_cache_level: CacheLevel,
18    /// Estimated average reuse distance (in number of intervening accesses).
19    pub reuse_distance: f64,
20}
21impl DataLocalityInfo {
22    /// Returns `true` if the working set fits in L1 cache.
23    pub fn fits_in_l1(&self) -> bool {
24        self.working_set_bytes <= CacheLevel::L1.capacity_bytes()
25    }
26    /// Returns `true` if the working set fits in L2 cache.
27    pub fn fits_in_l2(&self) -> bool {
28        self.working_set_bytes <= CacheLevel::L2.capacity_bytes()
29    }
30    /// Returns the fraction of accesses that are cache-friendly.
31    pub fn cache_friendly_fraction(&self) -> f64 {
32        if self.accesses.is_empty() {
33            return 1.0;
34        }
35        let friendly = self
36            .accesses
37            .iter()
38            .filter(|a| a.is_cache_friendly())
39            .count();
40        friendly as f64 / self.accesses.len() as f64
41    }
42}
43#[allow(dead_code)]
44#[derive(Debug, Clone)]
45pub struct OCAnalysisCache {
46    pub(super) entries: std::collections::HashMap<String, OCCacheEntry>,
47    pub(super) max_size: usize,
48    pub(super) hits: u64,
49    pub(super) misses: u64,
50}
51impl OCAnalysisCache {
52    #[allow(dead_code)]
53    pub fn new(max_size: usize) -> Self {
54        OCAnalysisCache {
55            entries: std::collections::HashMap::new(),
56            max_size,
57            hits: 0,
58            misses: 0,
59        }
60    }
61    #[allow(dead_code)]
62    pub fn get(&mut self, key: &str) -> Option<&OCCacheEntry> {
63        if self.entries.contains_key(key) {
64            self.hits += 1;
65            self.entries.get(key)
66        } else {
67            self.misses += 1;
68            None
69        }
70    }
71    #[allow(dead_code)]
72    pub fn insert(&mut self, key: String, data: Vec<u8>) {
73        if self.entries.len() >= self.max_size {
74            if let Some(oldest) = self.entries.keys().next().cloned() {
75                self.entries.remove(&oldest);
76            }
77        }
78        self.entries.insert(
79            key.clone(),
80            OCCacheEntry {
81                key,
82                data,
83                timestamp: 0,
84                valid: true,
85            },
86        );
87    }
88    #[allow(dead_code)]
89    pub fn invalidate(&mut self, key: &str) {
90        if let Some(entry) = self.entries.get_mut(key) {
91            entry.valid = false;
92        }
93    }
94    #[allow(dead_code)]
95    pub fn clear(&mut self) {
96        self.entries.clear();
97    }
98    #[allow(dead_code)]
99    pub fn hit_rate(&self) -> f64 {
100        let total = self.hits + self.misses;
101        if total == 0 {
102            return 0.0;
103        }
104        self.hits as f64 / total as f64
105    }
106    #[allow(dead_code)]
107    pub fn size(&self) -> usize {
108        self.entries.len()
109    }
110}
111/// Pass execution phase for OCacheExt.
112#[allow(dead_code)]
113#[derive(Debug, Clone, PartialEq, Eq, Hash)]
114pub enum OCacheExtPassPhase {
115    Early,
116    Middle,
117    Late,
118    Finalize,
119}
120impl OCacheExtPassPhase {
121    #[allow(dead_code)]
122    pub fn is_early(&self) -> bool {
123        matches!(self, Self::Early)
124    }
125    #[allow(dead_code)]
126    pub fn is_middle(&self) -> bool {
127        matches!(self, Self::Middle)
128    }
129    #[allow(dead_code)]
130    pub fn is_late(&self) -> bool {
131        matches!(self, Self::Late)
132    }
133    #[allow(dead_code)]
134    pub fn is_finalize(&self) -> bool {
135        matches!(self, Self::Finalize)
136    }
137    #[allow(dead_code)]
138    pub fn order(&self) -> u32 {
139        match self {
140            Self::Early => 0,
141            Self::Middle => 1,
142            Self::Late => 2,
143            Self::Finalize => 3,
144        }
145    }
146    #[allow(dead_code)]
147    pub fn from_order(n: u32) -> Option<Self> {
148        match n {
149            0 => Some(Self::Early),
150            1 => Some(Self::Middle),
151            2 => Some(Self::Late),
152            3 => Some(Self::Finalize),
153            _ => None,
154        }
155    }
156}
157/// Top-level configuration for the cache optimization pass.
158#[derive(Debug, Clone)]
159pub struct CacheOptConfig {
160    /// Cache line size in bytes (typically 64 on x86-64).
161    pub cache_line_size: u64,
162    /// How many iterations ahead to prefetch (in units of loop iterations).
163    pub prefetch_distance: u64,
164    /// Whether to insert software prefetch hints.
165    pub enable_prefetch: bool,
166    /// Loop tiling sub-configuration.
167    pub tiling: LoopTilingConfig,
168}
169#[allow(dead_code)]
170#[derive(Debug, Clone)]
171pub struct OCWorklist {
172    pub(super) items: std::collections::VecDeque<u32>,
173    pub(super) in_worklist: std::collections::HashSet<u32>,
174}
175impl OCWorklist {
176    #[allow(dead_code)]
177    pub fn new() -> Self {
178        OCWorklist {
179            items: std::collections::VecDeque::new(),
180            in_worklist: std::collections::HashSet::new(),
181        }
182    }
183    #[allow(dead_code)]
184    pub fn push(&mut self, item: u32) -> bool {
185        if self.in_worklist.insert(item) {
186            self.items.push_back(item);
187            true
188        } else {
189            false
190        }
191    }
192    #[allow(dead_code)]
193    pub fn pop(&mut self) -> Option<u32> {
194        let item = self.items.pop_front()?;
195        self.in_worklist.remove(&item);
196        Some(item)
197    }
198    #[allow(dead_code)]
199    pub fn is_empty(&self) -> bool {
200        self.items.is_empty()
201    }
202    #[allow(dead_code)]
203    pub fn len(&self) -> usize {
204        self.items.len()
205    }
206    #[allow(dead_code)]
207    pub fn contains(&self, item: u32) -> bool {
208        self.in_worklist.contains(&item)
209    }
210}
211/// Pass execution phase for OCacheX2.
212#[allow(dead_code)]
213#[derive(Debug, Clone, PartialEq, Eq, Hash)]
214pub enum OCacheX2PassPhase {
215    Early,
216    Middle,
217    Late,
218    Finalize,
219}
220impl OCacheX2PassPhase {
221    #[allow(dead_code)]
222    pub fn is_early(&self) -> bool {
223        matches!(self, Self::Early)
224    }
225    #[allow(dead_code)]
226    pub fn is_middle(&self) -> bool {
227        matches!(self, Self::Middle)
228    }
229    #[allow(dead_code)]
230    pub fn is_late(&self) -> bool {
231        matches!(self, Self::Late)
232    }
233    #[allow(dead_code)]
234    pub fn is_finalize(&self) -> bool {
235        matches!(self, Self::Finalize)
236    }
237    #[allow(dead_code)]
238    pub fn order(&self) -> u32 {
239        match self {
240            Self::Early => 0,
241            Self::Middle => 1,
242            Self::Late => 2,
243            Self::Finalize => 3,
244        }
245    }
246    #[allow(dead_code)]
247    pub fn from_order(n: u32) -> Option<Self> {
248        match n {
249            0 => Some(Self::Early),
250            1 => Some(Self::Middle),
251            2 => Some(Self::Late),
252            3 => Some(Self::Finalize),
253            _ => None,
254        }
255    }
256}
257/// A summary report produced after the cache optimization pass completes.
258#[derive(Debug, Clone, Default)]
259pub struct CacheOptReport {
260    /// Number of loops that had tiling applied.
261    pub num_loops_tiled: usize,
262    /// Number of prefetch hints inserted.
263    pub num_prefetches_inserted: usize,
264    /// Estimated reduction in cache misses as a fraction in [0, 1].
265    pub estimated_cache_miss_reduction: f64,
266}
267impl CacheOptReport {
268    /// Returns a human-readable one-line summary of the report.
269    pub fn summary(&self) -> String {
270        format!(
271            "CacheOpt: {} loops tiled, {} prefetches inserted, \
272             estimated {:.1}% cache-miss reduction",
273            self.num_loops_tiled,
274            self.num_prefetches_inserted,
275            self.estimated_cache_miss_reduction * 100.0,
276        )
277    }
278}
279/// Liveness analysis for OCacheExt.
280#[allow(dead_code)]
281#[derive(Debug, Clone, Default)]
282pub struct OCacheExtLiveness {
283    pub live_in: Vec<Vec<usize>>,
284    pub live_out: Vec<Vec<usize>>,
285    pub defs: Vec<Vec<usize>>,
286    pub uses: Vec<Vec<usize>>,
287}
288impl OCacheExtLiveness {
289    #[allow(dead_code)]
290    pub fn new(n: usize) -> Self {
291        Self {
292            live_in: vec![Vec::new(); n],
293            live_out: vec![Vec::new(); n],
294            defs: vec![Vec::new(); n],
295            uses: vec![Vec::new(); n],
296        }
297    }
298    #[allow(dead_code)]
299    pub fn live_in(&self, b: usize, v: usize) -> bool {
300        self.live_in.get(b).map(|s| s.contains(&v)).unwrap_or(false)
301    }
302    #[allow(dead_code)]
303    pub fn live_out(&self, b: usize, v: usize) -> bool {
304        self.live_out
305            .get(b)
306            .map(|s| s.contains(&v))
307            .unwrap_or(false)
308    }
309    #[allow(dead_code)]
310    pub fn add_def(&mut self, b: usize, v: usize) {
311        if let Some(s) = self.defs.get_mut(b) {
312            if !s.contains(&v) {
313                s.push(v);
314            }
315        }
316    }
317    #[allow(dead_code)]
318    pub fn add_use(&mut self, b: usize, v: usize) {
319        if let Some(s) = self.uses.get_mut(b) {
320            if !s.contains(&v) {
321                s.push(v);
322            }
323        }
324    }
325    #[allow(dead_code)]
326    pub fn var_is_used_in_block(&self, b: usize, v: usize) -> bool {
327        self.uses.get(b).map(|s| s.contains(&v)).unwrap_or(false)
328    }
329    #[allow(dead_code)]
330    pub fn var_is_def_in_block(&self, b: usize, v: usize) -> bool {
331        self.defs.get(b).map(|s| s.contains(&v)).unwrap_or(false)
332    }
333}
334/// Pass registry for OCacheExt.
335#[allow(dead_code)]
336#[derive(Debug, Default)]
337pub struct OCacheExtPassRegistry {
338    pub(super) configs: Vec<OCacheExtPassConfig>,
339    pub(super) stats: Vec<OCacheExtPassStats>,
340}
341impl OCacheExtPassRegistry {
342    #[allow(dead_code)]
343    pub fn new() -> Self {
344        Self::default()
345    }
346    #[allow(dead_code)]
347    pub fn register(&mut self, c: OCacheExtPassConfig) {
348        self.stats.push(OCacheExtPassStats::new());
349        self.configs.push(c);
350    }
351    #[allow(dead_code)]
352    pub fn len(&self) -> usize {
353        self.configs.len()
354    }
355    #[allow(dead_code)]
356    pub fn is_empty(&self) -> bool {
357        self.configs.is_empty()
358    }
359    #[allow(dead_code)]
360    pub fn get(&self, i: usize) -> Option<&OCacheExtPassConfig> {
361        self.configs.get(i)
362    }
363    #[allow(dead_code)]
364    pub fn get_stats(&self, i: usize) -> Option<&OCacheExtPassStats> {
365        self.stats.get(i)
366    }
367    #[allow(dead_code)]
368    pub fn enabled_passes(&self) -> Vec<&OCacheExtPassConfig> {
369        self.configs.iter().filter(|c| c.enabled).collect()
370    }
371    #[allow(dead_code)]
372    pub fn passes_in_phase(&self, ph: &OCacheExtPassPhase) -> Vec<&OCacheExtPassConfig> {
373        self.configs
374            .iter()
375            .filter(|c| c.enabled && &c.phase == ph)
376            .collect()
377    }
378    #[allow(dead_code)]
379    pub fn total_nodes_visited(&self) -> usize {
380        self.stats.iter().map(|s| s.nodes_visited).sum()
381    }
382    #[allow(dead_code)]
383    pub fn any_changed(&self) -> bool {
384        self.stats.iter().any(|s| s.changed)
385    }
386}
387/// Represents a level of the memory hierarchy.
388#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
389pub enum CacheLevel {
390    /// L1 data cache: typically ~32 KB, ~4 cycle latency.
391    L1,
392    /// L2 unified cache: typically ~256 KB, ~12 cycle latency.
393    L2,
394    /// L3 last-level cache: typically ~8 MB, ~40 cycle latency.
395    L3,
396    /// Main memory (RAM): 100+ cycle latency.
397    Ram,
398}
399impl CacheLevel {
400    /// Returns the typical capacity in bytes for this cache level.
401    pub fn capacity_bytes(&self) -> u64 {
402        match self {
403            CacheLevel::L1 => 32 * 1024,
404            CacheLevel::L2 => 256 * 1024,
405            CacheLevel::L3 => 8 * 1024 * 1024,
406            CacheLevel::Ram => u64::MAX,
407        }
408    }
409    /// Returns the typical access latency in cycles for this cache level.
410    pub fn latency_cycles(&self) -> u32 {
411        match self {
412            CacheLevel::L1 => 4,
413            CacheLevel::L2 => 12,
414            CacheLevel::L3 => 40,
415            CacheLevel::Ram => 200,
416        }
417    }
418}
419/// Constant folding helper for OCacheExt.
420#[allow(dead_code)]
421#[derive(Debug, Clone, Default)]
422pub struct OCacheExtConstFolder {
423    pub(super) folds: usize,
424    pub(super) failures: usize,
425    pub(super) enabled: bool,
426}
427impl OCacheExtConstFolder {
428    #[allow(dead_code)]
429    pub fn new() -> Self {
430        Self {
431            folds: 0,
432            failures: 0,
433            enabled: true,
434        }
435    }
436    #[allow(dead_code)]
437    pub fn add_i64(&mut self, a: i64, b: i64) -> Option<i64> {
438        self.folds += 1;
439        a.checked_add(b)
440    }
441    #[allow(dead_code)]
442    pub fn sub_i64(&mut self, a: i64, b: i64) -> Option<i64> {
443        self.folds += 1;
444        a.checked_sub(b)
445    }
446    #[allow(dead_code)]
447    pub fn mul_i64(&mut self, a: i64, b: i64) -> Option<i64> {
448        self.folds += 1;
449        a.checked_mul(b)
450    }
451    #[allow(dead_code)]
452    pub fn div_i64(&mut self, a: i64, b: i64) -> Option<i64> {
453        if b == 0 {
454            self.failures += 1;
455            None
456        } else {
457            self.folds += 1;
458            a.checked_div(b)
459        }
460    }
461    #[allow(dead_code)]
462    pub fn rem_i64(&mut self, a: i64, b: i64) -> Option<i64> {
463        if b == 0 {
464            self.failures += 1;
465            None
466        } else {
467            self.folds += 1;
468            a.checked_rem(b)
469        }
470    }
471    #[allow(dead_code)]
472    pub fn neg_i64(&mut self, a: i64) -> Option<i64> {
473        self.folds += 1;
474        a.checked_neg()
475    }
476    #[allow(dead_code)]
477    pub fn shl_i64(&mut self, a: i64, s: u32) -> Option<i64> {
478        if s >= 64 {
479            self.failures += 1;
480            None
481        } else {
482            self.folds += 1;
483            a.checked_shl(s)
484        }
485    }
486    #[allow(dead_code)]
487    pub fn shr_i64(&mut self, a: i64, s: u32) -> Option<i64> {
488        if s >= 64 {
489            self.failures += 1;
490            None
491        } else {
492            self.folds += 1;
493            a.checked_shr(s)
494        }
495    }
496    #[allow(dead_code)]
497    pub fn and_i64(&mut self, a: i64, b: i64) -> i64 {
498        self.folds += 1;
499        a & b
500    }
501    #[allow(dead_code)]
502    pub fn or_i64(&mut self, a: i64, b: i64) -> i64 {
503        self.folds += 1;
504        a | b
505    }
506    #[allow(dead_code)]
507    pub fn xor_i64(&mut self, a: i64, b: i64) -> i64 {
508        self.folds += 1;
509        a ^ b
510    }
511    #[allow(dead_code)]
512    pub fn not_i64(&mut self, a: i64) -> i64 {
513        self.folds += 1;
514        !a
515    }
516    #[allow(dead_code)]
517    pub fn fold_count(&self) -> usize {
518        self.folds
519    }
520    #[allow(dead_code)]
521    pub fn failure_count(&self) -> usize {
522        self.failures
523    }
524    #[allow(dead_code)]
525    pub fn enable(&mut self) {
526        self.enabled = true;
527    }
528    #[allow(dead_code)]
529    pub fn disable(&mut self) {
530        self.enabled = false;
531    }
532    #[allow(dead_code)]
533    pub fn is_enabled(&self) -> bool {
534        self.enabled
535    }
536}
537/// Analysis cache for OCacheExt.
538#[allow(dead_code)]
539#[derive(Debug)]
540pub struct OCacheExtCache {
541    pub(super) entries: Vec<(u64, Vec<u8>, bool, u32)>,
542    pub(super) cap: usize,
543    pub(super) total_hits: u64,
544    pub(super) total_misses: u64,
545}
546impl OCacheExtCache {
547    #[allow(dead_code)]
548    pub fn new(cap: usize) -> Self {
549        Self {
550            entries: Vec::new(),
551            cap,
552            total_hits: 0,
553            total_misses: 0,
554        }
555    }
556    #[allow(dead_code)]
557    pub fn get(&mut self, key: u64) -> Option<&[u8]> {
558        for e in self.entries.iter_mut() {
559            if e.0 == key && e.2 {
560                e.3 += 1;
561                self.total_hits += 1;
562                return Some(&e.1);
563            }
564        }
565        self.total_misses += 1;
566        None
567    }
568    #[allow(dead_code)]
569    pub fn put(&mut self, key: u64, data: Vec<u8>) {
570        if self.entries.len() >= self.cap {
571            self.entries.retain(|e| e.2);
572            if self.entries.len() >= self.cap {
573                self.entries.remove(0);
574            }
575        }
576        self.entries.push((key, data, true, 0));
577    }
578    #[allow(dead_code)]
579    pub fn invalidate(&mut self) {
580        for e in self.entries.iter_mut() {
581            e.2 = false;
582        }
583    }
584    #[allow(dead_code)]
585    pub fn hit_rate(&self) -> f64 {
586        let t = self.total_hits + self.total_misses;
587        if t == 0 {
588            0.0
589        } else {
590            self.total_hits as f64 / t as f64
591        }
592    }
593    #[allow(dead_code)]
594    pub fn live_count(&self) -> usize {
595        self.entries.iter().filter(|e| e.2).count()
596    }
597}
598/// Configuration for OCacheX2 passes.
599#[allow(dead_code)]
600#[derive(Debug, Clone)]
601pub struct OCacheX2PassConfig {
602    pub name: String,
603    pub phase: OCacheX2PassPhase,
604    pub enabled: bool,
605    pub max_iterations: usize,
606    pub debug: u32,
607    pub timeout_ms: Option<u64>,
608}
609impl OCacheX2PassConfig {
610    #[allow(dead_code)]
611    pub fn new(name: impl Into<String>) -> Self {
612        Self {
613            name: name.into(),
614            phase: OCacheX2PassPhase::Middle,
615            enabled: true,
616            max_iterations: 100,
617            debug: 0,
618            timeout_ms: None,
619        }
620    }
621    #[allow(dead_code)]
622    pub fn with_phase(mut self, phase: OCacheX2PassPhase) -> Self {
623        self.phase = phase;
624        self
625    }
626    #[allow(dead_code)]
627    pub fn with_max_iter(mut self, n: usize) -> Self {
628        self.max_iterations = n;
629        self
630    }
631    #[allow(dead_code)]
632    pub fn with_debug(mut self, d: u32) -> Self {
633        self.debug = d;
634        self
635    }
636    #[allow(dead_code)]
637    pub fn disabled(mut self) -> Self {
638        self.enabled = false;
639        self
640    }
641    #[allow(dead_code)]
642    pub fn with_timeout(mut self, ms: u64) -> Self {
643        self.timeout_ms = Some(ms);
644        self
645    }
646    #[allow(dead_code)]
647    pub fn is_debug_enabled(&self) -> bool {
648        self.debug > 0
649    }
650}
651#[allow(dead_code)]
652#[derive(Debug, Clone)]
653pub struct OCPassConfig {
654    pub phase: OCPassPhase,
655    pub enabled: bool,
656    pub max_iterations: u32,
657    pub debug_output: bool,
658    pub pass_name: String,
659}
660impl OCPassConfig {
661    #[allow(dead_code)]
662    pub fn new(name: impl Into<String>, phase: OCPassPhase) -> Self {
663        OCPassConfig {
664            phase,
665            enabled: true,
666            max_iterations: 10,
667            debug_output: false,
668            pass_name: name.into(),
669        }
670    }
671    #[allow(dead_code)]
672    pub fn disabled(mut self) -> Self {
673        self.enabled = false;
674        self
675    }
676    #[allow(dead_code)]
677    pub fn with_debug(mut self) -> Self {
678        self.debug_output = true;
679        self
680    }
681    #[allow(dead_code)]
682    pub fn max_iter(mut self, n: u32) -> Self {
683        self.max_iterations = n;
684        self
685    }
686}
687/// Configuration parameters for loop tiling (cache blocking).
688#[derive(Debug, Clone)]
689pub struct LoopTilingConfig {
690    /// Tile size targeting L1 cache reuse, in elements.
691    pub tile_size_l1: u64,
692    /// Tile size targeting L2 cache reuse, in elements.
693    pub tile_size_l2: u64,
694    /// Whether to apply L1-level tiling.
695    pub enable_l1_tiling: bool,
696    /// Whether to apply L2-level tiling.
697    pub enable_l2_tiling: bool,
698}
699/// Describes the memory layout of a struct, used for field reordering analysis.
700#[derive(Debug, Clone, PartialEq)]
701pub struct StructLayout {
702    /// The name of the struct.
703    pub struct_name: String,
704    /// Ordered list of (field_name, field_size_bytes) pairs.
705    pub fields: Vec<(String, u64)>,
706    /// Total size of the struct in bytes (including padding).
707    pub total_size: u64,
708    /// Required alignment of the struct in bytes.
709    pub alignment: u64,
710}
711impl StructLayout {
712    /// Returns `true` if the struct's total size is a multiple of the cache line size (64 bytes).
713    pub fn is_cache_aligned(&self) -> bool {
714        const CACHE_LINE: u64 = 64;
715        self.total_size.is_multiple_of(CACHE_LINE) && self.alignment >= CACHE_LINE
716    }
717    /// Returns the number of padding bytes in the struct.
718    pub fn padding_bytes(&self) -> u64 {
719        let fields_total: u64 = self.fields.iter().map(|(_, sz)| sz).sum();
720        self.total_size.saturating_sub(fields_total)
721    }
722    /// Returns the number of cache lines the struct spans.
723    pub fn cache_lines_used(&self) -> u64 {
724        const CACHE_LINE: u64 = 64;
725        self.total_size.div_ceil(CACHE_LINE)
726    }
727}
728/// Configuration for OCacheExt passes.
729#[allow(dead_code)]
730#[derive(Debug, Clone)]
731pub struct OCacheExtPassConfig {
732    pub name: String,
733    pub phase: OCacheExtPassPhase,
734    pub enabled: bool,
735    pub max_iterations: usize,
736    pub debug: u32,
737    pub timeout_ms: Option<u64>,
738}
739impl OCacheExtPassConfig {
740    #[allow(dead_code)]
741    pub fn new(name: impl Into<String>) -> Self {
742        Self {
743            name: name.into(),
744            phase: OCacheExtPassPhase::Middle,
745            enabled: true,
746            max_iterations: 100,
747            debug: 0,
748            timeout_ms: None,
749        }
750    }
751    #[allow(dead_code)]
752    pub fn with_phase(mut self, phase: OCacheExtPassPhase) -> Self {
753        self.phase = phase;
754        self
755    }
756    #[allow(dead_code)]
757    pub fn with_max_iter(mut self, n: usize) -> Self {
758        self.max_iterations = n;
759        self
760    }
761    #[allow(dead_code)]
762    pub fn with_debug(mut self, d: u32) -> Self {
763        self.debug = d;
764        self
765    }
766    #[allow(dead_code)]
767    pub fn disabled(mut self) -> Self {
768        self.enabled = false;
769        self
770    }
771    #[allow(dead_code)]
772    pub fn with_timeout(mut self, ms: u64) -> Self {
773        self.timeout_ms = Some(ms);
774        self
775    }
776    #[allow(dead_code)]
777    pub fn is_debug_enabled(&self) -> bool {
778        self.debug > 0
779    }
780}
781/// Analyzes struct/constructor layouts in an LCNF function and produces
782/// `StructLayout` records that describe each distinct constructor shape.
783pub struct FieldReorderingAnalysis;
784impl FieldReorderingAnalysis {
785    /// Inspect `decl` and collect layout information for every constructor
786    /// referenced in the body.  The heuristic assigns 8 bytes per field
787    /// (pointer-sized) and rounds the total up to the nearest 8-byte boundary.
788    pub fn analyze(decl: &LcnfFunDecl) -> Vec<StructLayout> {
789        let mut layouts: Vec<StructLayout> = Vec::new();
790        Self::collect_layouts(&decl.body, &mut layouts);
791        layouts
792    }
793    pub(super) fn collect_layouts(expr: &LcnfExpr, out: &mut Vec<StructLayout>) {
794        match expr {
795            LcnfExpr::Let { value, body, .. } => {
796                if let LcnfLetValue::Ctor(name, _tag, args) = value {
797                    if !out.iter().any(|l| &l.struct_name == name) {
798                        let field_count = args.len() as u64;
799                        let fields: Vec<(String, u64)> = (0..field_count)
800                            .map(|i| (format!("field_{}", i), 8u64))
801                            .collect();
802                        let fields_total = field_count * 8;
803                        let total_size = if fields_total == 0 {
804                            8
805                        } else {
806                            fields_total.next_multiple_of(8)
807                        };
808                        out.push(StructLayout {
809                            struct_name: name.clone(),
810                            fields,
811                            total_size,
812                            alignment: 8,
813                        });
814                    }
815                }
816                Self::collect_layouts(body, out);
817            }
818            LcnfExpr::Case { alts, default, .. } => {
819                for alt in alts {
820                    Self::collect_layouts(&alt.body, out);
821                }
822                if let Some(def) = default {
823                    Self::collect_layouts(def, out);
824                }
825            }
826            LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
827        }
828    }
829}
830#[allow(dead_code)]
831#[derive(Debug, Clone)]
832pub struct OCDominatorTree {
833    pub idom: Vec<Option<u32>>,
834    pub dom_children: Vec<Vec<u32>>,
835    pub dom_depth: Vec<u32>,
836}
837impl OCDominatorTree {
838    #[allow(dead_code)]
839    pub fn new(size: usize) -> Self {
840        OCDominatorTree {
841            idom: vec![None; size],
842            dom_children: vec![Vec::new(); size],
843            dom_depth: vec![0; size],
844        }
845    }
846    #[allow(dead_code)]
847    pub fn set_idom(&mut self, node: usize, idom: u32) {
848        self.idom[node] = Some(idom);
849    }
850    #[allow(dead_code)]
851    pub fn dominates(&self, a: usize, b: usize) -> bool {
852        if a == b {
853            return true;
854        }
855        let mut cur = b;
856        loop {
857            match self.idom[cur] {
858                Some(parent) if parent as usize == a => return true,
859                Some(parent) if parent as usize == cur => return false,
860                Some(parent) => cur = parent as usize,
861                None => return false,
862            }
863        }
864    }
865    #[allow(dead_code)]
866    pub fn depth(&self, node: usize) -> u32 {
867        self.dom_depth.get(node).copied().unwrap_or(0)
868    }
869}
870#[allow(dead_code)]
871pub struct OCConstantFoldingHelper;
872impl OCConstantFoldingHelper {
873    #[allow(dead_code)]
874    pub fn fold_add_i64(a: i64, b: i64) -> Option<i64> {
875        a.checked_add(b)
876    }
877    #[allow(dead_code)]
878    pub fn fold_sub_i64(a: i64, b: i64) -> Option<i64> {
879        a.checked_sub(b)
880    }
881    #[allow(dead_code)]
882    pub fn fold_mul_i64(a: i64, b: i64) -> Option<i64> {
883        a.checked_mul(b)
884    }
885    #[allow(dead_code)]
886    pub fn fold_div_i64(a: i64, b: i64) -> Option<i64> {
887        if b == 0 {
888            None
889        } else {
890            a.checked_div(b)
891        }
892    }
893    #[allow(dead_code)]
894    pub fn fold_add_f64(a: f64, b: f64) -> f64 {
895        a + b
896    }
897    #[allow(dead_code)]
898    pub fn fold_mul_f64(a: f64, b: f64) -> f64 {
899        a * b
900    }
901    #[allow(dead_code)]
902    pub fn fold_neg_i64(a: i64) -> Option<i64> {
903        a.checked_neg()
904    }
905    #[allow(dead_code)]
906    pub fn fold_not_bool(a: bool) -> bool {
907        !a
908    }
909    #[allow(dead_code)]
910    pub fn fold_and_bool(a: bool, b: bool) -> bool {
911        a && b
912    }
913    #[allow(dead_code)]
914    pub fn fold_or_bool(a: bool, b: bool) -> bool {
915        a || b
916    }
917    #[allow(dead_code)]
918    pub fn fold_shl_i64(a: i64, b: u32) -> Option<i64> {
919        a.checked_shl(b)
920    }
921    #[allow(dead_code)]
922    pub fn fold_shr_i64(a: i64, b: u32) -> Option<i64> {
923        a.checked_shr(b)
924    }
925    #[allow(dead_code)]
926    pub fn fold_rem_i64(a: i64, b: i64) -> Option<i64> {
927        if b == 0 {
928            None
929        } else {
930            Some(a % b)
931        }
932    }
933    #[allow(dead_code)]
934    pub fn fold_bitand_i64(a: i64, b: i64) -> i64 {
935        a & b
936    }
937    #[allow(dead_code)]
938    pub fn fold_bitor_i64(a: i64, b: i64) -> i64 {
939        a | b
940    }
941    #[allow(dead_code)]
942    pub fn fold_bitxor_i64(a: i64, b: i64) -> i64 {
943        a ^ b
944    }
945    #[allow(dead_code)]
946    pub fn fold_bitnot_i64(a: i64) -> i64 {
947        !a
948    }
949}
950#[allow(dead_code)]
951#[derive(Debug, Clone)]
952pub struct OCCacheEntry {
953    pub key: String,
954    pub data: Vec<u8>,
955    pub timestamp: u64,
956    pub valid: bool,
957}
958/// Constant folding helper for OCacheX2.
959#[allow(dead_code)]
960#[derive(Debug, Clone, Default)]
961pub struct OCacheX2ConstFolder {
962    pub(super) folds: usize,
963    pub(super) failures: usize,
964    pub(super) enabled: bool,
965}
966impl OCacheX2ConstFolder {
967    #[allow(dead_code)]
968    pub fn new() -> Self {
969        Self {
970            folds: 0,
971            failures: 0,
972            enabled: true,
973        }
974    }
975    #[allow(dead_code)]
976    pub fn add_i64(&mut self, a: i64, b: i64) -> Option<i64> {
977        self.folds += 1;
978        a.checked_add(b)
979    }
980    #[allow(dead_code)]
981    pub fn sub_i64(&mut self, a: i64, b: i64) -> Option<i64> {
982        self.folds += 1;
983        a.checked_sub(b)
984    }
985    #[allow(dead_code)]
986    pub fn mul_i64(&mut self, a: i64, b: i64) -> Option<i64> {
987        self.folds += 1;
988        a.checked_mul(b)
989    }
990    #[allow(dead_code)]
991    pub fn div_i64(&mut self, a: i64, b: i64) -> Option<i64> {
992        if b == 0 {
993            self.failures += 1;
994            None
995        } else {
996            self.folds += 1;
997            a.checked_div(b)
998        }
999    }
1000    #[allow(dead_code)]
1001    pub fn rem_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1002        if b == 0 {
1003            self.failures += 1;
1004            None
1005        } else {
1006            self.folds += 1;
1007            a.checked_rem(b)
1008        }
1009    }
1010    #[allow(dead_code)]
1011    pub fn neg_i64(&mut self, a: i64) -> Option<i64> {
1012        self.folds += 1;
1013        a.checked_neg()
1014    }
1015    #[allow(dead_code)]
1016    pub fn shl_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1017        if s >= 64 {
1018            self.failures += 1;
1019            None
1020        } else {
1021            self.folds += 1;
1022            a.checked_shl(s)
1023        }
1024    }
1025    #[allow(dead_code)]
1026    pub fn shr_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1027        if s >= 64 {
1028            self.failures += 1;
1029            None
1030        } else {
1031            self.folds += 1;
1032            a.checked_shr(s)
1033        }
1034    }
1035    #[allow(dead_code)]
1036    pub fn and_i64(&mut self, a: i64, b: i64) -> i64 {
1037        self.folds += 1;
1038        a & b
1039    }
1040    #[allow(dead_code)]
1041    pub fn or_i64(&mut self, a: i64, b: i64) -> i64 {
1042        self.folds += 1;
1043        a | b
1044    }
1045    #[allow(dead_code)]
1046    pub fn xor_i64(&mut self, a: i64, b: i64) -> i64 {
1047        self.folds += 1;
1048        a ^ b
1049    }
1050    #[allow(dead_code)]
1051    pub fn not_i64(&mut self, a: i64) -> i64 {
1052        self.folds += 1;
1053        !a
1054    }
1055    #[allow(dead_code)]
1056    pub fn fold_count(&self) -> usize {
1057        self.folds
1058    }
1059    #[allow(dead_code)]
1060    pub fn failure_count(&self) -> usize {
1061        self.failures
1062    }
1063    #[allow(dead_code)]
1064    pub fn enable(&mut self) {
1065        self.enabled = true;
1066    }
1067    #[allow(dead_code)]
1068    pub fn disable(&mut self) {
1069        self.enabled = false;
1070    }
1071    #[allow(dead_code)]
1072    pub fn is_enabled(&self) -> bool {
1073        self.enabled
1074    }
1075}
1076/// Dependency graph for OCacheExt.
1077#[allow(dead_code)]
1078#[derive(Debug, Clone)]
1079pub struct OCacheExtDepGraph {
1080    pub(super) n: usize,
1081    pub(super) adj: Vec<Vec<usize>>,
1082    pub(super) rev: Vec<Vec<usize>>,
1083    pub(super) edge_count: usize,
1084}
1085impl OCacheExtDepGraph {
1086    #[allow(dead_code)]
1087    pub fn new(n: usize) -> Self {
1088        Self {
1089            n,
1090            adj: vec![Vec::new(); n],
1091            rev: vec![Vec::new(); n],
1092            edge_count: 0,
1093        }
1094    }
1095    #[allow(dead_code)]
1096    pub fn add_edge(&mut self, from: usize, to: usize) {
1097        if from < self.n && to < self.n {
1098            if !self.adj[from].contains(&to) {
1099                self.adj[from].push(to);
1100                self.rev[to].push(from);
1101                self.edge_count += 1;
1102            }
1103        }
1104    }
1105    #[allow(dead_code)]
1106    pub fn succs(&self, n: usize) -> &[usize] {
1107        self.adj.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1108    }
1109    #[allow(dead_code)]
1110    pub fn preds(&self, n: usize) -> &[usize] {
1111        self.rev.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1112    }
1113    #[allow(dead_code)]
1114    pub fn topo_sort(&self) -> Option<Vec<usize>> {
1115        let mut deg: Vec<usize> = (0..self.n).map(|i| self.rev[i].len()).collect();
1116        let mut q: std::collections::VecDeque<usize> =
1117            (0..self.n).filter(|&i| deg[i] == 0).collect();
1118        let mut out = Vec::with_capacity(self.n);
1119        while let Some(u) = q.pop_front() {
1120            out.push(u);
1121            for &v in &self.adj[u] {
1122                deg[v] -= 1;
1123                if deg[v] == 0 {
1124                    q.push_back(v);
1125                }
1126            }
1127        }
1128        if out.len() == self.n {
1129            Some(out)
1130        } else {
1131            None
1132        }
1133    }
1134    #[allow(dead_code)]
1135    pub fn has_cycle(&self) -> bool {
1136        self.topo_sort().is_none()
1137    }
1138    #[allow(dead_code)]
1139    pub fn reachable(&self, start: usize) -> Vec<usize> {
1140        let mut vis = vec![false; self.n];
1141        let mut stk = vec![start];
1142        let mut out = Vec::new();
1143        while let Some(u) = stk.pop() {
1144            if u < self.n && !vis[u] {
1145                vis[u] = true;
1146                out.push(u);
1147                for &v in &self.adj[u] {
1148                    if !vis[v] {
1149                        stk.push(v);
1150                    }
1151                }
1152            }
1153        }
1154        out
1155    }
1156    #[allow(dead_code)]
1157    pub fn scc(&self) -> Vec<Vec<usize>> {
1158        let mut visited = vec![false; self.n];
1159        let mut order = Vec::new();
1160        for i in 0..self.n {
1161            if !visited[i] {
1162                let mut stk = vec![(i, 0usize)];
1163                while let Some((u, idx)) = stk.last_mut() {
1164                    if !visited[*u] {
1165                        visited[*u] = true;
1166                    }
1167                    if *idx < self.adj[*u].len() {
1168                        let v = self.adj[*u][*idx];
1169                        *idx += 1;
1170                        if !visited[v] {
1171                            stk.push((v, 0));
1172                        }
1173                    } else {
1174                        order.push(*u);
1175                        stk.pop();
1176                    }
1177                }
1178            }
1179        }
1180        let mut comp = vec![usize::MAX; self.n];
1181        let mut components: Vec<Vec<usize>> = Vec::new();
1182        for &start in order.iter().rev() {
1183            if comp[start] == usize::MAX {
1184                let cid = components.len();
1185                let mut component = Vec::new();
1186                let mut stk = vec![start];
1187                while let Some(u) = stk.pop() {
1188                    if comp[u] == usize::MAX {
1189                        comp[u] = cid;
1190                        component.push(u);
1191                        for &v in &self.rev[u] {
1192                            if comp[v] == usize::MAX {
1193                                stk.push(v);
1194                            }
1195                        }
1196                    }
1197                }
1198                components.push(component);
1199            }
1200        }
1201        components
1202    }
1203    #[allow(dead_code)]
1204    pub fn node_count(&self) -> usize {
1205        self.n
1206    }
1207    #[allow(dead_code)]
1208    pub fn edge_count(&self) -> usize {
1209        self.edge_count
1210    }
1211}
1212/// Statistics for OCacheX2 passes.
1213#[allow(dead_code)]
1214#[derive(Debug, Clone, Default)]
1215pub struct OCacheX2PassStats {
1216    pub iterations: usize,
1217    pub changed: bool,
1218    pub nodes_visited: usize,
1219    pub nodes_modified: usize,
1220    pub time_ms: u64,
1221    pub memory_bytes: usize,
1222    pub errors: usize,
1223}
1224impl OCacheX2PassStats {
1225    #[allow(dead_code)]
1226    pub fn new() -> Self {
1227        Self::default()
1228    }
1229    #[allow(dead_code)]
1230    pub fn visit(&mut self) {
1231        self.nodes_visited += 1;
1232    }
1233    #[allow(dead_code)]
1234    pub fn modify(&mut self) {
1235        self.nodes_modified += 1;
1236        self.changed = true;
1237    }
1238    #[allow(dead_code)]
1239    pub fn iterate(&mut self) {
1240        self.iterations += 1;
1241    }
1242    #[allow(dead_code)]
1243    pub fn error(&mut self) {
1244        self.errors += 1;
1245    }
1246    #[allow(dead_code)]
1247    pub fn efficiency(&self) -> f64 {
1248        if self.nodes_visited == 0 {
1249            0.0
1250        } else {
1251            self.nodes_modified as f64 / self.nodes_visited as f64
1252        }
1253    }
1254    #[allow(dead_code)]
1255    pub fn merge(&mut self, o: &OCacheX2PassStats) {
1256        self.iterations += o.iterations;
1257        self.changed |= o.changed;
1258        self.nodes_visited += o.nodes_visited;
1259        self.nodes_modified += o.nodes_modified;
1260        self.time_ms += o.time_ms;
1261        self.memory_bytes = self.memory_bytes.max(o.memory_bytes);
1262        self.errors += o.errors;
1263    }
1264}
1265#[allow(dead_code)]
1266#[derive(Debug, Clone)]
1267pub struct OCLivenessInfo {
1268    pub live_in: Vec<std::collections::HashSet<u32>>,
1269    pub live_out: Vec<std::collections::HashSet<u32>>,
1270    pub defs: Vec<std::collections::HashSet<u32>>,
1271    pub uses: Vec<std::collections::HashSet<u32>>,
1272}
1273impl OCLivenessInfo {
1274    #[allow(dead_code)]
1275    pub fn new(block_count: usize) -> Self {
1276        OCLivenessInfo {
1277            live_in: vec![std::collections::HashSet::new(); block_count],
1278            live_out: vec![std::collections::HashSet::new(); block_count],
1279            defs: vec![std::collections::HashSet::new(); block_count],
1280            uses: vec![std::collections::HashSet::new(); block_count],
1281        }
1282    }
1283    #[allow(dead_code)]
1284    pub fn add_def(&mut self, block: usize, var: u32) {
1285        if block < self.defs.len() {
1286            self.defs[block].insert(var);
1287        }
1288    }
1289    #[allow(dead_code)]
1290    pub fn add_use(&mut self, block: usize, var: u32) {
1291        if block < self.uses.len() {
1292            self.uses[block].insert(var);
1293        }
1294    }
1295    #[allow(dead_code)]
1296    pub fn is_live_in(&self, block: usize, var: u32) -> bool {
1297        self.live_in
1298            .get(block)
1299            .map(|s| s.contains(&var))
1300            .unwrap_or(false)
1301    }
1302    #[allow(dead_code)]
1303    pub fn is_live_out(&self, block: usize, var: u32) -> bool {
1304        self.live_out
1305            .get(block)
1306            .map(|s| s.contains(&var))
1307            .unwrap_or(false)
1308    }
1309}
1310/// Worklist for OCacheExt.
1311#[allow(dead_code)]
1312#[derive(Debug, Clone)]
1313pub struct OCacheExtWorklist {
1314    pub(super) items: std::collections::VecDeque<usize>,
1315    pub(super) present: Vec<bool>,
1316}
1317impl OCacheExtWorklist {
1318    #[allow(dead_code)]
1319    pub fn new(capacity: usize) -> Self {
1320        Self {
1321            items: std::collections::VecDeque::new(),
1322            present: vec![false; capacity],
1323        }
1324    }
1325    #[allow(dead_code)]
1326    pub fn push(&mut self, id: usize) {
1327        if id < self.present.len() && !self.present[id] {
1328            self.present[id] = true;
1329            self.items.push_back(id);
1330        }
1331    }
1332    #[allow(dead_code)]
1333    pub fn push_front(&mut self, id: usize) {
1334        if id < self.present.len() && !self.present[id] {
1335            self.present[id] = true;
1336            self.items.push_front(id);
1337        }
1338    }
1339    #[allow(dead_code)]
1340    pub fn pop(&mut self) -> Option<usize> {
1341        let id = self.items.pop_front()?;
1342        if id < self.present.len() {
1343            self.present[id] = false;
1344        }
1345        Some(id)
1346    }
1347    #[allow(dead_code)]
1348    pub fn is_empty(&self) -> bool {
1349        self.items.is_empty()
1350    }
1351    #[allow(dead_code)]
1352    pub fn len(&self) -> usize {
1353        self.items.len()
1354    }
1355    #[allow(dead_code)]
1356    pub fn contains(&self, id: usize) -> bool {
1357        id < self.present.len() && self.present[id]
1358    }
1359    #[allow(dead_code)]
1360    pub fn drain_all(&mut self) -> Vec<usize> {
1361        let v: Vec<usize> = self.items.drain(..).collect();
1362        for &id in &v {
1363            if id < self.present.len() {
1364                self.present[id] = false;
1365            }
1366        }
1367        v
1368    }
1369}
1370/// Dependency graph for OCacheX2.
1371#[allow(dead_code)]
1372#[derive(Debug, Clone)]
1373pub struct OCacheX2DepGraph {
1374    pub(super) n: usize,
1375    pub(super) adj: Vec<Vec<usize>>,
1376    pub(super) rev: Vec<Vec<usize>>,
1377    pub(super) edge_count: usize,
1378}
1379impl OCacheX2DepGraph {
1380    #[allow(dead_code)]
1381    pub fn new(n: usize) -> Self {
1382        Self {
1383            n,
1384            adj: vec![Vec::new(); n],
1385            rev: vec![Vec::new(); n],
1386            edge_count: 0,
1387        }
1388    }
1389    #[allow(dead_code)]
1390    pub fn add_edge(&mut self, from: usize, to: usize) {
1391        if from < self.n && to < self.n {
1392            if !self.adj[from].contains(&to) {
1393                self.adj[from].push(to);
1394                self.rev[to].push(from);
1395                self.edge_count += 1;
1396            }
1397        }
1398    }
1399    #[allow(dead_code)]
1400    pub fn succs(&self, n: usize) -> &[usize] {
1401        self.adj.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1402    }
1403    #[allow(dead_code)]
1404    pub fn preds(&self, n: usize) -> &[usize] {
1405        self.rev.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1406    }
1407    #[allow(dead_code)]
1408    pub fn topo_sort(&self) -> Option<Vec<usize>> {
1409        let mut deg: Vec<usize> = (0..self.n).map(|i| self.rev[i].len()).collect();
1410        let mut q: std::collections::VecDeque<usize> =
1411            (0..self.n).filter(|&i| deg[i] == 0).collect();
1412        let mut out = Vec::with_capacity(self.n);
1413        while let Some(u) = q.pop_front() {
1414            out.push(u);
1415            for &v in &self.adj[u] {
1416                deg[v] -= 1;
1417                if deg[v] == 0 {
1418                    q.push_back(v);
1419                }
1420            }
1421        }
1422        if out.len() == self.n {
1423            Some(out)
1424        } else {
1425            None
1426        }
1427    }
1428    #[allow(dead_code)]
1429    pub fn has_cycle(&self) -> bool {
1430        self.topo_sort().is_none()
1431    }
1432    #[allow(dead_code)]
1433    pub fn reachable(&self, start: usize) -> Vec<usize> {
1434        let mut vis = vec![false; self.n];
1435        let mut stk = vec![start];
1436        let mut out = Vec::new();
1437        while let Some(u) = stk.pop() {
1438            if u < self.n && !vis[u] {
1439                vis[u] = true;
1440                out.push(u);
1441                for &v in &self.adj[u] {
1442                    if !vis[v] {
1443                        stk.push(v);
1444                    }
1445                }
1446            }
1447        }
1448        out
1449    }
1450    #[allow(dead_code)]
1451    pub fn scc(&self) -> Vec<Vec<usize>> {
1452        let mut visited = vec![false; self.n];
1453        let mut order = Vec::new();
1454        for i in 0..self.n {
1455            if !visited[i] {
1456                let mut stk = vec![(i, 0usize)];
1457                while let Some((u, idx)) = stk.last_mut() {
1458                    if !visited[*u] {
1459                        visited[*u] = true;
1460                    }
1461                    if *idx < self.adj[*u].len() {
1462                        let v = self.adj[*u][*idx];
1463                        *idx += 1;
1464                        if !visited[v] {
1465                            stk.push((v, 0));
1466                        }
1467                    } else {
1468                        order.push(*u);
1469                        stk.pop();
1470                    }
1471                }
1472            }
1473        }
1474        let mut comp = vec![usize::MAX; self.n];
1475        let mut components: Vec<Vec<usize>> = Vec::new();
1476        for &start in order.iter().rev() {
1477            if comp[start] == usize::MAX {
1478                let cid = components.len();
1479                let mut component = Vec::new();
1480                let mut stk = vec![start];
1481                while let Some(u) = stk.pop() {
1482                    if comp[u] == usize::MAX {
1483                        comp[u] = cid;
1484                        component.push(u);
1485                        for &v in &self.rev[u] {
1486                            if comp[v] == usize::MAX {
1487                                stk.push(v);
1488                            }
1489                        }
1490                    }
1491                }
1492                components.push(component);
1493            }
1494        }
1495        components
1496    }
1497    #[allow(dead_code)]
1498    pub fn node_count(&self) -> usize {
1499        self.n
1500    }
1501    #[allow(dead_code)]
1502    pub fn edge_count(&self) -> usize {
1503        self.edge_count
1504    }
1505}
1506/// Pass registry for OCacheX2.
1507#[allow(dead_code)]
1508#[derive(Debug, Default)]
1509pub struct OCacheX2PassRegistry {
1510    pub(super) configs: Vec<OCacheX2PassConfig>,
1511    pub(super) stats: Vec<OCacheX2PassStats>,
1512}
1513impl OCacheX2PassRegistry {
1514    #[allow(dead_code)]
1515    pub fn new() -> Self {
1516        Self::default()
1517    }
1518    #[allow(dead_code)]
1519    pub fn register(&mut self, c: OCacheX2PassConfig) {
1520        self.stats.push(OCacheX2PassStats::new());
1521        self.configs.push(c);
1522    }
1523    #[allow(dead_code)]
1524    pub fn len(&self) -> usize {
1525        self.configs.len()
1526    }
1527    #[allow(dead_code)]
1528    pub fn is_empty(&self) -> bool {
1529        self.configs.is_empty()
1530    }
1531    #[allow(dead_code)]
1532    pub fn get(&self, i: usize) -> Option<&OCacheX2PassConfig> {
1533        self.configs.get(i)
1534    }
1535    #[allow(dead_code)]
1536    pub fn get_stats(&self, i: usize) -> Option<&OCacheX2PassStats> {
1537        self.stats.get(i)
1538    }
1539    #[allow(dead_code)]
1540    pub fn enabled_passes(&self) -> Vec<&OCacheX2PassConfig> {
1541        self.configs.iter().filter(|c| c.enabled).collect()
1542    }
1543    #[allow(dead_code)]
1544    pub fn passes_in_phase(&self, ph: &OCacheX2PassPhase) -> Vec<&OCacheX2PassConfig> {
1545        self.configs
1546            .iter()
1547            .filter(|c| c.enabled && &c.phase == ph)
1548            .collect()
1549    }
1550    #[allow(dead_code)]
1551    pub fn total_nodes_visited(&self) -> usize {
1552        self.stats.iter().map(|s| s.nodes_visited).sum()
1553    }
1554    #[allow(dead_code)]
1555    pub fn any_changed(&self) -> bool {
1556        self.stats.iter().any(|s| s.changed)
1557    }
1558}
1559/// Describes how a single loop variable should be tiled.
1560#[derive(Debug, Clone, PartialEq)]
1561pub struct LoopTile {
1562    /// The original (un-tiled) loop variable name.
1563    pub original_var: String,
1564    /// The outer (tile-index) variable name generated by tiling.
1565    pub tile_var: String,
1566    /// The inner (intra-tile) variable name generated by tiling.
1567    pub intra_var: String,
1568    /// The tile size in elements.
1569    pub tile_size: u64,
1570}
1571impl LoopTile {
1572    /// Creates a new `LoopTile` with auto-derived variable names.
1573    pub fn new(original_var: impl Into<String>, tile_size: u64) -> Self {
1574        let original = original_var.into();
1575        let tile_var = format!("{}_tile", original);
1576        let intra_var = format!("{}_intra", original);
1577        LoopTile {
1578            original_var: original,
1579            tile_var,
1580            intra_var,
1581            tile_size,
1582        }
1583    }
1584}
1585/// A single memory access event captured during analysis.
1586#[derive(Debug, Clone, PartialEq)]
1587pub struct MemoryAccess {
1588    /// The variable being accessed.
1589    pub var_name: String,
1590    /// Byte offset from the base of the variable (0 = base address).
1591    pub offset: i64,
1592    /// The access pattern observed or inferred.
1593    pub pattern: AccessPattern,
1594    /// For strided accesses, the stride in bytes.
1595    pub stride: Option<i64>,
1596    /// Estimated number of times this access occurs.
1597    pub count: u64,
1598}
1599impl MemoryAccess {
1600    /// Creates a new `MemoryAccess` with the given parameters.
1601    pub fn new(
1602        var_name: impl Into<String>,
1603        offset: i64,
1604        pattern: AccessPattern,
1605        stride: Option<i64>,
1606        count: u64,
1607    ) -> Self {
1608        MemoryAccess {
1609            var_name: var_name.into(),
1610            offset,
1611            pattern,
1612            stride,
1613            count,
1614        }
1615    }
1616    /// Returns `true` if this access is likely to produce cache hits.
1617    pub fn is_cache_friendly(&self) -> bool {
1618        self.pattern.is_cache_friendly()
1619    }
1620}
1621/// The kind of memory access the prefetch prepares for.
1622#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1623pub enum PrefetchType {
1624    /// Prefetch for a future read.
1625    Read,
1626    /// Prefetch for a future write (exclusive ownership hint).
1627    Write,
1628    /// Non-temporal prefetch: bypass caches (for streaming data not reused).
1629    NonTemporal,
1630}
1631/// Statistics for OCacheExt passes.
1632#[allow(dead_code)]
1633#[derive(Debug, Clone, Default)]
1634pub struct OCacheExtPassStats {
1635    pub iterations: usize,
1636    pub changed: bool,
1637    pub nodes_visited: usize,
1638    pub nodes_modified: usize,
1639    pub time_ms: u64,
1640    pub memory_bytes: usize,
1641    pub errors: usize,
1642}
1643impl OCacheExtPassStats {
1644    #[allow(dead_code)]
1645    pub fn new() -> Self {
1646        Self::default()
1647    }
1648    #[allow(dead_code)]
1649    pub fn visit(&mut self) {
1650        self.nodes_visited += 1;
1651    }
1652    #[allow(dead_code)]
1653    pub fn modify(&mut self) {
1654        self.nodes_modified += 1;
1655        self.changed = true;
1656    }
1657    #[allow(dead_code)]
1658    pub fn iterate(&mut self) {
1659        self.iterations += 1;
1660    }
1661    #[allow(dead_code)]
1662    pub fn error(&mut self) {
1663        self.errors += 1;
1664    }
1665    #[allow(dead_code)]
1666    pub fn efficiency(&self) -> f64 {
1667        if self.nodes_visited == 0 {
1668            0.0
1669        } else {
1670            self.nodes_modified as f64 / self.nodes_visited as f64
1671        }
1672    }
1673    #[allow(dead_code)]
1674    pub fn merge(&mut self, o: &OCacheExtPassStats) {
1675        self.iterations += o.iterations;
1676        self.changed |= o.changed;
1677        self.nodes_visited += o.nodes_visited;
1678        self.nodes_modified += o.nodes_modified;
1679        self.time_ms += o.time_ms;
1680        self.memory_bytes = self.memory_bytes.max(o.memory_bytes);
1681        self.errors += o.errors;
1682    }
1683}
1684/// Dominator tree for OCacheX2.
1685#[allow(dead_code)]
1686#[derive(Debug, Clone)]
1687pub struct OCacheX2DomTree {
1688    pub(super) idom: Vec<Option<usize>>,
1689    pub(super) children: Vec<Vec<usize>>,
1690    pub(super) depth: Vec<usize>,
1691}
1692impl OCacheX2DomTree {
1693    #[allow(dead_code)]
1694    pub fn new(n: usize) -> Self {
1695        Self {
1696            idom: vec![None; n],
1697            children: vec![Vec::new(); n],
1698            depth: vec![0; n],
1699        }
1700    }
1701    #[allow(dead_code)]
1702    pub fn set_idom(&mut self, node: usize, dom: usize) {
1703        if node < self.idom.len() {
1704            self.idom[node] = Some(dom);
1705            if dom < self.children.len() {
1706                self.children[dom].push(node);
1707            }
1708            self.depth[node] = if dom < self.depth.len() {
1709                self.depth[dom] + 1
1710            } else {
1711                1
1712            };
1713        }
1714    }
1715    #[allow(dead_code)]
1716    pub fn dominates(&self, a: usize, mut b: usize) -> bool {
1717        if a == b {
1718            return true;
1719        }
1720        let n = self.idom.len();
1721        for _ in 0..n {
1722            match self.idom.get(b).copied().flatten() {
1723                None => return false,
1724                Some(p) if p == a => return true,
1725                Some(p) if p == b => return false,
1726                Some(p) => b = p,
1727            }
1728        }
1729        false
1730    }
1731    #[allow(dead_code)]
1732    pub fn children_of(&self, n: usize) -> &[usize] {
1733        self.children.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1734    }
1735    #[allow(dead_code)]
1736    pub fn depth_of(&self, n: usize) -> usize {
1737        self.depth.get(n).copied().unwrap_or(0)
1738    }
1739    #[allow(dead_code)]
1740    pub fn lca(&self, mut a: usize, mut b: usize) -> usize {
1741        let n = self.idom.len();
1742        for _ in 0..(2 * n) {
1743            if a == b {
1744                return a;
1745            }
1746            if self.depth_of(a) > self.depth_of(b) {
1747                a = self.idom.get(a).and_then(|x| *x).unwrap_or(a);
1748            } else {
1749                b = self.idom.get(b).and_then(|x| *x).unwrap_or(b);
1750            }
1751        }
1752        0
1753    }
1754}
1755#[allow(dead_code)]
1756#[derive(Debug, Clone)]
1757pub struct OCDepGraph {
1758    pub(super) nodes: Vec<u32>,
1759    pub(super) edges: Vec<(u32, u32)>,
1760}
1761impl OCDepGraph {
1762    #[allow(dead_code)]
1763    pub fn new() -> Self {
1764        OCDepGraph {
1765            nodes: Vec::new(),
1766            edges: Vec::new(),
1767        }
1768    }
1769    #[allow(dead_code)]
1770    pub fn add_node(&mut self, id: u32) {
1771        if !self.nodes.contains(&id) {
1772            self.nodes.push(id);
1773        }
1774    }
1775    #[allow(dead_code)]
1776    pub fn add_dep(&mut self, dep: u32, dependent: u32) {
1777        self.add_node(dep);
1778        self.add_node(dependent);
1779        self.edges.push((dep, dependent));
1780    }
1781    #[allow(dead_code)]
1782    pub fn dependents_of(&self, node: u32) -> Vec<u32> {
1783        self.edges
1784            .iter()
1785            .filter(|(d, _)| *d == node)
1786            .map(|(_, dep)| *dep)
1787            .collect()
1788    }
1789    #[allow(dead_code)]
1790    pub fn dependencies_of(&self, node: u32) -> Vec<u32> {
1791        self.edges
1792            .iter()
1793            .filter(|(_, dep)| *dep == node)
1794            .map(|(d, _)| *d)
1795            .collect()
1796    }
1797    #[allow(dead_code)]
1798    pub fn topological_sort(&self) -> Vec<u32> {
1799        let mut in_degree: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
1800        for &n in &self.nodes {
1801            in_degree.insert(n, 0);
1802        }
1803        for (_, dep) in &self.edges {
1804            *in_degree.entry(*dep).or_insert(0) += 1;
1805        }
1806        let mut queue: std::collections::VecDeque<u32> = self
1807            .nodes
1808            .iter()
1809            .filter(|&&n| in_degree[&n] == 0)
1810            .copied()
1811            .collect();
1812        let mut result = Vec::new();
1813        while let Some(node) = queue.pop_front() {
1814            result.push(node);
1815            for dep in self.dependents_of(node) {
1816                let cnt = in_degree.entry(dep).or_insert(0);
1817                *cnt = cnt.saturating_sub(1);
1818                if *cnt == 0 {
1819                    queue.push_back(dep);
1820                }
1821            }
1822        }
1823        result
1824    }
1825    #[allow(dead_code)]
1826    pub fn has_cycle(&self) -> bool {
1827        self.topological_sort().len() < self.nodes.len()
1828    }
1829}
1830/// A software prefetch hint to be emitted before the actual access.
1831#[derive(Debug, Clone, PartialEq)]
1832pub struct PrefetchHint {
1833    /// Expression string representing the address to prefetch.
1834    pub address_expr: String,
1835    /// How many iterations ahead this prefetch is issued.
1836    pub distance: u64,
1837    /// The type of prefetch hint.
1838    pub hint_type: PrefetchType,
1839}
1840impl PrefetchHint {
1841    /// Creates a new `PrefetchHint`.
1842    pub fn new(address_expr: impl Into<String>, distance: u64, hint_type: PrefetchType) -> Self {
1843        PrefetchHint {
1844            address_expr: address_expr.into(),
1845            distance,
1846            hint_type,
1847        }
1848    }
1849}
1850#[allow(dead_code)]
1851pub struct OCPassRegistry {
1852    pub(super) configs: Vec<OCPassConfig>,
1853    pub(super) stats: std::collections::HashMap<String, OCPassStats>,
1854}
1855impl OCPassRegistry {
1856    #[allow(dead_code)]
1857    pub fn new() -> Self {
1858        OCPassRegistry {
1859            configs: Vec::new(),
1860            stats: std::collections::HashMap::new(),
1861        }
1862    }
1863    #[allow(dead_code)]
1864    pub fn register(&mut self, config: OCPassConfig) {
1865        self.stats
1866            .insert(config.pass_name.clone(), OCPassStats::new());
1867        self.configs.push(config);
1868    }
1869    #[allow(dead_code)]
1870    pub fn enabled_passes(&self) -> Vec<&OCPassConfig> {
1871        self.configs.iter().filter(|c| c.enabled).collect()
1872    }
1873    #[allow(dead_code)]
1874    pub fn get_stats(&self, name: &str) -> Option<&OCPassStats> {
1875        self.stats.get(name)
1876    }
1877    #[allow(dead_code)]
1878    pub fn total_passes(&self) -> usize {
1879        self.configs.len()
1880    }
1881    #[allow(dead_code)]
1882    pub fn enabled_count(&self) -> usize {
1883        self.enabled_passes().len()
1884    }
1885    #[allow(dead_code)]
1886    pub fn update_stats(&mut self, name: &str, changes: u64, time_ms: u64, iter: u32) {
1887        if let Some(stats) = self.stats.get_mut(name) {
1888            stats.record_run(changes, time_ms, iter);
1889        }
1890    }
1891}
1892/// Analysis cache for OCacheX2.
1893#[allow(dead_code)]
1894#[derive(Debug)]
1895pub struct OCacheX2Cache {
1896    pub(super) entries: Vec<(u64, Vec<u8>, bool, u32)>,
1897    pub(super) cap: usize,
1898    pub(super) total_hits: u64,
1899    pub(super) total_misses: u64,
1900}
1901impl OCacheX2Cache {
1902    #[allow(dead_code)]
1903    pub fn new(cap: usize) -> Self {
1904        Self {
1905            entries: Vec::new(),
1906            cap,
1907            total_hits: 0,
1908            total_misses: 0,
1909        }
1910    }
1911    #[allow(dead_code)]
1912    pub fn get(&mut self, key: u64) -> Option<&[u8]> {
1913        for e in self.entries.iter_mut() {
1914            if e.0 == key && e.2 {
1915                e.3 += 1;
1916                self.total_hits += 1;
1917                return Some(&e.1);
1918            }
1919        }
1920        self.total_misses += 1;
1921        None
1922    }
1923    #[allow(dead_code)]
1924    pub fn put(&mut self, key: u64, data: Vec<u8>) {
1925        if self.entries.len() >= self.cap {
1926            self.entries.retain(|e| e.2);
1927            if self.entries.len() >= self.cap {
1928                self.entries.remove(0);
1929            }
1930        }
1931        self.entries.push((key, data, true, 0));
1932    }
1933    #[allow(dead_code)]
1934    pub fn invalidate(&mut self) {
1935        for e in self.entries.iter_mut() {
1936            e.2 = false;
1937        }
1938    }
1939    #[allow(dead_code)]
1940    pub fn hit_rate(&self) -> f64 {
1941        let t = self.total_hits + self.total_misses;
1942        if t == 0 {
1943            0.0
1944        } else {
1945            self.total_hits as f64 / t as f64
1946        }
1947    }
1948    #[allow(dead_code)]
1949    pub fn live_count(&self) -> usize {
1950        self.entries.iter().filter(|e| e.2).count()
1951    }
1952}
1953/// The top-level cache-aware / data-locality optimization pass.
1954///
1955/// Usage:
1956/// ```rust
1957/// use oxilean_codegen::opt_cache::{CacheOptConfig, CacheOptPass};
1958/// let mut pass = CacheOptPass::new(CacheOptConfig::default());
1959/// // pass.run(&mut decls);
1960/// ```
1961pub struct CacheOptPass {
1962    /// Configuration for this pass.
1963    pub config: CacheOptConfig,
1964    /// Accumulated report from the last `run` call.
1965    pub report: CacheOptReport,
1966}
1967impl CacheOptPass {
1968    /// Creates a new `CacheOptPass` with the given configuration.
1969    pub fn new(config: CacheOptConfig) -> Self {
1970        CacheOptPass {
1971            config,
1972            report: CacheOptReport::default(),
1973        }
1974    }
1975    /// Runs all cache optimizations over the provided function declarations.
1976    pub fn run(&mut self, decls: &mut [LcnfFunDecl]) {
1977        self.report = CacheOptReport::default();
1978        for decl in decls.iter_mut() {
1979            let info = self.analyze_locality(decl);
1980            if self.config.tiling.enable_l1_tiling && !info.fits_in_l1() {
1981                let tiles = self.propose_tiles(decl, &info);
1982                if !tiles.is_empty() {
1983                    let n = tiles.len();
1984                    self.apply_loop_tiling(decl, &tiles);
1985                    self.report.num_loops_tiled += n;
1986                }
1987            }
1988            if self.config.enable_prefetch && info.reuse_distance > 8.0 {
1989                let hints = self.generate_prefetch_hints(&info);
1990                let n = hints.len();
1991                self.insert_prefetch_hints(decl, &hints);
1992                self.report.num_prefetches_inserted += n;
1993            }
1994            self.reorder_data_structures(decl);
1995        }
1996        if !decls.is_empty() {
1997            self.report.estimated_cache_miss_reduction = self.estimate_miss_reduction(decls);
1998        }
1999    }
2000    /// Analyses data locality for a single function declaration.
2001    pub fn analyze_locality(&self, decl: &LcnfFunDecl) -> DataLocalityInfo {
2002        let mut accesses: Vec<MemoryAccess> = Vec::new();
2003        Self::collect_accesses(&decl.body, &mut accesses);
2004        let working_set_bytes = self.estimate_working_set(&accesses);
2005        let best_cache_level = self.classify_cache_level(working_set_bytes);
2006        let reuse_distance = self.compute_reuse_distance(&accesses);
2007        DataLocalityInfo {
2008            accesses,
2009            working_set_bytes,
2010            best_cache_level,
2011            reuse_distance,
2012        }
2013    }
2014    /// Recursively collects `MemoryAccess` records from an LCNF expression tree.
2015    pub(super) fn collect_accesses(expr: &LcnfExpr, out: &mut Vec<MemoryAccess>) {
2016        match expr {
2017            LcnfExpr::Let {
2018                value, body, id, ..
2019            } => {
2020                match value {
2021                    LcnfLetValue::Proj(field, idx, src) => {
2022                        out.push(MemoryAccess::new(
2023                            format!("{}.{}", src, field),
2024                            (*idx as i64) * 8,
2025                            AccessPattern::Sequential,
2026                            Some(8),
2027                            1,
2028                        ));
2029                    }
2030                    LcnfLetValue::Ctor(name, _, args) => {
2031                        for (i, _arg) in args.iter().enumerate() {
2032                            out.push(MemoryAccess::new(
2033                                format!("{}#{}", name, id),
2034                                (i as i64) * 8,
2035                                AccessPattern::Sequential,
2036                                Some(8),
2037                                1,
2038                            ));
2039                        }
2040                    }
2041                    LcnfLetValue::App(_, args) => {
2042                        for (i, _arg) in args.iter().enumerate() {
2043                            out.push(MemoryAccess::new(
2044                                format!("arg_{}", i),
2045                                0,
2046                                AccessPattern::Random,
2047                                None,
2048                                1,
2049                            ));
2050                        }
2051                    }
2052                    _ => {}
2053                }
2054                Self::collect_accesses(body, out);
2055            }
2056            LcnfExpr::Case { alts, default, .. } => {
2057                for alt in alts {
2058                    Self::collect_accesses(&alt.body, out);
2059                }
2060                if let Some(def) = default {
2061                    Self::collect_accesses(def, out);
2062                }
2063            }
2064            LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
2065        }
2066    }
2067    /// Estimates the working set size from the collected accesses.
2068    pub(super) fn estimate_working_set(&self, accesses: &[MemoryAccess]) -> u64 {
2069        use std::collections::HashSet;
2070        let distinct: HashSet<&str> = accesses.iter().map(|a| a.var_name.as_str()).collect();
2071        (distinct.len() as u64) * self.config.cache_line_size
2072    }
2073    /// Classifies the working set into the most appropriate cache level.
2074    pub(super) fn classify_cache_level(&self, working_set_bytes: u64) -> CacheLevel {
2075        if working_set_bytes <= CacheLevel::L1.capacity_bytes() {
2076            CacheLevel::L1
2077        } else if working_set_bytes <= CacheLevel::L2.capacity_bytes() {
2078            CacheLevel::L2
2079        } else if working_set_bytes <= CacheLevel::L3.capacity_bytes() {
2080            CacheLevel::L3
2081        } else {
2082            CacheLevel::Ram
2083        }
2084    }
2085    /// Computes a heuristic reuse distance from the access list.
2086    ///
2087    /// Reuse distance is approximated as the average number of distinct
2088    /// variables accessed between consecutive accesses to the same variable.
2089    pub(super) fn compute_reuse_distance(&self, accesses: &[MemoryAccess]) -> f64 {
2090        if accesses.len() < 2 {
2091            return 0.0;
2092        }
2093        use std::collections::HashMap;
2094        let mut last_seen: HashMap<&str, usize> = HashMap::new();
2095        let mut total_distance: f64 = 0.0;
2096        let mut reuse_count: usize = 0;
2097        for (i, acc) in accesses.iter().enumerate() {
2098            if let Some(&prev) = last_seen.get(acc.var_name.as_str()) {
2099                total_distance += (i - prev) as f64;
2100                reuse_count += 1;
2101            }
2102            last_seen.insert(&acc.var_name, i);
2103        }
2104        if reuse_count == 0 {
2105            accesses.len() as f64
2106        } else {
2107            total_distance / reuse_count as f64
2108        }
2109    }
2110    /// Proposes a set of loop tiles based on locality analysis.
2111    pub(super) fn propose_tiles(
2112        &self,
2113        decl: &LcnfFunDecl,
2114        info: &DataLocalityInfo,
2115    ) -> Vec<LoopTile> {
2116        let mut tiles = Vec::new();
2117        for param in &decl.params {
2118            let used_in_sequential = info.accesses.iter().any(|a| {
2119                a.var_name.contains(&param.name) && matches!(a.pattern, AccessPattern::Sequential)
2120            });
2121            if used_in_sequential {
2122                let tile_size = if info.fits_in_l1() {
2123                    self.config.tiling.tile_size_l1
2124                } else {
2125                    self.config.tiling.tile_size_l2
2126                };
2127                tiles.push(LoopTile::new(&param.name, tile_size));
2128            }
2129        }
2130        tiles
2131    }
2132    /// Applies loop tiling annotations to a function declaration.
2133    ///
2134    /// In the LCNF IR there are no explicit loop constructs, so this pass
2135    /// records the tiling decision metadata for downstream backends to act on.
2136    /// The body is traversed and `Proj` accesses on tiled variables are
2137    /// annotated via a comment in the surrounding let binding name hint.
2138    pub fn apply_loop_tiling(&self, decl: &mut LcnfFunDecl, tiles: &[LoopTile]) {
2139        Self::annotate_tiling(&mut decl.body, tiles);
2140    }
2141    pub(super) fn annotate_tiling(expr: &mut LcnfExpr, tiles: &[LoopTile]) {
2142        match expr {
2143            LcnfExpr::Let {
2144                name, value, body, ..
2145            } => {
2146                if tiles.iter().any(|t| name.contains(&t.original_var)) && !name.ends_with("_tiled")
2147                {
2148                    *name = format!("{}_tiled", name);
2149                }
2150                if let LcnfLetValue::Proj(field, _idx, src) = value {
2151                    if tiles
2152                        .iter()
2153                        .any(|t| src.to_string().contains(&t.original_var))
2154                    {
2155                        *field = format!("{}_tile_cached", field);
2156                    }
2157                }
2158                Self::annotate_tiling(body, tiles);
2159            }
2160            LcnfExpr::Case { alts, default, .. } => {
2161                for alt in alts.iter_mut() {
2162                    Self::annotate_tiling(&mut alt.body, tiles);
2163                }
2164                if let Some(def) = default {
2165                    Self::annotate_tiling(def, tiles);
2166                }
2167            }
2168            LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
2169        }
2170    }
2171    /// Generates software prefetch hints based on locality info.
2172    pub(super) fn generate_prefetch_hints(&self, info: &DataLocalityInfo) -> Vec<PrefetchHint> {
2173        let mut hints = Vec::new();
2174        for acc in &info.accesses {
2175            if !acc.is_cache_friendly() {
2176                hints.push(PrefetchHint::new(
2177                    format!("&{}[{}]", acc.var_name, self.config.prefetch_distance),
2178                    self.config.prefetch_distance,
2179                    PrefetchType::Read,
2180                ));
2181            } else if matches!(acc.pattern, AccessPattern::Sequential) && acc.count > 8 {
2182                hints.push(PrefetchHint::new(
2183                    format!("&{}[{}]", acc.var_name, self.config.prefetch_distance),
2184                    self.config.prefetch_distance,
2185                    PrefetchType::NonTemporal,
2186                ));
2187            }
2188        }
2189        hints
2190    }
2191    /// Records prefetch hint metadata in the function declaration name hint.
2192    pub(super) fn insert_prefetch_hints(&self, decl: &mut LcnfFunDecl, hints: &[PrefetchHint]) {
2193        if hints.is_empty() {
2194            return;
2195        }
2196        let annotation = format!("__prefetch_{}", hints.len());
2197        if !decl.name.contains(&annotation) {
2198            decl.name = format!("{}{}", decl.name, annotation);
2199        }
2200    }
2201    /// Reorders struct fields for improved spatial locality.
2202    ///
2203    /// Uses `FieldReorderingAnalysis` to collect layout information, then
2204    /// records the reordering decision as metadata on the declaration.
2205    pub fn reorder_data_structures(&self, decl: &mut LcnfFunDecl) {
2206        let layouts = FieldReorderingAnalysis::analyze(decl);
2207        for layout in &layouts {
2208            if layout.padding_bytes() > 0 || !layout.is_cache_aligned() {
2209                let annotation = format!("__reorder_{}", layout.struct_name);
2210                if !decl.name.contains(&annotation) {
2211                    decl.name = format!("{}{}", decl.name, annotation);
2212                }
2213            }
2214        }
2215    }
2216    /// Estimates the overall cache miss reduction across all declarations.
2217    pub(super) fn estimate_miss_reduction(&self, decls: &[LcnfFunDecl]) -> f64 {
2218        if decls.is_empty() {
2219            return 0.0;
2220        }
2221        let total_friendly: f64 = decls
2222            .iter()
2223            .map(|d| {
2224                let info = self.analyze_locality(d);
2225                info.cache_friendly_fraction()
2226            })
2227            .sum();
2228        let avg_friendly = total_friendly / decls.len() as f64;
2229        let tiling_factor = if self.config.tiling.enable_l1_tiling {
2230            0.4
2231        } else {
2232            0.2
2233        };
2234        (1.0 - avg_friendly) * tiling_factor
2235    }
2236}
2237/// Liveness analysis for OCacheX2.
2238#[allow(dead_code)]
2239#[derive(Debug, Clone, Default)]
2240pub struct OCacheX2Liveness {
2241    pub live_in: Vec<Vec<usize>>,
2242    pub live_out: Vec<Vec<usize>>,
2243    pub defs: Vec<Vec<usize>>,
2244    pub uses: Vec<Vec<usize>>,
2245}
2246impl OCacheX2Liveness {
2247    #[allow(dead_code)]
2248    pub fn new(n: usize) -> Self {
2249        Self {
2250            live_in: vec![Vec::new(); n],
2251            live_out: vec![Vec::new(); n],
2252            defs: vec![Vec::new(); n],
2253            uses: vec![Vec::new(); n],
2254        }
2255    }
2256    #[allow(dead_code)]
2257    pub fn live_in(&self, b: usize, v: usize) -> bool {
2258        self.live_in.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2259    }
2260    #[allow(dead_code)]
2261    pub fn live_out(&self, b: usize, v: usize) -> bool {
2262        self.live_out
2263            .get(b)
2264            .map(|s| s.contains(&v))
2265            .unwrap_or(false)
2266    }
2267    #[allow(dead_code)]
2268    pub fn add_def(&mut self, b: usize, v: usize) {
2269        if let Some(s) = self.defs.get_mut(b) {
2270            if !s.contains(&v) {
2271                s.push(v);
2272            }
2273        }
2274    }
2275    #[allow(dead_code)]
2276    pub fn add_use(&mut self, b: usize, v: usize) {
2277        if let Some(s) = self.uses.get_mut(b) {
2278            if !s.contains(&v) {
2279                s.push(v);
2280            }
2281        }
2282    }
2283    #[allow(dead_code)]
2284    pub fn var_is_used_in_block(&self, b: usize, v: usize) -> bool {
2285        self.uses.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2286    }
2287    #[allow(dead_code)]
2288    pub fn var_is_def_in_block(&self, b: usize, v: usize) -> bool {
2289        self.defs.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2290    }
2291}
2292/// Dominator tree for OCacheExt.
2293#[allow(dead_code)]
2294#[derive(Debug, Clone)]
2295pub struct OCacheExtDomTree {
2296    pub(super) idom: Vec<Option<usize>>,
2297    pub(super) children: Vec<Vec<usize>>,
2298    pub(super) depth: Vec<usize>,
2299}
2300impl OCacheExtDomTree {
2301    #[allow(dead_code)]
2302    pub fn new(n: usize) -> Self {
2303        Self {
2304            idom: vec![None; n],
2305            children: vec![Vec::new(); n],
2306            depth: vec![0; n],
2307        }
2308    }
2309    #[allow(dead_code)]
2310    pub fn set_idom(&mut self, node: usize, dom: usize) {
2311        if node < self.idom.len() {
2312            self.idom[node] = Some(dom);
2313            if dom < self.children.len() {
2314                self.children[dom].push(node);
2315            }
2316            self.depth[node] = if dom < self.depth.len() {
2317                self.depth[dom] + 1
2318            } else {
2319                1
2320            };
2321        }
2322    }
2323    #[allow(dead_code)]
2324    pub fn dominates(&self, a: usize, mut b: usize) -> bool {
2325        if a == b {
2326            return true;
2327        }
2328        let n = self.idom.len();
2329        for _ in 0..n {
2330            match self.idom.get(b).copied().flatten() {
2331                None => return false,
2332                Some(p) if p == a => return true,
2333                Some(p) if p == b => return false,
2334                Some(p) => b = p,
2335            }
2336        }
2337        false
2338    }
2339    #[allow(dead_code)]
2340    pub fn children_of(&self, n: usize) -> &[usize] {
2341        self.children.get(n).map(|v| v.as_slice()).unwrap_or(&[])
2342    }
2343    #[allow(dead_code)]
2344    pub fn depth_of(&self, n: usize) -> usize {
2345        self.depth.get(n).copied().unwrap_or(0)
2346    }
2347    #[allow(dead_code)]
2348    pub fn lca(&self, mut a: usize, mut b: usize) -> usize {
2349        let n = self.idom.len();
2350        for _ in 0..(2 * n) {
2351            if a == b {
2352                return a;
2353            }
2354            if self.depth_of(a) > self.depth_of(b) {
2355                a = self.idom.get(a).and_then(|x| *x).unwrap_or(a);
2356            } else {
2357                b = self.idom.get(b).and_then(|x| *x).unwrap_or(b);
2358            }
2359        }
2360        0
2361    }
2362}
2363#[allow(dead_code)]
2364#[derive(Debug, Clone, Default)]
2365pub struct OCPassStats {
2366    pub total_runs: u32,
2367    pub successful_runs: u32,
2368    pub total_changes: u64,
2369    pub time_ms: u64,
2370    pub iterations_used: u32,
2371}
2372impl OCPassStats {
2373    #[allow(dead_code)]
2374    pub fn new() -> Self {
2375        Self::default()
2376    }
2377    #[allow(dead_code)]
2378    pub fn record_run(&mut self, changes: u64, time_ms: u64, iterations: u32) {
2379        self.total_runs += 1;
2380        self.successful_runs += 1;
2381        self.total_changes += changes;
2382        self.time_ms += time_ms;
2383        self.iterations_used = iterations;
2384    }
2385    #[allow(dead_code)]
2386    pub fn average_changes_per_run(&self) -> f64 {
2387        if self.total_runs == 0 {
2388            return 0.0;
2389        }
2390        self.total_changes as f64 / self.total_runs as f64
2391    }
2392    #[allow(dead_code)]
2393    pub fn success_rate(&self) -> f64 {
2394        if self.total_runs == 0 {
2395            return 0.0;
2396        }
2397        self.successful_runs as f64 / self.total_runs as f64
2398    }
2399    #[allow(dead_code)]
2400    pub fn format_summary(&self) -> String {
2401        format!(
2402            "Runs: {}/{}, Changes: {}, Time: {}ms",
2403            self.successful_runs, self.total_runs, self.total_changes, self.time_ms
2404        )
2405    }
2406}
2407/// Worklist for OCacheX2.
2408#[allow(dead_code)]
2409#[derive(Debug, Clone)]
2410pub struct OCacheX2Worklist {
2411    pub(super) items: std::collections::VecDeque<usize>,
2412    pub(super) present: Vec<bool>,
2413}
2414impl OCacheX2Worklist {
2415    #[allow(dead_code)]
2416    pub fn new(capacity: usize) -> Self {
2417        Self {
2418            items: std::collections::VecDeque::new(),
2419            present: vec![false; capacity],
2420        }
2421    }
2422    #[allow(dead_code)]
2423    pub fn push(&mut self, id: usize) {
2424        if id < self.present.len() && !self.present[id] {
2425            self.present[id] = true;
2426            self.items.push_back(id);
2427        }
2428    }
2429    #[allow(dead_code)]
2430    pub fn push_front(&mut self, id: usize) {
2431        if id < self.present.len() && !self.present[id] {
2432            self.present[id] = true;
2433            self.items.push_front(id);
2434        }
2435    }
2436    #[allow(dead_code)]
2437    pub fn pop(&mut self) -> Option<usize> {
2438        let id = self.items.pop_front()?;
2439        if id < self.present.len() {
2440            self.present[id] = false;
2441        }
2442        Some(id)
2443    }
2444    #[allow(dead_code)]
2445    pub fn is_empty(&self) -> bool {
2446        self.items.is_empty()
2447    }
2448    #[allow(dead_code)]
2449    pub fn len(&self) -> usize {
2450        self.items.len()
2451    }
2452    #[allow(dead_code)]
2453    pub fn contains(&self, id: usize) -> bool {
2454        id < self.present.len() && self.present[id]
2455    }
2456    #[allow(dead_code)]
2457    pub fn drain_all(&mut self) -> Vec<usize> {
2458        let v: Vec<usize> = self.items.drain(..).collect();
2459        for &id in &v {
2460            if id < self.present.len() {
2461                self.present[id] = false;
2462            }
2463        }
2464        v
2465    }
2466}
2467#[allow(dead_code)]
2468#[derive(Debug, Clone, PartialEq)]
2469pub enum OCPassPhase {
2470    Analysis,
2471    Transformation,
2472    Verification,
2473    Cleanup,
2474}
2475impl OCPassPhase {
2476    #[allow(dead_code)]
2477    pub fn name(&self) -> &str {
2478        match self {
2479            OCPassPhase::Analysis => "analysis",
2480            OCPassPhase::Transformation => "transformation",
2481            OCPassPhase::Verification => "verification",
2482            OCPassPhase::Cleanup => "cleanup",
2483        }
2484    }
2485    #[allow(dead_code)]
2486    pub fn is_modifying(&self) -> bool {
2487        matches!(self, OCPassPhase::Transformation | OCPassPhase::Cleanup)
2488    }
2489}
2490/// Describes how memory is accessed in a loop or computation.
2491#[derive(Debug, Clone, PartialEq)]
2492pub enum AccessPattern {
2493    /// Sequential access: elements accessed one after another (a\[0\], a\[1\], ...).
2494    Sequential,
2495    /// Strided access: elements accessed with a fixed stride (a\[0\], a\[s\], a\[2s\], ...).
2496    Strided(i64),
2497    /// Random (irregular) access: no discernible pattern.
2498    Random,
2499    /// Broadcast: same address read many times.
2500    Broadcast,
2501}
2502impl AccessPattern {
2503    /// Returns `true` if the pattern is cache-friendly (sequential or small stride).
2504    pub fn is_cache_friendly(&self) -> bool {
2505        match self {
2506            AccessPattern::Sequential => true,
2507            AccessPattern::Broadcast => true,
2508            AccessPattern::Strided(s) => s.unsigned_abs() <= 64,
2509            AccessPattern::Random => false,
2510        }
2511    }
2512}