oxilean_codegen/opt_parallel/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use crate::lcnf::{LcnfArg, LcnfExpr, LcnfFunDecl, LcnfLetValue, LcnfVarId};
6use std::collections::{HashMap, HashSet};
7
8use super::functions::*;
9use std::collections::VecDeque;
10
11#[allow(dead_code)]
12#[derive(Debug, Clone)]
13pub struct ParLivenessInfo {
14    pub live_in: Vec<std::collections::HashSet<u32>>,
15    pub live_out: Vec<std::collections::HashSet<u32>>,
16    pub defs: Vec<std::collections::HashSet<u32>>,
17    pub uses: Vec<std::collections::HashSet<u32>>,
18}
19impl ParLivenessInfo {
20    #[allow(dead_code)]
21    pub fn new(block_count: usize) -> Self {
22        ParLivenessInfo {
23            live_in: vec![std::collections::HashSet::new(); block_count],
24            live_out: vec![std::collections::HashSet::new(); block_count],
25            defs: vec![std::collections::HashSet::new(); block_count],
26            uses: vec![std::collections::HashSet::new(); block_count],
27        }
28    }
29    #[allow(dead_code)]
30    pub fn add_def(&mut self, block: usize, var: u32) {
31        if block < self.defs.len() {
32            self.defs[block].insert(var);
33        }
34    }
35    #[allow(dead_code)]
36    pub fn add_use(&mut self, block: usize, var: u32) {
37        if block < self.uses.len() {
38            self.uses[block].insert(var);
39        }
40    }
41    #[allow(dead_code)]
42    pub fn is_live_in(&self, block: usize, var: u32) -> bool {
43        self.live_in
44            .get(block)
45            .map(|s| s.contains(&var))
46            .unwrap_or(false)
47    }
48    #[allow(dead_code)]
49    pub fn is_live_out(&self, block: usize, var: u32) -> bool {
50        self.live_out
51            .get(block)
52            .map(|s| s.contains(&var))
53            .unwrap_or(false)
54    }
55}
56#[allow(dead_code)]
57#[derive(Debug, Clone, Default)]
58pub struct ParPassStats {
59    pub total_runs: u32,
60    pub successful_runs: u32,
61    pub total_changes: u64,
62    pub time_ms: u64,
63    pub iterations_used: u32,
64}
65impl ParPassStats {
66    #[allow(dead_code)]
67    pub fn new() -> Self {
68        Self::default()
69    }
70    #[allow(dead_code)]
71    pub fn record_run(&mut self, changes: u64, time_ms: u64, iterations: u32) {
72        self.total_runs += 1;
73        self.successful_runs += 1;
74        self.total_changes += changes;
75        self.time_ms += time_ms;
76        self.iterations_used = iterations;
77    }
78    #[allow(dead_code)]
79    pub fn average_changes_per_run(&self) -> f64 {
80        if self.total_runs == 0 {
81            return 0.0;
82        }
83        self.total_changes as f64 / self.total_runs as f64
84    }
85    #[allow(dead_code)]
86    pub fn success_rate(&self) -> f64 {
87        if self.total_runs == 0 {
88            return 0.0;
89        }
90        self.successful_runs as f64 / self.total_runs as f64
91    }
92    #[allow(dead_code)]
93    pub fn format_summary(&self) -> String {
94        format!(
95            "Runs: {}/{}, Changes: {}, Time: {}ms",
96            self.successful_runs, self.total_runs, self.total_changes, self.time_ms
97        )
98    }
99}
100#[allow(dead_code)]
101#[derive(Debug, Clone)]
102pub struct ParCacheEntry {
103    pub key: String,
104    pub data: Vec<u8>,
105    pub timestamp: u64,
106    pub valid: bool,
107}
108/// A code region that has been identified as amenable to parallel execution.
109#[derive(Debug, Clone)]
110pub struct ParallelRegion {
111    /// Name of the LCNF function that represents this region.
112    pub func_name: String,
113    /// Index of the first basic block in the parallel body.
114    pub start_block: usize,
115    /// Index of the last basic block in the parallel body (inclusive).
116    pub end_block: usize,
117    /// Inferred parallel-execution model.
118    pub kind: ParallelKind,
119    /// Specific algorithmic pattern detected.
120    pub pattern: ParallelPattern,
121    /// Estimated parallel speed-up (ideal / Amdahl-limited).
122    pub estimated_speedup: f64,
123    /// Variables that are shared across parallel threads.
124    pub shared_vars: Vec<String>,
125    /// Variables that are private to each parallel thread / iteration.
126    pub private_vars: Vec<String>,
127    /// Trip count of the parallel loop (if statically known).
128    pub trip_count: Option<u64>,
129    /// Dependence analysis result for this region.
130    pub dependence_info: DependenceInfo,
131}
132impl ParallelRegion {
133    /// Create a minimal parallel region descriptor.
134    pub fn new(func_name: impl Into<String>, kind: ParallelKind, pattern: ParallelPattern) -> Self {
135        ParallelRegion {
136            func_name: func_name.into(),
137            start_block: 0,
138            end_block: 0,
139            kind,
140            pattern,
141            estimated_speedup: 1.0,
142            shared_vars: Vec::new(),
143            private_vars: Vec::new(),
144            trip_count: None,
145            dependence_info: DependenceInfo::default(),
146        }
147    }
148    /// Whether this region is worth parallelising (speedup > threshold).
149    pub fn is_profitable(&self, threshold: f64) -> bool {
150        self.estimated_speedup > threshold && self.dependence_info.is_parallelizable()
151    }
152}
153/// A diagnostic message from a OPar pass.
154#[derive(Debug, Clone)]
155pub struct OParDiagMsg {
156    pub severity: OParDiagSeverity,
157    pub pass: String,
158    pub message: String,
159}
160impl OParDiagMsg {
161    pub fn error(pass: impl Into<String>, msg: impl Into<String>) -> Self {
162        OParDiagMsg {
163            severity: OParDiagSeverity::Error,
164            pass: pass.into(),
165            message: msg.into(),
166        }
167    }
168    pub fn warning(pass: impl Into<String>, msg: impl Into<String>) -> Self {
169        OParDiagMsg {
170            severity: OParDiagSeverity::Warning,
171            pass: pass.into(),
172            message: msg.into(),
173        }
174    }
175    pub fn note(pass: impl Into<String>, msg: impl Into<String>) -> Self {
176        OParDiagMsg {
177            severity: OParDiagSeverity::Note,
178            pass: pass.into(),
179            message: msg.into(),
180        }
181    }
182}
183/// Collects OPar diagnostics.
184#[derive(Debug, Default)]
185pub struct OParDiagCollector {
186    pub(super) msgs: Vec<OParDiagMsg>,
187}
188impl OParDiagCollector {
189    pub fn new() -> Self {
190        OParDiagCollector::default()
191    }
192    pub fn emit(&mut self, d: OParDiagMsg) {
193        self.msgs.push(d);
194    }
195    pub fn has_errors(&self) -> bool {
196        self.msgs
197            .iter()
198            .any(|d| d.severity == OParDiagSeverity::Error)
199    }
200    pub fn errors(&self) -> Vec<&OParDiagMsg> {
201        self.msgs
202            .iter()
203            .filter(|d| d.severity == OParDiagSeverity::Error)
204            .collect()
205    }
206    pub fn warnings(&self) -> Vec<&OParDiagMsg> {
207        self.msgs
208            .iter()
209            .filter(|d| d.severity == OParDiagSeverity::Warning)
210            .collect()
211    }
212    pub fn len(&self) -> usize {
213        self.msgs.len()
214    }
215    pub fn is_empty(&self) -> bool {
216        self.msgs.is_empty()
217    }
218    pub fn clear(&mut self) {
219        self.msgs.clear();
220    }
221}
222#[allow(dead_code)]
223#[derive(Debug, Clone)]
224pub struct ParPassConfig {
225    pub phase: ParPassPhase,
226    pub enabled: bool,
227    pub max_iterations: u32,
228    pub debug_output: bool,
229    pub pass_name: String,
230}
231impl ParPassConfig {
232    #[allow(dead_code)]
233    pub fn new(name: impl Into<String>, phase: ParPassPhase) -> Self {
234        ParPassConfig {
235            phase,
236            enabled: true,
237            max_iterations: 10,
238            debug_output: false,
239            pass_name: name.into(),
240        }
241    }
242    #[allow(dead_code)]
243    pub fn disabled(mut self) -> Self {
244        self.enabled = false;
245        self
246    }
247    #[allow(dead_code)]
248    pub fn with_debug(mut self) -> Self {
249        self.debug_output = true;
250        self
251    }
252    #[allow(dead_code)]
253    pub fn max_iter(mut self, n: u32) -> Self {
254        self.max_iterations = n;
255        self
256    }
257}
258/// Severity of a OPar diagnostic.
259#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
260pub enum OParDiagSeverity {
261    Note,
262    Warning,
263    Error,
264}
265/// Dependency edge between two LCNF variables (or array accesses).
266#[derive(Debug, Clone, PartialEq, Eq, Hash)]
267pub struct DepEdge {
268    /// Source variable or access.
269    pub from: String,
270    /// Destination variable or access.
271    pub to: String,
272    /// Distance vector (simplified to a scalar for 1-D loops).
273    pub distance: i64,
274}
275/// Summary statistics produced by `ParallelPass::report()`.
276#[derive(Debug, Clone, Default)]
277pub struct ParallelReport {
278    /// Number of parallel regions identified.
279    pub regions_found: usize,
280    /// Number of regions actually transformed.
281    pub regions_transformed: usize,
282    /// Estimated combined speed-up (product of individual speed-ups).
283    pub estimated_total_speedup: f64,
284    /// Number of data-race conditions detected (prevents transformation).
285    pub race_conditions_detected: usize,
286}
287/// Loop-dependence information for a single function/loop nest.
288///
289/// We use a simplified version of the standard three-class model:
290/// - **True dependence** (RAW): iteration J reads a value written by iteration I < J.
291/// - **Anti dependence** (WAR): iteration J writes a location read by iteration I < J.
292/// - **Output dependence** (WAW): two iterations write the same location.
293/// - **Loop-carried**: the dependence crosses loop iterations (distance > 0).
294#[derive(Debug, Clone, Default)]
295pub struct DependenceInfo {
296    /// Read-after-write (true) dependences.
297    pub true_deps: Vec<DepEdge>,
298    /// Write-after-read (anti) dependences.
299    pub anti_deps: Vec<DepEdge>,
300    /// Write-after-write (output) dependences.
301    pub output_deps: Vec<DepEdge>,
302    /// Subset of the above that are loop-carried (distance >= 1).
303    pub loop_carried_deps: Vec<DepEdge>,
304}
305impl DependenceInfo {
306    /// Returns `true` when no loop-carried dependences exist (Bernstein safe).
307    pub fn is_parallelizable(&self) -> bool {
308        self.loop_carried_deps.is_empty()
309    }
310    /// Total number of dependence edges.
311    pub fn total_deps(&self) -> usize {
312        self.true_deps.len() + self.anti_deps.len() + self.output_deps.len()
313    }
314}
315/// Thread-safety analysis result for a parallel region.
316#[derive(Debug, Clone)]
317pub struct ThreadSafetyInfo {
318    /// Whether the region is provably free of data races.
319    pub is_thread_safe: bool,
320    /// Pairs of (write, read/write) accesses that may race.
321    pub race_conditions: Vec<(String, String)>,
322    /// Variables that need atomic operations to be safe.
323    pub atomic_ops_needed: Vec<String>,
324}
325impl ThreadSafetyInfo {
326    /// Construct a trivially safe result.
327    pub fn safe() -> Self {
328        ThreadSafetyInfo {
329            is_thread_safe: true,
330            race_conditions: Vec::new(),
331            atomic_ops_needed: Vec::new(),
332        }
333    }
334    /// Construct an unsafe result with a single race.
335    pub fn unsafe_race(write: impl Into<String>, read: impl Into<String>) -> Self {
336        ThreadSafetyInfo {
337            is_thread_safe: false,
338            race_conditions: vec![(write.into(), read.into())],
339            atomic_ops_needed: Vec::new(),
340        }
341    }
342}
343/// Configuration for the parallelism pass.
344#[derive(Debug, Clone)]
345pub struct ParallelConfig {
346    /// Minimum estimated speedup below which a region is not transformed.
347    pub min_speedup_threshold: f64,
348    /// Maximum number of functions analysed (prevents runaway).
349    pub max_functions: usize,
350    /// Enable speculative parallelism (may be unsound).
351    pub allow_speculative: bool,
352    /// Assumed number of hardware threads for speed-up estimation.
353    pub hardware_threads: u32,
354}
355/// Statistics for OParExt passes.
356#[allow(dead_code)]
357#[derive(Debug, Clone, Default)]
358pub struct OParExtPassStats {
359    pub iterations: usize,
360    pub changed: bool,
361    pub nodes_visited: usize,
362    pub nodes_modified: usize,
363    pub time_ms: u64,
364    pub memory_bytes: usize,
365    pub errors: usize,
366}
367impl OParExtPassStats {
368    #[allow(dead_code)]
369    pub fn new() -> Self {
370        Self::default()
371    }
372    #[allow(dead_code)]
373    pub fn visit(&mut self) {
374        self.nodes_visited += 1;
375    }
376    #[allow(dead_code)]
377    pub fn modify(&mut self) {
378        self.nodes_modified += 1;
379        self.changed = true;
380    }
381    #[allow(dead_code)]
382    pub fn iterate(&mut self) {
383        self.iterations += 1;
384    }
385    #[allow(dead_code)]
386    pub fn error(&mut self) {
387        self.errors += 1;
388    }
389    #[allow(dead_code)]
390    pub fn efficiency(&self) -> f64 {
391        if self.nodes_visited == 0 {
392            0.0
393        } else {
394            self.nodes_modified as f64 / self.nodes_visited as f64
395        }
396    }
397    #[allow(dead_code)]
398    pub fn merge(&mut self, o: &OParExtPassStats) {
399        self.iterations += o.iterations;
400        self.changed |= o.changed;
401        self.nodes_visited += o.nodes_visited;
402        self.nodes_modified += o.nodes_modified;
403        self.time_ms += o.time_ms;
404        self.memory_bytes = self.memory_bytes.max(o.memory_bytes);
405        self.errors += o.errors;
406    }
407}
408/// The main parallelism optimisation pass.
409pub struct ParallelPass {
410    /// Configuration.
411    pub config: ParallelConfig,
412    /// Regions identified during analysis.
413    pub regions: Vec<ParallelRegion>,
414    /// Memoised thread-safety results.
415    pub(super) safety_cache: HashMap<String, ThreadSafetyInfo>,
416}
417impl ParallelPass {
418    /// Create a new pass with the given configuration.
419    pub fn new(config: ParallelConfig) -> Self {
420        ParallelPass {
421            config,
422            regions: Vec::new(),
423            safety_cache: HashMap::new(),
424        }
425    }
426    /// Create a pass with default configuration.
427    pub fn default_pass() -> Self {
428        Self::new(ParallelConfig::default())
429    }
430    /// Run analysis + transformation over all function declarations.
431    pub fn run(&mut self, decls: &mut [LcnfFunDecl]) {
432        self.regions.clear();
433        self.safety_cache.clear();
434        self.analyze_parallelism(decls);
435        self.transform_to_parallel(decls);
436    }
437    /// Analyse all declarations and populate `self.regions`.
438    pub fn analyze_parallelism(&mut self, decls: &[LcnfFunDecl]) {
439        for decl in decls.iter().take(self.config.max_functions) {
440            if let Some(region) = self.analyse_decl(decl) {
441                self.regions.push(region);
442            }
443        }
444    }
445    /// Transform declarations that have profitable parallel regions.
446    ///
447    /// Currently adds an annotation to the function body (via `LcnfExpr::Let`
448    /// with a sentinel `Ctor` value) so that downstream code generators can
449    /// see the annotation.  A full production implementation would rewrite the
450    /// loop body to use a parallel runtime API.
451    pub fn transform_to_parallel(&mut self, decls: &mut [LcnfFunDecl]) {
452        let profitable: HashSet<String> = self
453            .regions
454            .iter()
455            .filter(|r| r.is_profitable(self.config.min_speedup_threshold))
456            .map(|r| r.func_name.clone())
457            .collect();
458        for decl in decls.iter_mut() {
459            if profitable.contains(&decl.name) {
460                let old_body = std::mem::replace(&mut decl.body, LcnfExpr::Unreachable);
461                decl.body = LcnfExpr::Let {
462                    id: LcnfVarId(u64::MAX),
463                    name: "__parallel_annotation__".to_string(),
464                    ty: crate::lcnf::LcnfType::Unit,
465                    value: LcnfLetValue::Ctor("parallel_region".to_string(), 0, vec![]),
466                    body: Box::new(old_body),
467                };
468            }
469        }
470    }
471    /// Produce a summary report of the pass results.
472    pub fn report(&self) -> ParallelReport {
473        let races: usize = self
474            .safety_cache
475            .values()
476            .map(|s| s.race_conditions.len())
477            .sum();
478        let transformed = self
479            .regions
480            .iter()
481            .filter(|r| r.is_profitable(self.config.min_speedup_threshold))
482            .count();
483        let total_speedup = if self.regions.is_empty() {
484            1.0
485        } else {
486            self.regions
487                .iter()
488                .filter(|r| r.is_profitable(self.config.min_speedup_threshold))
489                .map(|r| r.estimated_speedup)
490                .fold(1.0_f64, f64::max)
491        };
492        ParallelReport {
493            regions_found: self.regions.len(),
494            regions_transformed: transformed,
495            estimated_total_speedup: total_speedup,
496            race_conditions_detected: races,
497        }
498    }
499    pub(super) fn analyse_decl(&mut self, decl: &LcnfFunDecl) -> Option<ParallelRegion> {
500        let pattern = PatternDetector::new(decl).detect()?;
501        let dep_info = DependenceAnalyser::new(decl).analyse();
502        if !dep_info.is_parallelizable() {
503            return None;
504        }
505        let kind = self.infer_kind(pattern, decl);
506        if kind == ParallelKind::SpeculativeParallel && !self.config.allow_speculative {
507            return None;
508        }
509        let trip_count = self.estimate_trip_count(decl);
510        let speedup = estimate_speedup_for_pattern(pattern, trip_count);
511        let detector = RaceDetector::new();
512        let safety = detector.analyse_decl(decl);
513        self.safety_cache.insert(decl.name.clone(), safety.clone());
514        if !safety.is_thread_safe && kind == ParallelKind::DataParallel {
515            return None;
516        }
517        let shared_vars = self.collect_shared_vars(decl);
518        let private_vars = self.collect_private_vars(decl);
519        Some(ParallelRegion {
520            func_name: decl.name.clone(),
521            start_block: 0,
522            end_block: decl.params.len().saturating_sub(1),
523            kind,
524            pattern,
525            estimated_speedup: speedup,
526            shared_vars,
527            private_vars,
528            trip_count,
529            dependence_info: dep_info,
530        })
531    }
532    pub(super) fn infer_kind(&self, pattern: ParallelPattern, _decl: &LcnfFunDecl) -> ParallelKind {
533        match pattern {
534            ParallelPattern::Map
535            | ParallelPattern::Filter
536            | ParallelPattern::Gather
537            | ParallelPattern::Scatter
538            | ParallelPattern::Stencil
539            | ParallelPattern::ParallelFor => ParallelKind::DataParallel,
540            ParallelPattern::Reduce => ParallelKind::DataParallel,
541            ParallelPattern::Scan => ParallelKind::PipelineParallel,
542        }
543    }
544    pub(super) fn estimate_trip_count(&self, decl: &LcnfFunDecl) -> Option<u64> {
545        for param in &decl.params {
546            if param.name == "n" || param.name == "len" || param.name == "size" {
547                return Some(256);
548            }
549        }
550        None
551    }
552    pub(super) fn collect_shared_vars(&self, decl: &LcnfFunDecl) -> Vec<String> {
553        decl.params
554            .iter()
555            .filter(|p| p.name.starts_with("arr") || p.name.starts_with("buf"))
556            .map(|p| p.name.clone())
557            .collect()
558    }
559    pub(super) fn collect_private_vars(&self, decl: &LcnfFunDecl) -> Vec<String> {
560        let mut privates = Vec::new();
561        Self::collect_let_names(&decl.body, &mut privates);
562        privates
563    }
564    pub(super) fn collect_let_names(expr: &LcnfExpr, out: &mut Vec<String>) {
565        match expr {
566            LcnfExpr::Let { name, body, .. } => {
567                out.push(name.clone());
568                Self::collect_let_names(body, out);
569            }
570            LcnfExpr::Case { alts, default, .. } => {
571                for alt in alts {
572                    Self::collect_let_names(&alt.body, out);
573                }
574                if let Some(d) = default {
575                    Self::collect_let_names(d, out);
576                }
577            }
578            _ => {}
579        }
580    }
581}
582/// Pass execution phase for OParExt.
583#[allow(dead_code)]
584#[derive(Debug, Clone, PartialEq, Eq, Hash)]
585pub enum OParExtPassPhase {
586    Early,
587    Middle,
588    Late,
589    Finalize,
590}
591impl OParExtPassPhase {
592    #[allow(dead_code)]
593    pub fn is_early(&self) -> bool {
594        matches!(self, Self::Early)
595    }
596    #[allow(dead_code)]
597    pub fn is_middle(&self) -> bool {
598        matches!(self, Self::Middle)
599    }
600    #[allow(dead_code)]
601    pub fn is_late(&self) -> bool {
602        matches!(self, Self::Late)
603    }
604    #[allow(dead_code)]
605    pub fn is_finalize(&self) -> bool {
606        matches!(self, Self::Finalize)
607    }
608    #[allow(dead_code)]
609    pub fn order(&self) -> u32 {
610        match self {
611            Self::Early => 0,
612            Self::Middle => 1,
613            Self::Late => 2,
614            Self::Finalize => 3,
615        }
616    }
617    #[allow(dead_code)]
618    pub fn from_order(n: u32) -> Option<Self> {
619        match n {
620            0 => Some(Self::Early),
621            1 => Some(Self::Middle),
622            2 => Some(Self::Late),
623            3 => Some(Self::Finalize),
624            _ => None,
625        }
626    }
627}
628/// Pass registry for OParExt.
629#[allow(dead_code)]
630#[derive(Debug, Default)]
631pub struct OParExtPassRegistry {
632    pub(super) configs: Vec<OParExtPassConfig>,
633    pub(super) stats: Vec<OParExtPassStats>,
634}
635impl OParExtPassRegistry {
636    #[allow(dead_code)]
637    pub fn new() -> Self {
638        Self::default()
639    }
640    #[allow(dead_code)]
641    pub fn register(&mut self, c: OParExtPassConfig) {
642        self.stats.push(OParExtPassStats::new());
643        self.configs.push(c);
644    }
645    #[allow(dead_code)]
646    pub fn len(&self) -> usize {
647        self.configs.len()
648    }
649    #[allow(dead_code)]
650    pub fn is_empty(&self) -> bool {
651        self.configs.is_empty()
652    }
653    #[allow(dead_code)]
654    pub fn get(&self, i: usize) -> Option<&OParExtPassConfig> {
655        self.configs.get(i)
656    }
657    #[allow(dead_code)]
658    pub fn get_stats(&self, i: usize) -> Option<&OParExtPassStats> {
659        self.stats.get(i)
660    }
661    #[allow(dead_code)]
662    pub fn enabled_passes(&self) -> Vec<&OParExtPassConfig> {
663        self.configs.iter().filter(|c| c.enabled).collect()
664    }
665    #[allow(dead_code)]
666    pub fn passes_in_phase(&self, ph: &OParExtPassPhase) -> Vec<&OParExtPassConfig> {
667        self.configs
668            .iter()
669            .filter(|c| c.enabled && &c.phase == ph)
670            .collect()
671    }
672    #[allow(dead_code)]
673    pub fn total_nodes_visited(&self) -> usize {
674        self.stats.iter().map(|s| s.nodes_visited).sum()
675    }
676    #[allow(dead_code)]
677    pub fn any_changed(&self) -> bool {
678        self.stats.iter().any(|s| s.changed)
679    }
680}
681/// Worklist for OParExt.
682#[allow(dead_code)]
683#[derive(Debug, Clone)]
684pub struct OParExtWorklist {
685    pub(super) items: std::collections::VecDeque<usize>,
686    pub(super) present: Vec<bool>,
687}
688impl OParExtWorklist {
689    #[allow(dead_code)]
690    pub fn new(capacity: usize) -> Self {
691        Self {
692            items: std::collections::VecDeque::new(),
693            present: vec![false; capacity],
694        }
695    }
696    #[allow(dead_code)]
697    pub fn push(&mut self, id: usize) {
698        if id < self.present.len() && !self.present[id] {
699            self.present[id] = true;
700            self.items.push_back(id);
701        }
702    }
703    #[allow(dead_code)]
704    pub fn push_front(&mut self, id: usize) {
705        if id < self.present.len() && !self.present[id] {
706            self.present[id] = true;
707            self.items.push_front(id);
708        }
709    }
710    #[allow(dead_code)]
711    pub fn pop(&mut self) -> Option<usize> {
712        let id = self.items.pop_front()?;
713        if id < self.present.len() {
714            self.present[id] = false;
715        }
716        Some(id)
717    }
718    #[allow(dead_code)]
719    pub fn is_empty(&self) -> bool {
720        self.items.is_empty()
721    }
722    #[allow(dead_code)]
723    pub fn len(&self) -> usize {
724        self.items.len()
725    }
726    #[allow(dead_code)]
727    pub fn contains(&self, id: usize) -> bool {
728        id < self.present.len() && self.present[id]
729    }
730    #[allow(dead_code)]
731    pub fn drain_all(&mut self) -> Vec<usize> {
732        let v: Vec<usize> = self.items.drain(..).collect();
733        for &id in &v {
734            if id < self.present.len() {
735                self.present[id] = false;
736            }
737        }
738        v
739    }
740}
741/// Dominator tree for OParExt.
742#[allow(dead_code)]
743#[derive(Debug, Clone)]
744pub struct OParExtDomTree {
745    pub(super) idom: Vec<Option<usize>>,
746    pub(super) children: Vec<Vec<usize>>,
747    pub(super) depth: Vec<usize>,
748}
749impl OParExtDomTree {
750    #[allow(dead_code)]
751    pub fn new(n: usize) -> Self {
752        Self {
753            idom: vec![None; n],
754            children: vec![Vec::new(); n],
755            depth: vec![0; n],
756        }
757    }
758    #[allow(dead_code)]
759    pub fn set_idom(&mut self, node: usize, dom: usize) {
760        if node < self.idom.len() {
761            self.idom[node] = Some(dom);
762            if dom < self.children.len() {
763                self.children[dom].push(node);
764            }
765            self.depth[node] = if dom < self.depth.len() {
766                self.depth[dom] + 1
767            } else {
768                1
769            };
770        }
771    }
772    #[allow(dead_code)]
773    pub fn dominates(&self, a: usize, mut b: usize) -> bool {
774        if a == b {
775            return true;
776        }
777        let n = self.idom.len();
778        for _ in 0..n {
779            match self.idom.get(b).copied().flatten() {
780                None => return false,
781                Some(p) if p == a => return true,
782                Some(p) if p == b => return false,
783                Some(p) => b = p,
784            }
785        }
786        false
787    }
788    #[allow(dead_code)]
789    pub fn children_of(&self, n: usize) -> &[usize] {
790        self.children.get(n).map(|v| v.as_slice()).unwrap_or(&[])
791    }
792    #[allow(dead_code)]
793    pub fn depth_of(&self, n: usize) -> usize {
794        self.depth.get(n).copied().unwrap_or(0)
795    }
796    #[allow(dead_code)]
797    pub fn lca(&self, mut a: usize, mut b: usize) -> usize {
798        let n = self.idom.len();
799        for _ in 0..(2 * n) {
800            if a == b {
801                return a;
802            }
803            if self.depth_of(a) > self.depth_of(b) {
804                a = self.idom.get(a).and_then(|x| *x).unwrap_or(a);
805            } else {
806                b = self.idom.get(b).and_then(|x| *x).unwrap_or(b);
807            }
808        }
809        0
810    }
811}
812#[allow(dead_code)]
813#[derive(Debug, Clone)]
814pub struct ParDepGraph {
815    pub(super) nodes: Vec<u32>,
816    pub(super) edges: Vec<(u32, u32)>,
817}
818impl ParDepGraph {
819    #[allow(dead_code)]
820    pub fn new() -> Self {
821        ParDepGraph {
822            nodes: Vec::new(),
823            edges: Vec::new(),
824        }
825    }
826    #[allow(dead_code)]
827    pub fn add_node(&mut self, id: u32) {
828        if !self.nodes.contains(&id) {
829            self.nodes.push(id);
830        }
831    }
832    #[allow(dead_code)]
833    pub fn add_dep(&mut self, dep: u32, dependent: u32) {
834        self.add_node(dep);
835        self.add_node(dependent);
836        self.edges.push((dep, dependent));
837    }
838    #[allow(dead_code)]
839    pub fn dependents_of(&self, node: u32) -> Vec<u32> {
840        self.edges
841            .iter()
842            .filter(|(d, _)| *d == node)
843            .map(|(_, dep)| *dep)
844            .collect()
845    }
846    #[allow(dead_code)]
847    pub fn dependencies_of(&self, node: u32) -> Vec<u32> {
848        self.edges
849            .iter()
850            .filter(|(_, dep)| *dep == node)
851            .map(|(d, _)| *d)
852            .collect()
853    }
854    #[allow(dead_code)]
855    pub fn topological_sort(&self) -> Vec<u32> {
856        let mut in_degree: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
857        for &n in &self.nodes {
858            in_degree.insert(n, 0);
859        }
860        for (_, dep) in &self.edges {
861            *in_degree.entry(*dep).or_insert(0) += 1;
862        }
863        let mut queue: std::collections::VecDeque<u32> = self
864            .nodes
865            .iter()
866            .filter(|&&n| in_degree[&n] == 0)
867            .copied()
868            .collect();
869        let mut result = Vec::new();
870        while let Some(node) = queue.pop_front() {
871            result.push(node);
872            for dep in self.dependents_of(node) {
873                let cnt = in_degree.entry(dep).or_insert(0);
874                *cnt = cnt.saturating_sub(1);
875                if *cnt == 0 {
876                    queue.push_back(dep);
877                }
878            }
879        }
880        result
881    }
882    #[allow(dead_code)]
883    pub fn has_cycle(&self) -> bool {
884        self.topological_sort().len() < self.nodes.len()
885    }
886}
887/// A text buffer for building OPar output source code.
888#[derive(Debug, Default)]
889pub struct OParSourceBuffer {
890    pub(super) buf: String,
891    pub(super) indent_level: usize,
892    pub(super) indent_str: String,
893}
894impl OParSourceBuffer {
895    pub fn new() -> Self {
896        OParSourceBuffer {
897            buf: String::new(),
898            indent_level: 0,
899            indent_str: "    ".to_string(),
900        }
901    }
902    pub fn with_indent(mut self, indent: impl Into<String>) -> Self {
903        self.indent_str = indent.into();
904        self
905    }
906    pub fn push_line(&mut self, line: &str) {
907        for _ in 0..self.indent_level {
908            self.buf.push_str(&self.indent_str);
909        }
910        self.buf.push_str(line);
911        self.buf.push('\n');
912    }
913    pub fn push_raw(&mut self, s: &str) {
914        self.buf.push_str(s);
915    }
916    pub fn indent(&mut self) {
917        self.indent_level += 1;
918    }
919    pub fn dedent(&mut self) {
920        self.indent_level = self.indent_level.saturating_sub(1);
921    }
922    pub fn as_str(&self) -> &str {
923        &self.buf
924    }
925    pub fn len(&self) -> usize {
926        self.buf.len()
927    }
928    pub fn is_empty(&self) -> bool {
929        self.buf.is_empty()
930    }
931    pub fn line_count(&self) -> usize {
932        self.buf.lines().count()
933    }
934    pub fn into_string(self) -> String {
935        self.buf
936    }
937    pub fn reset(&mut self) {
938        self.buf.clear();
939        self.indent_level = 0;
940    }
941}
942/// A fixed-capacity ring buffer of strings (for recent-event logging in OPar).
943#[derive(Debug)]
944pub struct OParEventLog {
945    pub(super) entries: std::collections::VecDeque<String>,
946    pub(super) capacity: usize,
947}
948impl OParEventLog {
949    pub fn new(capacity: usize) -> Self {
950        OParEventLog {
951            entries: std::collections::VecDeque::with_capacity(capacity),
952            capacity,
953        }
954    }
955    pub fn push(&mut self, event: impl Into<String>) {
956        if self.entries.len() >= self.capacity {
957            self.entries.pop_front();
958        }
959        self.entries.push_back(event.into());
960    }
961    pub fn iter(&self) -> impl Iterator<Item = &String> {
962        self.entries.iter()
963    }
964    pub fn len(&self) -> usize {
965        self.entries.len()
966    }
967    pub fn is_empty(&self) -> bool {
968        self.entries.is_empty()
969    }
970    pub fn capacity(&self) -> usize {
971        self.capacity
972    }
973    pub fn clear(&mut self) {
974        self.entries.clear();
975    }
976}
977/// Analysis cache for OParExt.
978#[allow(dead_code)]
979#[derive(Debug)]
980pub struct OParExtCache {
981    pub(super) entries: Vec<(u64, Vec<u8>, bool, u32)>,
982    pub(super) cap: usize,
983    pub(super) total_hits: u64,
984    pub(super) total_misses: u64,
985}
986impl OParExtCache {
987    #[allow(dead_code)]
988    pub fn new(cap: usize) -> Self {
989        Self {
990            entries: Vec::new(),
991            cap,
992            total_hits: 0,
993            total_misses: 0,
994        }
995    }
996    #[allow(dead_code)]
997    pub fn get(&mut self, key: u64) -> Option<&[u8]> {
998        for e in self.entries.iter_mut() {
999            if e.0 == key && e.2 {
1000                e.3 += 1;
1001                self.total_hits += 1;
1002                return Some(&e.1);
1003            }
1004        }
1005        self.total_misses += 1;
1006        None
1007    }
1008    #[allow(dead_code)]
1009    pub fn put(&mut self, key: u64, data: Vec<u8>) {
1010        if self.entries.len() >= self.cap {
1011            self.entries.retain(|e| e.2);
1012            if self.entries.len() >= self.cap {
1013                self.entries.remove(0);
1014            }
1015        }
1016        self.entries.push((key, data, true, 0));
1017    }
1018    #[allow(dead_code)]
1019    pub fn invalidate(&mut self) {
1020        for e in self.entries.iter_mut() {
1021            e.2 = false;
1022        }
1023    }
1024    #[allow(dead_code)]
1025    pub fn hit_rate(&self) -> f64 {
1026        let t = self.total_hits + self.total_misses;
1027        if t == 0 {
1028            0.0
1029        } else {
1030            self.total_hits as f64 / t as f64
1031        }
1032    }
1033    #[allow(dead_code)]
1034    pub fn live_count(&self) -> usize {
1035        self.entries.iter().filter(|e| e.2).count()
1036    }
1037}
1038/// Analyses loop-carried dependences for a single LCNF function.
1039pub struct DependenceAnalyser<'a> {
1040    pub(super) decl: &'a LcnfFunDecl,
1041}
1042impl<'a> DependenceAnalyser<'a> {
1043    pub(super) fn new(decl: &'a LcnfFunDecl) -> Self {
1044        DependenceAnalyser { decl }
1045    }
1046    pub(super) fn analyse(&self) -> DependenceInfo {
1047        let accesses = self.collect_accesses(&self.decl.body);
1048        let mut info = DependenceInfo::default();
1049        for i in 0..accesses.len() {
1050            for j in (i + 1)..accesses.len() {
1051                let a = &accesses[i];
1052                let b = &accesses[j];
1053                if a.independent_from(b) {
1054                    continue;
1055                }
1056                let edge = DepEdge {
1057                    from: format!("{}{}", a.base, a.offset),
1058                    to: format!("{}{}", b.base, b.offset),
1059                    distance: (b.offset - a.offset).abs(),
1060                };
1061                let is_loop_carried = edge.distance > 0;
1062                if a.is_write && !b.is_write {
1063                    info.true_deps.push(edge.clone());
1064                } else if !a.is_write && b.is_write {
1065                    info.anti_deps.push(edge.clone());
1066                } else if a.is_write && b.is_write {
1067                    info.output_deps.push(edge.clone());
1068                }
1069                if is_loop_carried {
1070                    info.loop_carried_deps.push(edge);
1071                }
1072            }
1073        }
1074        info
1075    }
1076    pub(super) fn collect_accesses(&self, expr: &LcnfExpr) -> Vec<AffineAccess> {
1077        let mut out = Vec::new();
1078        self.collect_accesses_inner(expr, &mut out);
1079        out
1080    }
1081    pub(super) fn collect_accesses_inner(&self, expr: &LcnfExpr, out: &mut Vec<AffineAccess>) {
1082        match expr {
1083            LcnfExpr::Let {
1084                value, body, name, ..
1085            } => {
1086                match value {
1087                    LcnfLetValue::App(LcnfArg::Var(fid), args) => {
1088                        let coeff = if args.len() > 1 { 1 } else { 0 };
1089                        out.push(AffineAccess {
1090                            base: name.clone(),
1091                            coeff,
1092                            offset: fid.0 as i64,
1093                            is_write: false,
1094                        });
1095                    }
1096                    LcnfLetValue::Ctor(ctor_name, tag, _) => {
1097                        out.push(AffineAccess {
1098                            base: ctor_name.clone(),
1099                            coeff: 1,
1100                            offset: *tag as i64,
1101                            is_write: true,
1102                        });
1103                    }
1104                    _ => {}
1105                }
1106                self.collect_accesses_inner(body, out);
1107            }
1108            LcnfExpr::Case { alts, default, .. } => {
1109                for alt in alts {
1110                    self.collect_accesses_inner(&alt.body, out);
1111                }
1112                if let Some(d) = default {
1113                    self.collect_accesses_inner(d, out);
1114                }
1115            }
1116            _ => {}
1117        }
1118    }
1119}
1120/// Heuristic freshness key for OPar incremental compilation.
1121#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1122pub struct OParIncrKey {
1123    pub content_hash: u64,
1124    pub config_hash: u64,
1125}
1126impl OParIncrKey {
1127    pub fn new(content: u64, config: u64) -> Self {
1128        OParIncrKey {
1129            content_hash: content,
1130            config_hash: config,
1131        }
1132    }
1133    pub fn combined_hash(&self) -> u64 {
1134        self.content_hash.wrapping_mul(0x9e3779b97f4a7c15) ^ self.config_hash
1135    }
1136    pub fn matches(&self, other: &OParIncrKey) -> bool {
1137        self.content_hash == other.content_hash && self.config_hash == other.config_hash
1138    }
1139}
1140/// A feature flag set for OPar capabilities.
1141#[derive(Debug, Clone, Default)]
1142pub struct OParFeatures {
1143    pub(super) flags: std::collections::HashSet<String>,
1144}
1145impl OParFeatures {
1146    pub fn new() -> Self {
1147        OParFeatures::default()
1148    }
1149    pub fn enable(&mut self, flag: impl Into<String>) {
1150        self.flags.insert(flag.into());
1151    }
1152    pub fn disable(&mut self, flag: &str) {
1153        self.flags.remove(flag);
1154    }
1155    pub fn is_enabled(&self, flag: &str) -> bool {
1156        self.flags.contains(flag)
1157    }
1158    pub fn len(&self) -> usize {
1159        self.flags.len()
1160    }
1161    pub fn is_empty(&self) -> bool {
1162        self.flags.is_empty()
1163    }
1164    pub fn union(&self, other: &OParFeatures) -> OParFeatures {
1165        OParFeatures {
1166            flags: self.flags.union(&other.flags).cloned().collect(),
1167        }
1168    }
1169    pub fn intersection(&self, other: &OParFeatures) -> OParFeatures {
1170        OParFeatures {
1171            flags: self.flags.intersection(&other.flags).cloned().collect(),
1172        }
1173    }
1174}
1175/// A monotonically increasing ID generator for OPar.
1176#[derive(Debug, Default)]
1177pub struct OParIdGen {
1178    pub(super) next: u32,
1179}
1180impl OParIdGen {
1181    pub fn new() -> Self {
1182        OParIdGen::default()
1183    }
1184    pub fn next_id(&mut self) -> u32 {
1185        let id = self.next;
1186        self.next += 1;
1187        id
1188    }
1189    pub fn peek_next(&self) -> u32 {
1190        self.next
1191    }
1192    pub fn reset(&mut self) {
1193        self.next = 0;
1194    }
1195    pub fn skip(&mut self, n: u32) {
1196        self.next += n;
1197    }
1198}
1199#[allow(dead_code)]
1200pub struct ParPassRegistry {
1201    pub(super) configs: Vec<ParPassConfig>,
1202    pub(super) stats: std::collections::HashMap<String, ParPassStats>,
1203}
1204impl ParPassRegistry {
1205    #[allow(dead_code)]
1206    pub fn new() -> Self {
1207        ParPassRegistry {
1208            configs: Vec::new(),
1209            stats: std::collections::HashMap::new(),
1210        }
1211    }
1212    #[allow(dead_code)]
1213    pub fn register(&mut self, config: ParPassConfig) {
1214        self.stats
1215            .insert(config.pass_name.clone(), ParPassStats::new());
1216        self.configs.push(config);
1217    }
1218    #[allow(dead_code)]
1219    pub fn enabled_passes(&self) -> Vec<&ParPassConfig> {
1220        self.configs.iter().filter(|c| c.enabled).collect()
1221    }
1222    #[allow(dead_code)]
1223    pub fn get_stats(&self, name: &str) -> Option<&ParPassStats> {
1224        self.stats.get(name)
1225    }
1226    #[allow(dead_code)]
1227    pub fn total_passes(&self) -> usize {
1228        self.configs.len()
1229    }
1230    #[allow(dead_code)]
1231    pub fn enabled_count(&self) -> usize {
1232        self.enabled_passes().len()
1233    }
1234    #[allow(dead_code)]
1235    pub fn update_stats(&mut self, name: &str, changes: u64, time_ms: u64, iter: u32) {
1236        if let Some(stats) = self.stats.get_mut(name) {
1237            stats.record_run(changes, time_ms, iter);
1238        }
1239    }
1240}
1241#[allow(dead_code)]
1242#[derive(Debug, Clone)]
1243pub struct ParDominatorTree {
1244    pub idom: Vec<Option<u32>>,
1245    pub dom_children: Vec<Vec<u32>>,
1246    pub dom_depth: Vec<u32>,
1247}
1248impl ParDominatorTree {
1249    #[allow(dead_code)]
1250    pub fn new(size: usize) -> Self {
1251        ParDominatorTree {
1252            idom: vec![None; size],
1253            dom_children: vec![Vec::new(); size],
1254            dom_depth: vec![0; size],
1255        }
1256    }
1257    #[allow(dead_code)]
1258    pub fn set_idom(&mut self, node: usize, idom: u32) {
1259        self.idom[node] = Some(idom);
1260    }
1261    #[allow(dead_code)]
1262    pub fn dominates(&self, a: usize, b: usize) -> bool {
1263        if a == b {
1264            return true;
1265        }
1266        let mut cur = b;
1267        loop {
1268            match self.idom[cur] {
1269                Some(parent) if parent as usize == a => return true,
1270                Some(parent) if parent as usize == cur => return false,
1271                Some(parent) => cur = parent as usize,
1272                None => return false,
1273            }
1274        }
1275    }
1276    #[allow(dead_code)]
1277    pub fn depth(&self, node: usize) -> u32 {
1278        self.dom_depth.get(node).copied().unwrap_or(0)
1279    }
1280}
1281#[allow(dead_code)]
1282#[derive(Debug, Clone)]
1283pub struct ParWorklist {
1284    pub(super) items: std::collections::VecDeque<u32>,
1285    pub(super) in_worklist: std::collections::HashSet<u32>,
1286}
1287impl ParWorklist {
1288    #[allow(dead_code)]
1289    pub fn new() -> Self {
1290        ParWorklist {
1291            items: std::collections::VecDeque::new(),
1292            in_worklist: std::collections::HashSet::new(),
1293        }
1294    }
1295    #[allow(dead_code)]
1296    pub fn push(&mut self, item: u32) -> bool {
1297        if self.in_worklist.insert(item) {
1298            self.items.push_back(item);
1299            true
1300        } else {
1301            false
1302        }
1303    }
1304    #[allow(dead_code)]
1305    pub fn pop(&mut self) -> Option<u32> {
1306        let item = self.items.pop_front()?;
1307        self.in_worklist.remove(&item);
1308        Some(item)
1309    }
1310    #[allow(dead_code)]
1311    pub fn is_empty(&self) -> bool {
1312        self.items.is_empty()
1313    }
1314    #[allow(dead_code)]
1315    pub fn len(&self) -> usize {
1316        self.items.len()
1317    }
1318    #[allow(dead_code)]
1319    pub fn contains(&self, item: u32) -> bool {
1320        self.in_worklist.contains(&item)
1321    }
1322}
1323/// Configuration for OParExt passes.
1324#[allow(dead_code)]
1325#[derive(Debug, Clone)]
1326pub struct OParExtPassConfig {
1327    pub name: String,
1328    pub phase: OParExtPassPhase,
1329    pub enabled: bool,
1330    pub max_iterations: usize,
1331    pub debug: u32,
1332    pub timeout_ms: Option<u64>,
1333}
1334impl OParExtPassConfig {
1335    #[allow(dead_code)]
1336    pub fn new(name: impl Into<String>) -> Self {
1337        Self {
1338            name: name.into(),
1339            phase: OParExtPassPhase::Middle,
1340            enabled: true,
1341            max_iterations: 100,
1342            debug: 0,
1343            timeout_ms: None,
1344        }
1345    }
1346    #[allow(dead_code)]
1347    pub fn with_phase(mut self, phase: OParExtPassPhase) -> Self {
1348        self.phase = phase;
1349        self
1350    }
1351    #[allow(dead_code)]
1352    pub fn with_max_iter(mut self, n: usize) -> Self {
1353        self.max_iterations = n;
1354        self
1355    }
1356    #[allow(dead_code)]
1357    pub fn with_debug(mut self, d: u32) -> Self {
1358        self.debug = d;
1359        self
1360    }
1361    #[allow(dead_code)]
1362    pub fn disabled(mut self) -> Self {
1363        self.enabled = false;
1364        self
1365    }
1366    #[allow(dead_code)]
1367    pub fn with_timeout(mut self, ms: u64) -> Self {
1368        self.timeout_ms = Some(ms);
1369        self
1370    }
1371    #[allow(dead_code)]
1372    pub fn is_debug_enabled(&self) -> bool {
1373        self.debug > 0
1374    }
1375}
1376#[allow(dead_code)]
1377#[derive(Debug, Clone, PartialEq)]
1378pub enum ParPassPhase {
1379    Analysis,
1380    Transformation,
1381    Verification,
1382    Cleanup,
1383}
1384impl ParPassPhase {
1385    #[allow(dead_code)]
1386    pub fn name(&self) -> &str {
1387        match self {
1388            ParPassPhase::Analysis => "analysis",
1389            ParPassPhase::Transformation => "transformation",
1390            ParPassPhase::Verification => "verification",
1391            ParPassPhase::Cleanup => "cleanup",
1392        }
1393    }
1394    #[allow(dead_code)]
1395    pub fn is_modifying(&self) -> bool {
1396        matches!(self, ParPassPhase::Transformation | ParPassPhase::Cleanup)
1397    }
1398}
1399/// A single affine access of the form `base + coeff * loop_var + offset`.
1400#[derive(Debug, Clone, PartialEq, Eq)]
1401pub struct AffineAccess {
1402    /// Base array / variable name.
1403    pub base: String,
1404    /// Coefficient of the loop induction variable.
1405    pub coeff: i64,
1406    /// Constant offset.
1407    pub offset: i64,
1408    /// Whether this is a write access.
1409    pub is_write: bool,
1410}
1411impl AffineAccess {
1412    /// Two accesses are independent under Bernstein's conditions when their
1413    /// access sets are provably disjoint, i.e., there is no integer `i`, `j`
1414    /// (with `i != j` for loop-carried deps) satisfying both access functions.
1415    ///
1416    /// For 1-D affine accesses `base + c1*i + o1` vs `base + c2*j + o2` we
1417    /// require `c1 == c2` (same stride) to get a GCD test: `gcd(c1,c2) | (o1-o2)`.
1418    pub fn independent_from(&self, other: &AffineAccess) -> bool {
1419        if self.base != other.base {
1420            return true;
1421        }
1422        if !self.is_write && !other.is_write {
1423            return true;
1424        }
1425        let g = gcd(self.coeff.unsigned_abs(), other.coeff.unsigned_abs()) as i64;
1426        if g == 0 {
1427            return self.offset != other.offset;
1428        }
1429        (self.offset - other.offset) % g != 0
1430    }
1431}
1432/// The concrete algorithmic pattern recognised in a parallel region.
1433#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1434pub enum ParallelPattern {
1435    /// `out[i] = f(in[i])` — embarrassingly parallel.
1436    Map,
1437    /// `out = [x for x in xs if p(x)]` — stream compaction.
1438    Filter,
1439    /// `acc = fold(f, init, xs)` — can use tree-reduction.
1440    Reduce,
1441    /// `out[i] = prefix_op(out[0..i])` — parallel-prefix.
1442    Scan,
1443    /// `out[i] = f(neighbours(in, i))` — finite-difference / convolution.
1444    Stencil,
1445    /// Generic counted loop with independent iterations.
1446    ParallelFor,
1447    /// Indexed scatter: `out[idx[i]] = val[i]`.
1448    Scatter,
1449    /// Indexed gather: `out[i] = in[idx[i]]`.
1450    Gather,
1451}
1452/// Constant folding helper for OParExt.
1453#[allow(dead_code)]
1454#[derive(Debug, Clone, Default)]
1455pub struct OParExtConstFolder {
1456    pub(super) folds: usize,
1457    pub(super) failures: usize,
1458    pub(super) enabled: bool,
1459}
1460impl OParExtConstFolder {
1461    #[allow(dead_code)]
1462    pub fn new() -> Self {
1463        Self {
1464            folds: 0,
1465            failures: 0,
1466            enabled: true,
1467        }
1468    }
1469    #[allow(dead_code)]
1470    pub fn add_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1471        self.folds += 1;
1472        a.checked_add(b)
1473    }
1474    #[allow(dead_code)]
1475    pub fn sub_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1476        self.folds += 1;
1477        a.checked_sub(b)
1478    }
1479    #[allow(dead_code)]
1480    pub fn mul_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1481        self.folds += 1;
1482        a.checked_mul(b)
1483    }
1484    #[allow(dead_code)]
1485    pub fn div_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1486        if b == 0 {
1487            self.failures += 1;
1488            None
1489        } else {
1490            self.folds += 1;
1491            a.checked_div(b)
1492        }
1493    }
1494    #[allow(dead_code)]
1495    pub fn rem_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1496        if b == 0 {
1497            self.failures += 1;
1498            None
1499        } else {
1500            self.folds += 1;
1501            a.checked_rem(b)
1502        }
1503    }
1504    #[allow(dead_code)]
1505    pub fn neg_i64(&mut self, a: i64) -> Option<i64> {
1506        self.folds += 1;
1507        a.checked_neg()
1508    }
1509    #[allow(dead_code)]
1510    pub fn shl_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1511        if s >= 64 {
1512            self.failures += 1;
1513            None
1514        } else {
1515            self.folds += 1;
1516            a.checked_shl(s)
1517        }
1518    }
1519    #[allow(dead_code)]
1520    pub fn shr_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1521        if s >= 64 {
1522            self.failures += 1;
1523            None
1524        } else {
1525            self.folds += 1;
1526            a.checked_shr(s)
1527        }
1528    }
1529    #[allow(dead_code)]
1530    pub fn and_i64(&mut self, a: i64, b: i64) -> i64 {
1531        self.folds += 1;
1532        a & b
1533    }
1534    #[allow(dead_code)]
1535    pub fn or_i64(&mut self, a: i64, b: i64) -> i64 {
1536        self.folds += 1;
1537        a | b
1538    }
1539    #[allow(dead_code)]
1540    pub fn xor_i64(&mut self, a: i64, b: i64) -> i64 {
1541        self.folds += 1;
1542        a ^ b
1543    }
1544    #[allow(dead_code)]
1545    pub fn not_i64(&mut self, a: i64) -> i64 {
1546        self.folds += 1;
1547        !a
1548    }
1549    #[allow(dead_code)]
1550    pub fn fold_count(&self) -> usize {
1551        self.folds
1552    }
1553    #[allow(dead_code)]
1554    pub fn failure_count(&self) -> usize {
1555        self.failures
1556    }
1557    #[allow(dead_code)]
1558    pub fn enable(&mut self) {
1559        self.enabled = true;
1560    }
1561    #[allow(dead_code)]
1562    pub fn disable(&mut self) {
1563        self.enabled = false;
1564    }
1565    #[allow(dead_code)]
1566    pub fn is_enabled(&self) -> bool {
1567        self.enabled
1568    }
1569}
1570/// Detects potential data races using a simplified Lamport happened-before model.
1571///
1572/// We conservatively flag any pair of accesses to the same variable where at
1573/// least one is a write and there is no ordering edge between them.
1574struct RaceDetector {
1575    /// Known ordering edges: `(a, b)` means `a` happens-before `b`.
1576    pub(super) happens_before: HashSet<(LcnfVarId, LcnfVarId)>,
1577}
1578impl RaceDetector {
1579    pub(super) fn new() -> Self {
1580        RaceDetector {
1581            happens_before: HashSet::new(),
1582        }
1583    }
1584    pub(super) fn add_ordering(&mut self, before: LcnfVarId, after: LcnfVarId) {
1585        self.happens_before.insert((before, after));
1586    }
1587    pub(super) fn may_race(
1588        &self,
1589        a: LcnfVarId,
1590        b: LcnfVarId,
1591        a_is_write: bool,
1592        b_is_write: bool,
1593    ) -> bool {
1594        if !a_is_write && !b_is_write {
1595            return false;
1596        }
1597        !self.happens_before.contains(&(a, b)) && !self.happens_before.contains(&(b, a))
1598    }
1599    pub(super) fn analyse_decl(&self, decl: &LcnfFunDecl) -> ThreadSafetyInfo {
1600        let mut races = Vec::new();
1601        let mut atomics = Vec::new();
1602        let accesses = Self::collect_var_accesses(&decl.body);
1603        let writes: Vec<_> = accesses
1604            .iter()
1605            .filter(|(_, is_write, _)| *is_write)
1606            .collect();
1607        let reads: Vec<_> = accesses
1608            .iter()
1609            .filter(|(_, is_write, _)| !*is_write)
1610            .collect();
1611        for (wid, _, wname) in &writes {
1612            for (rid, _, rname) in &reads {
1613                if wid != rid && self.may_race(*wid, *rid, true, false) {
1614                    races.push((wname.clone(), rname.clone()));
1615                }
1616            }
1617            for (wid2, _, wname2) in &writes {
1618                if wid != wid2 && self.may_race(*wid, *wid2, true, true) {
1619                    atomics.push(wname.clone());
1620                    atomics.push(wname2.clone());
1621                }
1622            }
1623        }
1624        atomics.sort();
1625        atomics.dedup();
1626        ThreadSafetyInfo {
1627            is_thread_safe: races.is_empty() && atomics.is_empty(),
1628            race_conditions: races,
1629            atomic_ops_needed: atomics,
1630        }
1631    }
1632    pub(super) fn collect_var_accesses(expr: &LcnfExpr) -> Vec<(LcnfVarId, bool, String)> {
1633        let mut out = Vec::new();
1634        Self::collect_inner(expr, &mut out);
1635        out
1636    }
1637    pub(super) fn collect_inner(expr: &LcnfExpr, out: &mut Vec<(LcnfVarId, bool, String)>) {
1638        match expr {
1639            LcnfExpr::Let {
1640                id,
1641                name,
1642                value,
1643                body,
1644                ..
1645            } => {
1646                let is_write = matches!(
1647                    value, LcnfLetValue::Ctor(n, _, _) if n.contains("write") || n
1648                    .contains("store")
1649                );
1650                out.push((*id, is_write, name.clone()));
1651                Self::collect_inner(body, out);
1652            }
1653            LcnfExpr::Case { alts, default, .. } => {
1654                for alt in alts {
1655                    Self::collect_inner(&alt.body, out);
1656                }
1657                if let Some(d) = default {
1658                    Self::collect_inner(d, out);
1659                }
1660            }
1661            _ => {}
1662        }
1663    }
1664}
1665/// A generic key-value configuration store for OPar.
1666#[derive(Debug, Clone, Default)]
1667pub struct OParConfig {
1668    pub(super) entries: std::collections::HashMap<String, String>,
1669}
1670impl OParConfig {
1671    pub fn new() -> Self {
1672        OParConfig::default()
1673    }
1674    pub fn set(&mut self, key: impl Into<String>, value: impl Into<String>) {
1675        self.entries.insert(key.into(), value.into());
1676    }
1677    pub fn get(&self, key: &str) -> Option<&str> {
1678        self.entries.get(key).map(|s| s.as_str())
1679    }
1680    pub fn get_bool(&self, key: &str) -> bool {
1681        matches!(self.get(key), Some("true") | Some("1") | Some("yes"))
1682    }
1683    pub fn get_int(&self, key: &str) -> Option<i64> {
1684        self.get(key)?.parse().ok()
1685    }
1686    pub fn len(&self) -> usize {
1687        self.entries.len()
1688    }
1689    pub fn is_empty(&self) -> bool {
1690        self.entries.is_empty()
1691    }
1692}
1693#[allow(dead_code)]
1694#[derive(Debug, Clone)]
1695pub struct ParAnalysisCache {
1696    pub(super) entries: std::collections::HashMap<String, ParCacheEntry>,
1697    pub(super) max_size: usize,
1698    pub(super) hits: u64,
1699    pub(super) misses: u64,
1700}
1701impl ParAnalysisCache {
1702    #[allow(dead_code)]
1703    pub fn new(max_size: usize) -> Self {
1704        ParAnalysisCache {
1705            entries: std::collections::HashMap::new(),
1706            max_size,
1707            hits: 0,
1708            misses: 0,
1709        }
1710    }
1711    #[allow(dead_code)]
1712    pub fn get(&mut self, key: &str) -> Option<&ParCacheEntry> {
1713        if self.entries.contains_key(key) {
1714            self.hits += 1;
1715            self.entries.get(key)
1716        } else {
1717            self.misses += 1;
1718            None
1719        }
1720    }
1721    #[allow(dead_code)]
1722    pub fn insert(&mut self, key: String, data: Vec<u8>) {
1723        if self.entries.len() >= self.max_size {
1724            if let Some(oldest) = self.entries.keys().next().cloned() {
1725                self.entries.remove(&oldest);
1726            }
1727        }
1728        self.entries.insert(
1729            key.clone(),
1730            ParCacheEntry {
1731                key,
1732                data,
1733                timestamp: 0,
1734                valid: true,
1735            },
1736        );
1737    }
1738    #[allow(dead_code)]
1739    pub fn invalidate(&mut self, key: &str) {
1740        if let Some(entry) = self.entries.get_mut(key) {
1741            entry.valid = false;
1742        }
1743    }
1744    #[allow(dead_code)]
1745    pub fn clear(&mut self) {
1746        self.entries.clear();
1747    }
1748    #[allow(dead_code)]
1749    pub fn hit_rate(&self) -> f64 {
1750        let total = self.hits + self.misses;
1751        if total == 0 {
1752            return 0.0;
1753        }
1754        self.hits as f64 / total as f64
1755    }
1756    #[allow(dead_code)]
1757    pub fn size(&self) -> usize {
1758        self.entries.len()
1759    }
1760}
1761/// Dependency graph for OParExt.
1762#[allow(dead_code)]
1763#[derive(Debug, Clone)]
1764pub struct OParExtDepGraph {
1765    pub(super) n: usize,
1766    pub(super) adj: Vec<Vec<usize>>,
1767    pub(super) rev: Vec<Vec<usize>>,
1768    pub(super) edge_count: usize,
1769}
1770impl OParExtDepGraph {
1771    #[allow(dead_code)]
1772    pub fn new(n: usize) -> Self {
1773        Self {
1774            n,
1775            adj: vec![Vec::new(); n],
1776            rev: vec![Vec::new(); n],
1777            edge_count: 0,
1778        }
1779    }
1780    #[allow(dead_code)]
1781    pub fn add_edge(&mut self, from: usize, to: usize) {
1782        if from < self.n && to < self.n {
1783            if !self.adj[from].contains(&to) {
1784                self.adj[from].push(to);
1785                self.rev[to].push(from);
1786                self.edge_count += 1;
1787            }
1788        }
1789    }
1790    #[allow(dead_code)]
1791    pub fn succs(&self, n: usize) -> &[usize] {
1792        self.adj.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1793    }
1794    #[allow(dead_code)]
1795    pub fn preds(&self, n: usize) -> &[usize] {
1796        self.rev.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1797    }
1798    #[allow(dead_code)]
1799    pub fn topo_sort(&self) -> Option<Vec<usize>> {
1800        let mut deg: Vec<usize> = (0..self.n).map(|i| self.rev[i].len()).collect();
1801        let mut q: std::collections::VecDeque<usize> =
1802            (0..self.n).filter(|&i| deg[i] == 0).collect();
1803        let mut out = Vec::with_capacity(self.n);
1804        while let Some(u) = q.pop_front() {
1805            out.push(u);
1806            for &v in &self.adj[u] {
1807                deg[v] -= 1;
1808                if deg[v] == 0 {
1809                    q.push_back(v);
1810                }
1811            }
1812        }
1813        if out.len() == self.n {
1814            Some(out)
1815        } else {
1816            None
1817        }
1818    }
1819    #[allow(dead_code)]
1820    pub fn has_cycle(&self) -> bool {
1821        self.topo_sort().is_none()
1822    }
1823    #[allow(dead_code)]
1824    pub fn reachable(&self, start: usize) -> Vec<usize> {
1825        let mut vis = vec![false; self.n];
1826        let mut stk = vec![start];
1827        let mut out = Vec::new();
1828        while let Some(u) = stk.pop() {
1829            if u < self.n && !vis[u] {
1830                vis[u] = true;
1831                out.push(u);
1832                for &v in &self.adj[u] {
1833                    if !vis[v] {
1834                        stk.push(v);
1835                    }
1836                }
1837            }
1838        }
1839        out
1840    }
1841    #[allow(dead_code)]
1842    pub fn scc(&self) -> Vec<Vec<usize>> {
1843        let mut visited = vec![false; self.n];
1844        let mut order = Vec::new();
1845        for i in 0..self.n {
1846            if !visited[i] {
1847                let mut stk = vec![(i, 0usize)];
1848                while let Some((u, idx)) = stk.last_mut() {
1849                    if !visited[*u] {
1850                        visited[*u] = true;
1851                    }
1852                    if *idx < self.adj[*u].len() {
1853                        let v = self.adj[*u][*idx];
1854                        *idx += 1;
1855                        if !visited[v] {
1856                            stk.push((v, 0));
1857                        }
1858                    } else {
1859                        order.push(*u);
1860                        stk.pop();
1861                    }
1862                }
1863            }
1864        }
1865        let mut comp = vec![usize::MAX; self.n];
1866        let mut components: Vec<Vec<usize>> = Vec::new();
1867        for &start in order.iter().rev() {
1868            if comp[start] == usize::MAX {
1869                let cid = components.len();
1870                let mut component = Vec::new();
1871                let mut stk = vec![start];
1872                while let Some(u) = stk.pop() {
1873                    if comp[u] == usize::MAX {
1874                        comp[u] = cid;
1875                        component.push(u);
1876                        for &v in &self.rev[u] {
1877                            if comp[v] == usize::MAX {
1878                                stk.push(v);
1879                            }
1880                        }
1881                    }
1882                }
1883                components.push(component);
1884            }
1885        }
1886        components
1887    }
1888    #[allow(dead_code)]
1889    pub fn node_count(&self) -> usize {
1890        self.n
1891    }
1892    #[allow(dead_code)]
1893    pub fn edge_count(&self) -> usize {
1894        self.edge_count
1895    }
1896}
1897/// Emission statistics for OPar.
1898#[derive(Debug, Clone, Default)]
1899pub struct OParEmitStats {
1900    pub bytes_emitted: usize,
1901    pub items_emitted: usize,
1902    pub errors: usize,
1903    pub warnings: usize,
1904    pub elapsed_ms: u64,
1905}
1906impl OParEmitStats {
1907    pub fn new() -> Self {
1908        OParEmitStats::default()
1909    }
1910    pub fn throughput_bps(&self) -> f64 {
1911        if self.elapsed_ms == 0 {
1912            0.0
1913        } else {
1914            self.bytes_emitted as f64 / (self.elapsed_ms as f64 / 1000.0)
1915        }
1916    }
1917    pub fn is_clean(&self) -> bool {
1918        self.errors == 0
1919    }
1920}
1921/// A version tag for OPar output artifacts.
1922#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1923pub struct OParVersion {
1924    pub major: u32,
1925    pub minor: u32,
1926    pub patch: u32,
1927    pub pre: Option<String>,
1928}
1929impl OParVersion {
1930    pub fn new(major: u32, minor: u32, patch: u32) -> Self {
1931        OParVersion {
1932            major,
1933            minor,
1934            patch,
1935            pre: None,
1936        }
1937    }
1938    pub fn with_pre(mut self, pre: impl Into<String>) -> Self {
1939        self.pre = Some(pre.into());
1940        self
1941    }
1942    pub fn is_stable(&self) -> bool {
1943        self.pre.is_none()
1944    }
1945    pub fn is_compatible_with(&self, other: &OParVersion) -> bool {
1946        self.major == other.major && self.minor >= other.minor
1947    }
1948}
1949/// Tracks declared names for OPar scope analysis.
1950#[derive(Debug, Default)]
1951pub struct OParNameScope {
1952    pub(super) declared: std::collections::HashSet<String>,
1953    pub(super) depth: usize,
1954    pub(super) parent: Option<Box<OParNameScope>>,
1955}
1956impl OParNameScope {
1957    pub fn new() -> Self {
1958        OParNameScope::default()
1959    }
1960    pub fn declare(&mut self, name: impl Into<String>) -> bool {
1961        self.declared.insert(name.into())
1962    }
1963    pub fn is_declared(&self, name: &str) -> bool {
1964        self.declared.contains(name)
1965    }
1966    pub fn push_scope(self) -> Self {
1967        OParNameScope {
1968            declared: std::collections::HashSet::new(),
1969            depth: self.depth + 1,
1970            parent: Some(Box::new(self)),
1971        }
1972    }
1973    pub fn pop_scope(self) -> Self {
1974        *self.parent.unwrap_or_default()
1975    }
1976    pub fn depth(&self) -> usize {
1977        self.depth
1978    }
1979    pub fn len(&self) -> usize {
1980        self.declared.len()
1981    }
1982}
1983/// Pass-timing record for OPar profiler.
1984#[derive(Debug, Clone)]
1985pub struct OParPassTiming {
1986    pub pass_name: String,
1987    pub elapsed_us: u64,
1988    pub items_processed: usize,
1989    pub bytes_before: usize,
1990    pub bytes_after: usize,
1991}
1992impl OParPassTiming {
1993    pub fn new(
1994        pass_name: impl Into<String>,
1995        elapsed_us: u64,
1996        items: usize,
1997        before: usize,
1998        after: usize,
1999    ) -> Self {
2000        OParPassTiming {
2001            pass_name: pass_name.into(),
2002            elapsed_us,
2003            items_processed: items,
2004            bytes_before: before,
2005            bytes_after: after,
2006        }
2007    }
2008    pub fn throughput_mps(&self) -> f64 {
2009        if self.elapsed_us == 0 {
2010            0.0
2011        } else {
2012            self.items_processed as f64 / (self.elapsed_us as f64 / 1_000_000.0)
2013        }
2014    }
2015    pub fn size_ratio(&self) -> f64 {
2016        if self.bytes_before == 0 {
2017            1.0
2018        } else {
2019            self.bytes_after as f64 / self.bytes_before as f64
2020        }
2021    }
2022    pub fn is_profitable(&self) -> bool {
2023        self.size_ratio() <= 1.05
2024    }
2025}
2026/// Pipeline profiler for OPar.
2027#[derive(Debug, Default)]
2028pub struct OParProfiler {
2029    pub(super) timings: Vec<OParPassTiming>,
2030}
2031impl OParProfiler {
2032    pub fn new() -> Self {
2033        OParProfiler::default()
2034    }
2035    pub fn record(&mut self, t: OParPassTiming) {
2036        self.timings.push(t);
2037    }
2038    pub fn total_elapsed_us(&self) -> u64 {
2039        self.timings.iter().map(|t| t.elapsed_us).sum()
2040    }
2041    pub fn slowest_pass(&self) -> Option<&OParPassTiming> {
2042        self.timings.iter().max_by_key(|t| t.elapsed_us)
2043    }
2044    pub fn num_passes(&self) -> usize {
2045        self.timings.len()
2046    }
2047    pub fn profitable_passes(&self) -> Vec<&OParPassTiming> {
2048        self.timings.iter().filter(|t| t.is_profitable()).collect()
2049    }
2050}
2051/// High-level classification of a parallel execution model.
2052#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2053pub enum ParallelKind {
2054    /// Independent iterations can run simultaneously (SIMD / OpenMP parallel-for).
2055    DataParallel,
2056    /// Independent sub-computations (futures / async tasks).
2057    TaskParallel,
2058    /// Producer–consumer stages overlap in time.
2059    PipelineParallel,
2060    /// Evaluate multiple branches speculatively and discard losers.
2061    SpeculativeParallel,
2062}
2063/// Liveness analysis for OParExt.
2064#[allow(dead_code)]
2065#[derive(Debug, Clone, Default)]
2066pub struct OParExtLiveness {
2067    pub live_in: Vec<Vec<usize>>,
2068    pub live_out: Vec<Vec<usize>>,
2069    pub defs: Vec<Vec<usize>>,
2070    pub uses: Vec<Vec<usize>>,
2071}
2072impl OParExtLiveness {
2073    #[allow(dead_code)]
2074    pub fn new(n: usize) -> Self {
2075        Self {
2076            live_in: vec![Vec::new(); n],
2077            live_out: vec![Vec::new(); n],
2078            defs: vec![Vec::new(); n],
2079            uses: vec![Vec::new(); n],
2080        }
2081    }
2082    #[allow(dead_code)]
2083    pub fn live_in(&self, b: usize, v: usize) -> bool {
2084        self.live_in.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2085    }
2086    #[allow(dead_code)]
2087    pub fn live_out(&self, b: usize, v: usize) -> bool {
2088        self.live_out
2089            .get(b)
2090            .map(|s| s.contains(&v))
2091            .unwrap_or(false)
2092    }
2093    #[allow(dead_code)]
2094    pub fn add_def(&mut self, b: usize, v: usize) {
2095        if let Some(s) = self.defs.get_mut(b) {
2096            if !s.contains(&v) {
2097                s.push(v);
2098            }
2099        }
2100    }
2101    #[allow(dead_code)]
2102    pub fn add_use(&mut self, b: usize, v: usize) {
2103        if let Some(s) = self.uses.get_mut(b) {
2104            if !s.contains(&v) {
2105                s.push(v);
2106            }
2107        }
2108    }
2109    #[allow(dead_code)]
2110    pub fn var_is_used_in_block(&self, b: usize, v: usize) -> bool {
2111        self.uses.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2112    }
2113    #[allow(dead_code)]
2114    pub fn var_is_def_in_block(&self, b: usize, v: usize) -> bool {
2115        self.defs.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2116    }
2117}
2118#[allow(dead_code)]
2119pub struct ParConstantFoldingHelper;
2120impl ParConstantFoldingHelper {
2121    #[allow(dead_code)]
2122    pub fn fold_add_i64(a: i64, b: i64) -> Option<i64> {
2123        a.checked_add(b)
2124    }
2125    #[allow(dead_code)]
2126    pub fn fold_sub_i64(a: i64, b: i64) -> Option<i64> {
2127        a.checked_sub(b)
2128    }
2129    #[allow(dead_code)]
2130    pub fn fold_mul_i64(a: i64, b: i64) -> Option<i64> {
2131        a.checked_mul(b)
2132    }
2133    #[allow(dead_code)]
2134    pub fn fold_div_i64(a: i64, b: i64) -> Option<i64> {
2135        if b == 0 {
2136            None
2137        } else {
2138            a.checked_div(b)
2139        }
2140    }
2141    #[allow(dead_code)]
2142    pub fn fold_add_f64(a: f64, b: f64) -> f64 {
2143        a + b
2144    }
2145    #[allow(dead_code)]
2146    pub fn fold_mul_f64(a: f64, b: f64) -> f64 {
2147        a * b
2148    }
2149    #[allow(dead_code)]
2150    pub fn fold_neg_i64(a: i64) -> Option<i64> {
2151        a.checked_neg()
2152    }
2153    #[allow(dead_code)]
2154    pub fn fold_not_bool(a: bool) -> bool {
2155        !a
2156    }
2157    #[allow(dead_code)]
2158    pub fn fold_and_bool(a: bool, b: bool) -> bool {
2159        a && b
2160    }
2161    #[allow(dead_code)]
2162    pub fn fold_or_bool(a: bool, b: bool) -> bool {
2163        a || b
2164    }
2165    #[allow(dead_code)]
2166    pub fn fold_shl_i64(a: i64, b: u32) -> Option<i64> {
2167        a.checked_shl(b)
2168    }
2169    #[allow(dead_code)]
2170    pub fn fold_shr_i64(a: i64, b: u32) -> Option<i64> {
2171        a.checked_shr(b)
2172    }
2173    #[allow(dead_code)]
2174    pub fn fold_rem_i64(a: i64, b: i64) -> Option<i64> {
2175        if b == 0 {
2176            None
2177        } else {
2178            Some(a % b)
2179        }
2180    }
2181    #[allow(dead_code)]
2182    pub fn fold_bitand_i64(a: i64, b: i64) -> i64 {
2183        a & b
2184    }
2185    #[allow(dead_code)]
2186    pub fn fold_bitor_i64(a: i64, b: i64) -> i64 {
2187        a | b
2188    }
2189    #[allow(dead_code)]
2190    pub fn fold_bitxor_i64(a: i64, b: i64) -> i64 {
2191        a ^ b
2192    }
2193    #[allow(dead_code)]
2194    pub fn fold_bitnot_i64(a: i64) -> i64 {
2195        !a
2196    }
2197}
2198/// Heuristic detector for common parallel patterns over LCNF.
2199///
2200/// We walk the function body looking for structural clues:
2201/// - A tail-recursive function whose accumulator is updated unconditionally
2202///   is a `Reduce`.
2203/// - A tail-recursive function that writes an output array at index `i` and
2204///   reads only from the input at `i` is a `Map`.
2205/// - Otherwise, if the loop body is side-effect-free we classify as `ParallelFor`.
2206pub struct PatternDetector<'a> {
2207    pub(super) decl: &'a LcnfFunDecl,
2208}
2209impl<'a> PatternDetector<'a> {
2210    pub(super) fn new(decl: &'a LcnfFunDecl) -> Self {
2211        PatternDetector { decl }
2212    }
2213    pub(super) fn detect(&self) -> Option<ParallelPattern> {
2214        if !self.decl.is_recursive {
2215            return None;
2216        }
2217        let reads = self.collect_reads(&self.decl.body);
2218        let writes = self.collect_writes(&self.decl.body);
2219        let has_accumulator = self.has_accumulator_update(&self.decl.body);
2220        let has_index_write = self.has_index_write(&self.decl.body);
2221        let has_index_read = !reads.is_empty();
2222        if has_accumulator && !has_index_write {
2223            return Some(ParallelPattern::Reduce);
2224        }
2225        if has_index_write && has_index_read {
2226            let write_bases: HashSet<&str> = writes.iter().map(|s| s.as_str()).collect();
2227            let read_bases: HashSet<&str> = reads.iter().map(|s| s.as_str()).collect();
2228            if write_bases == read_bases {
2229                return Some(ParallelPattern::Stencil);
2230            }
2231            return Some(ParallelPattern::Map);
2232        }
2233        if has_index_write && !has_index_read {
2234            return Some(ParallelPattern::Scatter);
2235        }
2236        if !has_index_write && has_index_read {
2237            return Some(ParallelPattern::Gather);
2238        }
2239        if self.has_filter_pattern(&self.decl.body) {
2240            return Some(ParallelPattern::Filter);
2241        }
2242        if self.has_scan_pattern(&self.decl.body) {
2243            return Some(ParallelPattern::Scan);
2244        }
2245        Some(ParallelPattern::ParallelFor)
2246    }
2247    pub(super) fn collect_reads(&self, expr: &LcnfExpr) -> Vec<String> {
2248        let mut out = Vec::new();
2249        self.collect_reads_inner(expr, &mut out);
2250        out
2251    }
2252    pub(super) fn collect_reads_inner(&self, expr: &LcnfExpr, out: &mut Vec<String>) {
2253        match expr {
2254            LcnfExpr::Let { value, body, .. } => {
2255                if let LcnfLetValue::App(LcnfArg::Var(id), _args) = value {
2256                    out.push(format!("read_{}", id.0));
2257                }
2258                self.collect_reads_inner(body, out);
2259            }
2260            LcnfExpr::Case { alts, default, .. } => {
2261                for alt in alts {
2262                    self.collect_reads_inner(&alt.body, out);
2263                }
2264                if let Some(d) = default {
2265                    self.collect_reads_inner(d, out);
2266                }
2267            }
2268            LcnfExpr::Return(_) | LcnfExpr::TailCall(_, _) | LcnfExpr::Unreachable => {}
2269        }
2270    }
2271    pub(super) fn collect_writes(&self, expr: &LcnfExpr) -> Vec<String> {
2272        let mut out = Vec::new();
2273        self.collect_writes_inner(expr, &mut out);
2274        out
2275    }
2276    pub(super) fn collect_writes_inner(&self, expr: &LcnfExpr, out: &mut Vec<String>) {
2277        match expr {
2278            LcnfExpr::Let {
2279                value, body, name, ..
2280            } => {
2281                if let LcnfLetValue::Ctor(ctor_name, _, _) = value {
2282                    if ctor_name.contains("write") || ctor_name.contains("store") {
2283                        out.push(name.clone());
2284                    }
2285                }
2286                self.collect_writes_inner(body, out);
2287            }
2288            LcnfExpr::Case { alts, default, .. } => {
2289                for alt in alts {
2290                    self.collect_writes_inner(&alt.body, out);
2291                }
2292                if let Some(d) = default {
2293                    self.collect_writes_inner(d, out);
2294                }
2295            }
2296            LcnfExpr::Return(_) | LcnfExpr::TailCall(_, _) | LcnfExpr::Unreachable => {}
2297        }
2298    }
2299    pub(super) fn has_accumulator_update(&self, expr: &LcnfExpr) -> bool {
2300        match expr {
2301            LcnfExpr::Let { value, body, .. } => {
2302                if let LcnfLetValue::App(LcnfArg::Var(fid), args) = value {
2303                    let is_binop = args.len() == 2;
2304                    let is_arith = fid.0 < 16;
2305                    if is_binop && is_arith {
2306                        return true;
2307                    }
2308                }
2309                self.has_accumulator_update(body)
2310            }
2311            LcnfExpr::Case { alts, default, .. } => {
2312                alts.iter().any(|a| self.has_accumulator_update(&a.body))
2313                    || default
2314                        .as_ref()
2315                        .map(|d| self.has_accumulator_update(d))
2316                        .unwrap_or(false)
2317            }
2318            _ => false,
2319        }
2320    }
2321    pub(super) fn has_index_write(&self, expr: &LcnfExpr) -> bool {
2322        match expr {
2323            LcnfExpr::Let { value, body, .. } => {
2324                matches!(
2325                    value, LcnfLetValue::Ctor(n, _, _) if n.contains("write") || n
2326                    .contains("store")
2327                ) || self.has_index_write(body)
2328            }
2329            LcnfExpr::Case { alts, default, .. } => {
2330                alts.iter().any(|a| self.has_index_write(&a.body))
2331                    || default
2332                        .as_ref()
2333                        .map(|d| self.has_index_write(d))
2334                        .unwrap_or(false)
2335            }
2336            _ => false,
2337        }
2338    }
2339    pub(super) fn has_filter_pattern(&self, expr: &LcnfExpr) -> bool {
2340        matches!(expr, LcnfExpr::Case { alts, .. } if alts.len() == 2)
2341            || match expr {
2342                LcnfExpr::Let { body, .. } => self.has_filter_pattern(body),
2343                _ => false,
2344            }
2345    }
2346    pub(super) fn has_scan_pattern(&self, expr: &LcnfExpr) -> bool {
2347        self.has_accumulator_update(expr) && self.has_index_write(expr)
2348    }
2349}
oxilean_codegen/opt_parallel/types.rs

oxilean_codegen/opt_parallel/
types.rs