1use crate::lcnf::{LcnfExpr, LcnfFunDecl, LcnfLetValue};
6
7use std::collections::{HashMap, HashSet, VecDeque};
8
9#[derive(Debug, Clone)]
11pub struct DataLocalityInfo {
12 pub accesses: Vec<MemoryAccess>,
14 pub working_set_bytes: u64,
16 pub best_cache_level: CacheLevel,
18 pub reuse_distance: f64,
20}
21impl DataLocalityInfo {
22 pub fn fits_in_l1(&self) -> bool {
24 self.working_set_bytes <= CacheLevel::L1.capacity_bytes()
25 }
26 pub fn fits_in_l2(&self) -> bool {
28 self.working_set_bytes <= CacheLevel::L2.capacity_bytes()
29 }
30 pub fn cache_friendly_fraction(&self) -> f64 {
32 if self.accesses.is_empty() {
33 return 1.0;
34 }
35 let friendly = self
36 .accesses
37 .iter()
38 .filter(|a| a.is_cache_friendly())
39 .count();
40 friendly as f64 / self.accesses.len() as f64
41 }
42}
43#[allow(dead_code)]
44#[derive(Debug, Clone)]
45pub struct OCAnalysisCache {
46 pub(super) entries: std::collections::HashMap<String, OCCacheEntry>,
47 pub(super) max_size: usize,
48 pub(super) hits: u64,
49 pub(super) misses: u64,
50}
51impl OCAnalysisCache {
52 #[allow(dead_code)]
53 pub fn new(max_size: usize) -> Self {
54 OCAnalysisCache {
55 entries: std::collections::HashMap::new(),
56 max_size,
57 hits: 0,
58 misses: 0,
59 }
60 }
61 #[allow(dead_code)]
62 pub fn get(&mut self, key: &str) -> Option<&OCCacheEntry> {
63 if self.entries.contains_key(key) {
64 self.hits += 1;
65 self.entries.get(key)
66 } else {
67 self.misses += 1;
68 None
69 }
70 }
71 #[allow(dead_code)]
72 pub fn insert(&mut self, key: String, data: Vec<u8>) {
73 if self.entries.len() >= self.max_size {
74 if let Some(oldest) = self.entries.keys().next().cloned() {
75 self.entries.remove(&oldest);
76 }
77 }
78 self.entries.insert(
79 key.clone(),
80 OCCacheEntry {
81 key,
82 data,
83 timestamp: 0,
84 valid: true,
85 },
86 );
87 }
88 #[allow(dead_code)]
89 pub fn invalidate(&mut self, key: &str) {
90 if let Some(entry) = self.entries.get_mut(key) {
91 entry.valid = false;
92 }
93 }
94 #[allow(dead_code)]
95 pub fn clear(&mut self) {
96 self.entries.clear();
97 }
98 #[allow(dead_code)]
99 pub fn hit_rate(&self) -> f64 {
100 let total = self.hits + self.misses;
101 if total == 0 {
102 return 0.0;
103 }
104 self.hits as f64 / total as f64
105 }
106 #[allow(dead_code)]
107 pub fn size(&self) -> usize {
108 self.entries.len()
109 }
110}
111#[allow(dead_code)]
113#[derive(Debug, Clone, PartialEq, Eq, Hash)]
114pub enum OCacheExtPassPhase {
115 Early,
116 Middle,
117 Late,
118 Finalize,
119}
120impl OCacheExtPassPhase {
121 #[allow(dead_code)]
122 pub fn is_early(&self) -> bool {
123 matches!(self, Self::Early)
124 }
125 #[allow(dead_code)]
126 pub fn is_middle(&self) -> bool {
127 matches!(self, Self::Middle)
128 }
129 #[allow(dead_code)]
130 pub fn is_late(&self) -> bool {
131 matches!(self, Self::Late)
132 }
133 #[allow(dead_code)]
134 pub fn is_finalize(&self) -> bool {
135 matches!(self, Self::Finalize)
136 }
137 #[allow(dead_code)]
138 pub fn order(&self) -> u32 {
139 match self {
140 Self::Early => 0,
141 Self::Middle => 1,
142 Self::Late => 2,
143 Self::Finalize => 3,
144 }
145 }
146 #[allow(dead_code)]
147 pub fn from_order(n: u32) -> Option<Self> {
148 match n {
149 0 => Some(Self::Early),
150 1 => Some(Self::Middle),
151 2 => Some(Self::Late),
152 3 => Some(Self::Finalize),
153 _ => None,
154 }
155 }
156}
157#[derive(Debug, Clone)]
159pub struct CacheOptConfig {
160 pub cache_line_size: u64,
162 pub prefetch_distance: u64,
164 pub enable_prefetch: bool,
166 pub tiling: LoopTilingConfig,
168}
169#[allow(dead_code)]
170#[derive(Debug, Clone)]
171pub struct OCWorklist {
172 pub(super) items: std::collections::VecDeque<u32>,
173 pub(super) in_worklist: std::collections::HashSet<u32>,
174}
175impl OCWorklist {
176 #[allow(dead_code)]
177 pub fn new() -> Self {
178 OCWorklist {
179 items: std::collections::VecDeque::new(),
180 in_worklist: std::collections::HashSet::new(),
181 }
182 }
183 #[allow(dead_code)]
184 pub fn push(&mut self, item: u32) -> bool {
185 if self.in_worklist.insert(item) {
186 self.items.push_back(item);
187 true
188 } else {
189 false
190 }
191 }
192 #[allow(dead_code)]
193 pub fn pop(&mut self) -> Option<u32> {
194 let item = self.items.pop_front()?;
195 self.in_worklist.remove(&item);
196 Some(item)
197 }
198 #[allow(dead_code)]
199 pub fn is_empty(&self) -> bool {
200 self.items.is_empty()
201 }
202 #[allow(dead_code)]
203 pub fn len(&self) -> usize {
204 self.items.len()
205 }
206 #[allow(dead_code)]
207 pub fn contains(&self, item: u32) -> bool {
208 self.in_worklist.contains(&item)
209 }
210}
211#[allow(dead_code)]
213#[derive(Debug, Clone, PartialEq, Eq, Hash)]
214pub enum OCacheX2PassPhase {
215 Early,
216 Middle,
217 Late,
218 Finalize,
219}
220impl OCacheX2PassPhase {
221 #[allow(dead_code)]
222 pub fn is_early(&self) -> bool {
223 matches!(self, Self::Early)
224 }
225 #[allow(dead_code)]
226 pub fn is_middle(&self) -> bool {
227 matches!(self, Self::Middle)
228 }
229 #[allow(dead_code)]
230 pub fn is_late(&self) -> bool {
231 matches!(self, Self::Late)
232 }
233 #[allow(dead_code)]
234 pub fn is_finalize(&self) -> bool {
235 matches!(self, Self::Finalize)
236 }
237 #[allow(dead_code)]
238 pub fn order(&self) -> u32 {
239 match self {
240 Self::Early => 0,
241 Self::Middle => 1,
242 Self::Late => 2,
243 Self::Finalize => 3,
244 }
245 }
246 #[allow(dead_code)]
247 pub fn from_order(n: u32) -> Option<Self> {
248 match n {
249 0 => Some(Self::Early),
250 1 => Some(Self::Middle),
251 2 => Some(Self::Late),
252 3 => Some(Self::Finalize),
253 _ => None,
254 }
255 }
256}
257#[derive(Debug, Clone, Default)]
259pub struct CacheOptReport {
260 pub num_loops_tiled: usize,
262 pub num_prefetches_inserted: usize,
264 pub estimated_cache_miss_reduction: f64,
266}
267impl CacheOptReport {
268 pub fn summary(&self) -> String {
270 format!(
271 "CacheOpt: {} loops tiled, {} prefetches inserted, \
272 estimated {:.1}% cache-miss reduction",
273 self.num_loops_tiled,
274 self.num_prefetches_inserted,
275 self.estimated_cache_miss_reduction * 100.0,
276 )
277 }
278}
279#[allow(dead_code)]
281#[derive(Debug, Clone, Default)]
282pub struct OCacheExtLiveness {
283 pub live_in: Vec<Vec<usize>>,
284 pub live_out: Vec<Vec<usize>>,
285 pub defs: Vec<Vec<usize>>,
286 pub uses: Vec<Vec<usize>>,
287}
288impl OCacheExtLiveness {
289 #[allow(dead_code)]
290 pub fn new(n: usize) -> Self {
291 Self {
292 live_in: vec![Vec::new(); n],
293 live_out: vec![Vec::new(); n],
294 defs: vec![Vec::new(); n],
295 uses: vec![Vec::new(); n],
296 }
297 }
298 #[allow(dead_code)]
299 pub fn live_in(&self, b: usize, v: usize) -> bool {
300 self.live_in.get(b).map(|s| s.contains(&v)).unwrap_or(false)
301 }
302 #[allow(dead_code)]
303 pub fn live_out(&self, b: usize, v: usize) -> bool {
304 self.live_out
305 .get(b)
306 .map(|s| s.contains(&v))
307 .unwrap_or(false)
308 }
309 #[allow(dead_code)]
310 pub fn add_def(&mut self, b: usize, v: usize) {
311 if let Some(s) = self.defs.get_mut(b) {
312 if !s.contains(&v) {
313 s.push(v);
314 }
315 }
316 }
317 #[allow(dead_code)]
318 pub fn add_use(&mut self, b: usize, v: usize) {
319 if let Some(s) = self.uses.get_mut(b) {
320 if !s.contains(&v) {
321 s.push(v);
322 }
323 }
324 }
325 #[allow(dead_code)]
326 pub fn var_is_used_in_block(&self, b: usize, v: usize) -> bool {
327 self.uses.get(b).map(|s| s.contains(&v)).unwrap_or(false)
328 }
329 #[allow(dead_code)]
330 pub fn var_is_def_in_block(&self, b: usize, v: usize) -> bool {
331 self.defs.get(b).map(|s| s.contains(&v)).unwrap_or(false)
332 }
333}
334#[allow(dead_code)]
336#[derive(Debug, Default)]
337pub struct OCacheExtPassRegistry {
338 pub(super) configs: Vec<OCacheExtPassConfig>,
339 pub(super) stats: Vec<OCacheExtPassStats>,
340}
341impl OCacheExtPassRegistry {
342 #[allow(dead_code)]
343 pub fn new() -> Self {
344 Self::default()
345 }
346 #[allow(dead_code)]
347 pub fn register(&mut self, c: OCacheExtPassConfig) {
348 self.stats.push(OCacheExtPassStats::new());
349 self.configs.push(c);
350 }
351 #[allow(dead_code)]
352 pub fn len(&self) -> usize {
353 self.configs.len()
354 }
355 #[allow(dead_code)]
356 pub fn is_empty(&self) -> bool {
357 self.configs.is_empty()
358 }
359 #[allow(dead_code)]
360 pub fn get(&self, i: usize) -> Option<&OCacheExtPassConfig> {
361 self.configs.get(i)
362 }
363 #[allow(dead_code)]
364 pub fn get_stats(&self, i: usize) -> Option<&OCacheExtPassStats> {
365 self.stats.get(i)
366 }
367 #[allow(dead_code)]
368 pub fn enabled_passes(&self) -> Vec<&OCacheExtPassConfig> {
369 self.configs.iter().filter(|c| c.enabled).collect()
370 }
371 #[allow(dead_code)]
372 pub fn passes_in_phase(&self, ph: &OCacheExtPassPhase) -> Vec<&OCacheExtPassConfig> {
373 self.configs
374 .iter()
375 .filter(|c| c.enabled && &c.phase == ph)
376 .collect()
377 }
378 #[allow(dead_code)]
379 pub fn total_nodes_visited(&self) -> usize {
380 self.stats.iter().map(|s| s.nodes_visited).sum()
381 }
382 #[allow(dead_code)]
383 pub fn any_changed(&self) -> bool {
384 self.stats.iter().any(|s| s.changed)
385 }
386}
387#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
389pub enum CacheLevel {
390 L1,
392 L2,
394 L3,
396 Ram,
398}
399impl CacheLevel {
400 pub fn capacity_bytes(&self) -> u64 {
402 match self {
403 CacheLevel::L1 => 32 * 1024,
404 CacheLevel::L2 => 256 * 1024,
405 CacheLevel::L3 => 8 * 1024 * 1024,
406 CacheLevel::Ram => u64::MAX,
407 }
408 }
409 pub fn latency_cycles(&self) -> u32 {
411 match self {
412 CacheLevel::L1 => 4,
413 CacheLevel::L2 => 12,
414 CacheLevel::L3 => 40,
415 CacheLevel::Ram => 200,
416 }
417 }
418}
419#[allow(dead_code)]
421#[derive(Debug, Clone, Default)]
422pub struct OCacheExtConstFolder {
423 pub(super) folds: usize,
424 pub(super) failures: usize,
425 pub(super) enabled: bool,
426}
427impl OCacheExtConstFolder {
428 #[allow(dead_code)]
429 pub fn new() -> Self {
430 Self {
431 folds: 0,
432 failures: 0,
433 enabled: true,
434 }
435 }
436 #[allow(dead_code)]
437 pub fn add_i64(&mut self, a: i64, b: i64) -> Option<i64> {
438 self.folds += 1;
439 a.checked_add(b)
440 }
441 #[allow(dead_code)]
442 pub fn sub_i64(&mut self, a: i64, b: i64) -> Option<i64> {
443 self.folds += 1;
444 a.checked_sub(b)
445 }
446 #[allow(dead_code)]
447 pub fn mul_i64(&mut self, a: i64, b: i64) -> Option<i64> {
448 self.folds += 1;
449 a.checked_mul(b)
450 }
451 #[allow(dead_code)]
452 pub fn div_i64(&mut self, a: i64, b: i64) -> Option<i64> {
453 if b == 0 {
454 self.failures += 1;
455 None
456 } else {
457 self.folds += 1;
458 a.checked_div(b)
459 }
460 }
461 #[allow(dead_code)]
462 pub fn rem_i64(&mut self, a: i64, b: i64) -> Option<i64> {
463 if b == 0 {
464 self.failures += 1;
465 None
466 } else {
467 self.folds += 1;
468 a.checked_rem(b)
469 }
470 }
471 #[allow(dead_code)]
472 pub fn neg_i64(&mut self, a: i64) -> Option<i64> {
473 self.folds += 1;
474 a.checked_neg()
475 }
476 #[allow(dead_code)]
477 pub fn shl_i64(&mut self, a: i64, s: u32) -> Option<i64> {
478 if s >= 64 {
479 self.failures += 1;
480 None
481 } else {
482 self.folds += 1;
483 a.checked_shl(s)
484 }
485 }
486 #[allow(dead_code)]
487 pub fn shr_i64(&mut self, a: i64, s: u32) -> Option<i64> {
488 if s >= 64 {
489 self.failures += 1;
490 None
491 } else {
492 self.folds += 1;
493 a.checked_shr(s)
494 }
495 }
496 #[allow(dead_code)]
497 pub fn and_i64(&mut self, a: i64, b: i64) -> i64 {
498 self.folds += 1;
499 a & b
500 }
501 #[allow(dead_code)]
502 pub fn or_i64(&mut self, a: i64, b: i64) -> i64 {
503 self.folds += 1;
504 a | b
505 }
506 #[allow(dead_code)]
507 pub fn xor_i64(&mut self, a: i64, b: i64) -> i64 {
508 self.folds += 1;
509 a ^ b
510 }
511 #[allow(dead_code)]
512 pub fn not_i64(&mut self, a: i64) -> i64 {
513 self.folds += 1;
514 !a
515 }
516 #[allow(dead_code)]
517 pub fn fold_count(&self) -> usize {
518 self.folds
519 }
520 #[allow(dead_code)]
521 pub fn failure_count(&self) -> usize {
522 self.failures
523 }
524 #[allow(dead_code)]
525 pub fn enable(&mut self) {
526 self.enabled = true;
527 }
528 #[allow(dead_code)]
529 pub fn disable(&mut self) {
530 self.enabled = false;
531 }
532 #[allow(dead_code)]
533 pub fn is_enabled(&self) -> bool {
534 self.enabled
535 }
536}
537#[allow(dead_code)]
539#[derive(Debug)]
540pub struct OCacheExtCache {
541 pub(super) entries: Vec<(u64, Vec<u8>, bool, u32)>,
542 pub(super) cap: usize,
543 pub(super) total_hits: u64,
544 pub(super) total_misses: u64,
545}
546impl OCacheExtCache {
547 #[allow(dead_code)]
548 pub fn new(cap: usize) -> Self {
549 Self {
550 entries: Vec::new(),
551 cap,
552 total_hits: 0,
553 total_misses: 0,
554 }
555 }
556 #[allow(dead_code)]
557 pub fn get(&mut self, key: u64) -> Option<&[u8]> {
558 for e in self.entries.iter_mut() {
559 if e.0 == key && e.2 {
560 e.3 += 1;
561 self.total_hits += 1;
562 return Some(&e.1);
563 }
564 }
565 self.total_misses += 1;
566 None
567 }
568 #[allow(dead_code)]
569 pub fn put(&mut self, key: u64, data: Vec<u8>) {
570 if self.entries.len() >= self.cap {
571 self.entries.retain(|e| e.2);
572 if self.entries.len() >= self.cap {
573 self.entries.remove(0);
574 }
575 }
576 self.entries.push((key, data, true, 0));
577 }
578 #[allow(dead_code)]
579 pub fn invalidate(&mut self) {
580 for e in self.entries.iter_mut() {
581 e.2 = false;
582 }
583 }
584 #[allow(dead_code)]
585 pub fn hit_rate(&self) -> f64 {
586 let t = self.total_hits + self.total_misses;
587 if t == 0 {
588 0.0
589 } else {
590 self.total_hits as f64 / t as f64
591 }
592 }
593 #[allow(dead_code)]
594 pub fn live_count(&self) -> usize {
595 self.entries.iter().filter(|e| e.2).count()
596 }
597}
598#[allow(dead_code)]
600#[derive(Debug, Clone)]
601pub struct OCacheX2PassConfig {
602 pub name: String,
603 pub phase: OCacheX2PassPhase,
604 pub enabled: bool,
605 pub max_iterations: usize,
606 pub debug: u32,
607 pub timeout_ms: Option<u64>,
608}
609impl OCacheX2PassConfig {
610 #[allow(dead_code)]
611 pub fn new(name: impl Into<String>) -> Self {
612 Self {
613 name: name.into(),
614 phase: OCacheX2PassPhase::Middle,
615 enabled: true,
616 max_iterations: 100,
617 debug: 0,
618 timeout_ms: None,
619 }
620 }
621 #[allow(dead_code)]
622 pub fn with_phase(mut self, phase: OCacheX2PassPhase) -> Self {
623 self.phase = phase;
624 self
625 }
626 #[allow(dead_code)]
627 pub fn with_max_iter(mut self, n: usize) -> Self {
628 self.max_iterations = n;
629 self
630 }
631 #[allow(dead_code)]
632 pub fn with_debug(mut self, d: u32) -> Self {
633 self.debug = d;
634 self
635 }
636 #[allow(dead_code)]
637 pub fn disabled(mut self) -> Self {
638 self.enabled = false;
639 self
640 }
641 #[allow(dead_code)]
642 pub fn with_timeout(mut self, ms: u64) -> Self {
643 self.timeout_ms = Some(ms);
644 self
645 }
646 #[allow(dead_code)]
647 pub fn is_debug_enabled(&self) -> bool {
648 self.debug > 0
649 }
650}
651#[allow(dead_code)]
652#[derive(Debug, Clone)]
653pub struct OCPassConfig {
654 pub phase: OCPassPhase,
655 pub enabled: bool,
656 pub max_iterations: u32,
657 pub debug_output: bool,
658 pub pass_name: String,
659}
660impl OCPassConfig {
661 #[allow(dead_code)]
662 pub fn new(name: impl Into<String>, phase: OCPassPhase) -> Self {
663 OCPassConfig {
664 phase,
665 enabled: true,
666 max_iterations: 10,
667 debug_output: false,
668 pass_name: name.into(),
669 }
670 }
671 #[allow(dead_code)]
672 pub fn disabled(mut self) -> Self {
673 self.enabled = false;
674 self
675 }
676 #[allow(dead_code)]
677 pub fn with_debug(mut self) -> Self {
678 self.debug_output = true;
679 self
680 }
681 #[allow(dead_code)]
682 pub fn max_iter(mut self, n: u32) -> Self {
683 self.max_iterations = n;
684 self
685 }
686}
687#[derive(Debug, Clone)]
689pub struct LoopTilingConfig {
690 pub tile_size_l1: u64,
692 pub tile_size_l2: u64,
694 pub enable_l1_tiling: bool,
696 pub enable_l2_tiling: bool,
698}
699#[derive(Debug, Clone, PartialEq)]
701pub struct StructLayout {
702 pub struct_name: String,
704 pub fields: Vec<(String, u64)>,
706 pub total_size: u64,
708 pub alignment: u64,
710}
711impl StructLayout {
712 pub fn is_cache_aligned(&self) -> bool {
714 const CACHE_LINE: u64 = 64;
715 self.total_size.is_multiple_of(CACHE_LINE) && self.alignment >= CACHE_LINE
716 }
717 pub fn padding_bytes(&self) -> u64 {
719 let fields_total: u64 = self.fields.iter().map(|(_, sz)| sz).sum();
720 self.total_size.saturating_sub(fields_total)
721 }
722 pub fn cache_lines_used(&self) -> u64 {
724 const CACHE_LINE: u64 = 64;
725 self.total_size.div_ceil(CACHE_LINE)
726 }
727}
728#[allow(dead_code)]
730#[derive(Debug, Clone)]
731pub struct OCacheExtPassConfig {
732 pub name: String,
733 pub phase: OCacheExtPassPhase,
734 pub enabled: bool,
735 pub max_iterations: usize,
736 pub debug: u32,
737 pub timeout_ms: Option<u64>,
738}
739impl OCacheExtPassConfig {
740 #[allow(dead_code)]
741 pub fn new(name: impl Into<String>) -> Self {
742 Self {
743 name: name.into(),
744 phase: OCacheExtPassPhase::Middle,
745 enabled: true,
746 max_iterations: 100,
747 debug: 0,
748 timeout_ms: None,
749 }
750 }
751 #[allow(dead_code)]
752 pub fn with_phase(mut self, phase: OCacheExtPassPhase) -> Self {
753 self.phase = phase;
754 self
755 }
756 #[allow(dead_code)]
757 pub fn with_max_iter(mut self, n: usize) -> Self {
758 self.max_iterations = n;
759 self
760 }
761 #[allow(dead_code)]
762 pub fn with_debug(mut self, d: u32) -> Self {
763 self.debug = d;
764 self
765 }
766 #[allow(dead_code)]
767 pub fn disabled(mut self) -> Self {
768 self.enabled = false;
769 self
770 }
771 #[allow(dead_code)]
772 pub fn with_timeout(mut self, ms: u64) -> Self {
773 self.timeout_ms = Some(ms);
774 self
775 }
776 #[allow(dead_code)]
777 pub fn is_debug_enabled(&self) -> bool {
778 self.debug > 0
779 }
780}
781pub struct FieldReorderingAnalysis;
784impl FieldReorderingAnalysis {
785 pub fn analyze(decl: &LcnfFunDecl) -> Vec<StructLayout> {
789 let mut layouts: Vec<StructLayout> = Vec::new();
790 Self::collect_layouts(&decl.body, &mut layouts);
791 layouts
792 }
793 pub(super) fn collect_layouts(expr: &LcnfExpr, out: &mut Vec<StructLayout>) {
794 match expr {
795 LcnfExpr::Let { value, body, .. } => {
796 if let LcnfLetValue::Ctor(name, _tag, args) = value {
797 if !out.iter().any(|l| &l.struct_name == name) {
798 let field_count = args.len() as u64;
799 let fields: Vec<(String, u64)> = (0..field_count)
800 .map(|i| (format!("field_{}", i), 8u64))
801 .collect();
802 let fields_total = field_count * 8;
803 let total_size = if fields_total == 0 {
804 8
805 } else {
806 fields_total.next_multiple_of(8)
807 };
808 out.push(StructLayout {
809 struct_name: name.clone(),
810 fields,
811 total_size,
812 alignment: 8,
813 });
814 }
815 }
816 Self::collect_layouts(body, out);
817 }
818 LcnfExpr::Case { alts, default, .. } => {
819 for alt in alts {
820 Self::collect_layouts(&alt.body, out);
821 }
822 if let Some(def) = default {
823 Self::collect_layouts(def, out);
824 }
825 }
826 LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
827 }
828 }
829}
830#[allow(dead_code)]
831#[derive(Debug, Clone)]
832pub struct OCDominatorTree {
833 pub idom: Vec<Option<u32>>,
834 pub dom_children: Vec<Vec<u32>>,
835 pub dom_depth: Vec<u32>,
836}
837impl OCDominatorTree {
838 #[allow(dead_code)]
839 pub fn new(size: usize) -> Self {
840 OCDominatorTree {
841 idom: vec![None; size],
842 dom_children: vec![Vec::new(); size],
843 dom_depth: vec![0; size],
844 }
845 }
846 #[allow(dead_code)]
847 pub fn set_idom(&mut self, node: usize, idom: u32) {
848 self.idom[node] = Some(idom);
849 }
850 #[allow(dead_code)]
851 pub fn dominates(&self, a: usize, b: usize) -> bool {
852 if a == b {
853 return true;
854 }
855 let mut cur = b;
856 loop {
857 match self.idom[cur] {
858 Some(parent) if parent as usize == a => return true,
859 Some(parent) if parent as usize == cur => return false,
860 Some(parent) => cur = parent as usize,
861 None => return false,
862 }
863 }
864 }
865 #[allow(dead_code)]
866 pub fn depth(&self, node: usize) -> u32 {
867 self.dom_depth.get(node).copied().unwrap_or(0)
868 }
869}
870#[allow(dead_code)]
871pub struct OCConstantFoldingHelper;
872impl OCConstantFoldingHelper {
873 #[allow(dead_code)]
874 pub fn fold_add_i64(a: i64, b: i64) -> Option<i64> {
875 a.checked_add(b)
876 }
877 #[allow(dead_code)]
878 pub fn fold_sub_i64(a: i64, b: i64) -> Option<i64> {
879 a.checked_sub(b)
880 }
881 #[allow(dead_code)]
882 pub fn fold_mul_i64(a: i64, b: i64) -> Option<i64> {
883 a.checked_mul(b)
884 }
885 #[allow(dead_code)]
886 pub fn fold_div_i64(a: i64, b: i64) -> Option<i64> {
887 if b == 0 {
888 None
889 } else {
890 a.checked_div(b)
891 }
892 }
893 #[allow(dead_code)]
894 pub fn fold_add_f64(a: f64, b: f64) -> f64 {
895 a + b
896 }
897 #[allow(dead_code)]
898 pub fn fold_mul_f64(a: f64, b: f64) -> f64 {
899 a * b
900 }
901 #[allow(dead_code)]
902 pub fn fold_neg_i64(a: i64) -> Option<i64> {
903 a.checked_neg()
904 }
905 #[allow(dead_code)]
906 pub fn fold_not_bool(a: bool) -> bool {
907 !a
908 }
909 #[allow(dead_code)]
910 pub fn fold_and_bool(a: bool, b: bool) -> bool {
911 a && b
912 }
913 #[allow(dead_code)]
914 pub fn fold_or_bool(a: bool, b: bool) -> bool {
915 a || b
916 }
917 #[allow(dead_code)]
918 pub fn fold_shl_i64(a: i64, b: u32) -> Option<i64> {
919 a.checked_shl(b)
920 }
921 #[allow(dead_code)]
922 pub fn fold_shr_i64(a: i64, b: u32) -> Option<i64> {
923 a.checked_shr(b)
924 }
925 #[allow(dead_code)]
926 pub fn fold_rem_i64(a: i64, b: i64) -> Option<i64> {
927 if b == 0 {
928 None
929 } else {
930 Some(a % b)
931 }
932 }
933 #[allow(dead_code)]
934 pub fn fold_bitand_i64(a: i64, b: i64) -> i64 {
935 a & b
936 }
937 #[allow(dead_code)]
938 pub fn fold_bitor_i64(a: i64, b: i64) -> i64 {
939 a | b
940 }
941 #[allow(dead_code)]
942 pub fn fold_bitxor_i64(a: i64, b: i64) -> i64 {
943 a ^ b
944 }
945 #[allow(dead_code)]
946 pub fn fold_bitnot_i64(a: i64) -> i64 {
947 !a
948 }
949}
950#[allow(dead_code)]
951#[derive(Debug, Clone)]
952pub struct OCCacheEntry {
953 pub key: String,
954 pub data: Vec<u8>,
955 pub timestamp: u64,
956 pub valid: bool,
957}
958#[allow(dead_code)]
960#[derive(Debug, Clone, Default)]
961pub struct OCacheX2ConstFolder {
962 pub(super) folds: usize,
963 pub(super) failures: usize,
964 pub(super) enabled: bool,
965}
966impl OCacheX2ConstFolder {
967 #[allow(dead_code)]
968 pub fn new() -> Self {
969 Self {
970 folds: 0,
971 failures: 0,
972 enabled: true,
973 }
974 }
975 #[allow(dead_code)]
976 pub fn add_i64(&mut self, a: i64, b: i64) -> Option<i64> {
977 self.folds += 1;
978 a.checked_add(b)
979 }
980 #[allow(dead_code)]
981 pub fn sub_i64(&mut self, a: i64, b: i64) -> Option<i64> {
982 self.folds += 1;
983 a.checked_sub(b)
984 }
985 #[allow(dead_code)]
986 pub fn mul_i64(&mut self, a: i64, b: i64) -> Option<i64> {
987 self.folds += 1;
988 a.checked_mul(b)
989 }
990 #[allow(dead_code)]
991 pub fn div_i64(&mut self, a: i64, b: i64) -> Option<i64> {
992 if b == 0 {
993 self.failures += 1;
994 None
995 } else {
996 self.folds += 1;
997 a.checked_div(b)
998 }
999 }
1000 #[allow(dead_code)]
1001 pub fn rem_i64(&mut self, a: i64, b: i64) -> Option<i64> {
1002 if b == 0 {
1003 self.failures += 1;
1004 None
1005 } else {
1006 self.folds += 1;
1007 a.checked_rem(b)
1008 }
1009 }
1010 #[allow(dead_code)]
1011 pub fn neg_i64(&mut self, a: i64) -> Option<i64> {
1012 self.folds += 1;
1013 a.checked_neg()
1014 }
1015 #[allow(dead_code)]
1016 pub fn shl_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1017 if s >= 64 {
1018 self.failures += 1;
1019 None
1020 } else {
1021 self.folds += 1;
1022 a.checked_shl(s)
1023 }
1024 }
1025 #[allow(dead_code)]
1026 pub fn shr_i64(&mut self, a: i64, s: u32) -> Option<i64> {
1027 if s >= 64 {
1028 self.failures += 1;
1029 None
1030 } else {
1031 self.folds += 1;
1032 a.checked_shr(s)
1033 }
1034 }
1035 #[allow(dead_code)]
1036 pub fn and_i64(&mut self, a: i64, b: i64) -> i64 {
1037 self.folds += 1;
1038 a & b
1039 }
1040 #[allow(dead_code)]
1041 pub fn or_i64(&mut self, a: i64, b: i64) -> i64 {
1042 self.folds += 1;
1043 a | b
1044 }
1045 #[allow(dead_code)]
1046 pub fn xor_i64(&mut self, a: i64, b: i64) -> i64 {
1047 self.folds += 1;
1048 a ^ b
1049 }
1050 #[allow(dead_code)]
1051 pub fn not_i64(&mut self, a: i64) -> i64 {
1052 self.folds += 1;
1053 !a
1054 }
1055 #[allow(dead_code)]
1056 pub fn fold_count(&self) -> usize {
1057 self.folds
1058 }
1059 #[allow(dead_code)]
1060 pub fn failure_count(&self) -> usize {
1061 self.failures
1062 }
1063 #[allow(dead_code)]
1064 pub fn enable(&mut self) {
1065 self.enabled = true;
1066 }
1067 #[allow(dead_code)]
1068 pub fn disable(&mut self) {
1069 self.enabled = false;
1070 }
1071 #[allow(dead_code)]
1072 pub fn is_enabled(&self) -> bool {
1073 self.enabled
1074 }
1075}
1076#[allow(dead_code)]
1078#[derive(Debug, Clone)]
1079pub struct OCacheExtDepGraph {
1080 pub(super) n: usize,
1081 pub(super) adj: Vec<Vec<usize>>,
1082 pub(super) rev: Vec<Vec<usize>>,
1083 pub(super) edge_count: usize,
1084}
1085impl OCacheExtDepGraph {
1086 #[allow(dead_code)]
1087 pub fn new(n: usize) -> Self {
1088 Self {
1089 n,
1090 adj: vec![Vec::new(); n],
1091 rev: vec![Vec::new(); n],
1092 edge_count: 0,
1093 }
1094 }
1095 #[allow(dead_code)]
1096 pub fn add_edge(&mut self, from: usize, to: usize) {
1097 if from < self.n && to < self.n {
1098 if !self.adj[from].contains(&to) {
1099 self.adj[from].push(to);
1100 self.rev[to].push(from);
1101 self.edge_count += 1;
1102 }
1103 }
1104 }
1105 #[allow(dead_code)]
1106 pub fn succs(&self, n: usize) -> &[usize] {
1107 self.adj.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1108 }
1109 #[allow(dead_code)]
1110 pub fn preds(&self, n: usize) -> &[usize] {
1111 self.rev.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1112 }
1113 #[allow(dead_code)]
1114 pub fn topo_sort(&self) -> Option<Vec<usize>> {
1115 let mut deg: Vec<usize> = (0..self.n).map(|i| self.rev[i].len()).collect();
1116 let mut q: std::collections::VecDeque<usize> =
1117 (0..self.n).filter(|&i| deg[i] == 0).collect();
1118 let mut out = Vec::with_capacity(self.n);
1119 while let Some(u) = q.pop_front() {
1120 out.push(u);
1121 for &v in &self.adj[u] {
1122 deg[v] -= 1;
1123 if deg[v] == 0 {
1124 q.push_back(v);
1125 }
1126 }
1127 }
1128 if out.len() == self.n {
1129 Some(out)
1130 } else {
1131 None
1132 }
1133 }
1134 #[allow(dead_code)]
1135 pub fn has_cycle(&self) -> bool {
1136 self.topo_sort().is_none()
1137 }
1138 #[allow(dead_code)]
1139 pub fn reachable(&self, start: usize) -> Vec<usize> {
1140 let mut vis = vec![false; self.n];
1141 let mut stk = vec![start];
1142 let mut out = Vec::new();
1143 while let Some(u) = stk.pop() {
1144 if u < self.n && !vis[u] {
1145 vis[u] = true;
1146 out.push(u);
1147 for &v in &self.adj[u] {
1148 if !vis[v] {
1149 stk.push(v);
1150 }
1151 }
1152 }
1153 }
1154 out
1155 }
1156 #[allow(dead_code)]
1157 pub fn scc(&self) -> Vec<Vec<usize>> {
1158 let mut visited = vec![false; self.n];
1159 let mut order = Vec::new();
1160 for i in 0..self.n {
1161 if !visited[i] {
1162 let mut stk = vec![(i, 0usize)];
1163 while let Some((u, idx)) = stk.last_mut() {
1164 if !visited[*u] {
1165 visited[*u] = true;
1166 }
1167 if *idx < self.adj[*u].len() {
1168 let v = self.adj[*u][*idx];
1169 *idx += 1;
1170 if !visited[v] {
1171 stk.push((v, 0));
1172 }
1173 } else {
1174 order.push(*u);
1175 stk.pop();
1176 }
1177 }
1178 }
1179 }
1180 let mut comp = vec![usize::MAX; self.n];
1181 let mut components: Vec<Vec<usize>> = Vec::new();
1182 for &start in order.iter().rev() {
1183 if comp[start] == usize::MAX {
1184 let cid = components.len();
1185 let mut component = Vec::new();
1186 let mut stk = vec![start];
1187 while let Some(u) = stk.pop() {
1188 if comp[u] == usize::MAX {
1189 comp[u] = cid;
1190 component.push(u);
1191 for &v in &self.rev[u] {
1192 if comp[v] == usize::MAX {
1193 stk.push(v);
1194 }
1195 }
1196 }
1197 }
1198 components.push(component);
1199 }
1200 }
1201 components
1202 }
1203 #[allow(dead_code)]
1204 pub fn node_count(&self) -> usize {
1205 self.n
1206 }
1207 #[allow(dead_code)]
1208 pub fn edge_count(&self) -> usize {
1209 self.edge_count
1210 }
1211}
1212#[allow(dead_code)]
1214#[derive(Debug, Clone, Default)]
1215pub struct OCacheX2PassStats {
1216 pub iterations: usize,
1217 pub changed: bool,
1218 pub nodes_visited: usize,
1219 pub nodes_modified: usize,
1220 pub time_ms: u64,
1221 pub memory_bytes: usize,
1222 pub errors: usize,
1223}
1224impl OCacheX2PassStats {
1225 #[allow(dead_code)]
1226 pub fn new() -> Self {
1227 Self::default()
1228 }
1229 #[allow(dead_code)]
1230 pub fn visit(&mut self) {
1231 self.nodes_visited += 1;
1232 }
1233 #[allow(dead_code)]
1234 pub fn modify(&mut self) {
1235 self.nodes_modified += 1;
1236 self.changed = true;
1237 }
1238 #[allow(dead_code)]
1239 pub fn iterate(&mut self) {
1240 self.iterations += 1;
1241 }
1242 #[allow(dead_code)]
1243 pub fn error(&mut self) {
1244 self.errors += 1;
1245 }
1246 #[allow(dead_code)]
1247 pub fn efficiency(&self) -> f64 {
1248 if self.nodes_visited == 0 {
1249 0.0
1250 } else {
1251 self.nodes_modified as f64 / self.nodes_visited as f64
1252 }
1253 }
1254 #[allow(dead_code)]
1255 pub fn merge(&mut self, o: &OCacheX2PassStats) {
1256 self.iterations += o.iterations;
1257 self.changed |= o.changed;
1258 self.nodes_visited += o.nodes_visited;
1259 self.nodes_modified += o.nodes_modified;
1260 self.time_ms += o.time_ms;
1261 self.memory_bytes = self.memory_bytes.max(o.memory_bytes);
1262 self.errors += o.errors;
1263 }
1264}
1265#[allow(dead_code)]
1266#[derive(Debug, Clone)]
1267pub struct OCLivenessInfo {
1268 pub live_in: Vec<std::collections::HashSet<u32>>,
1269 pub live_out: Vec<std::collections::HashSet<u32>>,
1270 pub defs: Vec<std::collections::HashSet<u32>>,
1271 pub uses: Vec<std::collections::HashSet<u32>>,
1272}
1273impl OCLivenessInfo {
1274 #[allow(dead_code)]
1275 pub fn new(block_count: usize) -> Self {
1276 OCLivenessInfo {
1277 live_in: vec![std::collections::HashSet::new(); block_count],
1278 live_out: vec![std::collections::HashSet::new(); block_count],
1279 defs: vec![std::collections::HashSet::new(); block_count],
1280 uses: vec![std::collections::HashSet::new(); block_count],
1281 }
1282 }
1283 #[allow(dead_code)]
1284 pub fn add_def(&mut self, block: usize, var: u32) {
1285 if block < self.defs.len() {
1286 self.defs[block].insert(var);
1287 }
1288 }
1289 #[allow(dead_code)]
1290 pub fn add_use(&mut self, block: usize, var: u32) {
1291 if block < self.uses.len() {
1292 self.uses[block].insert(var);
1293 }
1294 }
1295 #[allow(dead_code)]
1296 pub fn is_live_in(&self, block: usize, var: u32) -> bool {
1297 self.live_in
1298 .get(block)
1299 .map(|s| s.contains(&var))
1300 .unwrap_or(false)
1301 }
1302 #[allow(dead_code)]
1303 pub fn is_live_out(&self, block: usize, var: u32) -> bool {
1304 self.live_out
1305 .get(block)
1306 .map(|s| s.contains(&var))
1307 .unwrap_or(false)
1308 }
1309}
1310#[allow(dead_code)]
1312#[derive(Debug, Clone)]
1313pub struct OCacheExtWorklist {
1314 pub(super) items: std::collections::VecDeque<usize>,
1315 pub(super) present: Vec<bool>,
1316}
1317impl OCacheExtWorklist {
1318 #[allow(dead_code)]
1319 pub fn new(capacity: usize) -> Self {
1320 Self {
1321 items: std::collections::VecDeque::new(),
1322 present: vec![false; capacity],
1323 }
1324 }
1325 #[allow(dead_code)]
1326 pub fn push(&mut self, id: usize) {
1327 if id < self.present.len() && !self.present[id] {
1328 self.present[id] = true;
1329 self.items.push_back(id);
1330 }
1331 }
1332 #[allow(dead_code)]
1333 pub fn push_front(&mut self, id: usize) {
1334 if id < self.present.len() && !self.present[id] {
1335 self.present[id] = true;
1336 self.items.push_front(id);
1337 }
1338 }
1339 #[allow(dead_code)]
1340 pub fn pop(&mut self) -> Option<usize> {
1341 let id = self.items.pop_front()?;
1342 if id < self.present.len() {
1343 self.present[id] = false;
1344 }
1345 Some(id)
1346 }
1347 #[allow(dead_code)]
1348 pub fn is_empty(&self) -> bool {
1349 self.items.is_empty()
1350 }
1351 #[allow(dead_code)]
1352 pub fn len(&self) -> usize {
1353 self.items.len()
1354 }
1355 #[allow(dead_code)]
1356 pub fn contains(&self, id: usize) -> bool {
1357 id < self.present.len() && self.present[id]
1358 }
1359 #[allow(dead_code)]
1360 pub fn drain_all(&mut self) -> Vec<usize> {
1361 let v: Vec<usize> = self.items.drain(..).collect();
1362 for &id in &v {
1363 if id < self.present.len() {
1364 self.present[id] = false;
1365 }
1366 }
1367 v
1368 }
1369}
1370#[allow(dead_code)]
1372#[derive(Debug, Clone)]
1373pub struct OCacheX2DepGraph {
1374 pub(super) n: usize,
1375 pub(super) adj: Vec<Vec<usize>>,
1376 pub(super) rev: Vec<Vec<usize>>,
1377 pub(super) edge_count: usize,
1378}
1379impl OCacheX2DepGraph {
1380 #[allow(dead_code)]
1381 pub fn new(n: usize) -> Self {
1382 Self {
1383 n,
1384 adj: vec![Vec::new(); n],
1385 rev: vec![Vec::new(); n],
1386 edge_count: 0,
1387 }
1388 }
1389 #[allow(dead_code)]
1390 pub fn add_edge(&mut self, from: usize, to: usize) {
1391 if from < self.n && to < self.n {
1392 if !self.adj[from].contains(&to) {
1393 self.adj[from].push(to);
1394 self.rev[to].push(from);
1395 self.edge_count += 1;
1396 }
1397 }
1398 }
1399 #[allow(dead_code)]
1400 pub fn succs(&self, n: usize) -> &[usize] {
1401 self.adj.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1402 }
1403 #[allow(dead_code)]
1404 pub fn preds(&self, n: usize) -> &[usize] {
1405 self.rev.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1406 }
1407 #[allow(dead_code)]
1408 pub fn topo_sort(&self) -> Option<Vec<usize>> {
1409 let mut deg: Vec<usize> = (0..self.n).map(|i| self.rev[i].len()).collect();
1410 let mut q: std::collections::VecDeque<usize> =
1411 (0..self.n).filter(|&i| deg[i] == 0).collect();
1412 let mut out = Vec::with_capacity(self.n);
1413 while let Some(u) = q.pop_front() {
1414 out.push(u);
1415 for &v in &self.adj[u] {
1416 deg[v] -= 1;
1417 if deg[v] == 0 {
1418 q.push_back(v);
1419 }
1420 }
1421 }
1422 if out.len() == self.n {
1423 Some(out)
1424 } else {
1425 None
1426 }
1427 }
1428 #[allow(dead_code)]
1429 pub fn has_cycle(&self) -> bool {
1430 self.topo_sort().is_none()
1431 }
1432 #[allow(dead_code)]
1433 pub fn reachable(&self, start: usize) -> Vec<usize> {
1434 let mut vis = vec![false; self.n];
1435 let mut stk = vec![start];
1436 let mut out = Vec::new();
1437 while let Some(u) = stk.pop() {
1438 if u < self.n && !vis[u] {
1439 vis[u] = true;
1440 out.push(u);
1441 for &v in &self.adj[u] {
1442 if !vis[v] {
1443 stk.push(v);
1444 }
1445 }
1446 }
1447 }
1448 out
1449 }
1450 #[allow(dead_code)]
1451 pub fn scc(&self) -> Vec<Vec<usize>> {
1452 let mut visited = vec![false; self.n];
1453 let mut order = Vec::new();
1454 for i in 0..self.n {
1455 if !visited[i] {
1456 let mut stk = vec![(i, 0usize)];
1457 while let Some((u, idx)) = stk.last_mut() {
1458 if !visited[*u] {
1459 visited[*u] = true;
1460 }
1461 if *idx < self.adj[*u].len() {
1462 let v = self.adj[*u][*idx];
1463 *idx += 1;
1464 if !visited[v] {
1465 stk.push((v, 0));
1466 }
1467 } else {
1468 order.push(*u);
1469 stk.pop();
1470 }
1471 }
1472 }
1473 }
1474 let mut comp = vec![usize::MAX; self.n];
1475 let mut components: Vec<Vec<usize>> = Vec::new();
1476 for &start in order.iter().rev() {
1477 if comp[start] == usize::MAX {
1478 let cid = components.len();
1479 let mut component = Vec::new();
1480 let mut stk = vec![start];
1481 while let Some(u) = stk.pop() {
1482 if comp[u] == usize::MAX {
1483 comp[u] = cid;
1484 component.push(u);
1485 for &v in &self.rev[u] {
1486 if comp[v] == usize::MAX {
1487 stk.push(v);
1488 }
1489 }
1490 }
1491 }
1492 components.push(component);
1493 }
1494 }
1495 components
1496 }
1497 #[allow(dead_code)]
1498 pub fn node_count(&self) -> usize {
1499 self.n
1500 }
1501 #[allow(dead_code)]
1502 pub fn edge_count(&self) -> usize {
1503 self.edge_count
1504 }
1505}
1506#[allow(dead_code)]
1508#[derive(Debug, Default)]
1509pub struct OCacheX2PassRegistry {
1510 pub(super) configs: Vec<OCacheX2PassConfig>,
1511 pub(super) stats: Vec<OCacheX2PassStats>,
1512}
1513impl OCacheX2PassRegistry {
1514 #[allow(dead_code)]
1515 pub fn new() -> Self {
1516 Self::default()
1517 }
1518 #[allow(dead_code)]
1519 pub fn register(&mut self, c: OCacheX2PassConfig) {
1520 self.stats.push(OCacheX2PassStats::new());
1521 self.configs.push(c);
1522 }
1523 #[allow(dead_code)]
1524 pub fn len(&self) -> usize {
1525 self.configs.len()
1526 }
1527 #[allow(dead_code)]
1528 pub fn is_empty(&self) -> bool {
1529 self.configs.is_empty()
1530 }
1531 #[allow(dead_code)]
1532 pub fn get(&self, i: usize) -> Option<&OCacheX2PassConfig> {
1533 self.configs.get(i)
1534 }
1535 #[allow(dead_code)]
1536 pub fn get_stats(&self, i: usize) -> Option<&OCacheX2PassStats> {
1537 self.stats.get(i)
1538 }
1539 #[allow(dead_code)]
1540 pub fn enabled_passes(&self) -> Vec<&OCacheX2PassConfig> {
1541 self.configs.iter().filter(|c| c.enabled).collect()
1542 }
1543 #[allow(dead_code)]
1544 pub fn passes_in_phase(&self, ph: &OCacheX2PassPhase) -> Vec<&OCacheX2PassConfig> {
1545 self.configs
1546 .iter()
1547 .filter(|c| c.enabled && &c.phase == ph)
1548 .collect()
1549 }
1550 #[allow(dead_code)]
1551 pub fn total_nodes_visited(&self) -> usize {
1552 self.stats.iter().map(|s| s.nodes_visited).sum()
1553 }
1554 #[allow(dead_code)]
1555 pub fn any_changed(&self) -> bool {
1556 self.stats.iter().any(|s| s.changed)
1557 }
1558}
1559#[derive(Debug, Clone, PartialEq)]
1561pub struct LoopTile {
1562 pub original_var: String,
1564 pub tile_var: String,
1566 pub intra_var: String,
1568 pub tile_size: u64,
1570}
1571impl LoopTile {
1572 pub fn new(original_var: impl Into<String>, tile_size: u64) -> Self {
1574 let original = original_var.into();
1575 let tile_var = format!("{}_tile", original);
1576 let intra_var = format!("{}_intra", original);
1577 LoopTile {
1578 original_var: original,
1579 tile_var,
1580 intra_var,
1581 tile_size,
1582 }
1583 }
1584}
1585#[derive(Debug, Clone, PartialEq)]
1587pub struct MemoryAccess {
1588 pub var_name: String,
1590 pub offset: i64,
1592 pub pattern: AccessPattern,
1594 pub stride: Option<i64>,
1596 pub count: u64,
1598}
1599impl MemoryAccess {
1600 pub fn new(
1602 var_name: impl Into<String>,
1603 offset: i64,
1604 pattern: AccessPattern,
1605 stride: Option<i64>,
1606 count: u64,
1607 ) -> Self {
1608 MemoryAccess {
1609 var_name: var_name.into(),
1610 offset,
1611 pattern,
1612 stride,
1613 count,
1614 }
1615 }
1616 pub fn is_cache_friendly(&self) -> bool {
1618 self.pattern.is_cache_friendly()
1619 }
1620}
1621#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1623pub enum PrefetchType {
1624 Read,
1626 Write,
1628 NonTemporal,
1630}
1631#[allow(dead_code)]
1633#[derive(Debug, Clone, Default)]
1634pub struct OCacheExtPassStats {
1635 pub iterations: usize,
1636 pub changed: bool,
1637 pub nodes_visited: usize,
1638 pub nodes_modified: usize,
1639 pub time_ms: u64,
1640 pub memory_bytes: usize,
1641 pub errors: usize,
1642}
1643impl OCacheExtPassStats {
1644 #[allow(dead_code)]
1645 pub fn new() -> Self {
1646 Self::default()
1647 }
1648 #[allow(dead_code)]
1649 pub fn visit(&mut self) {
1650 self.nodes_visited += 1;
1651 }
1652 #[allow(dead_code)]
1653 pub fn modify(&mut self) {
1654 self.nodes_modified += 1;
1655 self.changed = true;
1656 }
1657 #[allow(dead_code)]
1658 pub fn iterate(&mut self) {
1659 self.iterations += 1;
1660 }
1661 #[allow(dead_code)]
1662 pub fn error(&mut self) {
1663 self.errors += 1;
1664 }
1665 #[allow(dead_code)]
1666 pub fn efficiency(&self) -> f64 {
1667 if self.nodes_visited == 0 {
1668 0.0
1669 } else {
1670 self.nodes_modified as f64 / self.nodes_visited as f64
1671 }
1672 }
1673 #[allow(dead_code)]
1674 pub fn merge(&mut self, o: &OCacheExtPassStats) {
1675 self.iterations += o.iterations;
1676 self.changed |= o.changed;
1677 self.nodes_visited += o.nodes_visited;
1678 self.nodes_modified += o.nodes_modified;
1679 self.time_ms += o.time_ms;
1680 self.memory_bytes = self.memory_bytes.max(o.memory_bytes);
1681 self.errors += o.errors;
1682 }
1683}
1684#[allow(dead_code)]
1686#[derive(Debug, Clone)]
1687pub struct OCacheX2DomTree {
1688 pub(super) idom: Vec<Option<usize>>,
1689 pub(super) children: Vec<Vec<usize>>,
1690 pub(super) depth: Vec<usize>,
1691}
1692impl OCacheX2DomTree {
1693 #[allow(dead_code)]
1694 pub fn new(n: usize) -> Self {
1695 Self {
1696 idom: vec![None; n],
1697 children: vec![Vec::new(); n],
1698 depth: vec![0; n],
1699 }
1700 }
1701 #[allow(dead_code)]
1702 pub fn set_idom(&mut self, node: usize, dom: usize) {
1703 if node < self.idom.len() {
1704 self.idom[node] = Some(dom);
1705 if dom < self.children.len() {
1706 self.children[dom].push(node);
1707 }
1708 self.depth[node] = if dom < self.depth.len() {
1709 self.depth[dom] + 1
1710 } else {
1711 1
1712 };
1713 }
1714 }
1715 #[allow(dead_code)]
1716 pub fn dominates(&self, a: usize, mut b: usize) -> bool {
1717 if a == b {
1718 return true;
1719 }
1720 let n = self.idom.len();
1721 for _ in 0..n {
1722 match self.idom.get(b).copied().flatten() {
1723 None => return false,
1724 Some(p) if p == a => return true,
1725 Some(p) if p == b => return false,
1726 Some(p) => b = p,
1727 }
1728 }
1729 false
1730 }
1731 #[allow(dead_code)]
1732 pub fn children_of(&self, n: usize) -> &[usize] {
1733 self.children.get(n).map(|v| v.as_slice()).unwrap_or(&[])
1734 }
1735 #[allow(dead_code)]
1736 pub fn depth_of(&self, n: usize) -> usize {
1737 self.depth.get(n).copied().unwrap_or(0)
1738 }
1739 #[allow(dead_code)]
1740 pub fn lca(&self, mut a: usize, mut b: usize) -> usize {
1741 let n = self.idom.len();
1742 for _ in 0..(2 * n) {
1743 if a == b {
1744 return a;
1745 }
1746 if self.depth_of(a) > self.depth_of(b) {
1747 a = self.idom.get(a).and_then(|x| *x).unwrap_or(a);
1748 } else {
1749 b = self.idom.get(b).and_then(|x| *x).unwrap_or(b);
1750 }
1751 }
1752 0
1753 }
1754}
1755#[allow(dead_code)]
1756#[derive(Debug, Clone)]
1757pub struct OCDepGraph {
1758 pub(super) nodes: Vec<u32>,
1759 pub(super) edges: Vec<(u32, u32)>,
1760}
1761impl OCDepGraph {
1762 #[allow(dead_code)]
1763 pub fn new() -> Self {
1764 OCDepGraph {
1765 nodes: Vec::new(),
1766 edges: Vec::new(),
1767 }
1768 }
1769 #[allow(dead_code)]
1770 pub fn add_node(&mut self, id: u32) {
1771 if !self.nodes.contains(&id) {
1772 self.nodes.push(id);
1773 }
1774 }
1775 #[allow(dead_code)]
1776 pub fn add_dep(&mut self, dep: u32, dependent: u32) {
1777 self.add_node(dep);
1778 self.add_node(dependent);
1779 self.edges.push((dep, dependent));
1780 }
1781 #[allow(dead_code)]
1782 pub fn dependents_of(&self, node: u32) -> Vec<u32> {
1783 self.edges
1784 .iter()
1785 .filter(|(d, _)| *d == node)
1786 .map(|(_, dep)| *dep)
1787 .collect()
1788 }
1789 #[allow(dead_code)]
1790 pub fn dependencies_of(&self, node: u32) -> Vec<u32> {
1791 self.edges
1792 .iter()
1793 .filter(|(_, dep)| *dep == node)
1794 .map(|(d, _)| *d)
1795 .collect()
1796 }
1797 #[allow(dead_code)]
1798 pub fn topological_sort(&self) -> Vec<u32> {
1799 let mut in_degree: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
1800 for &n in &self.nodes {
1801 in_degree.insert(n, 0);
1802 }
1803 for (_, dep) in &self.edges {
1804 *in_degree.entry(*dep).or_insert(0) += 1;
1805 }
1806 let mut queue: std::collections::VecDeque<u32> = self
1807 .nodes
1808 .iter()
1809 .filter(|&&n| in_degree[&n] == 0)
1810 .copied()
1811 .collect();
1812 let mut result = Vec::new();
1813 while let Some(node) = queue.pop_front() {
1814 result.push(node);
1815 for dep in self.dependents_of(node) {
1816 let cnt = in_degree.entry(dep).or_insert(0);
1817 *cnt = cnt.saturating_sub(1);
1818 if *cnt == 0 {
1819 queue.push_back(dep);
1820 }
1821 }
1822 }
1823 result
1824 }
1825 #[allow(dead_code)]
1826 pub fn has_cycle(&self) -> bool {
1827 self.topological_sort().len() < self.nodes.len()
1828 }
1829}
1830#[derive(Debug, Clone, PartialEq)]
1832pub struct PrefetchHint {
1833 pub address_expr: String,
1835 pub distance: u64,
1837 pub hint_type: PrefetchType,
1839}
1840impl PrefetchHint {
1841 pub fn new(address_expr: impl Into<String>, distance: u64, hint_type: PrefetchType) -> Self {
1843 PrefetchHint {
1844 address_expr: address_expr.into(),
1845 distance,
1846 hint_type,
1847 }
1848 }
1849}
1850#[allow(dead_code)]
1851pub struct OCPassRegistry {
1852 pub(super) configs: Vec<OCPassConfig>,
1853 pub(super) stats: std::collections::HashMap<String, OCPassStats>,
1854}
1855impl OCPassRegistry {
1856 #[allow(dead_code)]
1857 pub fn new() -> Self {
1858 OCPassRegistry {
1859 configs: Vec::new(),
1860 stats: std::collections::HashMap::new(),
1861 }
1862 }
1863 #[allow(dead_code)]
1864 pub fn register(&mut self, config: OCPassConfig) {
1865 self.stats
1866 .insert(config.pass_name.clone(), OCPassStats::new());
1867 self.configs.push(config);
1868 }
1869 #[allow(dead_code)]
1870 pub fn enabled_passes(&self) -> Vec<&OCPassConfig> {
1871 self.configs.iter().filter(|c| c.enabled).collect()
1872 }
1873 #[allow(dead_code)]
1874 pub fn get_stats(&self, name: &str) -> Option<&OCPassStats> {
1875 self.stats.get(name)
1876 }
1877 #[allow(dead_code)]
1878 pub fn total_passes(&self) -> usize {
1879 self.configs.len()
1880 }
1881 #[allow(dead_code)]
1882 pub fn enabled_count(&self) -> usize {
1883 self.enabled_passes().len()
1884 }
1885 #[allow(dead_code)]
1886 pub fn update_stats(&mut self, name: &str, changes: u64, time_ms: u64, iter: u32) {
1887 if let Some(stats) = self.stats.get_mut(name) {
1888 stats.record_run(changes, time_ms, iter);
1889 }
1890 }
1891}
1892#[allow(dead_code)]
1894#[derive(Debug)]
1895pub struct OCacheX2Cache {
1896 pub(super) entries: Vec<(u64, Vec<u8>, bool, u32)>,
1897 pub(super) cap: usize,
1898 pub(super) total_hits: u64,
1899 pub(super) total_misses: u64,
1900}
1901impl OCacheX2Cache {
1902 #[allow(dead_code)]
1903 pub fn new(cap: usize) -> Self {
1904 Self {
1905 entries: Vec::new(),
1906 cap,
1907 total_hits: 0,
1908 total_misses: 0,
1909 }
1910 }
1911 #[allow(dead_code)]
1912 pub fn get(&mut self, key: u64) -> Option<&[u8]> {
1913 for e in self.entries.iter_mut() {
1914 if e.0 == key && e.2 {
1915 e.3 += 1;
1916 self.total_hits += 1;
1917 return Some(&e.1);
1918 }
1919 }
1920 self.total_misses += 1;
1921 None
1922 }
1923 #[allow(dead_code)]
1924 pub fn put(&mut self, key: u64, data: Vec<u8>) {
1925 if self.entries.len() >= self.cap {
1926 self.entries.retain(|e| e.2);
1927 if self.entries.len() >= self.cap {
1928 self.entries.remove(0);
1929 }
1930 }
1931 self.entries.push((key, data, true, 0));
1932 }
1933 #[allow(dead_code)]
1934 pub fn invalidate(&mut self) {
1935 for e in self.entries.iter_mut() {
1936 e.2 = false;
1937 }
1938 }
1939 #[allow(dead_code)]
1940 pub fn hit_rate(&self) -> f64 {
1941 let t = self.total_hits + self.total_misses;
1942 if t == 0 {
1943 0.0
1944 } else {
1945 self.total_hits as f64 / t as f64
1946 }
1947 }
1948 #[allow(dead_code)]
1949 pub fn live_count(&self) -> usize {
1950 self.entries.iter().filter(|e| e.2).count()
1951 }
1952}
1953pub struct CacheOptPass {
1962 pub config: CacheOptConfig,
1964 pub report: CacheOptReport,
1966}
1967impl CacheOptPass {
1968 pub fn new(config: CacheOptConfig) -> Self {
1970 CacheOptPass {
1971 config,
1972 report: CacheOptReport::default(),
1973 }
1974 }
1975 pub fn run(&mut self, decls: &mut [LcnfFunDecl]) {
1977 self.report = CacheOptReport::default();
1978 for decl in decls.iter_mut() {
1979 let info = self.analyze_locality(decl);
1980 if self.config.tiling.enable_l1_tiling && !info.fits_in_l1() {
1981 let tiles = self.propose_tiles(decl, &info);
1982 if !tiles.is_empty() {
1983 let n = tiles.len();
1984 self.apply_loop_tiling(decl, &tiles);
1985 self.report.num_loops_tiled += n;
1986 }
1987 }
1988 if self.config.enable_prefetch && info.reuse_distance > 8.0 {
1989 let hints = self.generate_prefetch_hints(&info);
1990 let n = hints.len();
1991 self.insert_prefetch_hints(decl, &hints);
1992 self.report.num_prefetches_inserted += n;
1993 }
1994 self.reorder_data_structures(decl);
1995 }
1996 if !decls.is_empty() {
1997 self.report.estimated_cache_miss_reduction = self.estimate_miss_reduction(decls);
1998 }
1999 }
2000 pub fn analyze_locality(&self, decl: &LcnfFunDecl) -> DataLocalityInfo {
2002 let mut accesses: Vec<MemoryAccess> = Vec::new();
2003 Self::collect_accesses(&decl.body, &mut accesses);
2004 let working_set_bytes = self.estimate_working_set(&accesses);
2005 let best_cache_level = self.classify_cache_level(working_set_bytes);
2006 let reuse_distance = self.compute_reuse_distance(&accesses);
2007 DataLocalityInfo {
2008 accesses,
2009 working_set_bytes,
2010 best_cache_level,
2011 reuse_distance,
2012 }
2013 }
2014 pub(super) fn collect_accesses(expr: &LcnfExpr, out: &mut Vec<MemoryAccess>) {
2016 match expr {
2017 LcnfExpr::Let {
2018 value, body, id, ..
2019 } => {
2020 match value {
2021 LcnfLetValue::Proj(field, idx, src) => {
2022 out.push(MemoryAccess::new(
2023 format!("{}.{}", src, field),
2024 (*idx as i64) * 8,
2025 AccessPattern::Sequential,
2026 Some(8),
2027 1,
2028 ));
2029 }
2030 LcnfLetValue::Ctor(name, _, args) => {
2031 for (i, _arg) in args.iter().enumerate() {
2032 out.push(MemoryAccess::new(
2033 format!("{}#{}", name, id),
2034 (i as i64) * 8,
2035 AccessPattern::Sequential,
2036 Some(8),
2037 1,
2038 ));
2039 }
2040 }
2041 LcnfLetValue::App(_, args) => {
2042 for (i, _arg) in args.iter().enumerate() {
2043 out.push(MemoryAccess::new(
2044 format!("arg_{}", i),
2045 0,
2046 AccessPattern::Random,
2047 None,
2048 1,
2049 ));
2050 }
2051 }
2052 _ => {}
2053 }
2054 Self::collect_accesses(body, out);
2055 }
2056 LcnfExpr::Case { alts, default, .. } => {
2057 for alt in alts {
2058 Self::collect_accesses(&alt.body, out);
2059 }
2060 if let Some(def) = default {
2061 Self::collect_accesses(def, out);
2062 }
2063 }
2064 LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
2065 }
2066 }
2067 pub(super) fn estimate_working_set(&self, accesses: &[MemoryAccess]) -> u64 {
2069 use std::collections::HashSet;
2070 let distinct: HashSet<&str> = accesses.iter().map(|a| a.var_name.as_str()).collect();
2071 (distinct.len() as u64) * self.config.cache_line_size
2072 }
2073 pub(super) fn classify_cache_level(&self, working_set_bytes: u64) -> CacheLevel {
2075 if working_set_bytes <= CacheLevel::L1.capacity_bytes() {
2076 CacheLevel::L1
2077 } else if working_set_bytes <= CacheLevel::L2.capacity_bytes() {
2078 CacheLevel::L2
2079 } else if working_set_bytes <= CacheLevel::L3.capacity_bytes() {
2080 CacheLevel::L3
2081 } else {
2082 CacheLevel::Ram
2083 }
2084 }
2085 pub(super) fn compute_reuse_distance(&self, accesses: &[MemoryAccess]) -> f64 {
2090 if accesses.len() < 2 {
2091 return 0.0;
2092 }
2093 use std::collections::HashMap;
2094 let mut last_seen: HashMap<&str, usize> = HashMap::new();
2095 let mut total_distance: f64 = 0.0;
2096 let mut reuse_count: usize = 0;
2097 for (i, acc) in accesses.iter().enumerate() {
2098 if let Some(&prev) = last_seen.get(acc.var_name.as_str()) {
2099 total_distance += (i - prev) as f64;
2100 reuse_count += 1;
2101 }
2102 last_seen.insert(&acc.var_name, i);
2103 }
2104 if reuse_count == 0 {
2105 accesses.len() as f64
2106 } else {
2107 total_distance / reuse_count as f64
2108 }
2109 }
2110 pub(super) fn propose_tiles(
2112 &self,
2113 decl: &LcnfFunDecl,
2114 info: &DataLocalityInfo,
2115 ) -> Vec<LoopTile> {
2116 let mut tiles = Vec::new();
2117 for param in &decl.params {
2118 let used_in_sequential = info.accesses.iter().any(|a| {
2119 a.var_name.contains(¶m.name) && matches!(a.pattern, AccessPattern::Sequential)
2120 });
2121 if used_in_sequential {
2122 let tile_size = if info.fits_in_l1() {
2123 self.config.tiling.tile_size_l1
2124 } else {
2125 self.config.tiling.tile_size_l2
2126 };
2127 tiles.push(LoopTile::new(¶m.name, tile_size));
2128 }
2129 }
2130 tiles
2131 }
2132 pub fn apply_loop_tiling(&self, decl: &mut LcnfFunDecl, tiles: &[LoopTile]) {
2139 Self::annotate_tiling(&mut decl.body, tiles);
2140 }
2141 pub(super) fn annotate_tiling(expr: &mut LcnfExpr, tiles: &[LoopTile]) {
2142 match expr {
2143 LcnfExpr::Let {
2144 name, value, body, ..
2145 } => {
2146 if tiles.iter().any(|t| name.contains(&t.original_var)) && !name.ends_with("_tiled")
2147 {
2148 *name = format!("{}_tiled", name);
2149 }
2150 if let LcnfLetValue::Proj(field, _idx, src) = value {
2151 if tiles
2152 .iter()
2153 .any(|t| src.to_string().contains(&t.original_var))
2154 {
2155 *field = format!("{}_tile_cached", field);
2156 }
2157 }
2158 Self::annotate_tiling(body, tiles);
2159 }
2160 LcnfExpr::Case { alts, default, .. } => {
2161 for alt in alts.iter_mut() {
2162 Self::annotate_tiling(&mut alt.body, tiles);
2163 }
2164 if let Some(def) = default {
2165 Self::annotate_tiling(def, tiles);
2166 }
2167 }
2168 LcnfExpr::Return(_) | LcnfExpr::Unreachable | LcnfExpr::TailCall(_, _) => {}
2169 }
2170 }
2171 pub(super) fn generate_prefetch_hints(&self, info: &DataLocalityInfo) -> Vec<PrefetchHint> {
2173 let mut hints = Vec::new();
2174 for acc in &info.accesses {
2175 if !acc.is_cache_friendly() {
2176 hints.push(PrefetchHint::new(
2177 format!("&{}[{}]", acc.var_name, self.config.prefetch_distance),
2178 self.config.prefetch_distance,
2179 PrefetchType::Read,
2180 ));
2181 } else if matches!(acc.pattern, AccessPattern::Sequential) && acc.count > 8 {
2182 hints.push(PrefetchHint::new(
2183 format!("&{}[{}]", acc.var_name, self.config.prefetch_distance),
2184 self.config.prefetch_distance,
2185 PrefetchType::NonTemporal,
2186 ));
2187 }
2188 }
2189 hints
2190 }
2191 pub(super) fn insert_prefetch_hints(&self, decl: &mut LcnfFunDecl, hints: &[PrefetchHint]) {
2193 if hints.is_empty() {
2194 return;
2195 }
2196 let annotation = format!("__prefetch_{}", hints.len());
2197 if !decl.name.contains(&annotation) {
2198 decl.name = format!("{}{}", decl.name, annotation);
2199 }
2200 }
2201 pub fn reorder_data_structures(&self, decl: &mut LcnfFunDecl) {
2206 let layouts = FieldReorderingAnalysis::analyze(decl);
2207 for layout in &layouts {
2208 if layout.padding_bytes() > 0 || !layout.is_cache_aligned() {
2209 let annotation = format!("__reorder_{}", layout.struct_name);
2210 if !decl.name.contains(&annotation) {
2211 decl.name = format!("{}{}", decl.name, annotation);
2212 }
2213 }
2214 }
2215 }
2216 pub(super) fn estimate_miss_reduction(&self, decls: &[LcnfFunDecl]) -> f64 {
2218 if decls.is_empty() {
2219 return 0.0;
2220 }
2221 let total_friendly: f64 = decls
2222 .iter()
2223 .map(|d| {
2224 let info = self.analyze_locality(d);
2225 info.cache_friendly_fraction()
2226 })
2227 .sum();
2228 let avg_friendly = total_friendly / decls.len() as f64;
2229 let tiling_factor = if self.config.tiling.enable_l1_tiling {
2230 0.4
2231 } else {
2232 0.2
2233 };
2234 (1.0 - avg_friendly) * tiling_factor
2235 }
2236}
2237#[allow(dead_code)]
2239#[derive(Debug, Clone, Default)]
2240pub struct OCacheX2Liveness {
2241 pub live_in: Vec<Vec<usize>>,
2242 pub live_out: Vec<Vec<usize>>,
2243 pub defs: Vec<Vec<usize>>,
2244 pub uses: Vec<Vec<usize>>,
2245}
2246impl OCacheX2Liveness {
2247 #[allow(dead_code)]
2248 pub fn new(n: usize) -> Self {
2249 Self {
2250 live_in: vec![Vec::new(); n],
2251 live_out: vec![Vec::new(); n],
2252 defs: vec![Vec::new(); n],
2253 uses: vec![Vec::new(); n],
2254 }
2255 }
2256 #[allow(dead_code)]
2257 pub fn live_in(&self, b: usize, v: usize) -> bool {
2258 self.live_in.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2259 }
2260 #[allow(dead_code)]
2261 pub fn live_out(&self, b: usize, v: usize) -> bool {
2262 self.live_out
2263 .get(b)
2264 .map(|s| s.contains(&v))
2265 .unwrap_or(false)
2266 }
2267 #[allow(dead_code)]
2268 pub fn add_def(&mut self, b: usize, v: usize) {
2269 if let Some(s) = self.defs.get_mut(b) {
2270 if !s.contains(&v) {
2271 s.push(v);
2272 }
2273 }
2274 }
2275 #[allow(dead_code)]
2276 pub fn add_use(&mut self, b: usize, v: usize) {
2277 if let Some(s) = self.uses.get_mut(b) {
2278 if !s.contains(&v) {
2279 s.push(v);
2280 }
2281 }
2282 }
2283 #[allow(dead_code)]
2284 pub fn var_is_used_in_block(&self, b: usize, v: usize) -> bool {
2285 self.uses.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2286 }
2287 #[allow(dead_code)]
2288 pub fn var_is_def_in_block(&self, b: usize, v: usize) -> bool {
2289 self.defs.get(b).map(|s| s.contains(&v)).unwrap_or(false)
2290 }
2291}
2292#[allow(dead_code)]
2294#[derive(Debug, Clone)]
2295pub struct OCacheExtDomTree {
2296 pub(super) idom: Vec<Option<usize>>,
2297 pub(super) children: Vec<Vec<usize>>,
2298 pub(super) depth: Vec<usize>,
2299}
2300impl OCacheExtDomTree {
2301 #[allow(dead_code)]
2302 pub fn new(n: usize) -> Self {
2303 Self {
2304 idom: vec![None; n],
2305 children: vec![Vec::new(); n],
2306 depth: vec![0; n],
2307 }
2308 }
2309 #[allow(dead_code)]
2310 pub fn set_idom(&mut self, node: usize, dom: usize) {
2311 if node < self.idom.len() {
2312 self.idom[node] = Some(dom);
2313 if dom < self.children.len() {
2314 self.children[dom].push(node);
2315 }
2316 self.depth[node] = if dom < self.depth.len() {
2317 self.depth[dom] + 1
2318 } else {
2319 1
2320 };
2321 }
2322 }
2323 #[allow(dead_code)]
2324 pub fn dominates(&self, a: usize, mut b: usize) -> bool {
2325 if a == b {
2326 return true;
2327 }
2328 let n = self.idom.len();
2329 for _ in 0..n {
2330 match self.idom.get(b).copied().flatten() {
2331 None => return false,
2332 Some(p) if p == a => return true,
2333 Some(p) if p == b => return false,
2334 Some(p) => b = p,
2335 }
2336 }
2337 false
2338 }
2339 #[allow(dead_code)]
2340 pub fn children_of(&self, n: usize) -> &[usize] {
2341 self.children.get(n).map(|v| v.as_slice()).unwrap_or(&[])
2342 }
2343 #[allow(dead_code)]
2344 pub fn depth_of(&self, n: usize) -> usize {
2345 self.depth.get(n).copied().unwrap_or(0)
2346 }
2347 #[allow(dead_code)]
2348 pub fn lca(&self, mut a: usize, mut b: usize) -> usize {
2349 let n = self.idom.len();
2350 for _ in 0..(2 * n) {
2351 if a == b {
2352 return a;
2353 }
2354 if self.depth_of(a) > self.depth_of(b) {
2355 a = self.idom.get(a).and_then(|x| *x).unwrap_or(a);
2356 } else {
2357 b = self.idom.get(b).and_then(|x| *x).unwrap_or(b);
2358 }
2359 }
2360 0
2361 }
2362}
2363#[allow(dead_code)]
2364#[derive(Debug, Clone, Default)]
2365pub struct OCPassStats {
2366 pub total_runs: u32,
2367 pub successful_runs: u32,
2368 pub total_changes: u64,
2369 pub time_ms: u64,
2370 pub iterations_used: u32,
2371}
2372impl OCPassStats {
2373 #[allow(dead_code)]
2374 pub fn new() -> Self {
2375 Self::default()
2376 }
2377 #[allow(dead_code)]
2378 pub fn record_run(&mut self, changes: u64, time_ms: u64, iterations: u32) {
2379 self.total_runs += 1;
2380 self.successful_runs += 1;
2381 self.total_changes += changes;
2382 self.time_ms += time_ms;
2383 self.iterations_used = iterations;
2384 }
2385 #[allow(dead_code)]
2386 pub fn average_changes_per_run(&self) -> f64 {
2387 if self.total_runs == 0 {
2388 return 0.0;
2389 }
2390 self.total_changes as f64 / self.total_runs as f64
2391 }
2392 #[allow(dead_code)]
2393 pub fn success_rate(&self) -> f64 {
2394 if self.total_runs == 0 {
2395 return 0.0;
2396 }
2397 self.successful_runs as f64 / self.total_runs as f64
2398 }
2399 #[allow(dead_code)]
2400 pub fn format_summary(&self) -> String {
2401 format!(
2402 "Runs: {}/{}, Changes: {}, Time: {}ms",
2403 self.successful_runs, self.total_runs, self.total_changes, self.time_ms
2404 )
2405 }
2406}
2407#[allow(dead_code)]
2409#[derive(Debug, Clone)]
2410pub struct OCacheX2Worklist {
2411 pub(super) items: std::collections::VecDeque<usize>,
2412 pub(super) present: Vec<bool>,
2413}
2414impl OCacheX2Worklist {
2415 #[allow(dead_code)]
2416 pub fn new(capacity: usize) -> Self {
2417 Self {
2418 items: std::collections::VecDeque::new(),
2419 present: vec![false; capacity],
2420 }
2421 }
2422 #[allow(dead_code)]
2423 pub fn push(&mut self, id: usize) {
2424 if id < self.present.len() && !self.present[id] {
2425 self.present[id] = true;
2426 self.items.push_back(id);
2427 }
2428 }
2429 #[allow(dead_code)]
2430 pub fn push_front(&mut self, id: usize) {
2431 if id < self.present.len() && !self.present[id] {
2432 self.present[id] = true;
2433 self.items.push_front(id);
2434 }
2435 }
2436 #[allow(dead_code)]
2437 pub fn pop(&mut self) -> Option<usize> {
2438 let id = self.items.pop_front()?;
2439 if id < self.present.len() {
2440 self.present[id] = false;
2441 }
2442 Some(id)
2443 }
2444 #[allow(dead_code)]
2445 pub fn is_empty(&self) -> bool {
2446 self.items.is_empty()
2447 }
2448 #[allow(dead_code)]
2449 pub fn len(&self) -> usize {
2450 self.items.len()
2451 }
2452 #[allow(dead_code)]
2453 pub fn contains(&self, id: usize) -> bool {
2454 id < self.present.len() && self.present[id]
2455 }
2456 #[allow(dead_code)]
2457 pub fn drain_all(&mut self) -> Vec<usize> {
2458 let v: Vec<usize> = self.items.drain(..).collect();
2459 for &id in &v {
2460 if id < self.present.len() {
2461 self.present[id] = false;
2462 }
2463 }
2464 v
2465 }
2466}
2467#[allow(dead_code)]
2468#[derive(Debug, Clone, PartialEq)]
2469pub enum OCPassPhase {
2470 Analysis,
2471 Transformation,
2472 Verification,
2473 Cleanup,
2474}
2475impl OCPassPhase {
2476 #[allow(dead_code)]
2477 pub fn name(&self) -> &str {
2478 match self {
2479 OCPassPhase::Analysis => "analysis",
2480 OCPassPhase::Transformation => "transformation",
2481 OCPassPhase::Verification => "verification",
2482 OCPassPhase::Cleanup => "cleanup",
2483 }
2484 }
2485 #[allow(dead_code)]
2486 pub fn is_modifying(&self) -> bool {
2487 matches!(self, OCPassPhase::Transformation | OCPassPhase::Cleanup)
2488 }
2489}
2490#[derive(Debug, Clone, PartialEq)]
2492pub enum AccessPattern {
2493 Sequential,
2495 Strided(i64),
2497 Random,
2499 Broadcast,
2501}
2502impl AccessPattern {
2503 pub fn is_cache_friendly(&self) -> bool {
2505 match self {
2506 AccessPattern::Sequential => true,
2507 AccessPattern::Broadcast => true,
2508 AccessPattern::Strided(s) => s.unsigned_abs() <= 64,
2509 AccessPattern::Random => false,
2510 }
2511 }
2512}