Skip to main content

vyre_foundation/ir_inner/model/program/
meta.rs

1use std::hash::{Hash, Hasher as _};
2use std::sync::atomic::Ordering;
3use std::sync::Arc;
4
5use rustc_hash::FxHasher;
6use vyre_spec::bin_op::OpIntensity;
7
8use crate::ir::{Expr, Node};
9use crate::ir_inner::model::expr::Ident;
10use crate::ir_inner::model::types::BufferAccess;
11use crate::transform::visit::{walk_nodes_and_exprs, ExprVisitor, NodeVisitor};
12
13use super::Program;
14
15fn mix_wire_fallback_hashable<T: Hash>(hasher: &mut blake3::Hasher, value: &T) {
16    let mut state = FxHasher::default();
17    value.hash(&mut state);
18    hasher.update(&state.finish().to_le_bytes());
19}
20
21/// Bounded IR structure digest for wire-hash fallback (never formats full IR via `Debug`).
22struct FallbackWireHasher<'a>(&'a mut blake3::Hasher);
23
24impl NodeVisitor for FallbackWireHasher<'_> {
25    fn visit_node(&mut self, node: &Node) {
26        let h = &mut *self.0;
27        match node {
28            Node::Let { name, .. } => {
29                h.update(b"n:Let\0");
30                h.update(name.as_bytes());
31            }
32            Node::Assign { name, .. } => {
33                h.update(b"n:Assign\0");
34                h.update(name.as_bytes());
35            }
36            Node::Store { buffer, .. } => {
37                h.update(b"n:Store\0");
38                h.update(buffer.as_bytes());
39            }
40            Node::If { .. } => {
41                h.update(b"n:If\0");
42            }
43            Node::Loop { var, .. } => {
44                h.update(b"n:Loop\0");
45                h.update(var.as_bytes());
46            }
47            Node::IndirectDispatch {
48                count_buffer,
49                count_offset,
50            } => {
51                h.update(b"n:IndirectDispatch\0");
52                h.update(count_buffer.as_bytes());
53                h.update(&count_offset.to_le_bytes());
54            }
55            Node::AsyncLoad {
56                source,
57                destination,
58                tag,
59                ..
60            } => {
61                h.update(b"n:AsyncLoad\0");
62                h.update(source.as_bytes());
63                h.update(destination.as_bytes());
64                h.update(tag.as_bytes());
65            }
66            Node::AsyncStore {
67                source,
68                destination,
69                tag,
70                ..
71            } => {
72                h.update(b"n:AsyncStore\0");
73                h.update(source.as_bytes());
74                h.update(destination.as_bytes());
75                h.update(tag.as_bytes());
76            }
77            Node::AsyncWait { tag } => {
78                h.update(b"n:AsyncWait\0");
79                h.update(tag.as_bytes());
80            }
81            Node::Trap { tag, .. } => {
82                h.update(b"n:Trap\0");
83                h.update(tag.as_bytes());
84            }
85            Node::Resume { tag } => {
86                h.update(b"n:Resume\0");
87                h.update(tag.as_bytes());
88            }
89            Node::Return => {
90                h.update(b"n:Return\0");
91            }
92            Node::Barrier { ordering } => {
93                h.update(b"n:Barrier\0");
94                mix_wire_fallback_hashable(h, ordering);
95            }
96            Node::Block(_) => {
97                h.update(b"n:Block\0");
98            }
99            Node::Region {
100                generator,
101                source_region,
102                ..
103            } => {
104                h.update(b"n:Region\0");
105                h.update(generator.as_bytes());
106                if let Some(gen) = source_region {
107                    h.update(gen.name.as_bytes());
108                }
109            }
110            Node::Opaque(ext) => {
111                h.update(b"n:Opaque\0");
112                h.update(ext.extension_kind().as_bytes());
113            }
114        }
115    }
116}
117
118impl ExprVisitor for FallbackWireHasher<'_> {
119    fn visit_expr(&mut self, expr: &Expr) {
120        let h = &mut *self.0;
121        match expr {
122            Expr::LitU32(v) => {
123                h.update(b"e:LitU32\0");
124                h.update(&v.to_le_bytes());
125            }
126            Expr::LitI32(v) => {
127                h.update(b"e:LitI32\0");
128                h.update(&v.to_le_bytes());
129            }
130            Expr::LitF32(v) => {
131                h.update(b"e:LitF32\0");
132                h.update(&v.to_le_bytes());
133            }
134            Expr::LitBool(v) => {
135                h.update(b"e:LitBool\0");
136                h.update(&[u8::from(*v)]);
137            }
138            Expr::Var(name) => {
139                h.update(b"e:Var\0");
140                h.update(name.as_bytes());
141            }
142            Expr::Load { buffer, .. } => {
143                h.update(b"e:Load\0");
144                h.update(buffer.as_bytes());
145            }
146            Expr::BufLen { buffer } => {
147                h.update(b"e:BufLen\0");
148                h.update(buffer.as_bytes());
149            }
150            Expr::InvocationId { axis } => {
151                h.update(b"e:InvocationId\0");
152                h.update(&[*axis]);
153            }
154            Expr::WorkgroupId { axis } => {
155                h.update(b"e:WorkgroupId\0");
156                h.update(&[*axis]);
157            }
158            Expr::LocalId { axis } => {
159                h.update(b"e:LocalId\0");
160                h.update(&[*axis]);
161            }
162            Expr::BinOp { op, .. } => {
163                h.update(b"e:BinOp\0");
164                mix_wire_fallback_hashable(h, op);
165            }
166            Expr::UnOp { op, .. } => {
167                h.update(b"e:UnOp\0");
168                mix_wire_fallback_hashable(h, op);
169            }
170            Expr::Call { op_id, .. } => {
171                h.update(b"e:Call\0");
172                h.update(op_id.as_bytes());
173            }
174            Expr::Select { .. } => {
175                h.update(b"e:Select\0");
176            }
177            Expr::Cast { target, .. } => {
178                h.update(b"e:Cast\0");
179                mix_wire_fallback_hashable(h, target);
180            }
181            Expr::Fma { .. } => {
182                h.update(b"e:Fma\0");
183            }
184            Expr::Atomic {
185                op,
186                buffer,
187                ordering,
188                ..
189            } => {
190                h.update(b"e:Atomic\0");
191                mix_wire_fallback_hashable(h, op);
192                h.update(buffer.as_bytes());
193                mix_wire_fallback_hashable(h, ordering);
194            }
195            Expr::SubgroupBallot { .. } => {
196                h.update(b"e:SubgroupBallot\0");
197            }
198            Expr::SubgroupShuffle { .. } => {
199                h.update(b"e:SubgroupShuffle\0");
200            }
201            Expr::SubgroupAdd { .. } => {
202                h.update(b"e:SubgroupAdd\0");
203            }
204            Expr::SubgroupLocalId => {
205                h.update(b"e:SubgroupLocalId\0");
206            }
207            Expr::SubgroupSize => {
208                h.update(b"e:SubgroupSize\0");
209            }
210            Expr::Opaque(ext) => {
211                h.update(b"e:Opaque\0");
212                h.update(ext.extension_kind().as_bytes());
213            }
214        }
215    }
216}
217
218impl Program {
219    /// Re-apply the same top-level `Node::Region` contract as
220    /// [`Program::wrapped`].
221    ///
222    /// The [`region_inline_engine`](crate::optimizer::passes::cleanup::region_inline_engine)
223    /// pass flattens small Category-A regions so CSE/DCE can see a single
224    /// function-shaped body, which can leave a statement-shaped entry list. The
225    /// standard optimizer run ends with this helper so the program remains in
226    /// a runnable, validator/reference-interpreter–compatible form while
227    /// still benefiting from the inline pass.
228    #[must_use]
229    pub fn reconcile_runnable_top_level(self) -> Self {
230        if self.is_top_level_region_wrapped() {
231            return self;
232        }
233        let new_entry = Self::wrap_entry(self.entry().to_vec());
234        self.with_rewritten_entry(new_entry)
235    }
236
237    /// Look up a buffer declaration by name.
238    #[must_use]
239    #[inline]
240    pub fn buffer(&self, name: &str) -> Option<&super::BufferDecl> {
241        self.buffer_index
242            .get(name)
243            .and_then(|&index| self.buffers.get(index))
244    }
245
246    /// Declared buffers.
247    #[must_use]
248    #[inline]
249    pub fn buffers(&self) -> &[super::BufferDecl] {
250        self.buffers.as_ref()
251    }
252
253    /// Access the buffer declaration Arc directly for identity checks.
254    #[must_use]
255    #[inline]
256    #[cfg(test)]
257    pub(crate) fn buffers_arc(&self) -> &Arc<[super::BufferDecl]> {
258        &self.buffers
259    }
260
261    /// Compare two programs by observable IR structure.
262    ///
263    /// This walk intentionally ignores buffer declaration order and never
264    /// consults arena-local allocation identity. Two programs are structurally
265    /// equal when they declare the same buffers, workgroup size, optional entry
266    /// op id, and entry body semantics.
267    #[must_use]
268    #[inline]
269    pub fn structural_eq(&self, other: &Self) -> bool {
270        // Identity short-circuit: Program::clone shares all the
271        // inner Arcs, so comparing a cloned program against its
272        // source (the common optimizer-pipeline pattern) is pure
273        // refcount comparison.
274        if std::ptr::eq(self, other)
275            || (Arc::ptr_eq(&self.buffers, &other.buffers)
276                && Arc::ptr_eq(&self.entry, &other.entry)
277                && self.entry_op_id == other.entry_op_id
278                && self.non_composable_with_self == other.non_composable_with_self
279                && self.workgroup_size == other.workgroup_size)
280        {
281            return true;
282        }
283        self.entry_op_id == other.entry_op_id
284            && self.non_composable_with_self == other.non_composable_with_self
285            && buffers_equal_ignoring_declaration_order(&self.buffers, &other.buffers)
286            && self.workgroup_size == other.workgroup_size
287            && self.entry == other.entry
288    }
289
290    /// Workgroup dimensions.
291    #[must_use]
292    #[inline]
293    pub fn workgroup_size(&self) -> [u32; 3] {
294        self.workgroup_size
295    }
296
297    /// Substrate-neutral alias for [`workgroup_size`](Self::workgroup_size).
298    ///
299    /// Naming: "parallel region" avoids picking a single target substrate's
300    /// word for one dispatch invocation grouping.
301    #[must_use]
302    #[inline]
303    pub fn parallel_region_size(&self) -> [u32; 3] {
304        self.workgroup_size
305    }
306
307    /// Return true when this program must not be fused with another copy
308    /// of itself in the same megakernel.
309    #[must_use]
310    #[inline]
311    pub fn is_non_composable_with_self(&self) -> bool {
312        self.non_composable_with_self
313    }
314
315    /// Mark this program as non-composable with itself.
316    #[must_use]
317    #[inline]
318    pub fn with_non_composable_with_self(mut self, flag: bool) -> Self {
319        self.non_composable_with_self = flag;
320        self.invalidate_caches();
321        self
322    }
323
324    /// Set the workgroup dimensions in place. Used by harnesses that
325    /// need to clone-and-rewrite a program's workgroup size for fallback
326    /// dispatch — the alternative was to reconstruct the entire Program,
327    /// which is unnecessarily expensive when only one field changes.
328    #[inline]
329    pub fn set_workgroup_size(&mut self, workgroup_size: [u32; 3]) {
330        self.workgroup_size = workgroup_size;
331        self.invalidate_caches();
332    }
333
334    /// Substrate-neutral alias for [`set_workgroup_size`](Self::set_workgroup_size).
335    #[inline]
336    pub fn set_parallel_region_size(&mut self, parallel_region_size: [u32; 3]) {
337        self.workgroup_size = parallel_region_size;
338        self.invalidate_caches();
339    }
340
341    /// Entry-point nodes.
342    #[must_use]
343    #[inline]
344    pub fn entry(&self) -> &[Node] {
345        self.entry.as_ref().as_slice()
346    }
347
348    /// Shared entry-point body Arc for identity checks.
349    #[must_use]
350    #[inline]
351    pub fn entry_arc(&self) -> &Arc<Vec<Node>> {
352        &self.entry
353    }
354
355    /// Return true when this Program is the canonical no-op shape produced by
356    /// [`Program::empty`]: no buffers and a single empty root Region.
357    #[must_use]
358    #[inline]
359    pub fn is_explicit_noop(&self) -> bool {
360        self.buffers().is_empty()
361            && matches!(self.entry(), [Node::Region { body, .. }] if body.is_empty())
362    }
363
364    /// Return true when the program satisfies the top-level region-chain
365    /// invariant: at least one top-level node, and every top-level node is a
366    /// `Node::Region`.
367    #[must_use]
368    #[inline]
369    pub fn is_top_level_region_wrapped(&self) -> bool {
370        !self.entry.is_empty()
371            && self
372                .entry()
373                .iter()
374                .all(|node| matches!(node, Node::Region { .. }))
375    }
376
377    /// Actionable error text describing why the top-level region invariant
378    /// failed, or `None` when the entry is valid.
379    #[must_use]
380    pub fn top_level_region_violation(&self) -> Option<String> {
381        if self.entry().is_empty() {
382            return Some(
383                "program entry has no top-level Region. Fix: construct runnable programs with Program::wrapped(...) or wrap the body in Node::Region before validation, interpretation, or dispatch."
384                    .to_string(),
385            );
386        }
387
388        self.entry()
389            .iter()
390            .enumerate()
391            .find(|(_, node)| !matches!(node, Node::Region { .. }))
392            .map(|(index, node)| {
393                format!(
394                    "program entry node {index} is `{}` instead of `Node::Region`. Fix: construct runnable programs with Program::wrapped(...) or wrap the top-level body in Node::Region; raw Program::new is reserved for wire decode and negative tests.",
395                    Self::top_level_node_name(node)
396                )
397            })
398    }
399
400    /// Mutable entry-point nodes for transformation passes.
401    #[must_use]
402    #[inline]
403    pub fn entry_mut(&mut self) -> &mut Vec<Node> {
404        self.invalidate_caches();
405        Arc::make_mut(&mut self.entry)
406    }
407
408    /// Stable BLAKE3 fingerprint of the canonical wire-format bytes.
409    #[must_use]
410    #[inline]
411    pub fn fingerprint(&self) -> [u8; 32] {
412        *self.fingerprint.get_or_init(|| {
413            let hash = self.compute_wire_hash();
414            let _ = self.hash.set(hash);
415            *hash.as_bytes()
416        })
417    }
418
419    /// VSA-style hypervector fingerprint of the canonical wire-format
420    /// bytes. Each `u32` lane is one segment of the program's blake3
421    /// hash; together they form an 8-lane hypervector suitable for
422    /// approximate similarity search via hamming distance.
423    ///
424    /// Use as the canonical cache key for approximate-match caches
425    /// (e.g. validation cache, AOT artifact dedup); use
426    /// [`Self::fingerprint`] for exact-match lookups.
427    ///
428    /// Wires the substrate's #29 hypervector primitive into Program
429    /// itself — every Program now carries its own VSA fingerprint
430    /// without callers having to reach into the substrate explicitly.
431    #[must_use]
432    pub fn vsa_fingerprint(&self) -> Vec<u32> {
433        self.fingerprint()
434            .chunks_exact(core::mem::size_of::<u32>())
435            .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
436            .collect()
437    }
438
439    /// Indices of read-write buffers in `buffers()` order.
440    #[must_use]
441    #[inline]
442    pub fn output_buffer_indices(&self) -> &[u32] {
443        self.output_buffer_index
444            .get_or_init(|| {
445                Arc::new(
446                    self.buffers()
447                        .iter()
448                        .enumerate()
449                        .filter_map(|(index, buffer)| {
450                            (buffer.access() == BufferAccess::ReadWrite).then_some(index as u32)
451                        })
452                        .collect(),
453                )
454            })
455            .as_slice()
456    }
457
458    /// True when the entry walk discovers any indirect dispatch node.
459    #[must_use]
460    #[inline]
461    pub fn has_indirect_dispatch(&self) -> bool {
462        *self.has_indirect_dispatch.get_or_init(|| {
463            let mut stack: Vec<&Node> = self.entry().iter().rev().collect();
464            while let Some(node) = stack.pop() {
465                match node {
466                    Node::IndirectDispatch { .. } => return true,
467                    Node::If {
468                        then, otherwise, ..
469                    } => {
470                        stack.extend(otherwise.iter().rev());
471                        stack.extend(then.iter().rev());
472                    }
473                    Node::Loop { body, .. } | Node::Block(body) => {
474                        stack.extend(body.iter().rev());
475                    }
476                    Node::Region { body, .. } => {
477                        stack.extend(body.iter().rev());
478                    }
479                    Node::Let { .. }
480                    | Node::Assign { .. }
481                    | Node::Store { .. }
482                    | Node::Return
483                    | Node::Barrier { .. }
484                    | Node::AsyncLoad { .. }
485                    | Node::AsyncStore { .. }
486                    | Node::AsyncWait { .. }
487                    | Node::Trap { .. }
488                    | Node::Resume { .. }
489                    | Node::Opaque(_) => {}
490                }
491            }
492            false
493        })
494    }
495
496    /// Check whether a named buffer exists.
497    #[must_use]
498    #[inline]
499    pub fn has_buffer(&self, name: &str) -> bool {
500        self.buffer_index.contains_key(name)
501    }
502
503    /// Number of declared buffers.
504    #[must_use]
505    #[inline]
506    pub fn buffer_count(&self) -> usize {
507        self.buffers.len()
508    }
509
510    #[inline]
511    pub(super) fn build_buffer_index(
512        buffers: &[super::BufferDecl],
513    ) -> rustc_hash::FxHashMap<Arc<str>, usize> {
514        let mut index = rustc_hash::FxHashMap::default();
515        index.reserve(buffers.len());
516        for (buffer_index, buffer) in buffers.iter().enumerate() {
517            index
518                .entry(Arc::clone(&buffer.name))
519                .or_insert(buffer_index);
520        }
521        index
522    }
523
524    /// Mark this program as successfully validated structurally.
525    #[inline]
526    pub fn mark_structurally_validated(&self) {
527        self.structural_validated.store(true, Ordering::Release);
528    }
529
530    /// Return true once structural validation has succeeded for this program shape.
531    #[must_use]
532    #[inline]
533    pub fn is_structurally_validated(&self) -> bool {
534        self.structural_validated.load(Ordering::Acquire)
535    }
536
537    /// Mark this program as successfully validated for a specific backend.
538    #[inline]
539    pub fn mark_validated_on(&self, backend_id: &str) {
540        self.validation_set
541            .insert(Arc::from(self.validation_cache_key(backend_id)));
542    }
543
544    /// Return true if this program has been validated for the given backend.
545    #[must_use]
546    #[inline]
547    pub fn is_validated_on(&self, backend_id: &str) -> bool {
548        self.validation_set
549            .contains(self.validation_cache_key(backend_id).as_str())
550    }
551
552    /// Deprecated: use `is_structurally_validated` or `is_validated_on`.
553    #[deprecated(note = "use is_structurally_validated or is_validated_on")]
554    #[must_use]
555    #[inline]
556    pub fn is_validated(&self) -> bool {
557        self.is_structurally_validated()
558    }
559
560    /// Deprecated: use `mark_structurally_validated` or `mark_validated_on`.
561    #[deprecated(note = "use mark_structurally_validated or mark_validated_on")]
562    #[inline]
563    pub fn mark_validated(&self) {
564        self.mark_structurally_validated();
565    }
566
567    /// Validate the program and cache the successful result on the program.
568    ///
569    /// # Errors
570    ///
571    /// Returns [`crate::Error::WireFormatValidation`] with every validation
572    /// message joined when the structural validator rejects the program.
573    pub fn validate(&self) -> crate::error::Result<()> {
574        if self.is_structurally_validated() {
575            return Ok(());
576        }
577        let errors = crate::validate::validate(self);
578        if errors.is_empty() {
579            self.mark_structurally_validated();
580            return Ok(());
581        }
582        let mut message = String::new();
583        for (index, error) in errors.into_iter().enumerate() {
584            if index > 0 {
585                message.push_str("; ");
586            }
587            message.push_str(error.message());
588        }
589        Err(crate::error::Error::WireFormatValidation { message })
590    }
591
592    #[inline]
593    /// Estimate the peak VRAM byte size of this Program.
594    ///
595    /// Innovation I.11: Static VRAM Pressure Analysis.
596    /// Returns the total bytes required by all storage and uniform buffers
597    /// declared in the Program. Optimizer passes use this to automatically
598    /// partition workloads if they would exceed a backend-specific safety
599    /// margin.
600    #[must_use]
601    pub fn estimate_peak_vram_bytes(&self) -> u64 {
602        self.buffers
603            .iter()
604            .map(|buffer| {
605                let element_size = buffer.element.size_bytes().unwrap_or(4);
606                (buffer.count as u64) * (element_size as u64)
607            })
608            .sum()
609    }
610
611    /// Return the peak computational intensity found in any instruction.
612    #[must_use]
613    pub fn peak_intensity(&self) -> OpIntensity {
614        let mut peak = OpIntensity::Free;
615        for node in self.entry().iter() {
616            peak = peak.max(self.node_intensity(node));
617        }
618        peak
619    }
620
621    fn node_intensity(&self, node: &crate::ir::Node) -> OpIntensity {
622        use crate::ir::Node;
623        match node {
624            Node::Let { value, .. } | Node::Assign { value, .. } => self.expr_intensity(value),
625            Node::Store { index, value, .. } => {
626                self.expr_intensity(index).max(self.expr_intensity(value))
627            }
628            Node::If {
629                cond,
630                then,
631                otherwise,
632            } => {
633                let mut p = self.expr_intensity(cond);
634                for n in then {
635                    p = p.max(self.node_intensity(n));
636                }
637                for n in otherwise {
638                    p = p.max(self.node_intensity(n));
639                }
640                p
641            }
642            Node::Loop { from, to, body, .. } => {
643                let mut p = self.expr_intensity(from).max(self.expr_intensity(to));
644                for n in body.iter() {
645                    p = p.max(self.node_intensity(n));
646                }
647                p
648            }
649            Node::Block(nodes) => {
650                let mut p = OpIntensity::Free;
651                for n in nodes {
652                    p = p.max(self.node_intensity(n));
653                }
654                p
655            }
656            Node::Region { body, .. } => {
657                let mut p = OpIntensity::Free;
658                for n in body.iter() {
659                    p = p.max(self.node_intensity(n));
660                }
661                p
662            }
663            _ => OpIntensity::Free,
664        }
665    }
666
667    #[allow(clippy::only_used_in_recursion)]
668    fn expr_intensity(&self, expr: &crate::ir::Expr) -> OpIntensity {
669        use crate::ir::Expr;
670        match expr {
671            Expr::BinOp { op, left, right } => op
672                .intensity()
673                .max(self.expr_intensity(left))
674                .max(self.expr_intensity(right)),
675            Expr::UnOp { operand, .. } => self.expr_intensity(operand),
676            Expr::Load { index, .. } => self.expr_intensity(index),
677            Expr::Select {
678                cond,
679                true_val,
680                false_val,
681            } => self
682                .expr_intensity(cond)
683                .max(self.expr_intensity(true_val))
684                .max(self.expr_intensity(false_val)),
685            Expr::Cast { value, .. } => self.expr_intensity(value),
686            Expr::Fma { a, b, c } => self
687                .expr_intensity(a)
688                .max(self.expr_intensity(b))
689                .max(self.expr_intensity(c)),
690            Expr::Atomic {
691                index,
692                value,
693                expected,
694                ..
695            } => {
696                let mut p = self.expr_intensity(index).max(self.expr_intensity(value));
697                if let Some(e) = expected {
698                    p = p.max(self.expr_intensity(e));
699                }
700                p.max(OpIntensity::Heavy)
701            }
702            Expr::SubgroupBallot { cond } => self.expr_intensity(cond).max(OpIntensity::Heavy),
703            Expr::SubgroupShuffle { value, lane } => self
704                .expr_intensity(value)
705                .max(self.expr_intensity(lane))
706                .max(OpIntensity::Heavy),
707            Expr::SubgroupAdd { value } => self.expr_intensity(value).max(OpIntensity::Heavy),
708            _ => OpIntensity::Free,
709        }
710    }
711
712    fn compute_wire_hash(&self) -> blake3::Hash {
713        match self.canonical_wire_hash() {
714            Ok(hash) => hash,
715            Err(error) => {
716                let structural = self.structural_fingerprint_fallback();
717                let err_msg = error.to_string();
718                let mut fallback = Vec::with_capacity(96 + err_msg.len() + structural.len());
719                fallback.extend_from_slice(b"VYRE-PROGRAM-CANONICAL-WIRE-HASH-ERROR\0");
720                fallback.extend_from_slice(err_msg.as_bytes());
721                fallback.push(0);
722                fallback.extend_from_slice(structural.as_bytes());
723                blake3::hash(&fallback)
724            }
725        }
726    }
727
728    fn structural_fingerprint_fallback(&self) -> String {
729        let mut hasher = blake3::Hasher::new();
730        hasher.update(b"VYRE-WIRE-FALLBACK-V4\0");
731        if let Some(id) = self.entry_op_id.as_deref() {
732            hasher.update(id.as_bytes());
733        }
734        hasher.update(b"\0");
735        for axis in &self.workgroup_size {
736            hasher.update(&axis.to_le_bytes());
737        }
738        hasher.update(&[u8::from(self.non_composable_with_self)]);
739        let mut keys: Vec<Vec<u8>> = self
740            .buffers()
741            .iter()
742            .map(buffer_decl_canonical_key)
743            .collect();
744        keys.sort_unstable();
745        for key in keys {
746            hasher.update(&key);
747        }
748        let mut visitor = FallbackWireHasher(&mut hasher);
749        walk_nodes_and_exprs(self, &mut visitor);
750        hasher.finalize().to_hex().to_string()
751    }
752
753    fn validation_cache_key(&self, backend_id: &str) -> String {
754        let fingerprint = self.fingerprint();
755        let mut key = String::with_capacity(backend_id.len() + 1 + 64);
756        key.push_str(backend_id);
757        key.push(':');
758        for byte in fingerprint {
759            use std::fmt::Write as _;
760            let _ = write!(&mut key, "{byte:02x}");
761        }
762        key
763    }
764
765    #[inline]
766    pub(super) fn invalidate_caches(&mut self) {
767        self.structural_validated.store(false, Ordering::Release);
768        self.validation_set.clear();
769        let _ = self.hash.take();
770        let _ = self.fingerprint.take();
771        let _ = self.output_buffer_index.take();
772        let _ = self.has_indirect_dispatch.take();
773        let _ = self.stats.take();
774    }
775
776    #[inline]
777    pub(super) fn wrap_entry(entry: Vec<Node>) -> Vec<Node> {
778        if !Self::entry_needs_root_region(&entry) {
779            return entry;
780        }
781        vec![Node::Region {
782            generator: Ident::from(Self::ROOT_REGION_GENERATOR),
783            source_region: None,
784            body: Arc::new(entry),
785        }]
786    }
787
788    #[inline]
789    fn entry_needs_root_region(entry: &[Node]) -> bool {
790        entry.is_empty()
791            || entry
792                .iter()
793                .any(|node| !matches!(node, Node::Region { .. }))
794    }
795
796    #[inline]
797    fn top_level_node_name(node: &Node) -> &'static str {
798        match node {
799            Node::Let { .. } => "Let",
800            Node::Assign { .. } => "Assign",
801            Node::Store { .. } => "Store",
802            Node::If { .. } => "If",
803            Node::Loop { .. } => "Loop",
804            Node::Return => "Return",
805            Node::Block(_) => "Block",
806            Node::Barrier { .. } => "Barrier",
807            Node::Region { .. } => "Region",
808            Node::IndirectDispatch { .. } => "IndirectDispatch",
809            Node::AsyncLoad { .. } => "AsyncLoad",
810            Node::AsyncStore { .. } => "AsyncStore",
811            Node::AsyncWait { .. } => "AsyncWait",
812            Node::Trap { .. } => "Trap",
813            Node::Resume { .. } => "Resume",
814            Node::Opaque(_) => "Opaque",
815        }
816    }
817}
818
819pub(crate) fn buffers_equal_ignoring_declaration_order(
820    left: &[super::BufferDecl],
821    right: &[super::BufferDecl],
822) -> bool {
823    if left.len() != right.len() {
824        return false;
825    }
826
827    // VYRE_IR_HOTSPOTS HIGH (meta.rs:360-379): previous impl allocated
828    // two Vec<Vec<u8>> then sorted on every equality call. Fast-path:
829    // if the slices compare equal in-place (declaration orders match)
830    // we skip the key-materialization entirely. This catches every
831    // Program::clone(prog) == prog and every `Arc::clone`-equivalent
832    // comparison, which dominate the call distribution.
833    if left == right {
834        return true;
835    }
836
837    let mut left_keys = Vec::with_capacity(left.len());
838    left_keys.extend(left.iter().map(buffer_decl_canonical_key));
839    let mut right_keys = Vec::with_capacity(right.len());
840    right_keys.extend(right.iter().map(buffer_decl_canonical_key));
841    left_keys.sort_unstable();
842    right_keys.sort_unstable();
843    left_keys == right_keys
844}
845
846pub(super) fn buffer_decl_canonical_key(buffer: &super::BufferDecl) -> Vec<u8> {
847    use crate::serial::wire::framing::{put_len_u32, put_u32, put_u8};
848    use crate::serial::wire::tags::put_data_type;
849
850    let mut key = Vec::with_capacity(96);
851    if let Err(error) = put_len_u32(&mut key, buffer.name.len(), "buffer name length") {
852        key.extend_from_slice(b"\0name-length-error\0");
853        key.extend_from_slice(error.as_bytes());
854    }
855    key.extend_from_slice(buffer.name.as_bytes());
856    put_u32(&mut key, buffer.binding);
857    match crate::serial::wire::tags::access_tag::access_tag(buffer.access.clone()) {
858        Ok(tag) => put_u8(&mut key, tag),
859        Err(error) => {
860            put_u8(&mut key, u8::MAX);
861            key.extend_from_slice(error.as_bytes());
862        }
863    }
864    put_u8(
865        &mut key,
866        match buffer.kind {
867            super::MemoryKind::Global => 0,
868            super::MemoryKind::Shared => 1,
869            super::MemoryKind::Uniform => 2,
870            super::MemoryKind::Local => 3,
871            super::MemoryKind::Readonly => 4,
872            super::MemoryKind::Persistent => 5,
873            super::MemoryKind::Push => 6,
874        },
875    );
876    if let Err(error) = put_data_type(&mut key, &buffer.element) {
877        key.extend_from_slice(b"\0dtype-error\0");
878        key.extend_from_slice(error.as_bytes());
879    }
880    put_u32(&mut key, buffer.count);
881    put_u8(&mut key, u8::from(buffer.is_output));
882    put_u8(&mut key, u8::from(buffer.pipeline_live_out));
883    match &buffer.output_byte_range {
884        Some(range) => {
885            put_u8(&mut key, 1);
886            match u32::try_from(range.start) {
887                Ok(start) => put_u32(&mut key, start),
888                Err(error) => {
889                    put_u32(&mut key, u32::MAX);
890                    key.extend_from_slice(error.to_string().as_bytes());
891                }
892            }
893            match u32::try_from(range.end) {
894                Ok(end) => put_u32(&mut key, end),
895                Err(error) => {
896                    put_u32(&mut key, u32::MAX);
897                    key.extend_from_slice(error.to_string().as_bytes());
898                }
899            }
900        }
901        None => put_u8(&mut key, 0),
902    }
903    match buffer.hints.coalesce_axis {
904        Some(axis) => {
905            put_u8(&mut key, 1);
906            put_u8(&mut key, axis);
907        }
908        None => put_u8(&mut key, 0),
909    }
910    put_u32(&mut key, buffer.hints.preferred_alignment);
911    put_u8(
912        &mut key,
913        match buffer.hints.cache_locality {
914            super::CacheLocality::Streaming => 0,
915            super::CacheLocality::Temporal => 1,
916            super::CacheLocality::Random => 2,
917        },
918    );
919    put_u8(&mut key, u8::from(buffer.bytes_extraction));
920    key
921}