Skip to main content

vyre_foundation/dispatch/
dialect_lookup.rs

1//! Dialect lookup contract shared by foundation-side consumers.
2//!
3//! This module is the dependency-inversion boundary between the reference
4//! interpreter and the driver registry. Reference code may ask for op ids and
5//! frozen op definitions through `DialectLookup`, but it must not depend on
6//! `vyre-driver` or the `vyre` meta crate.
7//!
8//! The trait is deliberately sealed by a hidden `__sealed` method on
9//! `DialectLookup`. Downstream crates can consume a lookup, but the only sanctioned
10//! implementations are installed by vyre driver crates so this surface can grow
11//! through additive default methods without breaking external implementors.
12
13use crate::ir_inner::model::program::Program;
14use lasso::ThreadedRodeo;
15use std::sync::{Arc, OnceLock};
16use vyre_spec::{AlgebraicLaw, CpuFn};
17
18/// Interned operation identifier used by every dialect lookup.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub struct InternedOpId(pub u32);
21
22fn get_interner() -> &'static ThreadedRodeo {
23    static INTERNER: OnceLock<ThreadedRodeo> = OnceLock::new();
24    INTERNER.get_or_init(ThreadedRodeo::new)
25}
26
27/// Intern a stable operation-id string into a compact process-local id.
28#[must_use]
29pub fn intern_string(s: &str) -> InternedOpId {
30    let interner = get_interner();
31    let key = interner.get_or_intern(s);
32    InternedOpId(key.into_inner().get())
33}
34
35/// Function pointer used by reference-backend lowerings.
36pub type ReferenceKind = CpuFn;
37
38/// Backend lowering context retained for source compatibility.
39#[derive(Default, Debug, Clone)]
40pub struct LoweringCtx<'a> {
41    /// Marker tying context references to the call lifetime.
42    pub unused: std::marker::PhantomData<&'a ()>,
43}
44
45/// Backend text module descriptor used by native lowering builders.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct TextModule {
48    /// Backend assembly text.
49    pub asm: String,
50    /// Backend format version encoded by the builder.
51    pub version: u32,
52}
53
54/// native-module module descriptor used by native lowering builders.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct NativeModule {
57    /// Backend-owned serialized AST payload.
58    pub ast: Vec<u8>,
59    /// Entry-point name.
60    pub entry: String,
61}
62
63/// Reserved builder type for the primary text lowering slot.
64pub type PrimaryTextBuilder = fn(&LoweringCtx<'_>) -> Result<(), String>;
65/// Reserved builder type for the primary binary lowering slot.
66pub type PrimaryBinaryBuilder = fn(&LoweringCtx<'_>) -> Vec<u32>;
67/// Builder type for the secondary text lowering slot.
68pub type SecondaryTextBuilder = fn(&LoweringCtx<'_>) -> TextModule;
69/// Builder type for native-module lowering.
70pub type NativeModuleBuilder = fn(&LoweringCtx<'_>) -> NativeModule;
71/// Builder-type erased payload for any out-of-tree backend.
72///
73/// Extension lowerings register a function that reads the shared
74/// [`LoweringCtx`] and writes backend-specific bytes into an opaque
75/// output buffer. The caller backend owns the payload format; the
76/// core dialect registry does not interpret the bytes  -  it only
77/// dispatches to the right builder by `BackendId`.
78///
79/// This is the extensibility lever: a concrete backend appends a new
80/// lowering *without*
81/// editing vyre-foundation, vyre-driver, or vyre-spec. The core
82/// surface remains frozen.
83pub type ExtensionLoweringFn =
84    fn(&LoweringCtx<'_>) -> Result<std::vec::Vec<u8>, std::string::String>;
85
86/// Lowering function table attached to an operation definition.
87///
88/// The named fields are terminal 0.6 in-tree slots. `extensions` is
89/// the open-ended slot: any
90/// out-of-tree backend registers its builder under its stable
91/// backend-id string. Look up by id via
92/// [`LoweringTable::extension`].
93///
94/// Not `#[non_exhaustive]` so static registrations can use functional
95/// record update (`..LoweringTable::empty()`) from `inventory::submit!`
96/// closures. Additive fields must carry defaults so the spread form
97/// keeps working without a breaking change.
98#[derive(Clone)]
99pub struct LoweringTable {
100    /// Portable CPU reference implementation.
101    pub cpu_ref: ReferenceKind,
102    /// Primary text builder. `None` in v0.4.1 pure-IR ops.
103    pub primary_text: Option<PrimaryTextBuilder>,
104    /// Primary binary builder. `None` in v0.4.1 pure-IR ops.
105    pub primary_binary: Option<PrimaryBinaryBuilder>,
106    /// Secondary text builder. `None` unless a concrete backend owns it.
107    pub secondary_text: Option<SecondaryTextBuilder>,
108    /// Native native-module builder. `None` until native-module support lands.
109    pub native_module: Option<NativeModuleBuilder>,
110    /// Open extension map for out-of-tree backends. Keyed by backend
111    /// id (matches the string a `VyreBackend::id` returns). Builders
112    /// are by-value function pointers so lookup is allocation-free
113    /// and the map stays `Clone + Send + Sync` without interior
114    /// locking.
115    pub extensions: rustc_hash::FxHashMap<&'static str, ExtensionLoweringFn>,
116}
117
118impl Default for LoweringTable {
119    fn default() -> Self {
120        Self::empty()
121    }
122}
123
124impl LoweringTable {
125    /// Build a lowering table with only the explicit CPU reference oracle
126    /// populated. Production execution still requires a concrete backend
127    /// lowering (`primary_*`, `secondary_text`, `native_module`, or an
128    /// extension); this constructor is for parity/conformance surfaces and
129    /// incremental backend registration.
130    #[must_use]
131    pub fn new(cpu_ref: ReferenceKind) -> Self {
132        Self {
133            cpu_ref,
134            primary_text: None,
135            primary_binary: None,
136            secondary_text: None,
137            native_module: None,
138            extensions: rustc_hash::FxHashMap::default(),
139        }
140    }
141
142    /// Empty table whose reference-oracle slot is the structured-intrinsic
143    /// sentinel. Invoking that slot panics after clearing output so missing
144    /// reference adapters cannot masquerade as empty CPU results. This is not
145    /// a production fallback path.
146    #[must_use]
147    pub fn empty() -> Self {
148        #[allow(deprecated)]
149        let cpu_ref = crate::cpu_op::structured_intrinsic_cpu;
150        Self {
151            cpu_ref,
152            primary_text: None,
153            primary_binary: None,
154            secondary_text: None,
155            native_module: None,
156            extensions: rustc_hash::FxHashMap::default(),
157        }
158    }
159
160    /// Register an out-of-tree backend's lowering. Stable backend id
161    /// is the key `DialectRegistry::get_lowering` uses for lookup; pick it
162    /// carefully, it is a wire-like identifier.
163    #[must_use]
164    pub fn with_extension(
165        mut self,
166        backend_id: &'static str,
167        builder: ExtensionLoweringFn,
168    ) -> Self {
169        self.extensions.insert(backend_id, builder);
170        self
171    }
172
173    /// Look up an extension builder by backend id.
174    #[must_use]
175    pub fn extension(&self, backend_id: &str) -> Option<ExtensionLoweringFn> {
176        self.extensions.get(backend_id).copied()
177    }
178}
179
180impl std::fmt::Debug for LoweringTable {
181    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182        f.debug_struct("LoweringTable")
183            .field("cpu_ref", &"<fn>")
184            .field("primary_text", &self.primary_text.map(|_| "<fn>"))
185            .field("primary_binary", &self.primary_binary.map(|_| "<fn>"))
186            .field("secondary_text", &self.secondary_text.map(|_| "<fn>"))
187            .field("native_module", &self.native_module.map(|_| "<fn>"))
188            .field(
189                "extensions",
190                &self
191                    .extensions
192                    .keys()
193                    .copied()
194                    .collect::<std::vec::Vec<_>>(),
195            )
196            .finish()
197    }
198}
199
200/// Attribute value type declared by an operation schema.
201#[derive(Debug, Clone, PartialEq, Eq)]
202#[non_exhaustive]
203pub enum AttrType {
204    /// Unsigned 32-bit integer.
205    U32,
206    /// Signed 32-bit integer.
207    I32,
208    /// IEEE-754 binary32.
209    F32,
210    /// Boolean.
211    Bool,
212    /// Opaque byte string.
213    Bytes,
214    /// UTF-8 string.
215    String,
216    /// Enumerated string value.
217    Enum(&'static [&'static str]),
218    /// Unknown extension attribute.
219    Unknown,
220}
221
222/// Attribute schema entry.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct AttrSchema {
225    /// Attribute name.
226    pub name: &'static str,
227    /// Attribute value type.
228    pub ty: AttrType,
229    /// Optional default value.
230    pub default: Option<&'static str>,
231}
232
233/// Typed input or output parameter.
234#[derive(Debug, Clone, PartialEq, Eq)]
235pub struct TypedParam {
236    /// Parameter name.
237    pub name: &'static str,
238    /// Stable type spelling.
239    pub ty: &'static str,
240}
241
242/// Operation signature contract.
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub struct Signature {
245    /// Input parameters.
246    pub inputs: &'static [TypedParam],
247    /// Output parameters.
248    pub outputs: &'static [TypedParam],
249    /// Attribute parameters.
250    pub attrs: &'static [AttrSchema],
251    /// True when this op may read `DataType::Bytes` buffers.
252    pub bytes_extraction: bool,
253}
254
255impl Signature {
256    /// Construct a signature for an op that performs bytes extraction.
257    #[must_use]
258    pub const fn bytes_extractor(
259        inputs: &'static [TypedParam],
260        outputs: &'static [TypedParam],
261        attrs: &'static [AttrSchema],
262    ) -> Self {
263        Self {
264            inputs,
265            outputs,
266            attrs,
267            bytes_extraction: true,
268        }
269    }
270}
271
272/// Operation category.
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
274pub enum Category {
275    /// Composition over IR.
276    Composite,
277    /// Extension op supplied by another crate.
278    Extension,
279    /// Intrinsic op supplied by a backend or primitive table.
280    Intrinsic,
281}
282
283/// Frozen operation definition.
284#[derive(Debug, Clone)]
285pub struct OpDef {
286    /// Stable operation id.
287    pub id: &'static str,
288    /// Stable dialect namespace.
289    pub dialect: &'static str,
290    /// Operation category.
291    pub category: Category,
292    /// Operation signature.
293    pub signature: Signature,
294    /// Backend lowering entries.
295    pub lowerings: LoweringTable,
296    /// Algebraic laws declared for conformance.
297    pub laws: &'static [AlgebraicLaw],
298    /// Composition-inlinable program builder.
299    pub compose: Option<fn() -> Program>,
300}
301
302impl OpDef {
303    /// Stable operation id.
304    #[must_use]
305    pub const fn id(&self) -> &'static str {
306        self.id
307    }
308
309    /// Build the canonical composition program when the operation has one.
310    #[must_use]
311    pub fn program(&self) -> Option<Program> {
312        self.compose
313            .map(|compose| compose().with_entry_op_id(self.id))
314    }
315}
316
317impl Default for OpDef {
318    fn default() -> Self {
319        Self {
320            id: "",
321            dialect: "",
322            category: Category::Intrinsic,
323            signature: Signature {
324                inputs: &[],
325                outputs: &[],
326                attrs: &[],
327                bytes_extraction: false,
328            },
329            lowerings: LoweringTable::empty(),
330            laws: &[],
331            compose: None,
332        }
333    }
334}
335
336#[doc(hidden)]
337pub mod private {
338    pub trait Sealed {}
339}
340
341/// Minimal lookup surface consumed by foundation-side reference code.
342pub trait DialectLookup: private::Sealed + Send + Sync {
343    /// Stable identifier naming the provider implementation.
344    ///
345    /// Two installs sharing the same `provider_id` are treated as the same
346    /// logical provider  -  a second install is an idempotent no-op. Two
347    /// installs with different ids are a conflict returned from
348    /// [`install_dialect_lookup`] so callers can fail their own setup without
349    /// panicking inside foundation.
350    fn provider_id(&self) -> &'static str;
351
352    /// Intern a stable operation id.
353    fn intern_op(&self, name: &str) -> InternedOpId;
354
355    /// Resolve an interned operation id to its frozen definition.
356    fn lookup(&self, id: InternedOpId) -> Option<&'static OpDef>;
357}
358
359static DIALECT_LOOKUP: OnceLock<Arc<dyn DialectLookup>> = OnceLock::new();
360
361/// Install the process-wide dialect lookup provider.
362///
363/// First caller wins. A second install from a provider that reports the
364/// same [`DialectLookup::provider_id`] is a silent no-op so harnesses can
365/// defensively call this at the top of every test without racing. A second
366/// install from a provider reporting a DIFFERENT `provider_id` returns an error with
367/// both ids named, because two divergent providers mapping the same op ids
368/// would corrupt every lookup-dependent pass (validator, reference, shadow
369/// diff, conformance matrix) in ways that are hard to attribute back to the
370/// install site. Failing here keeps the 60-second root-cause trace from
371/// LAW 4 intact.
372///
373/// # Errors
374///
375/// Returns an actionable error when a different provider is already installed
376/// or when the process-global lookup reaches an impossible `OnceLock` state.
377pub fn install_dialect_lookup(lookup: Arc<dyn DialectLookup>) -> Result<(), String> {
378    match DIALECT_LOOKUP.get() {
379        Some(existing) => {
380            let existing_id = existing.provider_id();
381            let incoming_id = lookup.provider_id();
382            ensure_same_provider(existing_id, incoming_id)?;
383        }
384        None => {
385            if let Err(lookup) = DIALECT_LOOKUP.set(lookup) {
386                // Lost a race with another thread; still need to validate
387                // idempotency so a concurrent install with a different id
388                // does not silently corrupt the process-wide lookup.
389                let Some(existing) = DIALECT_LOOKUP.get() else {
390                    return Err(
391                        "dialect lookup install lost the value after OnceLock::set failed. Fix: report this impossible OnceLock state."
392                            .to_string(),
393                    );
394                };
395                let existing_id = existing.provider_id();
396                let incoming_id = lookup.provider_id();
397                ensure_same_provider(existing_id, incoming_id)?;
398            }
399        }
400    }
401    Ok(())
402}
403
404fn ensure_same_provider(existing_id: &str, incoming_id: &str) -> Result<(), String> {
405    if existing_id == incoming_id {
406        Ok(())
407    } else {
408        Err(format!(
409            "dialect lookup already installed by provider `{existing_id}`; second installer `{incoming_id}` reports a different id. Fix: pick one provider for the process or reuse the first provider's id. Silent replacement is refused because two divergent lookups would mis-resolve op ids at runtime."
410        ))
411    }
412}
413
414/// Return the installed process-wide dialect lookup provider.
415#[must_use]
416pub fn dialect_lookup() -> Option<&'static dyn DialectLookup> {
417    DIALECT_LOOKUP.get().map(Arc::as_ref)
418}
419
420#[cfg(test)]
421mod tests;