Skip to main content

vyre_driver/
lib.rs

1#![forbid(unsafe_code)]
2#![allow(unused_imports)]
3#![allow(
4    clippy::only_used_in_recursion,
5    clippy::result_unit_err,
6    clippy::module_inception
7)]
8//! vyre-driver  -  substrate-agnostic backend machinery.
9//!
10//! Registry, runtime, pipeline, routing, diagnostics, and the VyreBackend
11//! trait. Concrete backend crates depend on this crate and contribute
12//! lowerings via the inventory collection mechanism.
13
14// missing_docs is enforced workspace-wide via [workspace.lints.rust].
15// vyre-driver inherits that floor; do not re-allow it here.
16
17/// Backend-neutral checked arithmetic and atomic accounting primitives.
18pub mod accounting;
19/// Backend-neutral fallible allocation reservation helpers.
20pub mod allocation;
21/// Backend-neutral ahead-of-time emission registry.
22pub mod aot;
23/// Independent-arm detection for queue-parallel dispatch (ROADMAP D2).
24/// Pure set arithmetic over (reads, writes) summaries; the dispatcher
25/// uses `can_dispatch_concurrently` to decide whether two megakernel
26/// arms can launch on independent backend queues or streams.
27pub mod arm_independence;
28/// Async-copy / kernel-overlap decision policy (ROADMAP D3). Pure
29/// per-slot read/write conflict check that decides whether an H2D
30/// copy can run on a side stream concurrently with a downstream
31/// kernel.
32pub mod async_copy_overlap;
33/// Persistent autotuning record store (ROADMAP I3).
34pub mod autotune_store;
35/// VyreBackend trait, BackendError, capability records, validation.
36pub mod backend;
37/// Backend-neutral benchmark-driven optimization pass selection.
38pub mod benchmark_pass_selection;
39/// Backend-neutral program binding plans.
40pub mod binding;
41/// Bindless buffers / textures decision policy (ROADMAP D9). Decides
42/// whether to use a bindless descriptor array or traditional per-
43/// resource bindings, given the kernel's resource count and the
44/// backend's bindless support level (Full / Static / Unsupported).
45pub mod bindless_policy;
46/// Backend-neutral cache eviction policy.
47pub mod cache_eviction;
48/// N5 substrate: spec-cache eviction with frequency × recency heat
49/// decay. Used by F1/F3 cache layers when capacity pressure
50/// triggers  -  `entries_to_evict(stats, capacity, now)` returns the
51/// evictable IDs in eviction order (lowest heat first).
52pub mod cache_eviction_heat;
53/// Backend-neutral cache invalidation policy.
54pub mod cache_invalidation;
55/// Pre-recorded command reuse decision policy (ROADMAP D4). Decides
56/// whether to record a native command sequence once and replay it for
57/// repeated identical dispatches, based on per-launch overhead vs
58/// record + replay overhead.
59pub mod command_reuse_policy;
60/// Device-conditioned e-graph extraction helpers.
61/// Backend-neutral device-side convergence planning.
62pub mod device_convergence;
63/// Backend-neutral device diagnostic aggregation planning.
64pub mod device_diagnostic_aggregation;
65pub mod device_extraction;
66/// Backend-neutral device capability profile and projections.
67pub mod device_profile;
68/// Tier-B device signature TOML loader.
69pub mod device_signature;
70/// Backend-neutral device-side work queue planning.
71pub mod device_work_queue;
72/// Structured, machine-readable diagnostic rendering.
73pub mod diagnostics;
74/// Bundled D-series + I2 policy invocation. One-shot eval of every
75/// dispatch-side decision substrate so the runtime threads a single
76/// `DispatchPolicyVerdict` instead of six per-substrate verdicts.
77pub mod dispatch_policy;
78/// Backend-neutral dispatch-shape comparison helpers.
79pub mod dispatch_shape;
80/// Backend-neutral evidence bundles and source provenance.
81pub mod evidence;
82/// Device-profile-aware extraction cost helpers (ROADMAP A7).
83pub mod extraction_cost;
84/// Backend-neutral fixpoint-iteration resolution.
85pub mod fixpoint_iterations;
86/// Cross-dispatch fusion decision types and pure analysis.
87pub mod fusion;
88/// Backend-neutral replayable graph-capture binding planning.
89pub mod graph_capture;
90/// Backend-neutral exact-input identity keys for replay caches.
91pub mod input_identity;
92/// Backend-neutral monotonic ordering helpers for staging hot paths.
93pub mod ordering;
94/// Backend-neutral fallible output-slot vector management.
95pub mod output_slots;
96/// Push-constant / tiny-param inlining decision policy (ROADMAP D7).
97/// Backends consume `decide_param_inlining` to choose between inlined
98/// launch metadata and a uniform buffer upload, based on a per-backend
99/// [`crate::param_inlining::ParamInliningPolicy`].
100pub mod param_inlining;
101/// Persistent-kernel-mode decision policy (ROADMAP D1). Decides
102/// whether to replace N small kernel launches with one persistent
103/// kernel that polls a device-side work queue, based on measured
104/// per-launch overhead and persistent-setup cost. Pure decision,
105/// no Program walk.
106pub mod persistent_kernel_policy;
107/// Compiled-pipeline cache, dispatch config, batched dispatch.
108pub mod pipeline;
109/// N4 substrate: cross-pipeline disjoint-binding fusion analysis.
110/// Lifts D2's in-megakernel-arm independence check to the
111/// cross-dispatch boundary so consecutive pipelines with disjoint
112/// reads/writes can fuse into one launch with a workgroup-bounded
113/// fence instead of a full grid-sync.
114pub mod pipeline_fusion;
115/// Dialect registry, OpDef registration, lowering tables, and interner.
116pub mod registry;
117/// Backend-neutral reservation policy adapters.
118pub mod reservation_policy;
119/// Backend-neutral resident-resource reuse telemetry.
120pub mod residency;
121/// Backend-neutral resident transfer interval fusion.
122pub mod resident_transfer_fusion;
123/// Backend-neutral compact result readback planning.
124pub mod result_compaction;
125/// Runtime routing: profile-guided variant selection, algorithm heuristics.
126pub mod routing;
127/// Sampled CPU-reference shadow execution of live dispatches.
128pub mod shadow;
129/// N8 substrate: predicted-next-shape fingerprint API. Records
130/// recent dispatch fingerprints and predicts the next via repeat /
131/// short-cycle detection so the async dispatch path can prefetch
132/// the predicted pipeline cache key during the GPU wait window.
133pub mod shape_prediction;
134/// Backend-neutral shader specialization values and cache key inputs.
135pub mod specialization;
136/// N2 substrate (foundation half): per-rewrite speculation-as-substrate
137/// decision policy. Given baseline + speculative dispatch observations
138/// + side-compile cost, returns Adopt / Reject / KeepRacing.
139pub mod speculation_substrate;
140/// Canonical subgroup operation taxonomy and capability records.
141pub mod subgroup;
142/// Trace-based JIT specialization decision policy (ROADMAP I2).
143/// Decides whether the dispatcher should fire a speculative
144/// pre-spec on a predicted shape, weighted by recent hit count and
145/// prediction confidence vs the speculative spec cost.
146pub mod trace_jit_policy;
147/// Backend-neutral checked transfer accounting policy.
148pub mod transfer_accounting;
149/// Backend-neutral autotuner framework.
150pub mod tuner;
151/// Shared validation caches and launch-geometry contracts.
152pub mod validation;
153
154/// Backend-specific lowering strategies (Layer 2 of the two-layer
155/// optimization architecture). Target-dependent emission decisions
156/// that don't change what a program computes but change how it's
157/// emitted for a specific chip/API.
158///
159/// See the [module docs](strategy/index.html) for the full architecture.
160pub mod strategy;
161
162/// Pure [`vyre_foundation::ir::Program`] analysis shared by all backends.
163pub mod program_walks;
164
165/// Driver-tier observability surface (P-OBS-1). Substrate-call
166/// counters, cache hit rates, and a Prometheus exposition format.
167pub mod observability;
168
169/// G6: speculative rule evaluation with commit/rollback. Runs the
170/// expensive confirmer on every tile, commits only tiles whose
171/// pre-filter passed. Hides gather latency + improves subgroup
172/// uniformity. Scaffold.
173pub mod speculate;
174
175/// Cross-grid synchronization: kernel-split fallback for backends
176/// that lack a native cooperative-launch grid barrier. Splits a
177/// `Program` at every `Node::Barrier { ordering: GridSync }` and
178/// dispatches the segments in sequence  -  the kernel-launch boundary
179/// itself is the grid-level fence.
180pub mod grid_sync;
181/// Backend-neutral launch preparation and program fingerprint wrappers.
182pub mod launch;
183/// Backend-neutral adjacent-stage launch fusion planning.
184pub mod launch_fusion;
185/// Backend-neutral megakernel wave barrier planning.
186pub mod megakernel_barrier;
187/// Backend-neutral persistent megakernel execution planning.
188pub mod megakernel_execution;
189/// Backend-neutral megakernel frontier memory planning.
190pub mod megakernel_frontier;
191/// Backend-neutral resident-graph multi-query execution planning.
192pub mod multi_query_execution;
193/// Backend-neutral numeric boundary conversions.
194pub mod numeric;
195/// G7: persistent-thread engine + device-side work queue.
196/// Eliminates per-file kernel-launch overhead for streams of
197/// many small scan jobs.
198pub mod persistent;
199/// Re-exports the unified vyre error type from `vyre-foundation`.
200pub use vyre_foundation::error;
201
202pub use aot::{emit_aot_target, registered_aot_emitters, AotEmitter, AotTargetId};
203pub use backend::{
204    borrowed_input_slices, default_dispatch_with_device_buffers,
205    replace_output_buffers_preserving_slots, validate_buffer_ownership,
206    validate_program_for_backend, BackendError, BackendRegistration, CompiledPipeline,
207    DeviceBuffer, DispatchConfig, Executable, HostShimBuffer, Memory, MemoryRef, OutputBuffers,
208    PendingDispatch, ResidentDispatchStep, ResidentReadRange, ResidentSequenceTiming, Resource,
209    TimedDispatchResult, TypedDispatchExt, VyreBackend, DEVICE_BUFFER_FEATURE,
210};
211pub use binding::{
212    binding_plans_share_layout, BackendLayoutClass, BackendLayoutFingerprint, BackendLayoutSlot,
213    Binding, BindingPlan, BindingRole, BindingSetFingerprint,
214};
215pub use device_extraction::{
216    extract_best_for_device, extract_best_for_devices, DeviceExtraction, ExtractionDevice,
217};
218pub use device_profile::{DeviceProfile, DeviceTimingQuality};
219pub use device_signature::{DeviceSignature, DeviceSignatureTable};
220pub use diagnostics::{Diagnostic, DiagnosticCode, OpLocation, Severity};
221pub use dispatch_shape::{
222    borrowed_input_batch_shapes_match, borrowed_input_shapes_match,
223    dispatch_configs_share_launch_shape,
224};
225pub use evidence::{
226    capture_git_info, capture_git_info_at, source_fingerprint, source_tree_fingerprint,
227    source_tree_fingerprint_at, DispatchTimingEvidence, EvidenceArtifact, EvidenceBundle,
228    ReplayEvidence, SourceProvenance,
229};
230pub use error::Error;
231pub use fixpoint_iterations::{resolve_fixpoint_iterations, resolve_fixpoint_iterations_usize};
232pub use launch::{program_vsa_fingerprint, program_vsa_fingerprint_words, LaunchPlan};
233pub use pipeline::{
234    compile, compile_owned, compile_owned_with_telemetry, compile_shared,
235    compile_shared_with_telemetry, compile_with_telemetry, hex_encode, hex_short,
236    CompiledPipelineBuild, DiskPipelineCache, PipelineCacheIdentity, PipelineCacheKey,
237    PipelineCacheMissEvidence, PipelineCacheMissReason, PipelineCacheSnapshot, PipelineDeviceFingerprint,
238    PipelineFeatureFlags, CURRENT_PIPELINE_CACHE_KEY_VERSION,
239};
240pub use program_walks::{
241    coerce_to_pow2_with_tail_mask, dispatch_element_count, dispatch_element_count_for_program,
242    dispatch_param_words, dispatch_param_words_into, element_size_bytes,
243    enforce_actual_output_budget, find_indirect_dispatch, infer_dispatch_grid,
244    infer_dispatch_grid_for_count, output_binding_layout, output_binding_layouts,
245    output_layout_from_program, try_coerce_to_pow2_with_tail_mask, try_dispatch_param_words,
246    try_dispatch_param_words_into, IndirectDispatch, OutputBindingLayout, OutputLayout,
247    TailMaskPolicy,
248};
249pub use registry::{
250    default_validator, intern_string, AttrSchema, AttrType, Category, Chain, Dialect,
251    DialectRegistration, DialectRegistry, DuplicateOpIdError, EnforceGate, EnforceVerdict,
252    InternedOpId, LoweringCtx, LoweringTable, MutationClass, NativeModule, NativeModuleBuilder,
253    OpBackendTarget, OpDef, OpDefRegistration, PrimaryBinaryBuilder, PrimaryTextBuilder,
254    ReferenceKind, SecondaryTextBuilder, Signature, Target, TextModule, TypedParam,
255};
256pub use residency::{ResidentGraphReuseTelemetry, ResidentGraphReuseTelemetryError};
257pub use routing::{select_sort_backend, Distribution, RoutingTable, SortBackend};
258pub use specialization::{SpecCacheKey, SpecMap, SpecValue};
259pub use speculate::{
260    record_speculative_variant_race, SpeculativeVariantDecision, SpeculativeVariantKeys,
261    SpeculativeVariantKind, SpeculativeVariantRace,
262};
263pub use subgroup::{SubgroupCaps, SubgroupOp};