Skip to main content

Crate vyre_driver

Crate vyre_driver 

Source
Expand description

vyre-driver - substrate-agnostic backend machinery.

Registry, runtime, pipeline, routing, diagnostics, and the VyreBackend trait. Concrete backend crates depend on this crate and contribute lowerings via the inventory collection mechanism.

Re-exports§

pub use aot::emit_aot_target;
pub use aot::registered_aot_emitters;
pub use aot::AotEmitter;
pub use aot::AotTargetId;
pub use backend::borrowed_input_slices;
pub use backend::default_dispatch_with_device_buffers;
pub use backend::replace_output_buffers_preserving_slots;
pub use backend::validate_buffer_ownership;
pub use backend::validate_program_for_backend;
pub use backend::BackendError;
pub use backend::BackendRegistration;
pub use backend::CompiledPipeline;
pub use backend::DeviceBuffer;
pub use backend::DispatchConfig;
pub use backend::Executable;
pub use backend::HostShimBuffer;
pub use backend::Memory;
pub use backend::MemoryRef;
pub use backend::OutputBuffers;
pub use backend::PendingDispatch;
pub use backend::ResidentDispatchStep;
pub use backend::ResidentReadRange;
pub use backend::ResidentSequenceTiming;
pub use backend::Resource;
pub use backend::TimedDispatchResult;
pub use backend::TypedDispatchExt;
pub use backend::VyreBackend;
pub use backend::DEVICE_BUFFER_FEATURE;
pub use binding::binding_plans_share_layout;
pub use binding::BackendLayoutClass;
pub use binding::BackendLayoutFingerprint;
pub use binding::BackendLayoutSlot;
pub use binding::Binding;
pub use binding::BindingPlan;
pub use binding::BindingRole;
pub use binding::BindingSetFingerprint;
pub use device_extraction::extract_best_for_device;
pub use device_extraction::extract_best_for_devices;
pub use device_extraction::DeviceExtraction;
pub use device_extraction::ExtractionDevice;
pub use device_profile::DeviceProfile;
pub use device_profile::DeviceTimingQuality;
pub use device_signature::DeviceSignature;
pub use device_signature::DeviceSignatureTable;
pub use diagnostics::Diagnostic;
pub use diagnostics::DiagnosticCode;
pub use diagnostics::OpLocation;
pub use diagnostics::Severity;
pub use dispatch_shape::borrowed_input_batch_shapes_match;
pub use dispatch_shape::borrowed_input_shapes_match;
pub use dispatch_shape::dispatch_configs_share_launch_shape;
pub use evidence::capture_git_info;
pub use evidence::capture_git_info_at;
pub use evidence::source_fingerprint;
pub use evidence::source_tree_fingerprint;
pub use evidence::source_tree_fingerprint_at;
pub use evidence::DispatchTimingEvidence;
pub use evidence::EvidenceArtifact;
pub use evidence::EvidenceBundle;
pub use evidence::ReplayEvidence;
pub use evidence::SourceProvenance;
pub use fixpoint_iterations::resolve_fixpoint_iterations;
pub use fixpoint_iterations::resolve_fixpoint_iterations_usize;
pub use launch::program_vsa_fingerprint;
pub use launch::program_vsa_fingerprint_words;
pub use launch::LaunchPlan;
pub use pipeline::compile;
pub use pipeline::compile_owned;
pub use pipeline::compile_owned_with_telemetry;
pub use pipeline::compile_shared;
pub use pipeline::compile_shared_with_telemetry;
pub use pipeline::compile_with_telemetry;
pub use pipeline::hex_encode;
pub use pipeline::hex_short;
pub use pipeline::CompiledPipelineBuild;
pub use pipeline::DiskPipelineCache;
pub use pipeline::PipelineCacheIdentity;
pub use pipeline::PipelineCacheKey;
pub use pipeline::PipelineCacheMissEvidence;
pub use pipeline::PipelineCacheMissReason;
pub use pipeline::PipelineCacheSnapshot;
pub use pipeline::PipelineDeviceFingerprint;
pub use pipeline::PipelineFeatureFlags;
pub use pipeline::CURRENT_PIPELINE_CACHE_KEY_VERSION;
pub use program_walks::coerce_to_pow2_with_tail_mask;
pub use program_walks::dispatch_element_count;
pub use program_walks::dispatch_element_count_for_program;
pub use program_walks::dispatch_param_words;
pub use program_walks::dispatch_param_words_into;
pub use program_walks::element_size_bytes;
pub use program_walks::enforce_actual_output_budget;
pub use program_walks::find_indirect_dispatch;
pub use program_walks::infer_dispatch_grid;
pub use program_walks::infer_dispatch_grid_for_count;
pub use program_walks::output_binding_layout;
pub use program_walks::output_binding_layouts;
pub use program_walks::output_layout_from_program;
pub use program_walks::try_coerce_to_pow2_with_tail_mask;
pub use program_walks::try_dispatch_param_words;
pub use program_walks::try_dispatch_param_words_into;
pub use program_walks::IndirectDispatch;
pub use program_walks::OutputBindingLayout;
pub use program_walks::OutputLayout;
pub use program_walks::TailMaskPolicy;
pub use registry::default_validator;
pub use registry::Chain;
pub use registry::Dialect;
pub use registry::DialectRegistration;
pub use registry::DialectRegistry;
pub use registry::DuplicateOpIdError;
pub use registry::EnforceGate;
pub use registry::EnforceVerdict;
pub use registry::MutationClass;
pub use registry::OpBackendTarget;
pub use registry::OpDefRegistration;
pub use registry::Target;
pub use residency::ResidentGraphReuseTelemetry;
pub use residency::ResidentGraphReuseTelemetryError;
pub use routing::select_sort_backend;
pub use routing::Distribution;
pub use routing::RoutingTable;
pub use routing::SortBackend;
pub use specialization::SpecCacheKey;
pub use specialization::SpecMap;
pub use specialization::SpecValue;
pub use speculate::record_speculative_variant_race;
pub use speculate::SpeculativeVariantDecision;
pub use speculate::SpeculativeVariantKeys;
pub use speculate::SpeculativeVariantKind;
pub use speculate::SpeculativeVariantRace;
pub use subgroup::SubgroupCaps;
pub use subgroup::SubgroupOp;

Modules§

accounting
Backend-neutral checked arithmetic and atomic accounting primitives. Backend-neutral checked arithmetic and atomic accounting primitives.
allocation
Backend-neutral fallible allocation reservation helpers. Backend-neutral fallible allocation reservation helpers.
aot
Backend-neutral ahead-of-time emission registry. Backend-neutral AOT emission and launcher registries.
arm_independence
Independent-arm detection for queue-parallel dispatch (ROADMAP D2). Pure set arithmetic over (reads, writes) summaries; the dispatcher uses can_dispatch_concurrently to decide whether two megakernel arms can launch on independent backend queues or streams. D2 substrate: independent-arm detection for queue-parallel dispatch.
async_copy_overlap
Async-copy / kernel-overlap decision policy (ROADMAP D3). Pure per-slot read/write conflict check that decides whether an H2D copy can run on a side stream concurrently with a downstream kernel. D3 substrate: async-copy / kernel-overlap decision policy.
autotune_store
Persistent autotuning record store (ROADMAP I3). Persistent autotuning record store.
backend
VyreBackend trait, BackendError, capability records, validation. Frozen backend extension contract.
benchmark_pass_selection
Backend-neutral benchmark-driven optimization pass selection. Benchmark-driven optimization pass selection.
binding
Backend-neutral program binding plans. Backend-neutral binding-plan construction for VYRE programs.
bindless_policy
Bindless buffers / textures decision policy (ROADMAP D9). Decides whether to use a bindless descriptor array or traditional per- resource bindings, given the kernel’s resource count and the backend’s bindless support level (Full / Static / Unsupported). D9 substrate: bindless buffers / textures decision policy.
cache_eviction
Backend-neutral cache eviction policy. Backend-neutral cache eviction policy.
cache_eviction_heat
N5 substrate: spec-cache eviction with frequency × recency heat decay. Used by F1/F3 cache layers when capacity pressure triggers - entries_to_evict(stats, capacity, now) returns the evictable IDs in eviction order (lowest heat first). N5 substrate: spec-cache eviction policy with frequency × recency heat decay.
cache_invalidation
Backend-neutral cache invalidation policy. Backend-neutral pipeline-cache invalidation helpers.
command_reuse_policy
Pre-recorded command reuse decision policy (ROADMAP D4). Decides whether to record a native command sequence once and replay it for repeated identical dispatches, based on per-launch overhead vs record + replay overhead. D4 substrate: pre-recorded command reuse policy.
device_convergence
Device-conditioned e-graph extraction helpers. Backend-neutral device-side convergence planning. Backend-neutral device-side convergence planning for iterative analyses.
device_diagnostic_aggregation
Backend-neutral device diagnostic aggregation planning. Backend-neutral device-side diagnostic aggregation planning.
device_extraction
Device-conditioned e-graph extraction helpers.
device_profile
Backend-neutral device capability profile and projections. Backend-neutral device capability profile.
device_signature
Tier-B device signature TOML loader. Tier-B device signature loader.
device_work_queue
Backend-neutral device-side work queue planning. Backend-neutral device-side work queue planning for dependent dataflow execution.
diagnostics
Structured, machine-readable diagnostic rendering. Structured, machine-readable diagnostics.
dispatch_policy
Bundled D-series + I2 policy invocation. One-shot eval of every dispatch-side decision substrate so the runtime threads a single DispatchPolicyVerdict instead of six per-substrate verdicts. Bundled D-series + I2 policy invocation.
dispatch_shape
Backend-neutral dispatch-shape comparison helpers. Backend-neutral dispatch shape comparison helpers.
error
Re-exports the unified vyre error type from vyre-foundation. Unified error type for validation, wire format, lowering, and execution. Error types for IR validation, wire-format decoding, and GPU operations.
evidence
Backend-neutral evidence bundles and source provenance. Backend-neutral evidence, provenance, and replay metadata.
extraction_cost
Device-profile-aware extraction cost helpers (ROADMAP A7). Device-profile-aware cost helpers for vyre_foundation::optimizer::eqsat::extract_best.
fixpoint_iterations
Backend-neutral fixpoint-iteration resolution. Backend-neutral fixpoint-iteration resolution.
fusion
Cross-dispatch fusion decision types and pure analysis. Cross-dispatch fusion decisions shared by concrete backends.
graph_capture
Backend-neutral replayable graph-capture binding planning. Backend-neutral planning for replayable graph-capture dispatch paths.
grid_sync
Cross-grid synchronization: kernel-split fallback for backends that lack a native cooperative-launch grid barrier. Splits a Program at every Node::Barrier { ordering: GridSync } and dispatches the segments in sequence - the kernel-launch boundary itself is the grid-level fence. Grid-sync kernel splitting.
input_identity
Backend-neutral exact-input identity keys for replay caches. Exact-input identity keys shared by replay and materialized-output caches.
launch
Backend-neutral launch preparation and program fingerprint wrappers. Backend-neutral dispatch launch preparation.
launch_fusion
Backend-neutral adjacent-stage launch fusion planning. Backend-neutral adjacent-stage launch fusion planning.
megakernel_barrier
Backend-neutral megakernel wave barrier planning. Backend-neutral megakernel barrier planning for dependency-typed waves.
megakernel_execution
Backend-neutral persistent megakernel execution planning. Backend-neutral execution planning for persistent megakernel waves.
megakernel_frontier
Backend-neutral megakernel frontier memory planning. Backend-neutral frontier memory planning for dependency-aware megakernels.
multi_query_execution
Backend-neutral resident-graph multi-query execution planning. Backend-neutral multi-query execution planning over one resident graph.
numeric
Backend-neutral numeric boundary conversions. Backend-neutral numeric boundary conversions.
observability
Driver-tier observability surface (P-OBS-1). Substrate-call counters, cache hit rates, and a Prometheus exposition format. Driver-tier observability surface (P-OBS-1).
ordering
Backend-neutral monotonic ordering helpers for staging hot paths. Backend-neutral monotonic ordering helpers for staging hot paths.
output_slots
Backend-neutral fallible output-slot vector management. Backend-neutral fallible output-slot vector management.
param_inlining
Push-constant / tiny-param inlining decision policy (ROADMAP D7). Backends consume decide_param_inlining to choose between inlined launch metadata and a uniform buffer upload, based on a per-backend crate::param_inlining::ParamInliningPolicy. D7 substrate: push-constant / tiny-param inlining policy.
persistent
G7: persistent-thread engine + device-side work queue. Eliminates per-file kernel-launch overhead for streams of many small scan jobs. Persistent-thread engine + host-side work queue (G7).
persistent_kernel_policy
Persistent-kernel-mode decision policy (ROADMAP D1). Decides whether to replace N small kernel launches with one persistent kernel that polls a device-side work queue, based on measured per-launch overhead and persistent-setup cost. Pure decision, no Program walk. D1 substrate: persistent-kernel-mode decision policy.
pipeline
Compiled-pipeline cache, dispatch config, batched dispatch. Pipeline mode - pre-compile a Program once, dispatch repeatedly with new inputs.
pipeline_fusion
N4 substrate: cross-pipeline disjoint-binding fusion analysis. Lifts D2’s in-megakernel-arm independence check to the cross-dispatch boundary so consecutive pipelines with disjoint reads/writes can fuse into one launch with a workgroup-bounded fence instead of a full grid-sync. N4 substrate: cross-pipeline disjoint-binding fusion analysis.
program_walks
Pure vyre_foundation::ir::Program analysis shared by all backends. Pure IR walks over vyre_foundation::ir::Program shared by all backends.
registry
Dialect registry, OpDef registration, lowering tables, and interner. Dialect registry, OpDef registration, and TOML loader.
reservation_policy
Backend-neutral reservation policy adapters. Backend-neutral reservation policy adapters.
residency
Backend-neutral resident-resource reuse telemetry. Backend-neutral resident-resource reuse telemetry.
resident_transfer_fusion
Backend-neutral resident transfer interval fusion. Backend-neutral resident transfer interval fusion.
result_compaction
Backend-neutral compact result readback planning. Backend-neutral compact result readback planning.
routing
Runtime routing: profile-guided variant selection, algorithm heuristics. Runtime distribution-aware algorithm routing.
shadow
Sampled CPU-reference shadow execution of live dispatches. Exhaustive CPU-vs-backend conformance for compiled pipelines.
shape_prediction
N8 substrate: predicted-next-shape fingerprint API. Records recent dispatch fingerprints and predicts the next via repeat / short-cycle detection so the async dispatch path can prefetch the predicted pipeline cache key during the GPU wait window. N8 substrate: predicted-next-shape fingerprint API.
specialization
Backend-neutral shader specialization values and cache key inputs. Backend-neutral specialization values and cache key inputs.
speculate
G6: speculative rule evaluation with commit/rollback. Runs the expensive confirmer on every tile, commits only tiles whose pre-filter passed. Hides gather latency + improves subgroup uniformity. Scaffold. Speculative rule evaluation with commit/rollback (G6).
speculation_substrate
N2 substrate (foundation half): per-rewrite speculation-as-substrate decision policy. Given baseline + speculative dispatch observations
strategy
Backend-specific lowering strategies (Layer 2 of the two-layer optimization architecture). Target-dependent emission decisions that don’t change what a program computes but change how it’s emitted for a specific chip/API.
subgroup
Canonical subgroup operation taxonomy and capability records. Backend-neutral subgroup operation taxonomy.
trace_jit_policy
Trace-based JIT specialization decision policy (ROADMAP I2). Decides whether the dispatcher should fire a speculative pre-spec on a predicted shape, weighted by recent hit count and prediction confidence vs the speculative spec cost. I2 substrate: trace-based JIT specialization decision policy.
transfer_accounting
Backend-neutral checked transfer accounting policy. Backend-neutral transfer accounting policy.
tuner
Backend-neutral autotuner framework. Backend-neutral autotuner framework and cache metadata.
validation
Shared validation caches and launch-geometry contracts. Shared validation caches and launch-geometry checks for concrete drivers.

Structs§

AttrSchema
Attribute schema entry.
InternedOpId
Interned operation identifier used by every dialect lookup.
LoweringCtx
Backend lowering context retained for source compatibility.
LoweringTable
Lowering function table attached to an operation definition.
NativeModule
native-module module descriptor used by native lowering builders.
OpDef
Frozen operation definition.
Signature
Operation signature contract.
TextModule
Backend text module descriptor used by native lowering builders.
TypedParam
Typed input or output parameter.

Enums§

AttrType
Attribute value type declared by an operation schema.
Category
Operation category.
Error
The unified failure enum for every vyre operation.

Functions§

intern_string
Intern a stable operation-id string into a compact process-local id.

Type Aliases§

NativeModuleBuilder
Builder type for native-module lowering.
PrimaryBinaryBuilder
Reserved builder type for the primary binary lowering slot.
PrimaryTextBuilder
Reserved builder type for the primary text lowering slot.
ReferenceKind
Function pointer used by reference-backend lowerings.
SecondaryTextBuilder
Builder type for the secondary text lowering slot.