Skip to main content

ExecutableGraph

Trait ExecutableGraph 

Source
pub trait ExecutableGraph: Send {
Show 24 methods // Required methods fn set_param(&mut self, name: &str, data: &[f32]); fn run(&mut self, inputs: &[(&str, &[f32])]) -> Vec<Vec<f32>>; // Provided methods fn clone_box(&self) -> Box<dyn ExecutableGraph> { ... } fn run_raw(&mut self, inputs: &[(&str, &[f32])]) -> Vec<(*const f32, usize)> { ... } fn run_slots(&mut self, _inputs: &[&[f32]]) -> &[(usize, usize)] { ... } fn arena_ptr(&self) -> *const u8 { ... } fn set_active_extent(&mut self, extent: Option<(usize, usize)>) { ... } fn set_moe_resident_experts(&mut self, _mask: &[bool]) { ... } fn set_moe_resident_experts_per_layer(&mut self, _masks: &[&[bool]]) { ... } fn enable_moe_topk_capture(&mut self, _num_experts: usize) -> bool { ... } fn take_moe_topk_capture(&mut self) -> Option<Vec<Vec<u32>>> { ... } fn take_moe_residency_stats(&mut self) -> Option<MoeResidencyStats> { ... } fn bind_handle(&mut self, _name: &str, _data: &[f32]) -> bool { ... } fn read_handle(&self, _name: &str) -> Option<Vec<f32>> { ... } fn bind_gpu_handle(&mut self, _name: &str, _data: &[f32]) -> bool { ... } fn has_gpu_handle(&self, _name: &str) -> bool { ... } fn set_gpu_handle_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool { ... } fn read_gpu_handle(&self, _name: &str) -> Option<Vec<f32>> { ... } fn run_feed_gpu_handle( &mut self, inputs: &[(&str, &[f32])], _handle_name: &str, _output_index: usize, ) -> Option<Vec<f32>> { ... } fn commit_no_wait(&mut self, inputs: &[(&str, &[f32])]) { ... } fn sync_pending(&mut self) { ... } fn run_pipelined( &mut self, input_sets: &[Vec<(&str, &[f32])>], ) -> Vec<Vec<Vec<f32>>> { ... } fn set_param_typed(&mut self, name: &str, data: &[u8], dtype: DType) { ... } fn run_typed( &mut self, inputs: &[(&str, &[u8], DType)], ) -> Vec<(Vec<u8>, DType)> { ... }
}
Expand description

A compiled, ready-to-execute graph on a specific backend.

Required Methods§

Source

fn set_param(&mut self, name: &str, data: &[f32])

Set a named parameter (weight) buffer.

Source

fn run(&mut self, inputs: &[(&str, &[f32])]) -> Vec<Vec<f32>>

Execute the graph with named inputs. Returns output data (copies from arena).

Provided Methods§

Source

fn clone_box(&self) -> Box<dyn ExecutableGraph>

Deep-clone this executable into a fresh Box. Lets CompiledGraph implement Clone so callers (e.g. eda-mna’s SensitivityContext) can spin up N independent executor copies for thread-parallel dispatch without paying the full graph-compile cost N times. Default implementation panics; backends that support cloning override.

Source

fn run_raw(&mut self, inputs: &[(&str, &[f32])]) -> Vec<(*const f32, usize)>

Execute and return raw pointers to output data in arena (zero-copy).

Source

fn run_slots(&mut self, _inputs: &[&[f32]]) -> &[(usize, usize)]

Fastest: inputs by slot index, returns output (offset, len) pairs. Read output from arena via arena_ptr().add(offset).

Source

fn arena_ptr(&self) -> *const u8

Get the raw arena buffer pointer for reading outputs after run_slots.

Source

fn set_active_extent(&mut self, extent: Option<(usize, usize)>)

Hint the executor that subsequent run calls should process only the first actual rows along the bucket axis (out of upper, the extent the graph was compiled at). Backends that support per-kernel active-extent dispatch honor this; others ignore it and process the full compiled extent.

Pass None to clear the hint. The hint is sticky — set it before each run and clear it after, or maintain it across runs at your discretion.

Even when honored, callers must not rely on the contents of the output past actual rows — that region may contain stale data from earlier runs (kernels skip it).

Default: no-op. See BucketedCompileCache::run_padded for the canonical caller; backends opt in by overriding this method.

Source

fn set_moe_resident_experts(&mut self, _mask: &[bool])

TIDE merged placement mask (union across MoE layers). CPU: stats + host path.

Source

fn set_moe_resident_experts_per_layer(&mut self, _masks: &[&[bool]])

Per MoE layer placement (masks[layer][expert]). Preferred over merged on CPU.

Source

fn enable_moe_topk_capture(&mut self, _num_experts: usize) -> bool

Capture MoE router TopK indices on the next CPU forward (TIDE refresh).

Source

fn take_moe_topk_capture(&mut self) -> Option<Vec<Vec<u32>>>

Take captured per-layer expert indices (one vec per MoE TopK in order).

Source

fn take_moe_residency_stats(&mut self) -> Option<MoeResidencyStats>

MoE GroupedMatMul residency accounting from the last forward (CPU).

Source

fn bind_handle(&mut self, _name: &str, _data: &[f32]) -> bool

Bind a persistent buffer handle (KV-cache, training state, etc.). The buffer lives across run() calls and is not in the arena. Returns true if the backend supports persistent handles.

Source

fn read_handle(&self, _name: &str) -> Option<Vec<f32>>

Read a persistent buffer’s current contents.

Source

fn bind_gpu_handle(&mut self, _name: &str, _data: &[f32]) -> bool

GPU-resident input (MLX): upload once, reuse across runs.

Source

fn has_gpu_handle(&self, _name: &str) -> bool

Source

fn set_gpu_handle_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool

Source

fn read_gpu_handle(&self, _name: &str) -> Option<Vec<f32>>

Source

fn run_feed_gpu_handle( &mut self, inputs: &[(&str, &[f32])], _handle_name: &str, _output_index: usize, ) -> Option<Vec<f32>>

Run and refresh a GPU handle from output_index; returns that output on host.

Source

fn commit_no_wait(&mut self, inputs: &[(&str, &[f32])])

Encode + commit a forward pass without waiting for completion.

Outputs of intermediate calls are stomped — use run_pipelined if you need outputs from each individual commit. Pair with sync_pending to drain.

Default: synchronous fallback (calls run, discards output). CPU uses this default since BLAS is synchronous anyway.

Source

fn sync_pending(&mut self)

Wait for every command queued by commit_no_wait. Default: no-op (synchronous backends have nothing pending).

Source

fn run_pipelined( &mut self, input_sets: &[Vec<(&str, &[f32])>], ) -> Vec<Vec<Vec<f32>>>

Issue a batch of forward passes pipelined, returning per-run outputs.

The Metal impl encodes a per-commit blit so each in-flight run’s outputs survive subsequent commits stomping the shared arena. The CPU default is just sequential runs — equally correct, no perf penalty (CPU has no GPU sync cost to amortize).

Returns out[run_idx][output_idx][element_idx].

Source

fn set_param_typed(&mut self, name: &str, data: &[u8], dtype: DType)

Set a named parameter from raw bytes in the given dtype.

Source

fn run_typed( &mut self, inputs: &[(&str, &[u8], DType)], ) -> Vec<(Vec<u8>, DType)>

Run with typed inputs and typed outputs. Returns (bytes, dtype) per output; the dtype is whatever the graph’s output node was declared as.

Dyn Compatibility§

This trait is dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

Implementors§