Trait ExecutableGraph

Source

pub trait ExecutableGraph: Send {
Show 39 methods    // Required methods
    fn set_param(&mut self, name: &str, data: &[f32]);
    fn run(&mut self, inputs: &[(&str, &[f32])]) -> Vec<Vec<f32>>;

    // Provided methods
    fn finalize_params(&mut self) { ... }
    fn clone_box(&self) -> Box<dyn ExecutableGraph> { ... }
    fn run_read_outputs(
        &mut self,
        inputs: &[(&str, &[f32])],
        read_indices: Option<&[usize]>,
    ) -> Vec<Vec<f32>> { ... }
    fn run_raw(&mut self, inputs: &[(&str, &[f32])]) -> Vec<(*const f32, usize)> { ... }
    fn run_slots(&mut self, _inputs: &[&[f32]]) -> &[(usize, usize)] { ... }
    fn arena_ptr(&self) -> *const u8 { ... }
    fn set_active_extent(&mut self, extent: Option<(usize, usize)>) { ... }
    fn set_rng(&mut self, rng: RngOptions) { ... }
    fn rng(&self) -> RngOptions { ... }
    fn set_moe_resident_experts(&mut self, _mask: &[bool]) { ... }
    fn set_moe_resident_experts_per_layer(&mut self, _masks: &[&[bool]]) { ... }
    fn enable_moe_topk_capture(&mut self, _num_experts: usize) -> bool { ... }
    fn take_moe_topk_capture(&mut self) -> Option<Vec<Vec<u32>>> { ... }
    fn take_moe_residency_stats(&mut self) -> Option<MoeResidencyStats> { ... }
    fn bind_handle(&mut self, _name: &str, _data: &[f32]) -> bool { ... }
    fn read_handle(&self, _name: &str) -> Option<Vec<f32>> { ... }
    fn bind_gpu_handle(&mut self, _name: &str, _data: &[f32]) -> bool { ... }
    fn has_gpu_handle(&self, _name: &str) -> bool { ... }
    fn set_gpu_handle_feed(
        &mut self,
        _handle_name: &str,
        _output_index: usize,
    ) -> bool { ... }
    fn read_gpu_handle(&self, _name: &str) -> Option<Vec<f32>> { ... }
    fn read_gpu_handle_row(
        &self,
        _name: &str,
        _row: usize,
        _row_inner: usize,
    ) -> Option<Vec<f32>> { ... }
    fn register_kv_row_feed(
        &mut self,
        _handle_name: &str,
        _output_index: usize,
    ) -> bool { ... }
    fn feed_kv_row(
        &mut self,
        _src_row: usize,
        _dst_row: usize,
        _row_elems: usize,
    ) -> bool { ... }
    fn prepare_resident_gpu_handle(&mut self, _name: &str) -> bool { ... }
    fn stage_bound_gpu_handles_to_arena(&mut self) { ... }
    fn seed_resident_kv_prefix_from(
        &mut self,
        _src: &dyn ExecutableGraph,
        _prefix_tokens: usize,
        _outgoing_upper: usize,
        _kv_dim: usize,
        _n_layers: usize,
    ) -> bool { ... }
    fn copy_resident_kv_rows_from(
        &mut self,
        _src: &dyn ExecutableGraph,
        _from_row: usize,
        _to_row: usize,
        _outgoing_upper: usize,
        _kv_dim: usize,
        _n_layers: usize,
    ) -> bool { ... }
    fn copy_params_from(&mut self, src: &dyn ExecutableGraph) -> bool { ... }
    fn executable_as_any(&self) -> Option<&dyn Any> { ... }
    fn executable_as_any_mut(&mut self) -> Option<&mut dyn Any> { ... }
    fn read_output_row(
        &self,
        _out_idx: usize,
        _row: usize,
        _row_inner: usize,
    ) -> Option<Vec<f32>> { ... }
    fn run_feed_gpu_handle(
        &mut self,
        inputs: &[(&str, &[f32])],
        _handle_name: &str,
        _output_index: usize,
    ) -> Option<Vec<f32>> { ... }
    fn commit_no_wait(&mut self, inputs: &[(&str, &[f32])]) { ... }
    fn sync_pending(&mut self) { ... }
    fn run_pipelined(
        &mut self,
        input_sets: &[Vec<(&str, &[f32])>],
    ) -> Vec<Vec<Vec<f32>>> { ... }
    fn set_param_typed(&mut self, name: &str, data: &[u8], dtype: DType) { ... }
    fn run_typed(
        &mut self,
        inputs: &[(&str, &[u8], DType)],
    ) -> Vec<(Vec<u8>, DType)> { ... }
}

Expand description

A compiled, ready-to-execute graph on a specific backend.

Required Methods§

Source

fn set_param(&mut self, name: &str, data: &[f32])

Set a named parameter (weight) buffer.

Source

fn run(&mut self, inputs: &[(&str, &[f32])]) -> Vec<Vec<f32>>

Execute the graph with named inputs. Returns output data (copies from arena).

Provided Methods§

Source

fn finalize_params(&mut self)

Called after all params are uploaded (set_param / set_param_typed). Backends may warm caches (e.g. Metal QMatMul weight dequant).

Source

fn clone_box(&self) -> Box<dyn ExecutableGraph>

Deep-clone this executable into a fresh Box. Lets CompiledGraph implement Clone so callers (e.g. eda-mna’s SensitivityContext) can spin up N independent executor copies for thread-parallel dispatch without paying the full graph-compile cost N times. Default implementation panics; backends that support cloning override.

Source

fn run_read_outputs( &mut self, inputs: &[(&str, &[f32])], read_indices: Option<&[usize]>, ) -> Vec<Vec<f32>>

Like Self::run but only read back outputs at read_indices. GPU handle feeds still update for every output. Default: all outputs.

Source

fn run_raw(&mut self, inputs: &[(&str, &[f32])]) -> Vec<(*const f32, usize)>

Execute and return raw pointers to output data in arena (zero-copy).

Source

fn run_slots(&mut self, _inputs: &[&[f32]]) -> &[(usize, usize)]

Fastest: inputs by slot index, returns output (offset, len) pairs. Read output from arena via arena_ptr().add(offset).

Source

fn arena_ptr(&self) -> *const u8

Get the raw arena buffer pointer for reading outputs after run_slots.

Source

fn set_active_extent(&mut self, extent: Option<(usize, usize)>)

Hint the executor that subsequent run calls should process only the first actual rows along the bucket axis (out of upper, the extent the graph was compiled at). Backends that support per-kernel active-extent dispatch honor this; others ignore it and process the full compiled extent.

Pass None to clear the hint. The hint is sticky — set it before each run and clear it after, or maintain it across runs at your discretion.

Even when honored, callers must not rely on the contents of the output past actual rows — that region may contain stale data from earlier runs (kernels skip it).

Default: no-op. See BucketedCompileCache::run_padded for the canonical caller; backends opt in by overriding this method.

Source

fn set_rng(&mut self, rng: RngOptions)

Override RNG policy for in-graph random ops without recompiling.

Source

fn rng(&self) -> RngOptions

Current RNG policy (default when the backend does not override).

Source

fn set_moe_resident_experts(&mut self, _mask: &[bool])

TIDE merged placement mask (union across MoE layers). CPU: stats + host path.

Source

fn set_moe_resident_experts_per_layer(&mut self, _masks: &[&[bool]])

Per MoE layer placement (masks[layer][expert]). Preferred over merged on CPU.

Source

fn enable_moe_topk_capture(&mut self, _num_experts: usize) -> bool

Capture MoE router TopK indices on the next CPU forward (TIDE refresh).

Source

fn take_moe_topk_capture(&mut self) -> Option<Vec<Vec<u32>>>

Take captured per-layer expert indices (one vec per MoE TopK in order).

Source

fn take_moe_residency_stats(&mut self) -> Option<MoeResidencyStats>

MoE GroupedMatMul residency accounting from the last forward (CPU).

Source

fn bind_handle(&mut self, _name: &str, _data: &[f32]) -> bool

Bind a persistent buffer handle (KV-cache, training state, etc.). The buffer lives across run() calls and is not in the arena. Returns true if the backend supports persistent handles.

Source

fn read_handle(&self, _name: &str) -> Option<Vec<f32>>

Read a persistent buffer’s current contents.

Source

fn bind_gpu_handle(&mut self, _name: &str, _data: &[f32]) -> bool

GPU-resident input (MLX): upload once, reuse across runs.

Source

fn has_gpu_handle(&self, _name: &str) -> bool

Source

fn set_gpu_handle_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool

Source

fn read_gpu_handle(&self, _name: &str) -> Option<Vec<f32>>

Source

fn read_gpu_handle_row( &self, _name: &str, _row: usize, _row_inner: usize, ) -> Option<Vec<f32>>

Read one row from a resident GPU input handle without full-tensor D2H.

Source

fn register_kv_row_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool

Register a targeted row feed for resident KV decode (graphs that emit the new token at the last bucket-padded output row). Returns false when the backend has no GPU-resident handle support. See [feed_kv_row].

Source

fn feed_kv_row( &mut self, _src_row: usize, _dst_row: usize, _row_elems: usize, ) -> bool

Fold each registered row feed’s new-token row (src_row of its output) into the resident handle slot at dst_row (row_elems = kv_dim), in-place on device. Call after a logits-only run. Returns false when unsupported (caller keeps the host KV path).

Source

fn prepare_resident_gpu_handle(&mut self, _name: &str) -> bool

Mark a graph input as a device-resident handle with no host mirror.

Source

fn stage_bound_gpu_handles_to_arena(&mut self)

Upload bound (non-resident) GPU handle mirrors into the arena.

Source

fn seed_resident_kv_prefix_from( &mut self, _src: &dyn ExecutableGraph, _prefix_tokens: usize, _outgoing_upper: usize, _kv_dim: usize, _n_layers: usize, ) -> bool

D2D seed of resident past_k_* / past_v_* from another executable’s resident prefix (bucket rollover without host DRAM round-trip).

Source

fn copy_resident_kv_rows_from( &mut self, _src: &dyn ExecutableGraph, _from_row: usize, _to_row: usize, _outgoing_upper: usize, _kv_dim: usize, _n_layers: usize, ) -> bool

D2D copy resident KV rows [from_row..to_row) from another executable.

Source

fn copy_params_from(&mut self, src: &dyn ExecutableGraph) -> bool

Copy named parameter storage from another executable on the same backend. Used to avoid re-uploading packed U8 weights when compiling decode buckets.

Source

fn executable_as_any(&self) -> Option<&dyn Any>

Downcast hook for Self::copy_params_from. Backends override when supported.

Source

fn executable_as_any_mut(&mut self) -> Option<&mut dyn Any>

Mutable downcast hook for Self::copy_params_from.

Source

fn read_output_row( &self, _out_idx: usize, _row: usize, _row_inner: usize, ) -> Option<Vec<f32>>

Read one row from a row-major graph output after run / run_read_outputs. Metal reads a single row from the arena; default returns None (caller falls back).

Source

fn run_feed_gpu_handle( &mut self, inputs: &[(&str, &[f32])], _handle_name: &str, _output_index: usize, ) -> Option<Vec<f32>>

Run and refresh a GPU handle from output_index; returns that output on host.

Source

fn commit_no_wait(&mut self, inputs: &[(&str, &[f32])])

Encode + commit a forward pass without waiting for completion.

Outputs of intermediate calls are stomped — use run_pipelined if you need outputs from each individual commit. Pair with sync_pending to drain.

Default: synchronous fallback (calls run, discards output). CPU uses this default since BLAS is synchronous anyway.

Source

fn sync_pending(&mut self)

Wait for every command queued by commit_no_wait. Default: no-op (synchronous backends have nothing pending).

Source

fn run_pipelined( &mut self, input_sets: &[Vec<(&str, &[f32])>], ) -> Vec<Vec<Vec<f32>>>

Issue a batch of forward passes pipelined, returning per-run outputs.

The Metal impl encodes a per-commit blit so each in-flight run’s outputs survive subsequent commits stomping the shared arena. The CPU default is just sequential runs — equally correct, no perf penalty (CPU has no GPU sync cost to amortize).

Returns out[run_idx][output_idx][element_idx].

Source

fn set_param_typed(&mut self, name: &str, data: &[u8], dtype: DType)

Set a named parameter from raw bytes in the given dtype.

Source

fn run_typed( &mut self, inputs: &[(&str, &[u8], DType)], ) -> Vec<(Vec<u8>, DType)>

Run with typed inputs and typed outputs. Returns (bytes, dtype) per output; the dtype is whatever the graph’s output node was declared as.

Dyn Compatibility§

This trait is dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

ExecutableGraph

Trait ExecutableGraph Copy item path

Required Methods§

fn set_param(&mut self, name: &str, data: &[f32])

fn run(&mut self, inputs: &[(&str, &[f32])]) -> Vec<Vec<f32>>

Provided Methods§

fn finalize_params(&mut self)

fn clone_box(&self) -> Box<dyn ExecutableGraph>

fn run_read_outputs( &mut self, inputs: &[(&str, &[f32])], read_indices: Option<&[usize]>, ) -> Vec<Vec<f32>>

fn run_raw(&mut self, inputs: &[(&str, &[f32])]) -> Vec<(*const f32, usize)>

fn run_slots(&mut self, _inputs: &[&[f32]]) -> &[(usize, usize)]

fn arena_ptr(&self) -> *const u8

fn set_active_extent(&mut self, extent: Option<(usize, usize)>)

fn set_rng(&mut self, rng: RngOptions)

fn rng(&self) -> RngOptions

fn set_moe_resident_experts(&mut self, _mask: &[bool])

fn set_moe_resident_experts_per_layer(&mut self, _masks: &[&[bool]])

fn enable_moe_topk_capture(&mut self, _num_experts: usize) -> bool

fn take_moe_topk_capture(&mut self) -> Option<Vec<Vec<u32>>>

fn take_moe_residency_stats(&mut self) -> Option<MoeResidencyStats>

fn bind_handle(&mut self, _name: &str, _data: &[f32]) -> bool

fn read_handle(&self, _name: &str) -> Option<Vec<f32>>

fn bind_gpu_handle(&mut self, _name: &str, _data: &[f32]) -> bool

fn has_gpu_handle(&self, _name: &str) -> bool

fn set_gpu_handle_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool

fn read_gpu_handle(&self, _name: &str) -> Option<Vec<f32>>

fn read_gpu_handle_row( &self, _name: &str, _row: usize, _row_inner: usize, ) -> Option<Vec<f32>>

fn register_kv_row_feed( &mut self, _handle_name: &str, _output_index: usize, ) -> bool

fn feed_kv_row( &mut self, _src_row: usize, _dst_row: usize, _row_elems: usize, ) -> bool

fn prepare_resident_gpu_handle(&mut self, _name: &str) -> bool

fn stage_bound_gpu_handles_to_arena(&mut self)

fn seed_resident_kv_prefix_from( &mut self, _src: &dyn ExecutableGraph, _prefix_tokens: usize, _outgoing_upper: usize, _kv_dim: usize, _n_layers: usize, ) -> bool

fn copy_resident_kv_rows_from( &mut self, _src: &dyn ExecutableGraph, _from_row: usize, _to_row: usize, _outgoing_upper: usize, _kv_dim: usize, _n_layers: usize, ) -> bool

fn copy_params_from(&mut self, src: &dyn ExecutableGraph) -> bool

fn executable_as_any(&self) -> Option<&dyn Any>

fn executable_as_any_mut(&mut self) -> Option<&mut dyn Any>

fn read_output_row( &self, _out_idx: usize, _row: usize, _row_inner: usize, ) -> Option<Vec<f32>>

fn run_feed_gpu_handle( &mut self, inputs: &[(&str, &[f32])], _handle_name: &str, _output_index: usize, ) -> Option<Vec<f32>>

fn commit_no_wait(&mut self, inputs: &[(&str, &[f32])])

fn sync_pending(&mut self)

fn run_pipelined( &mut self, input_sets: &[Vec<(&str, &[f32])>], ) -> Vec<Vec<Vec<f32>>>

fn set_param_typed(&mut self, name: &str, data: &[u8], dtype: DType)

fn run_typed( &mut self, inputs: &[(&str, &[u8], DType)], ) -> Vec<(Vec<u8>, DType)>

Dyn Compatibility§

Implementors§

Trait ExecutableGraph