Struct MockModelExecutor

Source

pub struct MockModelExecutor { /* private fields */ }

Expand description

Mock model executor that simulates prefill/decode with configurable latency. No model weights, no GPU — pure async simulation.

Implementations§

Source §

impl MockModelExecutor

Source

pub fn new( vocab_size: usize, prefill_latency: Duration, decode_latency: Duration, ) -> Self

Source

pub fn instant(vocab_size: usize) -> Self

Create with zero latency (for fast unit tests).

Source

pub fn prefill_count(&self) -> u64

Source

pub fn decode_count(&self) -> u64

Trait Implementations§

Source §

impl ModelExecutor for MockModelExecutor

Source §

fn info(&self) -> &ModelInfo

Get model information and metadata

Source §

fn prefill<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 PrefillInput, ) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Execute prefill phase (process initial prompt)

Source §

fn decode<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 DecodeInput, ) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Execute decode phase (generate next token)

Source §

fn capabilities(&self) -> ExecutorCapabilities

Get executor capabilities

Source §

fn status(&self) -> ExecutorStatus

Get current executor status

Source §

fn batch_decode<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Batch decode: process multiple sequences in one forward pass. Read more

Source §

fn forward<'life0, 'life1, 'async_trait>( &'life0 self, _input: &'life1 Arc<dyn TensorLike>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Optional: full forward pass (for non-autoregressive use cases)

Source §

fn truncate_kv<'life0, 'life1, 'async_trait>( &'life0 self, _kv_cache: &'life1 Arc<dyn KvCacheHandle>, _new_len: usize, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Roll the KV cache for this executor’s sequence back to new_len. Used by speculative decoding on partial rejection so the next iteration sees a KV prefix that matches the accepted token stream. Default: Ok(()) — executors that don’t cache per-sequence state (stub, mock) are inherently tolerant; real LLM executors override.

Source §

fn forward_verify<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Multi-position decode-verify: one forward over N+1 tokens, producing one logits row per position. Used by speculative decoding’s target path so we don’t pay N+1 sequential forwards. Read more

Source §

fn warmup<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, Self: 'async_trait,

Warm up executor (load model, allocate memory, etc.)

Source §

fn shutdown<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, Self: 'async_trait,

Shutdown executor gracefully

Source §

fn release_cache(&self, _cache_id: &str)

Release KV cache and state for a completed sequence. Read more

Auto Trait Implementations§

§

impl UnwindSafe for MockModelExecutor

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct MockModelExecutor Copy item path

Implementations§

impl MockModelExecutor

pub fn new( vocab_size: usize, prefill_latency: Duration, decode_latency: Duration, ) -> Self

pub fn instant(vocab_size: usize) -> Self

pub fn prefill_count(&self) -> u64

pub fn decode_count(&self) -> u64

Trait Implementations§

impl ModelExecutor for MockModelExecutor

fn info(&self) -> &ModelInfo

fn prefill<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 PrefillInput, ) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn decode<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 DecodeInput, ) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn capabilities(&self) -> ExecutorCapabilities

fn status(&self) -> ExecutorStatus

fn batch_decode<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn forward<'life0, 'life1, 'async_trait>( &'life0 self, _input: &'life1 Arc<dyn TensorLike>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn truncate_kv<'life0, 'life1, 'async_trait>( &'life0 self, _kv_cache: &'life1 Arc<dyn KvCacheHandle>, _new_len: usize, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn forward_verify<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn warmup<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, Self: 'async_trait,

fn shutdown<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where 'life0: 'async_trait, Self: 'async_trait,

fn release_cache(&self, _cache_id: &str)

Auto Trait Implementations§

impl !Freeze for MockModelExecutor

impl RefUnwindSafe for MockModelExecutor

impl Send for MockModelExecutor

impl Sync for MockModelExecutor

impl Unpin for MockModelExecutor

impl UnsafeUnpin for MockModelExecutor

impl UnwindSafe for MockModelExecutor

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct MockModelExecutor

fn prefill<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 PrefillInput, ) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn decode<'life0, 'life1, 'async_trait>( &'life0 self, input: &'life1 DecodeInput, ) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

fn batch_decode<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn forward<'life0, 'life1, 'async_trait>( &'life0 self, _input: &'life1 Arc<dyn TensorLike>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn truncate_kv<'life0, 'life1, 'async_trait>( &'life0 self, _kv_cache: &'life1 Arc<dyn KvCacheHandle>, _new_len: usize, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn forward_verify<'life0, 'life1, 'async_trait>( &'life0 self, inputs: &'life1 [DecodeInput], ) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

fn warmup<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, Self: 'async_trait,

fn shutdown<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>
where 'life0: 'async_trait, Self: 'async_trait,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,