pub struct Llama32Flow<'a> { /* private fields */ }Expand description
Fluent LLaMA-3.2 flow builder — reads config once, chain modifiers, then build.
use rlx_models::llama32::{Llama32Config, Llama32Flow};
let built = Llama32Flow::new(&cfg)
.prefill()
.batch(1)
.seq(128)
.lm_head()
.last_token_logits()
.build(&mut weights)?;Implementations§
Source§impl<'a> Llama32Flow<'a>
impl<'a> Llama32Flow<'a>
pub fn new(cfg: &'a Llama32Config) -> Llama32Flow<'a>
Sourcepub fn for_prefill(
cfg: &'a Llama32Config,
batch: usize,
seq: usize,
) -> Llama32Flow<'a>
pub fn for_prefill( cfg: &'a Llama32Config, batch: usize, seq: usize, ) -> Llama32Flow<'a>
Prefill recipe with common batch/seq defaults.
Sourcepub fn for_decode(
cfg: &'a Llama32Config,
batch: usize,
past_seq: usize,
) -> Llama32Flow<'a>
pub fn for_decode( cfg: &'a Llama32Config, batch: usize, past_seq: usize, ) -> Llama32Flow<'a>
Decode recipe with common batch/past defaults (includes LM head).
pub fn prefill(self) -> Llama32Flow<'a>
pub fn decode(self) -> Llama32Flow<'a>
pub fn batch(self, batch: usize) -> Llama32Flow<'a>
Sourcepub fn seq(self, seq: usize) -> Llama32Flow<'a>
pub fn seq(self, seq: usize) -> Llama32Flow<'a>
Prefill sequence length (ignored in decode mode).
Sourcepub fn past(self, past_seq: usize) -> Llama32Flow<'a>
pub fn past(self, past_seq: usize) -> Llama32Flow<'a>
Decode past length (ignored in prefill mode).
Sourcepub fn dynamic_seq(self) -> Llama32Flow<'a>
pub fn dynamic_seq(self) -> Llama32Flow<'a>
Symbolic sequence dim (sym::SEQ) for dynamic prefill specialization.
Sourcepub fn dynamic_past(self) -> Llama32Flow<'a>
pub fn dynamic_past(self) -> Llama32Flow<'a>
Symbolic past dim (sym::PAST_SEQ) for dynamic decode specialization.
pub fn lm_head(self) -> Llama32Flow<'a>
Hidden states only — skip LM head (default for prefill unless .lm_head()).
pub fn last_token_logits(self) -> Llama32Flow<'a>
pub fn export_kv(self) -> Llama32Flow<'a>
pub fn custom_mask(self) -> Llama32Flow<'a>
pub fn profile(self, profile: CompileProfile) -> Llama32Flow<'a>
Sourcepub fn profile_prefill(self) -> Llama32Flow<'a>
pub fn profile_prefill(self) -> Llama32Flow<'a>
Fusion-first prefill profile preset.
Sourcepub fn profile_decode(self) -> Llama32Flow<'a>
pub fn profile_decode(self) -> Llama32Flow<'a>
Decode / KV-cache profile preset (Fusable lowering).
pub fn profile_near(self, weights_path: &Path) -> Llama32Flow<'a>
Sourcepub fn before_layers(
self,
stages: impl IntoIterator<Item = FlowStage>,
) -> Llama32Flow<'a>
pub fn before_layers( self, stages: impl IntoIterator<Item = FlowStage>, ) -> Llama32Flow<'a>
Insert custom stages after embedding, before the layer stack.
Sourcepub fn after_layers(
self,
stages: impl IntoIterator<Item = FlowStage>,
) -> Llama32Flow<'a>
pub fn after_layers( self, stages: impl IntoIterator<Item = FlowStage>, ) -> Llama32Flow<'a>
Insert custom stages after the layer stack, before final norm / LM head.
Sourcepub fn layer<F>(self, f: F) -> Llama32Flow<'a>
pub fn layer<F>(self, f: F) -> Llama32Flow<'a>
Override per-layer construction (prefill or decode depending on mode).
Call LlamaLayerCtx::default_stage to keep stock blocks for unmodified layers.
Sourcepub fn patch_flow<F>(self, f: F) -> Llama32Flow<'a>
pub fn patch_flow<F>(self, f: F) -> Llama32Flow<'a>
Patch the assembled ModelFlow before build — full flexibility escape hatch.
pub fn build(self, weights: &mut dyn WeightLoader) -> Result<BuiltModel, Error>
Trait Implementations§
Source§impl<'a> Clone for Llama32Flow<'a>
impl<'a> Clone for Llama32Flow<'a>
Source§fn clone(&self) -> Llama32Flow<'a>
fn clone(&self) -> Llama32Flow<'a>
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreAuto Trait Implementations§
impl<'a> !RefUnwindSafe for Llama32Flow<'a>
impl<'a> !UnwindSafe for Llama32Flow<'a>
impl<'a> Freeze for Llama32Flow<'a>
impl<'a> Send for Llama32Flow<'a>
impl<'a> Sync for Llama32Flow<'a>
impl<'a> Unpin for Llama32Flow<'a>
impl<'a> UnsafeUnpin for Llama32Flow<'a>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more