Struct Config

Source

pub struct Config {Show 19 fields
    pub hidden_size: usize,
    pub intermediate_size: usize,
    pub vocab_size: usize,
    pub num_hidden_layers: usize,
    pub num_attention_heads: usize,
    pub num_key_value_heads: usize,
    pub rms_norm_eps: f64,
    pub rope_theta: f32,
    pub bos_token_id: Option<u32>,
    pub eos_token_id: Option<EosTokenId>,
    pub rope_scaling: Option<RopeScaling>,
    pub tie_word_embeddings: bool,
    pub max_seq_len: usize,
    pub use_qkv_bias: bool,
    pub model_prefix: String,
    pub head_dim: Option<usize>,
    pub partial_rotary_factor: f32,
    pub linear_attn: Option<LinearAttnConfig>,
    pub residual_rms_norm: bool,
}

Expand description

Generalized LLM configuration shared by all decoder-only text models.

Fields§

§hidden_size: usize§intermediate_size: usize§vocab_size: usize§num_hidden_layers: usize§num_attention_heads: usize§num_key_value_heads: usize§rms_norm_eps: f64§rope_theta: f32§bos_token_id: Option<u32>§eos_token_id: Option<EosTokenId>§rope_scaling: Option<RopeScaling>§tie_word_embeddings: bool§max_seq_len: usize§use_qkv_bias: bool

Whether Q/K/V projections use bias (true for Qwen2, false for LLaMA).

§model_prefix: String

Weight tensor prefix for the transformer stack (e.g. “model” or “model.language_model”).

§head_dim: Option<usize>

Explicit head dimension when it differs from hidden_size / num_attention_heads.

§partial_rotary_factor: f32

Fraction of head dims to apply rotary embeddings to (1.0 = all, 0.25 = first quarter).

§linear_attn: Option<LinearAttnConfig>

Linear attention configuration (None for pure softmax-attention models).

§residual_rms_norm: bool

Whether RMS norm uses residual weight: (1 + weight) * norm(x) instead of weight * norm(x). True for Qwen3.5 whose norm weights are initialized to zero with +1 applied at runtime.

Config

Struct Config Copy item path

Fields§

Trait Implementations§

impl Clone for Config

fn clone(&self) -> Config

fn clone_from(&mut self, source: &Self)

impl Debug for Config

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for Config

impl RefUnwindSafe for Config

impl Send for Config

impl Sync for Config

impl Unpin for Config

impl UnsafeUnpin for Config

impl UnwindSafe for Config

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> ErasedDestructor for Twhere T: 'static,

Struct Config

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> ErasedDestructor for T
where T: 'static,