Struct Qwen3Config

Source

pub struct Qwen3Config {Show 22 fields
    pub vocab_size: usize,
    pub hidden_size: usize,
    pub intermediate_size: usize,
    pub num_hidden_layers: usize,
    pub num_attention_heads: usize,
    pub num_key_value_heads: usize,
    pub head_dim: usize,
    pub max_position_embeddings: usize,
    pub rms_norm_eps: f64,
    pub rope_theta: f64,
    pub hidden_act: String,
    pub tie_word_embeddings: bool,
    pub attention_bias: bool,
    pub qk_norm: bool,
    pub sliding_window: Option<usize>,
    pub max_window_layers: usize,
    pub use_sliding_window: bool,
    pub num_experts: usize,
    pub num_experts_used: usize,
    pub expert_ffn_size: usize,
    pub shared_expert_ffn_size: usize,
    pub expert_weights_scale: f32,
}

Fields§

§vocab_size: usize§hidden_size: usize§intermediate_size: usize§num_hidden_layers: usize§num_attention_heads: usize§num_key_value_heads: usize§head_dim: usize§max_position_embeddings: usize§rms_norm_eps: f64§rope_theta: f64§hidden_act: String§tie_word_embeddings: bool§attention_bias: bool§qk_norm: bool

Whether the model uses per-head RMS-norm on Q/K before RoPE (a.k.a. “QK-norm”). Qwen 3 has it; Qwen 2 does NOT. Defaults to true to match the historical Qwen 3 build path.

§sliding_window: Option<usize>

Sliding-window size; None (or absent) means full causal.

§max_window_layers: usize

Number of leading layers that use full causal attention; layers [max_window_layers, num_hidden_layers) use sliding window when use_sliding_window is true. HF default: all layers full.

§use_sliding_window: bool§num_experts: usize

Total number of routed experts per MoE layer (0 = dense model). HF key: num_experts / n_routed_experts. GGUF key: qwen3.expert_count.

§num_experts_used: usize

Number of experts activated per token (top-k routing). HF key: num_experts_per_tok. GGUF key: qwen3.expert_used_count.

§expert_ffn_size: usize

FFN inner width for each routed expert. When 0 falls back to intermediate_size / num_experts_used to match upstream defaults. GGUF key: qwen3.expert_feed_forward_length.

§shared_expert_ffn_size: usize

FFN inner width for the always-on shared expert (0 = no shared expert). GGUF key: qwen3.expert_shared_feed_forward_length.

§expert_weights_scale: f32

Multiplier applied to routed-expert logits before softmax (default 1.0). GGUF key: qwen3.expert_weights_scale.

Struct Qwen3Config Copy item path

Fields§

Implementations§

impl Qwen3Config

pub fn from_file(path: &Path) -> Result<Qwen3Config, Error>

pub fn kv_group_size(&self) -> usize

pub fn q_proj_dim(&self) -> usize

pub fn kv_proj_dim(&self) -> usize

pub fn is_moe(&self) -> bool

pub fn expert_ffn_dim(&self) -> usize

pub fn shared_expert_ffn_dim(&self) -> usize

pub fn layer_uses_swa(&self, idx: usize) -> bool

Trait Implementations§

impl Clone for Qwen3Config

fn clone(&self) -> Qwen3Config

fn clone_from(&mut self, source: &Self)

impl Debug for Qwen3Config

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

impl<'de> Deserialize<'de> for Qwen3Config

fn deserialize<__D>( __deserializer: __D, ) -> Result<Qwen3Config, <__D as Deserializer<'de>>::Error>where __D: Deserializer<'de>,

Auto Trait Implementations§

impl Freeze for Qwen3Config

impl RefUnwindSafe for Qwen3Config

impl Send for Qwen3Config

impl Sync for Qwen3Config

impl Unpin for Qwen3Config

impl UnsafeUnpin for Qwen3Config

impl UnwindSafe for Qwen3Config

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct Qwen3Config

fn deserialize<D>( deserializer: D, ) -> Result<Qwen3Config, <D as Deserializer<'de>>::Error>
where __D: Deserializer<'de>,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,