Struct QuantizationConfig

Source

pub struct QuantizationConfig {
    pub bits: u8,
    pub group_size: usize,
    pub per_tensor: HashMap<String, TensorQuantConfig>,
}

Expand description

Per-tensor quantization configuration from quantization_config.json.

This mirrors the JSON structure produced by hf2q’s --quant auto mode, where each tensor may have a different bit-width and group size.

Fields§

§bits: u8

Default bit-width applied when a tensor has no per-tensor override.

§group_size: usize

Default group size applied when a tensor has no per-tensor override.

§per_tensor: HashMap<String, TensorQuantConfig>

Per-tensor overrides keyed by tensor name pattern. Each entry maps a tensor name (or glob pattern) to its quant config.

Implementations§

Source §

impl QuantizationConfig

Source

pub fn from_file(path: &Path) -> Result<Self>

Load and parse a quantization_config.json file from disk.

§Errors

Returns MlxError::IoError if the file cannot be read, or MlxError::QuantConfigError if the JSON is malformed.

Source

pub fn from_json(json: &str) -> Result<Self>

Parse a QuantizationConfig from a JSON string.

§Errors

Returns MlxError::QuantConfigError if the JSON is malformed.

Source

pub fn from_model_config_json(json: &str) -> Result<Self>

Parse per-tensor quantization overrides from the "quantization" section of an MLX model’s config.json.

In this format, the quantization section contains flat keys for tensor names alongside the default bits and group_size:

{
  "quantization": {
    "bits": 4,
    "group_size": 64,
    "model.layers.0.mlp.down_proj": {"bits": 8, "group_size": 64}
  }
}

This parses the entire "quantization" object, extracting bits and group_size as defaults, and any nested objects as per-tensor overrides.

Source

pub fn from_model_config_file(path: &Path) -> Result<Self>

Parse per-tensor overrides from a config.json file on disk.

Source

pub fn config_for_tensor(&self, tensor_name: &str) -> (u8, usize)

Look up the quantization parameters for a specific tensor name.

Matching strategy (in order):

Exact match in per_tensor.
Strip .weight / .scales / .biases suffix, then exact match.
Strip language_model. prefix (with or without suffix), then match.
Add language_model. prefix (with or without suffix), then match.

If no override matches, returns the default bits and group_size.

Trait Implementations§

Source §

impl Clone for QuantizationConfig

Source §

fn clone(&self) -> QuantizationConfig

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for QuantizationConfig

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl<'de> Deserialize<'de> for QuantizationConfig

Source §

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

Auto Trait Implementations§

§

impl UnwindSafe for QuantizationConfig

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

Struct QuantizationConfig Copy item path

Fields§

Implementations§

impl QuantizationConfig

pub fn from_file(path: &Path) -> Result<Self>

§Errors

pub fn from_json(json: &str) -> Result<Self>

§Errors

pub fn from_model_config_json(json: &str) -> Result<Self>

pub fn from_model_config_file(path: &Path) -> Result<Self>

pub fn config_for_tensor(&self, tensor_name: &str) -> (u8, usize)

Trait Implementations§

impl Clone for QuantizationConfig

fn clone(&self) -> QuantizationConfig

fn clone_from(&mut self, source: &Self)

impl Debug for QuantizationConfig

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<'de> Deserialize<'de> for QuantizationConfig

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

Auto Trait Implementations§

impl Freeze for QuantizationConfig

impl RefUnwindSafe for QuantizationConfig

impl Send for QuantizationConfig

impl Sync for QuantizationConfig

impl Unpin for QuantizationConfig

impl UnsafeUnpin for QuantizationConfig

impl UnwindSafe for QuantizationConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Struct QuantizationConfig

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,