llm_models 0.0.3

use super::*;

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Hash)]
pub enum GgufPresetId {
    Llama321BInstruct,
    Llama323BInstruct,
    Llama318BInstructGGUF,
    Mixtral8x7BInstructV01,
    Mistral7BInstructV03,
    MistralNemoInstruct2407,
    MistralSmall24BInstruct2501,
    MistralSmallInstruct2409,
    Stablelm212bChat,
    Qwen257BInstruct,
    Qwen2532BInstruct,
    Qwen2514BInstruct,
    Qwen253BInstruct,
    Granite308bInstruct,
    Granite302bInstruct,
    SuperNovaMedius,
    Llama31Nemotron70BInstruct,
    Llama31Nemotron51BInstruct,
    MistralNeMoMinitron8BInstruct,
    Phi35MiniInstruct,
    Phi3Medium4kInstruct,
    Phi4MiniInstruct,
    Phi3Mini4kInstruct,
    Phi4,
    Phi35MoEInstruct,
}
impl GgufPresetId {
    pub fn preset(&self) -> GgufPreset {
        match self {
            Self::Llama321BInstruct => GgufPreset::LLAMA_3_2_1B_INSTRUCT,
            Self::Llama323BInstruct => GgufPreset::LLAMA_3_2_3B_INSTRUCT,
            Self::Llama318BInstructGGUF => GgufPreset::LLAMA_3_1_8B_INSTRUCT,
            Self::Mixtral8x7BInstructV01 => GgufPreset::MIXTRAL_8X7B_INSTRUCT_V0_1,
            Self::Mistral7BInstructV03 => GgufPreset::MISTRAL_7B_INSTRUCT_V0_3,
            Self::MistralNemoInstruct2407 => GgufPreset::MISTRAL_NEMO_INSTRUCT_2407,
            Self::MistralSmall24BInstruct2501 => GgufPreset::MISTRAL_SMALL_24B_INSTRUCT_2501,
            Self::MistralSmallInstruct2409 => GgufPreset::MISTRAL_SMALL_INSTRUCT_2409,
            Self::Stablelm212bChat => GgufPreset::STABLE_LM_2_12B_CHAT,
            Self::Qwen257BInstruct => GgufPreset::QWEN2_5_7B_INSTRUCT,
            Self::Qwen2532BInstruct => GgufPreset::QWEN2_5_32B_INSTRUCT,
            Self::Qwen2514BInstruct => GgufPreset::QWEN2_5_14B_INSTRUCT,
            Self::Qwen253BInstruct => GgufPreset::QWEN2_5_3B_INSTRUCT,
            Self::Granite308bInstruct => GgufPreset::GRANITE_3_0_8B_INSTRUCT,
            Self::Granite302bInstruct => GgufPreset::GRANITE_3_0_2B_INSTRUCT,
            Self::SuperNovaMedius => GgufPreset::SUPERNOVA_MEDIUS,
            Self::Llama31Nemotron70BInstruct => GgufPreset::LLAMA_3_1_NEMOTRON_70B_INSTRUCT,
            Self::Llama31Nemotron51BInstruct => GgufPreset::LLAMA_3_1_NEMOTRON_51B_INSTRUCT,
            Self::MistralNeMoMinitron8BInstruct => GgufPreset::MISTRAL_NEMO_MINITRON_8B_INSTRUCT,
            Self::Phi35MiniInstruct => GgufPreset::PHI_3_5_MINI_INSTRUCT,
            Self::Phi3Medium4kInstruct => GgufPreset::PHI_3_MEDIUM_4K_INSTRUCT,
            Self::Phi4MiniInstruct => GgufPreset::PHI_4_MINI_INSTRUCT_,
            Self::Phi3Mini4kInstruct => GgufPreset::PHI_3_MINI_4K_INSTRUCT,
            Self::Phi4 => GgufPreset::PHI_4,
            Self::Phi35MoEInstruct => GgufPreset::PHI_3_5_MOE_INSTRUCT,
        }
    }
    pub fn model_id(&self) -> &'static str {
        match self {
            Self::Llama321BInstruct => "Llama-3.2-1B-Instruct",
            Self::Llama323BInstruct => "Llama-3.2-3B-Instruct",
            Self::Llama318BInstructGGUF => "Llama-3.1-8B-Instruct-GGUF",
            Self::Mixtral8x7BInstructV01 => "Mixtral-8x7B-Instruct-v0.1",
            Self::Mistral7BInstructV03 => "Mistral-7B-Instruct-v0.3",
            Self::MistralNemoInstruct2407 => "Mistral-Nemo-Instruct-2407",
            Self::MistralSmall24BInstruct2501 => "Mistral-Small-24B-Instruct-2501",
            Self::MistralSmallInstruct2409 => "Mistral-Small-Instruct-2409",
            Self::Stablelm212bChat => "stablelm-2-12b-chat",
            Self::Qwen257BInstruct => "Qwen2.5-7B-Instruct",
            Self::Qwen2532BInstruct => "Qwen2.5-32B-Instruct",
            Self::Qwen2514BInstruct => "Qwen2.5-14B-Instruct",
            Self::Qwen253BInstruct => "Qwen2.5-3B-Instruct",
            Self::Granite308bInstruct => "granite-3.0-8b-instruct",
            Self::Granite302bInstruct => "granite-3.0-2b-instruct",
            Self::SuperNovaMedius => "SuperNova-Medius",
            Self::Llama31Nemotron70BInstruct => "Llama-3.1-Nemotron-70B-Instruct",
            Self::Llama31Nemotron51BInstruct => "Llama-3_1-Nemotron-51B-Instruct",
            Self::MistralNeMoMinitron8BInstruct => "Mistral-NeMo-Minitron-8B-Instruct",
            Self::Phi35MiniInstruct => "Phi-3.5-mini-instruct",
            Self::Phi3Medium4kInstruct => "Phi-3-medium-4k-instruct",
            Self::Phi4MiniInstruct => "phi-4-mini-instruct ",
            Self::Phi3Mini4kInstruct => "Phi-3-mini-4k-instruct",
            Self::Phi4 => "phi-4",
            Self::Phi35MoEInstruct => "Phi-3.5-MoE-instruct",
        }
    }
}
impl GgufPreset {
    pub const ALL_MODELS: [GgufPreset; 25usize] = [
        Self::LLAMA_3_2_1B_INSTRUCT,
        Self::LLAMA_3_2_3B_INSTRUCT,
        Self::LLAMA_3_1_8B_INSTRUCT,
        Self::MIXTRAL_8X7B_INSTRUCT_V0_1,
        Self::MISTRAL_7B_INSTRUCT_V0_3,
        Self::MISTRAL_NEMO_INSTRUCT_2407,
        Self::MISTRAL_SMALL_24B_INSTRUCT_2501,
        Self::MISTRAL_SMALL_INSTRUCT_2409,
        Self::STABLE_LM_2_12B_CHAT,
        Self::QWEN2_5_7B_INSTRUCT,
        Self::QWEN2_5_32B_INSTRUCT,
        Self::QWEN2_5_14B_INSTRUCT,
        Self::QWEN2_5_3B_INSTRUCT,
        Self::GRANITE_3_0_8B_INSTRUCT,
        Self::GRANITE_3_0_2B_INSTRUCT,
        Self::SUPERNOVA_MEDIUS,
        Self::LLAMA_3_1_NEMOTRON_70B_INSTRUCT,
        Self::LLAMA_3_1_NEMOTRON_51B_INSTRUCT,
        Self::MISTRAL_NEMO_MINITRON_8B_INSTRUCT,
        Self::PHI_3_5_MINI_INSTRUCT,
        Self::PHI_3_MEDIUM_4K_INSTRUCT,
        Self::PHI_4_MINI_INSTRUCT_,
        Self::PHI_3_MINI_4K_INSTRUCT,
        Self::PHI_4,
        Self::PHI_3_5_MOE_INSTRUCT,
    ];
    pub const LLAMA_3_2_1B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::META,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Llama-3.2-1B-Instruct"),
            friendly_name: Cow::Borrowed("Llama 3.2 1B Instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("meta-llama/Llama-3.2-1B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Llama-3.2-1B-Instruct-GGUF"),
        number_of_parameters: 3f64,
        tokenizer_file_name: Some(Cow::Borrowed("meta.llama3.1.8b.instruct.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 2048u64,
            feed_forward_length: Some(8192u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 16u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 128256u64,
            architecture: Cow::Borrowed("llama"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Llama-3.2-1B-Instruct-IQ3_M.gguf"),
                total_bytes: 657289344u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Llama-3.2-1B-Instruct-Q4_K_M.gguf"),
                total_bytes: 807694464u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Llama-3.2-1B-Instruct-Q5_K_M.gguf"),
                total_bytes: 911503488u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Llama-3.2-1B-Instruct-Q6_K.gguf"),
                total_bytes: 1021800576u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Llama-3.2-1B-Instruct-Q8_0.gguf"),
                total_bytes: 1321083008u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Llama321BInstruct,
    };
    pub const LLAMA_3_2_3B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::META,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Llama-3.2-3B-Instruct"),
            friendly_name: Cow::Borrowed("Llama 3.2 3B Instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("meta-llama/Llama-3.2-3B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Llama-3.2-3B-Instruct-GGUF"),
        number_of_parameters: 3f64,
        tokenizer_file_name: Some(Cow::Borrowed("meta.llama3.1.8b.instruct.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 3072u64,
            feed_forward_length: Some(8192u64),
            head_count: 24u64,
            head_count_kv: Some(8u64),
            block_count: 28u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 128256u64,
            architecture: Cow::Borrowed("llama"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Llama-3.2-3B-Instruct-IQ3_M.gguf"),
                total_bytes: 1599668768u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Llama-3.2-3B-Instruct-Q4_K_M.gguf"),
                total_bytes: 2019377696u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Llama-3.2-3B-Instruct-Q5_K_M.gguf"),
                total_bytes: 2322154016u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Llama-3.2-3B-Instruct-Q6_K.gguf"),
                total_bytes: 2643853856u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Llama-3.2-3B-Instruct-Q8_0.gguf"),
                total_bytes: 3421899296u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Llama323BInstruct,
    };
    pub const LLAMA_3_1_8B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::META,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Llama-3.1-8B-Instruct-GGUF"),
            friendly_name: Cow::Borrowed("Llama 3.1 8B Instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("meta-llama/Llama-3.1-8B-Instruct-GGUF"),
        gguf_repo_id: Cow::Borrowed("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"),
        number_of_parameters: 8f64,
        tokenizer_file_name: Some(Cow::Borrowed("meta.llama3.1.8b.instruct.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(14336u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 128256u64,
            architecture: Cow::Borrowed("llama"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q2_K.gguf"),
                total_bytes: 3179136416u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q3_K_M.gguf"),
                total_bytes: 4018922912u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"),
                total_bytes: 4920739232u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf"),
                total_bytes: 5732992416u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q6_K.gguf"),
                total_bytes: 6596011424u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"),
                total_bytes: 8540775840u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Llama318BInstructGGUF,
    };
    pub const MIXTRAL_8X7B_INSTRUCT_V0_1: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MISTRAL,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1"),
            friendly_name: Cow::Borrowed("Mixtral 8x7B Instruct v0.1"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("nvidia/Mixtral-8x7B-Instruct-v0.1"),
        gguf_repo_id: Cow::Borrowed("MaziyarPanahi/Mixtral-8x7B-Instruct-v0.1-GGUF"),
        number_of_parameters: 56f64,
        tokenizer_file_name: Some(Cow::Borrowed(
            "mistral.mixtral8x7b.instruct.v0.1.tokenizer.json",
        )),
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(14336u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32768u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q2_K.gguf"),
                total_bytes: 17309173632u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q3_K_M.gguf"),
                total_bytes: 22544394112u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q4_K_M.gguf"),
                total_bytes: 28446410624u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q5_K_M.gguf"),
                total_bytes: 33227523968u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q6_K.gguf"),
                total_bytes: 38378760064u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mixtral-8x7B-Instruct-v0.1.Q8_0.gguf"),
                total_bytes: 49624262528u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Mixtral8x7BInstructV01,
    };
    pub const MISTRAL_7B_INSTRUCT_V0_3: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MISTRAL,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mistral-7B-Instruct-v0.3"),
            friendly_name: Cow::Borrowed("Mistral 7B Instruct v0.3"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("mistral/Mistral-7B-Instruct-v0.3"),
        gguf_repo_id: Cow::Borrowed("MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF"),
        number_of_parameters: 7f64,
        tokenizer_file_name: Some(Cow::Borrowed(
            "mistral.mistral7b.instruct.v0.3.tokenizer.json",
        )),
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(14336u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32768u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 1u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.IQ1_M.gguf"),
                total_bytes: 1757663392u64,
            },
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q2_K.gguf"),
                total_bytes: 2722877600u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q3_K_M.gguf"),
                total_bytes: 3522941088u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"),
                total_bytes: 4372811936u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q5_K_M.gguf"),
                total_bytes: 5136175264u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q6_K.gguf"),
                total_bytes: 5947248800u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mistral-7B-Instruct-v0.3.Q8_0.gguf"),
                total_bytes: 7702565024u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Mistral7BInstructV03,
    };
    pub const MISTRAL_NEMO_INSTRUCT_2407: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MISTRAL,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mistral-Nemo-Instruct-2407"),
            friendly_name: Cow::Borrowed("Mistral Nemo Instruct 2407"),
            model_ctx_size: 1024000u64,
            inference_ctx_size: 1024000u64,
        },
        model_repo_id: Cow::Borrowed("mistral/Mistral-Nemo-Instruct-2407"),
        gguf_repo_id: Cow::Borrowed("bartowski/Mistral-Nemo-Instruct-2407-GGUF"),
        number_of_parameters: 12f64,
        tokenizer_file_name: Some(Cow::Borrowed(
            "mistral.mistral.nemo.instruct.2407.tokenizer.json",
        )),
        config: GgufPresetConfig {
            context_length: 1024000u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(14336u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 131072u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q2_K.gguf"),
                total_bytes: 4791051392u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q3_K_M.gguf"),
                total_bytes: 6083093632u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q4_K_M.gguf"),
                total_bytes: 7477208192u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q5_K_M.gguf"),
                total_bytes: 8727635072u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q6_K.gguf"),
                total_bytes: 10056213632u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mistral-Nemo-Instruct-2407-Q8_0.gguf"),
                total_bytes: 13022372992u64,
            },
        ]),
        preset_llm_id: GgufPresetId::MistralNemoInstruct2407,
    };
    pub const MISTRAL_SMALL_24B_INSTRUCT_2501: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MISTRAL,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mistral-Small-24B-Instruct-2501"),
            friendly_name: Cow::Borrowed("Mistral Small 24B Instruct 2501"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("mistral/Mistral-Small-24B-Instruct-2501"),
        gguf_repo_id: Cow::Borrowed("bartowski/Mistral-Small-24B-Instruct-2501-GGUF"),
        number_of_parameters: 24f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(32768u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 131072u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q2_K.gguf"),
                total_bytes: 8890324672u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q3_K_M.gguf"),
                total_bytes: 11474081472u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf"),
                total_bytes: 14333908672u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q5_K_M.gguf"),
                total_bytes: 16763983552u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q6_K.gguf"),
                total_bytes: 19345938112u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mistral-Small-24B-Instruct-2501-Q8_0.gguf"),
                total_bytes: 25054779072u64,
            },
        ]),
        preset_llm_id: GgufPresetId::MistralSmall24BInstruct2501,
    };
    pub const MISTRAL_SMALL_INSTRUCT_2409: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MISTRAL,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mistral-Small-Instruct-2409"),
            friendly_name: Cow::Borrowed("Mistral Small Instruct 2409"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("mistral/Mistral-Small-Instruct-2409"),
        gguf_repo_id: Cow::Borrowed("bartowski/Mistral-Small-Instruct-2409-GGUF"),
        number_of_parameters: 12f64,
        tokenizer_file_name: Some(Cow::Borrowed(
            "mistral.mistral.small.instruct.2409.tokenizer.json",
        )),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 6144u64,
            feed_forward_length: Some(16384u64),
            head_count: 48u64,
            head_count_kv: Some(8u64),
            block_count: 56u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32768u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q2_K.gguf"),
                total_bytes: 8272098304u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q3_K_M.gguf"),
                total_bytes: 10756830208u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q4_K_M.gguf"),
                total_bytes: 13341242368u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q5_K_M.gguf"),
                total_bytes: 15722558464u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q6_K.gguf"),
                total_bytes: 18252706816u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mistral-Small-Instruct-2409-Q8_0.gguf"),
                total_bytes: 23640552448u64,
            },
        ]),
        preset_llm_id: GgufPresetId::MistralSmallInstruct2409,
    };
    pub const STABLE_LM_2_12B_CHAT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::STABILITY_AI,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("stablelm-2-12b-chat"),
            friendly_name: Cow::Borrowed("Stable LM 2 12B Chat"),
            model_ctx_size: 4096u64,
            inference_ctx_size: 4096u64,
        },
        model_repo_id: Cow::Borrowed("stabilityai/stablelm-2-12b-chat"),
        gguf_repo_id: Cow::Borrowed("second-state/stablelm-2-12b-chat-GGUF"),
        number_of_parameters: 12f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 4096u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(13824u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 100352u64,
            architecture: Cow::Borrowed("stablelm"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q2_K.gguf"),
                total_bytes: 4698894176u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q3_K_M.gguf"),
                total_bytes: 5993885536u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q4_K_M.gguf"),
                total_bytes: 7367642976u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q5_K_M.gguf"),
                total_bytes: 8627900256u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q6_K.gguf"),
                total_bytes: 9966923616u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("stablelm-2-12b-chat-Q8_0.gguf"),
                total_bytes: 12907687776u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Stablelm212bChat,
    };
    pub const QWEN2_5_7B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::ALIBABA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Qwen2.5-7B-Instruct"),
            friendly_name: Cow::Borrowed("Qwen2.5 7B Instruct"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("Qwen/Qwen2.5-7B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Qwen2.5-7B-Instruct-GGUF"),
        number_of_parameters: 7f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 3584u64,
            feed_forward_length: Some(18944u64),
            head_count: 28u64,
            head_count_kv: Some(4u64),
            block_count: 28u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 152064u64,
            architecture: Cow::Borrowed("qwen2"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q2_K.gguf"),
                total_bytes: 3015940800u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q3_K_M.gguf"),
                total_bytes: 3808391872u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q4_K_M.gguf"),
                total_bytes: 4683074240u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q5_K_M.gguf"),
                total_bytes: 5444831936u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q6_K.gguf"),
                total_bytes: 6254199488u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Qwen2.5-7B-Instruct-Q8_0.gguf"),
                total_bytes: 8098525888u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Qwen257BInstruct,
    };
    pub const QWEN2_5_32B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::ALIBABA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Qwen2.5-32B-Instruct"),
            friendly_name: Cow::Borrowed("Qwen2.5 32B Instruct"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("Qwen/Qwen2.5-32B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Qwen2.5-32B-Instruct-GGUF"),
        number_of_parameters: 32f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(27648u64),
            head_count: 40u64,
            head_count_kv: Some(8u64),
            block_count: 64u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 152064u64,
            architecture: Cow::Borrowed("qwen2"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q2_K.gguf"),
                total_bytes: 12313099136u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q3_K_M.gguf"),
                total_bytes: 15935048576u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q4_K_M.gguf"),
                total_bytes: 19851336576u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q5_K_M.gguf"),
                total_bytes: 23262157696u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q6_K.gguf"),
                total_bytes: 26886155136u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Qwen2.5-32B-Instruct-Q8_0.gguf"),
                total_bytes: 34820885376u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Qwen2532BInstruct,
    };
    pub const QWEN2_5_14B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::ALIBABA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Qwen2.5-14B-Instruct"),
            friendly_name: Cow::Borrowed("Qwen2.5 14B Instruct"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("Qwen/Qwen2.5-14B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Qwen2.5-14B-Instruct-GGUF"),
        number_of_parameters: 14f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(13824u64),
            head_count: 40u64,
            head_count_kv: Some(8u64),
            block_count: 48u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 152064u64,
            architecture: Cow::Borrowed("qwen2"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q2_K.gguf"),
                total_bytes: 5770498176u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q3_K_M.gguf"),
                total_bytes: 7339204736u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q4_K_M.gguf"),
                total_bytes: 8988110976u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q5_K_M.gguf"),
                total_bytes: 10508873856u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q6_K.gguf"),
                total_bytes: 12124684416u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Qwen2.5-14B-Instruct-Q8_0.gguf"),
                total_bytes: 15701598336u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Qwen2514BInstruct,
    };
    pub const QWEN2_5_3B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::ALIBABA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Qwen2.5-3B-Instruct"),
            friendly_name: Cow::Borrowed("Qwen2.5 3B Instruct"),
            model_ctx_size: 32768u64,
            inference_ctx_size: 32768u64,
        },
        model_repo_id: Cow::Borrowed("Qwen/Qwen2.5-3B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Qwen2.5-3B-Instruct-GGUF"),
        number_of_parameters: 3f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 32768u64,
            embedding_length: 2048u64,
            feed_forward_length: Some(11008u64),
            head_count: 16u64,
            head_count_kv: Some(2u64),
            block_count: 36u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 151936u64,
            architecture: Cow::Borrowed("qwen2"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q2_K.gguf"),
                total_bytes: 1274756256u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q3_K_M.gguf"),
                total_bytes: 1590475936u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q4_K_M.gguf"),
                total_bytes: 1929903264u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q5_K_M.gguf"),
                total_bytes: 2224815264u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q6_K.gguf"),
                total_bytes: 2538159264u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Qwen2.5-3B-Instruct-Q8_0.gguf"),
                total_bytes: 3285476512u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Qwen253BInstruct,
    };
    pub const GRANITE_3_0_8B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::IBM,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("granite-3.0-8b-instruct"),
            friendly_name: Cow::Borrowed("Granite 3.0 8b instruct"),
            model_ctx_size: 4096u64,
            inference_ctx_size: 4096u64,
        },
        model_repo_id: Cow::Borrowed("ibm/granite-3.0-8b-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/granite-3.0-8b-instruct-GGUF"),
        number_of_parameters: 8f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 4096u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(12800u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 49155u64,
            architecture: Cow::Borrowed("granite"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q2_K.gguf"),
                total_bytes: 3103588576u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q3_K_L.gguf"),
                total_bytes: 4349427936u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q4_K_M.gguf"),
                total_bytes: 4942856416u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q5_K_M.gguf"),
                total_bytes: 5797445856u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q6_K.gguf"),
                total_bytes: 6705447136u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("granite-3.0-8b-instruct-Q8_0.gguf"),
                total_bytes: 8684244096u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Granite308bInstruct,
    };
    pub const GRANITE_3_0_2B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::IBM,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("granite-3.0-2b-instruct"),
            friendly_name: Cow::Borrowed("Granite 3.0 2b instruct"),
            model_ctx_size: 4096u64,
            inference_ctx_size: 4096u64,
        },
        model_repo_id: Cow::Borrowed("ibm/granite-3.0-2b-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/granite-3.0-2b-instruct-GGUF"),
        number_of_parameters: 2f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 4096u64,
            embedding_length: 2048u64,
            feed_forward_length: Some(8192u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 49155u64,
            architecture: Cow::Borrowed("granite"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q2_K.gguf"),
                total_bytes: 1011275040u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q3_K_L.gguf"),
                total_bytes: 1400625056u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q4_K_M.gguf"),
                total_bytes: 1601919680u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q5_K_M.gguf"),
                total_bytes: 1874025920u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q6_K.gguf"),
                total_bytes: 2163138816u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("granite-3.0-2b-instruct-Q8_0.gguf"),
                total_bytes: 2801069184u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Granite302bInstruct,
    };
    pub const SUPERNOVA_MEDIUS: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::ARCEE_AI,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("SuperNova-Medius"),
            friendly_name: Cow::Borrowed("SuperNova Medius"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("arcee-ai/arcee-ai/SuperNova-Medius"),
        gguf_repo_id: Cow::Borrowed("arcee-ai/SuperNova-Medius-GGUF"),
        number_of_parameters: 13f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(13824u64),
            head_count: 40u64,
            head_count_kv: Some(8u64),
            block_count: 48u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 152064u64,
            architecture: Cow::Borrowed("qwen2"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q2_K.gguf"),
                total_bytes: 5770498592u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q3_K_M.gguf"),
                total_bytes: 7339205152u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q4_K_M.gguf"),
                total_bytes: 8988111392u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q5_K_M.gguf"),
                total_bytes: 10508874272u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q6_K.gguf"),
                total_bytes: 12124684832u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("SuperNova-Medius-Q8_0.gguf"),
                total_bytes: 15701598752u64,
            },
        ]),
        preset_llm_id: GgufPresetId::SuperNovaMedius,
    };
    pub const LLAMA_3_1_NEMOTRON_70B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::NVIDIA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Llama-3.1-Nemotron-70B-Instruct"),
            friendly_name: Cow::Borrowed("Llama 3.1 Nemotron 70B Instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"),
        gguf_repo_id: Cow::Borrowed("bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF"),
        number_of_parameters: 70f64,
        tokenizer_file_name: Some(Cow::Borrowed("meta.llama3.1.8b.instruct.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 8192u64,
            feed_forward_length: Some(28672u64),
            head_count: 64u64,
            head_count_kv: Some(8u64),
            block_count: 80u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 128256u64,
            architecture: Cow::Borrowed("llama"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Llama-3.1-Nemotron-70B-Instruct-HF-Q2_K.gguf"),
                total_bytes: 26375113632u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Llama-3.1-Nemotron-70B-Instruct-HF-Q3_K_M.gguf"),
                total_bytes: 34267499424u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Llama-3.1-Nemotron-70B-Instruct-HF-Q4_K_M.gguf"),
                total_bytes: 42520398752u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Llama-3.1-Nemotron-70B-Instruct-HF-Q5_K_S.gguf"),
                total_bytes: 48657451936u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Llama31Nemotron70BInstruct,
    };
    pub const LLAMA_3_1_NEMOTRON_51B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::NVIDIA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct"),
            friendly_name: Cow::Borrowed("Llama 3.1 Nemotron 51B Instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("nvidia/Llama-3_1-Nemotron-51B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Llama-3_1-Nemotron-51B-Instruct-GGUF"),
        number_of_parameters: 52f64,
        tokenizer_file_name: Some(Cow::Borrowed("meta.llama3.1.8b.instruct.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 8192u64,
            feed_forward_length: None,
            head_count: 64u64,
            head_count_kv: Some(8u64),
            block_count: 80u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 128256u64,
            architecture: Cow::Borrowed("nemotron-nas"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct-Q2_K.gguf"),
                total_bytes: 19418642688u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct-Q3_K_M.gguf"),
                total_bytes: 25182345472u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct-Q4_K_M.gguf"),
                total_bytes: 31037307136u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct-Q5_K_M.gguf"),
                total_bytes: 36465391872u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Llama-3_1-Nemotron-51B-Instruct-Q6_K.gguf"),
                total_bytes: 42258774272u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Llama31Nemotron51BInstruct,
    };
    pub const MISTRAL_NEMO_MINITRON_8B_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::NVIDIA,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct"),
            friendly_name: Cow::Borrowed("Mistral NeMo Minitron 8B Instruct"),
            model_ctx_size: 8192u64,
            inference_ctx_size: 8192u64,
        },
        model_repo_id: Cow::Borrowed("nvidia/Mistral-NeMo-Minitron-8B-Instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Mistral-NeMo-Minitron-8B-Instruct-GGUF"),
        number_of_parameters: 8f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 8192u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(11520u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 131072u64,
            architecture: Cow::Borrowed("mistral"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q2_K.gguf"),
                total_bytes: 3333392064u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q3_K_M.gguf"),
                total_bytes: 4209149632u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q4_K_M.gguf"),
                total_bytes: 5145298624u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q5_K_M.gguf"),
                total_bytes: 6001460928u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q6_K.gguf"),
                total_bytes: 6911133376u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Mistral-NeMo-Minitron-8B-Instruct-Q8_0.gguf"),
                total_bytes: 8948844224u64,
            },
        ]),
        preset_llm_id: GgufPresetId::MistralNeMoMinitron8BInstruct,
    };
    pub const PHI_3_5_MINI_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Phi-3.5-mini-instruct"),
            friendly_name: Cow::Borrowed("Phi 3.5 mini instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/Phi-3.5-mini-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Phi-3.5-mini-instruct-GGUF"),
        number_of_parameters: 4f64,
        tokenizer_file_name: Some(Cow::Borrowed("microsoft.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 3072u64,
            feed_forward_length: Some(8192u64),
            head_count: 32u64,
            head_count_kv: Some(32u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32064u64,
            architecture: Cow::Borrowed("phi3"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q2_K.gguf"),
                total_bytes: 1416204576u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q3_K_M.gguf"),
                total_bytes: 1955477280u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q4_K_M.gguf"),
                total_bytes: 2393232672u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q5_K_M.gguf"),
                total_bytes: 2815276320u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q6_K.gguf"),
                total_bytes: 3135853344u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Phi-3.5-mini-instruct-Q8_0.gguf"),
                total_bytes: 4061222688u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi35MiniInstruct,
    };
    pub const PHI_3_MEDIUM_4K_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Phi-3-medium-4k-instruct"),
            friendly_name: Cow::Borrowed("Phi 3 medium 4k instruct"),
            model_ctx_size: 4096u64,
            inference_ctx_size: 4096u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/Phi-3-medium-4k-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Phi-3-medium-4k-instruct-GGUF"),
        number_of_parameters: 14f64,
        tokenizer_file_name: Some(Cow::Borrowed("microsoft.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 4096u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(17920u64),
            head_count: 40u64,
            head_count_kv: Some(10u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32064u64,
            architecture: Cow::Borrowed("phi3"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q2_K.gguf"),
                total_bytes: 5143000448u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q3_K_M.gguf"),
                total_bytes: 6923411328u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q4_K_M.gguf"),
                total_bytes: 8566821248u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q5_K_M.gguf"),
                total_bytes: 10074190208u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q6_K.gguf"),
                total_bytes: 11453817728u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Phi-3-medium-4k-instruct-Q8_0.gguf"),
                total_bytes: 14834712448u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi3Medium4kInstruct,
    };
    pub const PHI_4_MINI_INSTRUCT_: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("phi-4-mini-instruct "),
            friendly_name: Cow::Borrowed("Phi-4-mini-instruct "),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/Phi-4-mini-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/microsoft_Phi-4-mini-instruct-GGUF"),
        number_of_parameters: 3.84f64,
        tokenizer_file_name: None,
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 3072u64,
            feed_forward_length: Some(8192u64),
            head_count: 24u64,
            head_count_kv: Some(8u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 200064u64,
            architecture: Cow::Borrowed("phi3"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q2_K.gguf"),
                total_bytes: 1682636160u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q3_K_M.gguf"),
                total_bytes: 2117533056u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q4_K_M.gguf"),
                total_bytes: 2491874688u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q5_K_M.gguf"),
                total_bytes: 2848128384u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q6_K.gguf"),
                total_bytes: 3155623296u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("microsoft_Phi-4-mini-instruct-Q8_0.gguf"),
                total_bytes: 4084611456u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi4MiniInstruct,
    };
    pub const PHI_3_MINI_4K_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Phi-3-mini-4k-instruct"),
            friendly_name: Cow::Borrowed("Phi 3 mini 4k instruct"),
            model_ctx_size: 4096u64,
            inference_ctx_size: 4096u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/Phi-3-mini-4k-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Phi-3-mini-4k-instruct-GGUF"),
        number_of_parameters: 4f64,
        tokenizer_file_name: Some(Cow::Borrowed("microsoft.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 4096u64,
            embedding_length: 3072u64,
            feed_forward_length: Some(8192u64),
            head_count: 32u64,
            head_count_kv: Some(32u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32064u64,
            architecture: Cow::Borrowed("phi3"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 1u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-IQ1_M.gguf"),
                total_bytes: 917106176u64,
            },
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q2_K.gguf"),
                total_bytes: 1416203264u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q3_K_M.gguf"),
                total_bytes: 1955475968u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q4_K_M.gguf"),
                total_bytes: 2393231360u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q5_K_M.gguf"),
                total_bytes: 2815275008u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q6_K.gguf"),
                total_bytes: 3135852032u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Phi-3-mini-4k-instruct-Q8_0.gguf"),
                total_bytes: 4061221376u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi3Mini4kInstruct,
    };
    pub const PHI_4: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("phi-4"),
            friendly_name: Cow::Borrowed("Phi-4"),
            model_ctx_size: 16384u64,
            inference_ctx_size: 16384u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/phi-4"),
        gguf_repo_id: Cow::Borrowed("bartowski/phi-4-GGUF"),
        number_of_parameters: 14f64,
        tokenizer_file_name: Some(Cow::Borrowed("microsoft.phi4.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 16384u64,
            embedding_length: 5120u64,
            feed_forward_length: Some(17920u64),
            head_count: 40u64,
            head_count_kv: Some(10u64),
            block_count: 40u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 100352u64,
            architecture: Cow::Borrowed("phi3"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("phi-4-Q2_K.gguf"),
                total_bytes: 5547348416u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("phi-4-Q3_K_M.gguf"),
                total_bytes: 7363269056u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("phi-4-Q4_K_M.gguf"),
                total_bytes: 9053114816u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("phi-4-Q5_K_M.gguf"),
                total_bytes: 10604188096u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("phi-4-Q6_K.gguf"),
                total_bytes: 12030251456u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("phi-4-Q8_0.gguf"),
                total_bytes: 15580500416u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi4,
    };
    pub const PHI_3_5_MOE_INSTRUCT: GgufPreset = GgufPreset {
        organization: LocalLlmOrganization::MICROSOFT,
        model_base: LlmModelBase {
            model_id: Cow::Borrowed("Phi-3.5-MoE-instruct"),
            friendly_name: Cow::Borrowed("Phi 3.5 MoE instruct"),
            model_ctx_size: 131072u64,
            inference_ctx_size: 131072u64,
        },
        model_repo_id: Cow::Borrowed("microsoft/Phi-3.5-MoE-instruct"),
        gguf_repo_id: Cow::Borrowed("bartowski/Phi-3.5-MoE-instruct-GGUF"),
        number_of_parameters: 7f64,
        tokenizer_file_name: Some(Cow::Borrowed("microsoft.tokenizer.json")),
        config: GgufPresetConfig {
            context_length: 131072u64,
            embedding_length: 4096u64,
            feed_forward_length: Some(6400u64),
            head_count: 32u64,
            head_count_kv: Some(8u64),
            block_count: 32u64,
            torch_dtype: Cow::Borrowed("bfloat16"),
            vocab_size: 32064u64,
            architecture: Cow::Borrowed("phimoe"),
            model_size_bytes: None,
        },
        quants: Cow::Borrowed(&[
            GgufPresetQuant {
                q_lvl: 2u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q2_K.gguf"),
                total_bytes: 15265136480u64,
            },
            GgufPresetQuant {
                q_lvl: 3u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q3_K_M.gguf"),
                total_bytes: 20032718688u64,
            },
            GgufPresetQuant {
                q_lvl: 4u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q4_K_M.gguf"),
                total_bytes: 25345994592u64,
            },
            GgufPresetQuant {
                q_lvl: 5u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q5_K_M.gguf"),
                total_bytes: 29716098912u64,
            },
            GgufPresetQuant {
                q_lvl: 6u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q6_K.gguf"),
                total_bytes: 34359334752u64,
            },
            GgufPresetQuant {
                q_lvl: 8u8,
                fname: Cow::Borrowed("Phi-3.5-MoE-instruct-Q8_0.gguf"),
                total_bytes: 44499765088u64,
            },
        ]),
        preset_llm_id: GgufPresetId::Phi35MoEInstruct,
    };
}
pub trait GgufPresetTrait {
    fn preset(&mut self) -> &mut GgufPreset;
    fn preset_from_str(mut self, selected_model_id: &str) -> crate::Result<Self>
    where
        Self: Sized,
    {
        let preset = GgufPreset::ALL_MODELS
            .into_iter()
            .find(|preset| preset.model_base.model_id == selected_model_id)
            .ok_or_else(|| crate::anyhow!("Invalid selected_model_id: {}", selected_model_id))?;
        *self.preset() = preset;
        Ok(self)
    }
    fn llama_3_2_1b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::LLAMA_3_2_1B_INSTRUCT;
        self
    }
    fn llama_3_2_3b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::LLAMA_3_2_3B_INSTRUCT;
        self
    }
    fn llama_3_1_8b_instruct_gguf(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::LLAMA_3_1_8B_INSTRUCT;
        self
    }
    fn mixtral_8x7b_instruct_v0_1(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MIXTRAL_8X7B_INSTRUCT_V0_1;
        self
    }
    fn mistral_7b_instruct_v0_3(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MISTRAL_7B_INSTRUCT_V0_3;
        self
    }
    fn mistral_nemo_instruct_2407(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MISTRAL_NEMO_INSTRUCT_2407;
        self
    }
    fn mistral_small_24b_instruct_2501(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MISTRAL_SMALL_24B_INSTRUCT_2501;
        self
    }
    fn mistral_small_instruct_2409(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MISTRAL_SMALL_INSTRUCT_2409;
        self
    }
    fn stablelm_2_12b_chat(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::STABLE_LM_2_12B_CHAT;
        self
    }
    fn qwen2_5_7b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::QWEN2_5_7B_INSTRUCT;
        self
    }
    fn qwen2_5_32b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::QWEN2_5_32B_INSTRUCT;
        self
    }
    fn qwen2_5_14b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::QWEN2_5_14B_INSTRUCT;
        self
    }
    fn qwen2_5_3b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::QWEN2_5_3B_INSTRUCT;
        self
    }
    fn granite_3_0_8b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::GRANITE_3_0_8B_INSTRUCT;
        self
    }
    fn granite_3_0_2b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::GRANITE_3_0_2B_INSTRUCT;
        self
    }
    fn supernova_medius(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::SUPERNOVA_MEDIUS;
        self
    }
    fn llama_3_1_nemotron_70b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::LLAMA_3_1_NEMOTRON_70B_INSTRUCT;
        self
    }
    fn llama_3_1_nemotron_51b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::LLAMA_3_1_NEMOTRON_51B_INSTRUCT;
        self
    }
    fn mistral_nemo_minitron_8b_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::MISTRAL_NEMO_MINITRON_8B_INSTRUCT;
        self
    }
    fn phi_3_5_mini_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_3_5_MINI_INSTRUCT;
        self
    }
    fn phi_3_medium_4k_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_3_MEDIUM_4K_INSTRUCT;
        self
    }
    fn phi_4_mini_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_4_MINI_INSTRUCT_;
        self
    }
    fn phi_3_mini_4k_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_3_MINI_4K_INSTRUCT;
        self
    }
    fn phi_4(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_4;
        self
    }
    fn phi_3_5_moe_instruct(mut self) -> Self
    where
        Self: Sized,
    {
        *self.preset() = GgufPreset::PHI_3_5_MOE_INSTRUCT;
        self
    }
}