pub const BARTOWSKI_QWEN25_1_5B_INSTRUCT_GGUF: &str = "bartowski/Qwen2.5-1.5B-Instruct-GGUF";
pub const QWEN25_1_5B_GGUF_FILE: &str = "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf";
pub const QWEN25_1_5B_TOK_MODEL_ID: &str = "Qwen/Qwen2.5-1.5B-Instruct";
pub const BARTOWSKI_QWEN25_CODER_1_5B_INSTRUCT_GGUF: &str =
"bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF";
pub const QWEN25_CODER_1_5B_GGUF_FILE: &str = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf";
pub const QWEN25_CODER_1_5B_TOK_MODEL_ID: &str = "Qwen/Qwen2.5-Coder-1.5B-Instruct";
pub const QWEN25_CODER_7B_INSTRUCT: &str = "Qwen/Qwen2.5-Coder-7B-Instruct";
pub const BARTOWSKI_QWEN25_CODER_3B_INSTRUCT_GGUF: &str =
"bartowski/Qwen2.5-Coder-3B-Instruct-GGUF";
pub const QWEN25_CODER_3B_GGUF_FILE: &str = "Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf";
pub const QWEN25_CODER_3B_TOK_MODEL_ID: &str = "Qwen/Qwen2.5-Coder-3B-Instruct";
pub const BARTOWSKI_QWEN25_CODER_7B_INSTRUCT_GGUF: &str =
"bartowski/Qwen2.5-Coder-7B-Instruct-GGUF";
pub const QWEN25_CODER_7B_GGUF_FILE: &str = "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf";
pub const QWEN25_CODER_7B_TOK_MODEL_ID: &str = "Qwen/Qwen2.5-Coder-7B-Instruct";
pub const BARTOWSKI_QWEN25_3B_INSTRUCT_GGUF: &str = "bartowski/Qwen2.5-3B-Instruct-GGUF";
pub const QWEN25_3B_GGUF_FILE: &str = "Qwen2.5-3B-Instruct-Q4_K_M.gguf";
pub const QWEN25_3B_TOK_MODEL_ID: &str = "Qwen/Qwen2.5-3B-Instruct";
pub const BARTOWSKI_QWEN3_4B_GGUF: &str = "bartowski/Qwen_Qwen3-4B-GGUF";
pub const QWEN3_4B_GGUF_FILE: &str = "Qwen_Qwen3-4B-Q4_K_M.gguf";
pub const BARTOWSKI_QWEN3_8B_GGUF: &str = "bartowski/Qwen_Qwen3-8B-GGUF";
pub const QWEN3_8B_GGUF_FILE: &str = "Qwen_Qwen3-8B-Q4_K_M.gguf";
pub const BARTOWSKI_QWEN3_14B_GGUF: &str = "bartowski/Qwen_Qwen3-14B-GGUF";
pub const QWEN3_14B_GGUF_FILE: &str = "Qwen_Qwen3-14B-Q4_K_M.gguf";
pub const BARTOWSKI_QWEN3_1_7B_GGUF: &str = "bartowski/Qwen_Qwen3-1.7B-GGUF";
pub const QWEN3_1_7B_GGUF_FILE: &str = "Qwen_Qwen3-1.7B-Q4_K_M.gguf";
pub const THEBLOKE_DEEPSEEK_CODER_6_7B_INSTRUCT_GGUF: &str =
"TheBloke/deepseek-coder-6.7B-instruct-GGUF";
pub const DEEPSEEK_CODER_6_7B_GGUF_FILE: &str = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf";
pub const DEEPSEEK_CODER_6_7B_TOK_MODEL_ID: &str = "deepseek-ai/deepseek-coder-6.7b-instruct";
pub const SUPPORTED_MODELS: &[&str] = &[
BARTOWSKI_QWEN25_1_5B_INSTRUCT_GGUF,
BARTOWSKI_QWEN25_3B_INSTRUCT_GGUF,
BARTOWSKI_QWEN3_4B_GGUF,
BARTOWSKI_QWEN3_8B_GGUF,
BARTOWSKI_QWEN3_14B_GGUF,
BARTOWSKI_QWEN3_1_7B_GGUF,
BARTOWSKI_QWEN25_CODER_7B_INSTRUCT_GGUF,
THEBLOKE_DEEPSEEK_CODER_6_7B_INSTRUCT_GGUF,
];
pub struct SupportedModelInfo {
pub id: &'static str,
pub name: &'static str,
pub org: &'static str,
pub description: &'static str,
pub expected_size_bytes: u64,
}
pub const SUPPORTED_MODEL_INFO: &[SupportedModelInfo] = &[
SupportedModelInfo {
id: BARTOWSKI_QWEN25_1_5B_INSTRUCT_GGUF,
name: "Qwen 2.5 1.5B (GGUF)",
org: "Qwen / Alibaba",
description: "Lightest pre-quantized chat model — ideal for iOS & Android (~941 MB). \
Fits comfortably within iOS memory limits (iPhone 16e, 8 GB RAM).",
expected_size_bytes: 986_048_768,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN25_3B_INSTRUCT_GGUF,
name: "Qwen 2.5 3B (GGUF)",
org: "Qwen / Alibaba",
description:
"Pre-quantized chat model for macOS & Android — balanced quality and size (~1.93 GB). \
Not recommended as default on iOS due to memory constraints.",
expected_size_bytes: 1_929_903_264,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN3_4B_GGUF,
name: "Qwen 3 4B (GGUF)",
org: "Qwen / Alibaba",
description: "Full tool-calling support with extended reasoning mode (~2.7 GB). \
Recommended for siGit Code on macOS, Linux, and Windows.",
expected_size_bytes: 2_596_306_912,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN3_8B_GGUF,
name: "Qwen 3 8B (GGUF)",
org: "Qwen / Alibaba",
description: "Strong tool-calling model with extended thinking (~5 GB). \
Best balance of quality and memory for macOS with 24+ GB RAM.",
expected_size_bytes: 5_131_567_104,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN3_14B_GGUF,
name: "Qwen 3 14B (GGUF)",
org: "Qwen / Alibaba",
description: "Strong reasoning and tool-calling model with extended thinking (~8.4 GB). \
Best all-around model for macOS with 16+ GB RAM.",
expected_size_bytes: 9_001_753_632,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN3_1_7B_GGUF,
name: "Qwen 3 1.7B (GGUF)",
org: "Qwen / Alibaba",
description: "Lightweight tool-calling model for mobile (~1.3 GB). \
Smallest Qwen 3 variant with tool calling support.",
expected_size_bytes: 1_282_439_584,
},
SupportedModelInfo {
id: BARTOWSKI_QWEN25_CODER_7B_INSTRUCT_GGUF,
name: "Qwen 2.5 Coder 7B (GGUF)",
org: "Qwen / Alibaba",
description: "Strong coding model with tool calling support (~4.4 GB). \
Trained on 5.5T code tokens. Requires 8+ GB RAM.",
expected_size_bytes: 4_683_074_336,
},
SupportedModelInfo {
id: THEBLOKE_DEEPSEEK_CODER_6_7B_INSTRUCT_GGUF,
name: "DeepSeek Coder 6.7B (GGUF)",
org: "DeepSeek AI",
description: "Strong code generation model using the llama architecture (~3.8 GB). \
Requires 8+ GB RAM. Not recommended for mobile devices.",
expected_size_bytes: 4_083_015_904,
},
];
pub fn tok_model_id_for_repo(hf_repo_id: &str) -> Option<&'static str> {
match hf_repo_id {
BARTOWSKI_QWEN25_1_5B_INSTRUCT_GGUF => Some(QWEN25_1_5B_TOK_MODEL_ID),
BARTOWSKI_QWEN25_3B_INSTRUCT_GGUF => Some(QWEN25_3B_TOK_MODEL_ID),
BARTOWSKI_QWEN25_CODER_1_5B_INSTRUCT_GGUF => Some(QWEN25_CODER_1_5B_TOK_MODEL_ID),
BARTOWSKI_QWEN25_CODER_3B_INSTRUCT_GGUF => Some(QWEN25_CODER_3B_TOK_MODEL_ID),
BARTOWSKI_QWEN25_CODER_7B_INSTRUCT_GGUF => Some(QWEN25_CODER_7B_TOK_MODEL_ID),
THEBLOKE_DEEPSEEK_CODER_6_7B_INSTRUCT_GGUF => Some(DEEPSEEK_CODER_6_7B_TOK_MODEL_ID),
_ => None,
}
}