use anyhow::{Context, Result, bail};
use rlx_gguf::{GgufFile, MetaValue};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GgufModelFamily {
Qwen3,
Qwen35,
Llama32,
Gemma,
Lfm,
}
impl GgufModelFamily {
pub fn cli_name(self) -> &'static str {
match self {
Self::Qwen3 => "rlx-qwen3",
Self::Qwen35 => "rlx-qwen35",
Self::Llama32 => "rlx-llama32",
Self::Gemma => "rlx-gemma",
Self::Lfm => "rlx-lfm",
}
}
pub fn runner_name(self) -> &'static str {
match self {
Self::Qwen3 => "qwen3",
Self::Qwen35 => "qwen35",
Self::Llama32 => "llama32",
Self::Gemma => "gemma",
Self::Lfm => "lfm",
}
}
pub fn runner_hint(self) -> &'static str {
match self {
Self::Qwen3 => "`rlx_models::Qwen3Runner::builder()...build()`",
Self::Qwen35 => "`rlx_models::Qwen35Runner::builder()...build()`",
Self::Llama32 => "`rlx_models::Llama32Runner::builder()...build()`",
Self::Gemma => "`rlx_models::GemmaRunner::builder()...build()`",
Self::Lfm => "`rlx_lfm::LfmRunner::builder()...build()`",
}
}
fn accepts_arch(self, arch: &str) -> bool {
match self {
Self::Qwen3 => matches!(arch, "qwen3" | "qwen2" | "qwen25" | "qwen2_5"),
Self::Qwen35 => matches!(arch, "qwen35" | "qwen35moe" | "qwen36" | "qwen36moe"),
Self::Llama32 => matches!(
arch,
"llama"
| "mistral3"
| "mistral4"
| "phi3"
| "granite"
| "granitemoe"
| "granitehybrid"
| "command-r"
| "cohere2"
),
Self::Gemma => matches!(arch, "gemma" | "gemma2" | "gemma3" | "gemma3n"),
Self::Lfm => matches!(arch, "lfm2" | "lfm" | "lfm25" | "lfm2_5"),
}
}
}
pub fn gguf_architecture_str(file: &GgufFile) -> Option<&str> {
file.metadata
.get("general.architecture")
.and_then(MetaValue::as_str)
}
pub fn gguf_f32_bytes_estimate(file: &GgufFile) -> u64 {
file.tensors
.values()
.map(|t| (t.n_elements() as u64) * 4)
.sum()
}
pub fn gguf_validate_arch(path: &Path, allowed: &[&str]) -> Result<()> {
let arch = gguf_architecture_from_path(path)?;
if allowed.contains(&arch.as_str()) {
return Ok(());
}
bail!(
"{path:?}: GGUF architecture `{arch}` not in [{}]",
allowed.join(", ")
);
}
pub fn gguf_architecture_from_path(path: &Path) -> Result<String> {
let raw = GgufFile::from_path(path).with_context(|| format!("opening GGUF {path:?}"))?;
Ok(gguf_architecture_str(&raw).unwrap_or("unknown").to_string())
}
pub fn gguf_safetensors_only_hint(runner: &str, path: &Path, arch: &str) -> String {
let mut msg = format!(
"{runner}: {path:?} is GGUF (architecture `{arch}`); this runner expects safetensors"
);
if crate::gguf_config::is_embed_gguf_arch(arch) {
msg.push_str(". Load embedding GGUF with `rlx-embed` or `RlxEmbed::from_weights`");
} else if crate::gguf_config::is_flux_gguf_arch(arch) {
msg.push_str(
". Load FLUX denoiser GGUF with `rlx-flux2` (`Flux2Runner::builder().weights(path)`) \
— e.g. unsloth/FLUX.2-klein-9B-GGUF; VAE/text encoder stay separate safetensors",
);
} else if crate::gguf_config::is_dinov2_gguf_arch(arch) {
msg.push_str(
". Load DINOv2 GGUF with `rlx-dinov2` (`DinoV2Runner::builder().weights(path)`)",
);
} else if crate::gguf_config::is_sam3_gguf_arch(arch) {
msg.push_str(". Load SAM3 GGUF with `rlx-sam3` (`Sam3::from_checkpoint_on`)");
} else if crate::gguf_config::is_sam2_gguf_arch(arch) {
msg.push_str(". Load SAM2 GGUF with `rlx-sam2`");
} else if crate::gguf_config::is_sam_gguf_arch(arch) {
msg.push_str(". Load SAM / MobileSAM GGUF with `rlx-sam`");
} else if crate::gguf_config::is_vjepa2_gguf_arch(arch) {
msg.push_str(". Load V-JEPA2 GGUF with `rlx-vjepa2`");
} else if crate::gguf_config::is_w2v_bert_gguf_arch(arch) {
msg.push_str(". Load with `rlx-wav2vec2-bert` (sidecar `config.json` still required)");
} else if let Some(fam) = gguf_family_for_arch(arch) {
msg.push_str(&format!(
". Use `{}` ({}) instead",
fam.cli_name(),
fam.runner_hint()
));
} else {
msg.push_str(". See README Status → Weights for GGUF coverage per family");
}
msg
}
pub fn gguf_family_for_arch(arch: &str) -> Option<GgufModelFamily> {
match arch {
"qwen3" | "qwen2" | "qwen25" | "qwen2_5" => Some(GgufModelFamily::Qwen3),
"qwen35" | "qwen35moe" | "qwen36" | "qwen36moe" => Some(GgufModelFamily::Qwen35),
"llama" => Some(GgufModelFamily::Llama32),
"gemma" | "gemma2" | "gemma3" | "gemma3n" => Some(GgufModelFamily::Gemma),
"lfm2" | "lfm" | "lfm25" | "lfm2_5" => Some(GgufModelFamily::Lfm),
_ => None,
}
}
pub fn assert_gguf_family(path: &Path, expected: GgufModelFamily) -> Result<GgufFile> {
let raw = GgufFile::from_path(path).with_context(|| format!("opening GGUF {path:?}"))?;
let arch = gguf_architecture_str(&raw).unwrap_or("unknown");
if expected.accepts_arch(arch) {
return Ok(raw);
}
if let Some(actual) = gguf_family_for_arch(arch) {
bail!(
"{path:?} is a {arch} GGUF (family {actual:?}). Use `{}` or {} instead.",
actual.cli_name(),
actual.runner_hint()
);
}
bail!(
"{path:?} has general.architecture={arch:?}; this runner expects a {} GGUF",
expected.cli_name()
);
}
pub const DEFAULT_GGUF_PREFER_SUBSTR: &str = "Q4_K_M";
#[derive(Debug, Clone, Default)]
pub struct ResolveWeightsOptions<'a> {
pub prefer_gguf_substring: Option<&'a str>,
pub gguf_index: Option<usize>,
}
impl<'a> ResolveWeightsOptions<'a> {
pub fn prefer_substring(mut self, sub: &'a str) -> Self {
self.prefer_gguf_substring = Some(sub);
self
}
pub fn index(mut self, idx: usize) -> Self {
self.gguf_index = Some(idx);
self
}
}
pub fn resolve_weights_file(path: &Path) -> Result<PathBuf> {
resolve_weights_file_with_options(path, &ResolveWeightsOptions::default())
}
pub fn resolve_weights_file_with_options(
path: &Path,
opts: &ResolveWeightsOptions<'_>,
) -> Result<PathBuf> {
if path.is_file() {
return Ok(path.to_path_buf());
}
if !path.is_dir() {
bail!("weights path not found: {path:?}");
}
let mut ggufs = list_gguf_files_in_dir(path)?;
if let Some(sub) = opts.prefer_gguf_substring {
let preferred: Vec<_> = ggufs
.iter()
.filter(|p| {
p.file_name()
.and_then(|s| s.to_str())
.is_some_and(|n| n.contains(sub))
})
.cloned()
.collect();
if !preferred.is_empty() {
ggufs = preferred;
}
}
match ggufs.len() {
1 => return Ok(ggufs[0].clone()),
n if n > 1 => {
if let Some(idx) = opts.gguf_index {
return ggufs.get(idx).cloned().ok_or_else(|| {
anyhow::anyhow!(
"gguf_index={idx} out of range; directory {path:?} has {n} .gguf files"
)
});
}
let listing: Vec<String> = ggufs
.iter()
.map(|p| format!(" - {}", p.display()))
.collect();
bail!(
"directory {path:?} contains {n} .gguf files; pass the exact file path, \
use LoadOpts::map().prefer_q4_k_m() / prefer_substring(\"Q4_K_M\"), \
gguf_index(0), or run `rlx-inspect {path:?} --prefer Q4_K_M`:\n{}",
listing.join("\n")
);
}
_ => {}
}
let st = path.join("model.safetensors");
if st.is_file() {
return Ok(st);
}
bail!(
"directory {path:?} has no .gguf file and no model.safetensors; \
pass a .gguf or .safetensors path"
);
}
pub fn list_gguf_files_in_dir(dir: &Path) -> Result<Vec<PathBuf>> {
let mut ggufs = Vec::new();
for entry in std::fs::read_dir(dir).with_context(|| format!("reading dir {dir:?}"))? {
let entry = entry?;
let p = entry.path();
if p.extension().and_then(|s| s.to_str()) == Some("gguf") && p.is_file() {
ggufs.push(p);
}
}
ggufs.sort();
Ok(ggufs)
}
pub fn gguf_split_siblings(path: &Path) -> Result<Option<Vec<PathBuf>>> {
let raw = GgufFile::from_path(path).with_context(|| format!("opening GGUF {path:?}"))?;
let count = raw
.metadata
.get("split.count")
.and_then(MetaValue::as_u32)
.unwrap_or(1);
if count <= 1 {
return Ok(None);
}
let dir = path.parent().unwrap_or_else(|| Path::new("."));
let mut parts: Vec<(u32, PathBuf)> = Vec::new();
for candidate in list_gguf_files_in_dir(dir)? {
let other = GgufFile::from_path(&candidate)
.with_context(|| format!("opening split candidate {candidate:?}"))?;
let other_count = other
.metadata
.get("split.count")
.and_then(MetaValue::as_u32)
.unwrap_or(1);
if other_count != count {
continue;
}
let no = other
.metadata
.get("split.no")
.and_then(MetaValue::as_u32)
.unwrap_or(0);
parts.push((no, candidate));
}
parts.sort_by_key(|(no, _)| *no);
parts.dedup_by_key(|(no, _)| *no);
Ok(Some(parts.into_iter().map(|(_, p)| p).collect()))
}
pub fn load_gguf_file(path: &Path) -> Result<GgufFile> {
let raw = GgufFile::from_path(path).with_context(|| format!("opening GGUF {path:?}"))?;
let count = raw
.metadata
.get("split.count")
.and_then(MetaValue::as_u32)
.unwrap_or(1);
if count <= 1 {
return Ok(raw);
}
let siblings = gguf_split_siblings(path)?;
match siblings {
Some(parts) if parts.len() as u32 == count => {
eprintln!(
"[rlx-core] merging {count} GGUF split parts from {:?}",
path.parent().unwrap_or(path)
);
GgufFile::from_split_paths(&parts)
}
_ => {
let hint = gguf_split_hint(path)?.unwrap_or_else(|| {
format!("{path:?} is a split GGUF but sibling parts are missing")
});
bail!("{hint}");
}
}
}
pub fn gguf_split_hint(path: &Path) -> Result<Option<String>> {
let raw = GgufFile::from_path(path).with_context(|| format!("opening GGUF {path:?}"))?;
let count = raw
.metadata
.get("split.count")
.and_then(MetaValue::as_u32)
.unwrap_or(1);
if count <= 1 {
return Ok(None);
}
let no = raw
.metadata
.get("split.no")
.and_then(MetaValue::as_u32)
.unwrap_or(0);
let mut msg = format!(
"{path:?} is part {no} of a {count}-file GGUF split; \
place all parts in the same directory (auto-merge) or use a single-file quant"
);
if let Some(siblings) = gguf_split_siblings(path)? {
msg.push_str("\nSplit parts in this directory:");
for (i, p) in siblings.iter().enumerate() {
msg.push_str(&format!("\n [{i}] {}", p.display()));
}
}
Ok(Some(msg))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn embed_arch_list_includes_bert_and_nomic() {
assert!(crate::gguf_config::is_embed_gguf_arch("bert"));
assert!(crate::gguf_config::is_embed_gguf_arch("nomic-bert"));
assert!(!crate::gguf_config::is_embed_gguf_arch("sam3"));
}
#[test]
fn vision_arch_tags() {
assert!(crate::gguf_config::is_sam3_gguf_arch("sam3"));
assert!(crate::gguf_config::is_dinov2_gguf_arch("dinov2"));
assert!(crate::gguf_config::is_sam_gguf_arch("mobile-sam"));
assert!(crate::gguf_config::is_w2v_bert_gguf_arch("wav2vec2"));
assert!(!crate::gguf_config::is_sam3_gguf_arch("dinov2"));
}
#[test]
fn gguf_validate_arch_accepts_and_rejects() {
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
buf.extend_from_slice(&3u32.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
let write_string = |buf: &mut Vec<u8>, k: &str, v: &str| {
buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
buf.extend_from_slice(k.as_bytes());
buf.extend_from_slice(&8u32.to_le_bytes());
buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
buf.extend_from_slice(v.as_bytes());
};
write_string(&mut buf, "general.architecture", "dinov2");
let write_tensor = |buf: &mut Vec<u8>, name: &str, shape: &[usize], off: u64| {
buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
buf.extend_from_slice(&(shape.len() as u32).to_le_bytes());
for &d in shape {
buf.extend_from_slice(&(d as u64).to_le_bytes());
}
buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
buf.extend_from_slice(&off.to_le_bytes());
};
write_tensor(&mut buf, "w", &[4], 0);
while !buf
.len()
.is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
{
buf.push(0);
}
for _ in 0..4 {
buf.extend_from_slice(&1.0f32.to_le_bytes());
}
let path = std::env::temp_dir().join("rlx_gguf_validate_arch_test.gguf");
std::fs::write(&path, &buf).unwrap();
gguf_validate_arch(&path, crate::gguf_config::DINOV2_GGUF_ARCHES).expect("dinov2 ok");
let err = gguf_validate_arch(&path, crate::gguf_config::SAM3_GGUF_ARCHES)
.expect_err("wrong family");
assert!(format!("{err:#}").contains("dinov2"));
std::fs::remove_file(&path).ok();
}
#[test]
fn family_for_arch_maps_known_tags() {
assert_eq!(
gguf_family_for_arch("qwen35"),
Some(GgufModelFamily::Qwen35)
);
assert_eq!(
gguf_family_for_arch("qwen35moe"),
Some(GgufModelFamily::Qwen35)
);
assert_eq!(
gguf_family_for_arch("qwen36"),
Some(GgufModelFamily::Qwen35)
);
assert_eq!(
gguf_family_for_arch("qwen36moe"),
Some(GgufModelFamily::Qwen35)
);
assert_eq!(
gguf_family_for_arch("llama"),
Some(GgufModelFamily::Llama32)
);
assert_eq!(gguf_family_for_arch("gemma"), Some(GgufModelFamily::Gemma));
assert_eq!(gguf_family_for_arch("gemma2"), Some(GgufModelFamily::Gemma));
assert!(gguf_family_for_arch("clip").is_none());
}
}