use anyhow::{Context, Result, anyhow, bail};
use rlx_core::gguf_config::{
DINOV2_GGUF_ARCHES, FLUX_GGUF_ARCHES, SAM_GGUF_ARCHES, SAM2_GGUF_ARCHES, SAM3_GGUF_ARCHES,
VJEPA2_GGUF_ARCHES, W2V_BERT_GGUF_ARCHES,
};
use rlx_core::gguf_support::{
gguf_architecture_from_path, gguf_family_for_arch, resolve_weights_file,
};
use std::path::{Path, PathBuf};
use crate::registry::run_registered;
pub fn run_auto(args: &[String]) -> Result<()> {
let Some(first) = args.first() else {
bail!(
"auto: expected WEIGHTS path as the first argument\n\
usage: rlx-run auto <weights-path> [runner-args...]"
);
};
if matches!(first.as_str(), "-h" | "--help" | "help") {
println!(
"rlx-run auto — sniff a GGUF / safetensors file and dispatch to the right runner\n\
\n\
USAGE:\n rlx-run auto <weights-path> [runner-args...]\n\
\n\
The first argument is forwarded as the runner's --weights value;\n\
remaining arguments are passed through unchanged."
);
return Ok(());
}
let path = Path::new(first);
let sniff = auto_sniff(path)?;
eprintln!(
"[rlx-run auto] {} → runner `{}` (from {:?})",
sniff.path.display(),
sniff.runner_name,
sniff.from
);
let rest: Vec<String> = args[1..].to_vec();
let has_weights_flag = rest
.iter()
.any(|a| a == "--weights" || a.starts_with("--weights="));
let mut forwarded: Vec<String> = Vec::with_capacity(rest.len() + 2);
if !has_weights_flag {
forwarded.push("--weights".into());
forwarded.push(sniff.path.display().to_string());
}
forwarded.extend(rest);
match run_registered(sniff.runner_name, &forwarded)? {
Some(()) => Ok(()),
None => bail!(
"auto: runner `{}` not registered (sniffed from {:?}); register it via \
`register_cli` in your binary's main",
sniff.runner_name,
sniff.from
),
}
}
#[derive(Debug, Clone)]
pub enum SniffedFrom {
GgufArch(String),
SafetensorsConfig(String),
}
#[derive(Debug, Clone)]
pub struct SniffedRunner {
pub path: PathBuf,
pub runner_name: &'static str,
pub from: SniffedFrom,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct UnimplementedArch {
pub family: &'static str,
pub milestone: &'static str,
pub note: &'static str,
}
mod families {
use super::UnimplementedArch;
pub static MISTRAL: UnimplementedArch = UnimplementedArch {
family: "Mistral 3+ / Ministral",
milestone: "M4",
note: "Llama-shaped with newer RoPE; share `rlx-llama-base` per PLAN.md M4",
};
pub static PHI: UnimplementedArch = UnimplementedArch {
family: "Phi 3 / Phi 4",
milestone: "M4",
note: "Phi3/4 share llama.cpp arch tag — PLAN.md M4",
};
pub static PHIMOE: UnimplementedArch = UnimplementedArch {
family: "Phi MoE",
milestone: "M4 + M5",
note: "Phi + MoE routing; depends on shared MoE block — PLAN.md M4/M5",
};
pub static BONSAI: UnimplementedArch = UnimplementedArch {
family: "Bonsai",
milestone: "M4",
note: "Llama-shaped; HF model_type only — usually ships as llama GGUF — PLAN.md M4",
};
pub static OMNICODER: UnimplementedArch = UnimplementedArch {
family: "OmniCoder",
milestone: "M4",
note: "Qwen3-coder shaped — PLAN.md M4 (often tagged `qwen3` in GGUF)",
};
pub static MINIMAX: UnimplementedArch = UnimplementedArch {
family: "MiniMax M2",
milestone: "M5",
note: "Lightning Attention; depends on `rlx-ssm` upstream — PLAN.md M5",
};
pub static GLM: UnimplementedArch = UnimplementedArch {
family: "GLM 4 / 5",
milestone: "M5",
note: "GLM RoPE + RMSNorm placement — PLAN.md M5",
};
pub static GLM_MOE: UnimplementedArch = UnimplementedArch {
family: "GLM 4 MoE",
milestone: "M5",
note: "GLM + MoE routing — PLAN.md M5",
};
pub static GPT_OSS: UnimplementedArch = UnimplementedArch {
family: "gpt-oss",
milestone: "M5",
note: "OpenAI gpt-oss — confirm arch shape — PLAN.md M5",
};
pub static NEMOTRON: UnimplementedArch = UnimplementedArch {
family: "Nemotron",
milestone: "M5",
note: "Dense Nemotron arch — PLAN.md M5",
};
pub static NEMOTRON_H: UnimplementedArch = UnimplementedArch {
family: "Nemotron-H",
milestone: "M5",
note: "Mamba+attention hybrid; depends on `rlx-ssm` upstream — PLAN.md M5/M7",
};
#[allow(dead_code)]
pub static LFM: UnimplementedArch = UnimplementedArch {
family: "LFM 2 / 2.5",
milestone: "M5",
note: "Liquid Foundation Models with custom SSM layers — PLAN.md M5",
};
pub static LFM_MOE: UnimplementedArch = UnimplementedArch {
family: "LFM 2 MoE",
milestone: "M5",
note: "LFM + MoE — PLAN.md M5",
};
pub static QWEN3_MOE: UnimplementedArch = UnimplementedArch {
family: "Qwen3 MoE",
milestone: "M5",
note: "Qwen3 + MoE routing block — PLAN.md M5 (often loadable via qwen3 runner once MoE lands)",
};
pub static QWEN3_NEXT: UnimplementedArch = UnimplementedArch {
family: "Qwen3-Next",
milestone: "M5",
note: "Qwen3-Next variant — confirm arch deltas vs qwen3 — PLAN.md M5",
};
pub static GEMMA3: UnimplementedArch = UnimplementedArch {
family: "Gemma 3",
milestone: "M2",
note: "Gemma 3 (270m / 4b / 12b / 27b) adds per-layer sliding window + new RoPE — \
needs rlx-gemma config branch — PLAN.md M2",
};
pub static GEMMA3N: UnimplementedArch = UnimplementedArch {
family: "Gemma 3n",
milestone: "M2",
note: "Gemma 3n (mobile/edge Matformer variant) — PLAN.md M2",
};
pub static GEMMA4: UnimplementedArch = UnimplementedArch {
family: "Gemma 4",
milestone: "M2",
note: "Gemma 4 (flagship + edge E2B/E4B + MoE A4B) — PLAN.md M2 flagship",
};
pub static QWEN3_VL: UnimplementedArch = UnimplementedArch {
family: "Qwen3-VL",
milestone: "M7",
note: "vision tower + projector + LM (dense or MoE) — PLAN.md M7",
};
pub static QWEN3_MTP: UnimplementedArch = UnimplementedArch {
family: "Qwen3 / Qwen3.6 + MTP",
milestone: "M6",
note: "multi-token-prediction draft heads — PLAN.md M6",
};
pub static LLADA: UnimplementedArch = UnimplementedArch {
family: "LLaDA / LLaDA MoE (text-only)",
milestone: "M5",
note: "dense LLaDA arch in llama.cpp; rlx-llada2 currently targets the diffusion runner — PLAN.md M5",
};
pub static GRANITE: UnimplementedArch = UnimplementedArch {
family: "Granite (IBM)",
milestone: "M4",
note: "Llama-shaped — PLAN.md M4",
};
pub static DEEPSEEK: UnimplementedArch = UnimplementedArch {
family: "DeepSeek 2",
milestone: "M5",
note: "MoE + MLA attention — needs MoE block + MLA primitive — PLAN.md M5",
};
pub static COHERE: UnimplementedArch = UnimplementedArch {
family: "Command-R / Cohere",
milestone: "M4",
note: "Llama-shaped — PLAN.md M4",
};
}
static KNOWN_UNIMPLEMENTED: phf::Map<&'static str, &'static UnimplementedArch> = phf::phf_map! {
"mistral3" => &families::MISTRAL,
"mistral4" => &families::MISTRAL,
"phi3" => &families::PHI,
"phi4" => &families::PHI,
"phimoe" => &families::PHIMOE,
"bonsai" => &families::BONSAI,
"omnicoder" => &families::OMNICODER,
"minimax-m2" => &families::MINIMAX,
"minimax_m2" => &families::MINIMAX,
"minimax" => &families::MINIMAX,
"glm4" => &families::GLM,
"glm5" => &families::GLM,
"chatglm" => &families::GLM,
"glm4moe" => &families::GLM_MOE,
"gpt-oss" => &families::GPT_OSS,
"gpt_oss" => &families::GPT_OSS,
"nemotron" => &families::NEMOTRON,
"nemotron_h" => &families::NEMOTRON_H,
"nemotron_h_moe" => &families::NEMOTRON_H,
"lfm2moe" => &families::LFM_MOE,
"qwen3moe" => &families::QWEN3_MOE,
"qwen3next" => &families::QWEN3_NEXT,
"gemma3" => &families::GEMMA3,
"gemma3n" => &families::GEMMA3N,
"gemma4" => &families::GEMMA4,
"gemma4moe" => &families::GEMMA4,
"qwen3vl" => &families::QWEN3_VL,
"qwen3vlmoe" => &families::QWEN3_VL,
"qwen3_vl" => &families::QWEN3_VL,
"qwen3-vl" => &families::QWEN3_VL,
"qwen3_mtp" => &families::QWEN3_MTP,
"qwen3-mtp" => &families::QWEN3_MTP,
"qwen36_mtp" => &families::QWEN3_MTP,
"llada" => &families::LLADA,
"llada-moe" => &families::LLADA,
"granite" => &families::GRANITE,
"granitemoe" => &families::GRANITE,
"granitehybrid" => &families::GRANITE,
"deepseek2" => &families::DEEPSEEK,
"deepseek2-ocr" => &families::DEEPSEEK,
"command-r" => &families::COHERE,
"cohere2" => &families::COHERE,
};
pub fn known_unimplemented_arch(arch_or_model_type: &str) -> Option<UnimplementedArch> {
KNOWN_UNIMPLEMENTED.get(arch_or_model_type).map(|p| **p)
}
pub fn known_unimplemented_keys() -> impl Iterator<Item = (&'static str, &'static UnimplementedArch)>
{
KNOWN_UNIMPLEMENTED.entries().map(|(k, v)| (*k, *v))
}
pub fn arch_runner_name(arch: &str) -> Option<&'static str> {
if let Some(fam) = gguf_family_for_arch(arch) {
return Some(fam.runner_name());
}
if FLUX_GGUF_ARCHES.contains(&arch) {
return Some("flux2");
}
if DINOV2_GGUF_ARCHES.contains(&arch) {
return Some("dinov2");
}
if VJEPA2_GGUF_ARCHES.contains(&arch) {
return Some("vjepa2");
}
if SAM3_GGUF_ARCHES.contains(&arch) {
return Some("sam3");
}
if SAM2_GGUF_ARCHES.contains(&arch) {
return Some("sam2");
}
if SAM_GGUF_ARCHES.contains(&arch) {
return Some("sam1");
}
if W2V_BERT_GGUF_ARCHES.contains(&arch) {
return Some("wav2vec2-bert");
}
None
}
pub fn model_type_runner_name(model_type: &str) -> Option<&'static str> {
match model_type {
"qwen3" | "qwen3_moe" | "qwen3moe" | "qwen25" | "qwen2_5" | "qwen2.5" | "qwen251"
| "qwen2_5_1" => Some("qwen3"),
"qwen35" | "qwen3_5" | "qwen35_moe" | "qwen35moe" => Some("qwen35"),
"qwen36" | "qwen3_6" | "qwen36_moe" | "qwen36moe" => Some("qwen35"),
"llama" | "llama2" | "llama3" => Some("llama32"),
"gemma" | "gemma2" | "gemma3" | "gemma3n" => Some("gemma"),
"dinov2" | "dinov2_with_registers" => Some("dinov2"),
"vjepa2" | "vjepa" => Some("vjepa2"),
"sam" | "sam_vit" | "mobile-sam" | "mobile_sam" => Some("sam1"),
"sam2" => Some("sam2"),
"sam3" => Some("sam3"),
"whisper" => Some("whisper"),
"wav2vec2-bert" | "wav2vec2_bert" | "w2v-bert" | "w2v_bert" => Some("wav2vec2-bert"),
"flux" | "flux2" => Some("flux2"),
_ => None,
}
}
fn read_model_type_from_sidecar(path: &Path) -> Result<Option<String>> {
let dir = path
.parent()
.ok_or_else(|| anyhow!("safetensors path {path:?} has no parent dir"))?;
let cfg = dir.join("config.json");
if !cfg.is_file() {
return Ok(None);
}
let bytes = std::fs::read(&cfg).with_context(|| format!("reading {cfg:?}"))?;
let v: serde_json::Value =
serde_json::from_slice(&bytes).with_context(|| format!("parsing {cfg:?}"))?;
Ok(v.get("model_type")
.and_then(serde_json::Value::as_str)
.map(str::to_owned))
}
pub fn auto_sniff(path: &Path) -> Result<SniffedRunner> {
let file = resolve_weights_file(path)?;
let ext = file.extension().and_then(|s| s.to_str()).unwrap_or("");
match ext {
"gguf" => {
let arch = gguf_architecture_from_path(&file)?;
let runner = arch_runner_name(&arch).ok_or_else(|| {
if let Some(u) = known_unimplemented_arch(&arch) {
anyhow!(
"{file:?}: GGUF architecture `{arch}` is {} ({}) — not yet implemented in rlx-models. {}",
u.family, u.milestone, u.note
)
} else {
anyhow!(
"{file:?}: GGUF architecture `{arch}` has no registered rlx runner; \
see `rlx-run` for supported families"
)
}
})?;
Ok(SniffedRunner {
path: file,
runner_name: runner,
from: SniffedFrom::GgufArch(arch),
})
}
"safetensors" => {
let model_type = read_model_type_from_sidecar(&file)?.ok_or_else(|| {
anyhow!("{file:?}: no `model_type` in sidecar config.json (auto-dispatch needs it)")
})?;
let runner = model_type_runner_name(&model_type).ok_or_else(|| {
if let Some(u) = known_unimplemented_arch(&model_type) {
anyhow!(
"{file:?}: safetensors model_type `{model_type}` is {} ({}) — not yet implemented in rlx-models. {}",
u.family, u.milestone, u.note
)
} else {
anyhow!(
"{file:?}: safetensors model_type `{model_type}` has no registered rlx runner"
)
}
})?;
Ok(SniffedRunner {
path: file,
runner_name: runner,
from: SniffedFrom::SafetensorsConfig(model_type),
})
}
other => {
bail!("{file:?}: unsupported extension `.{other}` (expected .gguf or .safetensors)")
}
}
}
pub fn auto_runner_name(path: &Path) -> Result<&'static str> {
Ok(auto_sniff(path)?.runner_name)
}
pub fn auto_dispatch(path: &Path, args: &[String]) -> Result<&'static str> {
let sniff = auto_sniff(path)?;
match run_registered(sniff.runner_name, args)? {
Some(()) => Ok(sniff.runner_name),
None => bail!(
"runner `{}` not registered (sniffed from {:?}); register it via \
`register_cli` before calling auto_dispatch",
sniff.runner_name,
sniff.from
),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn arch_runner_maps_lm_families() {
assert_eq!(arch_runner_name("qwen3"), Some("qwen3"));
assert_eq!(arch_runner_name("qwen2"), Some("qwen3"));
assert_eq!(arch_runner_name("qwen35"), Some("qwen35"));
assert_eq!(arch_runner_name("qwen35moe"), Some("qwen35"));
assert_eq!(arch_runner_name("qwen36"), Some("qwen35"));
assert_eq!(arch_runner_name("qwen36moe"), Some("qwen35"));
assert_eq!(arch_runner_name("qwen25"), Some("qwen3"));
assert_eq!(arch_runner_name("qwen2_5"), Some("qwen3"));
assert_eq!(arch_runner_name("llama"), Some("llama32"));
assert_eq!(arch_runner_name("gemma"), Some("gemma"));
assert_eq!(arch_runner_name("gemma2"), Some("gemma"));
}
#[test]
fn arch_runner_maps_vision_and_diffusion() {
assert_eq!(arch_runner_name("dinov2"), Some("dinov2"));
assert_eq!(arch_runner_name("sam"), Some("sam1"));
assert_eq!(arch_runner_name("mobile-sam"), Some("sam1"));
assert_eq!(arch_runner_name("sam2"), Some("sam2"));
assert_eq!(arch_runner_name("sam3"), Some("sam3"));
assert_eq!(arch_runner_name("flux"), Some("flux2"));
assert_eq!(arch_runner_name("vjepa2"), Some("vjepa2"));
assert_eq!(arch_runner_name("w2v-bert"), Some("wav2vec2-bert"));
}
#[test]
fn arch_runner_returns_none_for_embed_and_unknown() {
assert_eq!(arch_runner_name("bert"), None);
assert_eq!(arch_runner_name("nomic-bert"), None);
assert_eq!(arch_runner_name("totally-fake-arch"), None);
}
#[test]
fn known_unimplemented_covers_plan_families() {
assert_eq!(
known_unimplemented_arch("mistral3").map(|u| u.milestone),
Some("M4")
);
assert_eq!(
known_unimplemented_arch("phi3").map(|u| u.milestone),
Some("M4")
);
assert_eq!(
known_unimplemented_arch("phi4").map(|u| u.milestone),
Some("M4")
);
assert_eq!(
known_unimplemented_arch("bonsai").map(|u| u.milestone),
Some("M4")
);
assert_eq!(
known_unimplemented_arch("minimax-m2").map(|u| u.milestone),
Some("M5")
);
assert_eq!(
known_unimplemented_arch("glm4").map(|u| u.milestone),
Some("M5")
);
assert_eq!(
known_unimplemented_arch("nemotron_h").map(|u| u.milestone),
Some("M5")
);
assert_eq!(
known_unimplemented_arch("qwen3_mtp").map(|u| u.milestone),
Some("M6")
);
assert_eq!(
known_unimplemented_arch("qwen3vl").map(|u| u.milestone),
Some("M7")
);
assert_eq!(known_unimplemented_arch("qwen3"), None);
assert_eq!(known_unimplemented_arch("mistral"), None);
assert_eq!(known_unimplemented_arch("totally-fake"), None);
}
#[test]
fn auto_sniff_error_points_at_milestone_for_known_unimplemented() {
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
buf.extend_from_slice(&3u32.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
let k = "general.architecture";
buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
buf.extend_from_slice(k.as_bytes());
buf.extend_from_slice(&8u32.to_le_bytes());
let v = "mistral3";
buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
buf.extend_from_slice(v.as_bytes());
let name = "w";
buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&4u64.to_le_bytes());
buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
buf.extend_from_slice(&0u64.to_le_bytes());
while !buf
.len()
.is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
{
buf.push(0);
}
for _ in 0..4 {
buf.extend_from_slice(&1.0f32.to_le_bytes());
}
let path = std::env::temp_dir().join("rlx_auto_dispatch_mistral3_hint.gguf");
std::fs::write(&path, &buf).unwrap();
let err = auto_sniff(&path).expect_err("should error");
let s = format!("{err:#}");
assert!(s.contains("Mistral"), "expected family name in error: {s}");
assert!(s.contains("M4"), "expected milestone tag in error: {s}");
std::fs::remove_file(&path).ok();
}
#[test]
fn model_type_runner_maps_known() {
assert_eq!(model_type_runner_name("qwen3"), Some("qwen3"));
assert_eq!(model_type_runner_name("qwen3_moe"), Some("qwen3"));
assert_eq!(model_type_runner_name("llama"), Some("llama32"));
assert_eq!(model_type_runner_name("gemma3"), Some("gemma"));
assert_eq!(
model_type_runner_name("dinov2_with_registers"),
Some("dinov2")
);
assert_eq!(model_type_runner_name("whisper"), Some("whisper"));
assert_eq!(model_type_runner_name("unknown"), None);
}
#[test]
fn auto_sniff_reads_gguf_arch() {
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
buf.extend_from_slice(&3u32.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes()); buf.extend_from_slice(&1u64.to_le_bytes()); let write_string = |buf: &mut Vec<u8>, k: &str, v: &str| {
buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
buf.extend_from_slice(k.as_bytes());
buf.extend_from_slice(&8u32.to_le_bytes());
buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
buf.extend_from_slice(v.as_bytes());
};
write_string(&mut buf, "general.architecture", "qwen3");
let name = "w";
buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&4u64.to_le_bytes());
buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
buf.extend_from_slice(&0u64.to_le_bytes());
while !buf
.len()
.is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
{
buf.push(0);
}
for _ in 0..4 {
buf.extend_from_slice(&1.0f32.to_le_bytes());
}
let path = std::env::temp_dir().join("rlx_auto_dispatch_sniff.gguf");
std::fs::write(&path, &buf).unwrap();
let sniff = auto_sniff(&path).expect("sniff");
assert_eq!(sniff.runner_name, "qwen3");
match sniff.from {
SniffedFrom::GgufArch(a) => assert_eq!(a, "qwen3"),
other => panic!("wrong sniff source: {other:?}"),
}
std::fs::remove_file(&path).ok();
}
#[test]
fn run_auto_injects_weights_flag_when_missing() {
use crate::registry::{ModelRunner, register_runner};
use std::sync::{Mutex, OnceLock};
static CAPTURED: OnceLock<Mutex<Vec<String>>> = OnceLock::new();
fn captured() -> &'static Mutex<Vec<String>> {
CAPTURED.get_or_init(|| Mutex::new(Vec::new()))
}
struct Capture;
impl ModelRunner for Capture {
fn name(&self) -> &'static str {
"qwen3"
}
fn description(&self) -> &'static str {
"test capture"
}
fn run(&self, args: &[String]) -> Result<()> {
*captured().lock().unwrap() = args.to_vec();
Ok(())
}
}
register_runner(Box::new(Capture));
let dir = std::env::temp_dir().join("rlx_auto_dispatch_run_auto");
std::fs::create_dir_all(&dir).unwrap();
let path = dir.join("model.gguf");
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
buf.extend_from_slice(&3u32.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
buf.extend_from_slice(&1u64.to_le_bytes());
let k = "general.architecture";
buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
buf.extend_from_slice(k.as_bytes());
buf.extend_from_slice(&8u32.to_le_bytes());
let v = "qwen3";
buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
buf.extend_from_slice(v.as_bytes());
let name = "w";
buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&4u64.to_le_bytes());
buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
buf.extend_from_slice(&0u64.to_le_bytes());
while !buf
.len()
.is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
{
buf.push(0);
}
for _ in 0..4 {
buf.extend_from_slice(&1.0f32.to_le_bytes());
}
std::fs::write(&path, &buf).unwrap();
run_auto(&[path.display().to_string(), "--prompt".into(), "hi".into()]).unwrap();
let got = captured().lock().unwrap().clone();
assert_eq!(
got,
vec![
"--weights".to_string(),
path.display().to_string(),
"--prompt".into(),
"hi".into()
]
);
run_auto(&[
path.display().to_string(),
"--weights".into(),
"/other/path".into(),
"--prompt".into(),
"hi".into(),
])
.unwrap();
let got = captured().lock().unwrap().clone();
assert_eq!(
got,
vec![
"--weights".to_string(),
"/other/path".into(),
"--prompt".into(),
"hi".into(),
]
);
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn auto_sniff_reads_safetensors_sidecar() {
let dir = std::env::temp_dir().join("rlx_auto_dispatch_sidecar");
std::fs::create_dir_all(&dir).unwrap();
let cfg = dir.join("config.json");
std::fs::write(&cfg, br#"{"model_type":"llama"}"#).unwrap();
let st = dir.join("model.safetensors");
std::fs::write(&st, b"").unwrap();
let sniff = auto_sniff(&st).expect("sniff");
assert_eq!(sniff.runner_name, "llama32");
std::fs::remove_dir_all(&dir).ok();
}
}