rlx_cli/
auto_dispatch.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! Auto-dispatch: pick a registered model runner from a weights path.
17//!
18//! `auto_runner_name(path)` resolves the path (file or directory), sniffs
19//! the model family (GGUF `general.architecture` for `.gguf`, sidecar
20//! `config.json` `model_type` for safetensors), and maps it to the short
21//! runner name a callsite registered with [`register_cli`](crate::register_cli)
22//! (e.g. `"qwen3"`, `"gemma"`).
23//!
24//! `auto_dispatch(path, args)` is a one-shot: sniff, look up, run.
25//!
26//! Used by `skill` so callers don't need to hardcode `Qwen3Runner` vs
27//! `GemmaRunner` per family.
28
29use anyhow::{Context, Result, anyhow, bail};
30use rlx_core::gguf_config::{
31    DINOV2_GGUF_ARCHES, FLUX_GGUF_ARCHES, SAM_GGUF_ARCHES, SAM2_GGUF_ARCHES, SAM3_GGUF_ARCHES,
32    VJEPA2_GGUF_ARCHES, W2V_BERT_GGUF_ARCHES,
33};
34use rlx_core::gguf_support::{
35    gguf_architecture_from_path, gguf_family_for_arch, resolve_weights_file,
36};
37use std::path::{Path, PathBuf};
38
39use crate::registry::run_registered;
40
41/// Entry point for an `rlx-run auto WEIGHTS [args...]` subcommand.
42///
43/// Treats the first positional as the weights path (file or directory),
44/// sniffs the runner, and forwards the remaining args to it. The
45/// canonical wiring is `register_cli("auto", "...", rlx_cli::run_auto)`
46/// in the multiplexer.
47pub fn run_auto(args: &[String]) -> Result<()> {
48    let Some(first) = args.first() else {
49        bail!(
50            "auto: expected WEIGHTS path as the first argument\n\
51             usage: rlx-run auto <weights-path> [runner-args...]"
52        );
53    };
54    if matches!(first.as_str(), "-h" | "--help" | "help") {
55        println!(
56            "rlx-run auto — sniff a GGUF / safetensors file and dispatch to the right runner\n\
57             \n\
58             USAGE:\n  rlx-run auto <weights-path> [runner-args...]\n\
59             \n\
60             The first argument is forwarded as the runner's --weights value;\n\
61             remaining arguments are passed through unchanged."
62        );
63        return Ok(());
64    }
65    let path = Path::new(first);
66    let sniff = auto_sniff(path)?;
67    eprintln!(
68        "[rlx-run auto] {} → runner `{}` (from {:?})",
69        sniff.path.display(),
70        sniff.runner_name,
71        sniff.from
72    );
73    // Re-build argv: most per-family runners take `--weights PATH`. If the
74    // caller already passed --weights, don't double it; otherwise inject.
75    let rest: Vec<String> = args[1..].to_vec();
76    let has_weights_flag = rest
77        .iter()
78        .any(|a| a == "--weights" || a.starts_with("--weights="));
79    let mut forwarded: Vec<String> = Vec::with_capacity(rest.len() + 2);
80    if !has_weights_flag {
81        forwarded.push("--weights".into());
82        forwarded.push(sniff.path.display().to_string());
83    }
84    forwarded.extend(rest);
85    match run_registered(sniff.runner_name, &forwarded)? {
86        Some(()) => Ok(()),
87        None => bail!(
88            "auto: runner `{}` not registered (sniffed from {:?}); register it via \
89             `register_cli` in your binary's main",
90            sniff.runner_name,
91            sniff.from
92        ),
93    }
94}
95
96/// Source the sniffer used to identify the model family.
97#[derive(Debug, Clone)]
98pub enum SniffedFrom {
99    /// `general.architecture` value read from a `.gguf` file.
100    GgufArch(String),
101    /// `model_type` value read from a sidecar `config.json`.
102    SafetensorsConfig(String),
103}
104
105/// Result of sniffing a weights path.
106#[derive(Debug, Clone)]
107pub struct SniffedRunner {
108    /// Concrete file we sniffed (after resolving a directory).
109    pub path: PathBuf,
110    /// Short runner name as registered with `register_cli`.
111    pub runner_name: &'static str,
112    /// Where the sniff came from — useful for diagnostics.
113    pub from: SniffedFrom,
114}
115
116/// A catalog arch that RLX recognizes but has not yet implemented a runner
117/// for. Returned by [`known_unimplemented_arch`] so error messages can point
118/// at the PLAN.md milestone that unblocks the family.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub struct UnimplementedArch {
121    /// Display name (e.g. `"Mistral 3.5"`).
122    pub family: &'static str,
123    /// PLAN.md milestone tag (e.g. `"M4"`).
124    pub milestone: &'static str,
125    /// One-line note for the user.
126    pub note: &'static str,
127}
128
129/// Family-level metadata referenced by [`KNOWN_UNIMPLEMENTED`]. Static so
130/// the phf map can hold `&'static UnimplementedArch`.
131mod families {
132    use super::UnimplementedArch;
133    pub static MISTRAL: UnimplementedArch = UnimplementedArch {
134        family: "Mistral 3+ / Ministral",
135        milestone: "M4",
136        note: "Llama-shaped with newer RoPE; share `rlx-llama-base` per PLAN.md M4",
137    };
138    pub static PHIMOE: UnimplementedArch = UnimplementedArch {
139        family: "Phi MoE",
140        milestone: "M4 + M5",
141        note: "Phi + MoE routing; depends on shared MoE block — PLAN.md M4/M5",
142    };
143    pub static BONSAI: UnimplementedArch = UnimplementedArch {
144        family: "Bonsai",
145        milestone: "M4",
146        note: "Llama-shaped; HF model_type only — usually ships as llama GGUF — PLAN.md M4",
147    };
148    pub static OMNICODER: UnimplementedArch = UnimplementedArch {
149        family: "OmniCoder",
150        milestone: "M4",
151        note: "Qwen3-coder shaped — PLAN.md M4 (often tagged `qwen3` in GGUF)",
152    };
153    pub static MINIMAX: UnimplementedArch = UnimplementedArch {
154        family: "MiniMax M2",
155        milestone: "M5",
156        note: "Lightning Attention; depends on `rlx-ssm` upstream — PLAN.md M5",
157    };
158    pub static GLM: UnimplementedArch = UnimplementedArch {
159        family: "GLM 4 / 5",
160        milestone: "M5",
161        note: "GLM RoPE + RMSNorm placement — PLAN.md M5",
162    };
163    pub static GLM_MOE: UnimplementedArch = UnimplementedArch {
164        family: "GLM 4 MoE",
165        milestone: "M5",
166        note: "GLM + MoE routing — PLAN.md M5",
167    };
168    pub static GPT_OSS: UnimplementedArch = UnimplementedArch {
169        family: "gpt-oss",
170        milestone: "M5",
171        note: "OpenAI gpt-oss — confirm arch shape — PLAN.md M5",
172    };
173    pub static NEMOTRON: UnimplementedArch = UnimplementedArch {
174        family: "Nemotron",
175        milestone: "M5",
176        note: "Dense Nemotron arch — PLAN.md M5",
177    };
178    pub static NEMOTRON_H: UnimplementedArch = UnimplementedArch {
179        family: "Nemotron-H",
180        milestone: "M5",
181        note: "Mamba+attention hybrid; depends on `rlx-ssm` upstream — PLAN.md M5/M7",
182    };
183    #[allow(dead_code)]
184    pub static LFM: UnimplementedArch = UnimplementedArch {
185        family: "LFM 2 / 2.5",
186        milestone: "M5",
187        note: "Liquid Foundation Models with custom SSM layers — PLAN.md M5",
188    };
189    pub static LFM_MOE: UnimplementedArch = UnimplementedArch {
190        family: "LFM 2 MoE",
191        milestone: "M5",
192        note: "LFM + MoE — PLAN.md M5",
193    };
194    pub static QWEN3_MOE: UnimplementedArch = UnimplementedArch {
195        family: "Qwen3 MoE",
196        milestone: "M5",
197        note: "Qwen3 + MoE routing block — PLAN.md M5 (often loadable via qwen3 runner once MoE lands)",
198    };
199    pub static QWEN3_NEXT: UnimplementedArch = UnimplementedArch {
200        family: "Qwen3-Next",
201        milestone: "M5",
202        note: "Qwen3-Next variant — confirm arch deltas vs qwen3 — PLAN.md M5",
203    };
204    pub static GEMMA4: UnimplementedArch = UnimplementedArch {
205        family: "Gemma 4 MoE (A4B)",
206        milestone: "M2",
207        note: "Gemma 4 MoE A4B routing block — dense + E2B/E4B run via the `gemma` runner — PLAN.md M2",
208    };
209    pub static QWEN3_VL: UnimplementedArch = UnimplementedArch {
210        family: "Qwen3-VL",
211        milestone: "M7",
212        note: "vision tower + projector + LM (dense or MoE) — PLAN.md M7",
213    };
214    pub static QWEN3_MTP: UnimplementedArch = UnimplementedArch {
215        family: "Qwen3 / Qwen3.6 + MTP",
216        milestone: "M6",
217        note: "multi-token-prediction draft heads — PLAN.md M6",
218    };
219    pub static LLADA: UnimplementedArch = UnimplementedArch {
220        family: "LLaDA / LLaDA MoE (text-only)",
221        milestone: "M5",
222        note: "dense LLaDA arch in llama.cpp; rlx-llada2 currently targets the diffusion runner — PLAN.md M5",
223    };
224    pub static GRANITE: UnimplementedArch = UnimplementedArch {
225        family: "Granite (IBM)",
226        milestone: "M4",
227        note: "Llama-shaped — PLAN.md M4",
228    };
229    pub static DEEPSEEK: UnimplementedArch = UnimplementedArch {
230        family: "DeepSeek 2",
231        milestone: "M5",
232        note: "MoE + MLA attention — needs MoE block + MLA primitive — PLAN.md M5",
233    };
234    pub static COHERE: UnimplementedArch = UnimplementedArch {
235        family: "Command-R / Cohere",
236        milestone: "M4",
237        note: "Llama-shaped — PLAN.md M4",
238    };
239}
240
241/// Catalog families we know about but haven't implemented yet.
242///
243/// The keys are the **actual** GGUF `general.architecture` strings llama.cpp
244/// uses (`src/llama-arch.cpp::LLM_ARCH_NAMES`) plus their HF `model_type`
245/// aliases when those differ. Notably:
246///
247/// * Mistral 1/2 and Qwen 2.5 ship as `general.architecture = llama` /
248///   `qwen2` respectively — they don't have their own llama.cpp arch tag.
249///   Those tags route to the existing `llama32` / `qwen3` runners and are
250///   *not* listed here.
251/// * Mistral 3+ ships as `mistral3` / `mistral4` (real tags).
252/// * Phi-4 ships as `phi3` (Phi-4 reuses the Phi-3 arch in llama.cpp).
253///
254/// Both GGUF arch tags and HF `model_type` values are accepted so
255/// downstream callers don't keep two parallel lists.
256static KNOWN_UNIMPLEMENTED: phf::Map<&'static str, &'static UnimplementedArch> = phf::phf_map! {
257    // Mistral / Ministral (real llama.cpp tags)
258    "mistral3" => &families::MISTRAL,
259    "mistral4" => &families::MISTRAL,
260    // Phi MoE — still pending.
261    "phimoe" => &families::PHIMOE,
262    // Catalog HF model_type aliases — same remap gap as phi3.
263    "bonsai" => &families::BONSAI,
264    "omnicoder" => &families::OMNICODER,
265    // Hybrid / SSM families
266    "minimax-m2" => &families::MINIMAX,
267    "minimax_m2" => &families::MINIMAX,
268    "minimax" => &families::MINIMAX,
269    "glm4" => &families::GLM,
270    "glm5" => &families::GLM,
271    "chatglm" => &families::GLM,
272    "glm4moe" => &families::GLM_MOE,
273    "gpt-oss" => &families::GPT_OSS,
274    "gpt_oss" => &families::GPT_OSS,
275    "nemotron" => &families::NEMOTRON,
276    "nemotron_h" => &families::NEMOTRON_H,
277    "nemotron_h_moe" => &families::NEMOTRON_H,
278    // lfm2 / lfm / lfm25 / lfm2_5 are now routed through `rlx-lfm`'s
279    // `LfmRunner` via `gguf_family_for_arch` → `GgufModelFamily::Lfm`.
280    // Only the MoE variant remains unimplemented.
281    "lfm2moe" => &families::LFM_MOE,
282    // Qwen variants we don't run yet
283    "qwen3moe" => &families::QWEN3_MOE,
284    "qwen3next" => &families::QWEN3_NEXT,
285    // Gemma 3 / 3n route through `gemma` (see `model_type_runner_name`).
286    // Only the MoE A4B variant remains unimplemented.
287    "gemma4moe" => &families::GEMMA4,
288    "qwen3vl" => &families::QWEN3_VL,
289    "qwen3vlmoe" => &families::QWEN3_VL,
290    "qwen3_vl" => &families::QWEN3_VL,
291    "qwen3-vl" => &families::QWEN3_VL,
292    "qwen3_mtp" => &families::QWEN3_MTP,
293    "qwen3-mtp" => &families::QWEN3_MTP,
294    "qwen36_mtp" => &families::QWEN3_MTP,
295    // Other catalog-adjacent families
296    "llada" => &families::LLADA,
297    "llada-moe" => &families::LLADA,
298    "granite" => &families::GRANITE,
299    "granitemoe" => &families::GRANITE,
300    "granitehybrid" => &families::GRANITE,
301    "deepseek2" => &families::DEEPSEEK,
302    "deepseek2-ocr" => &families::DEEPSEEK,
303    "command-r" => &families::COHERE,
304    "cohere2" => &families::COHERE,
305};
306
307/// Look up an arch / model_type in the unimplemented-families table.
308pub fn known_unimplemented_arch(arch_or_model_type: &str) -> Option<UnimplementedArch> {
309    KNOWN_UNIMPLEMENTED.get(arch_or_model_type).map(|p| **p)
310}
311
312/// Snapshot of every (key, family) pair currently in the unimplemented
313/// table — useful for `rlx-run check --list-unimplemented` style tooling.
314pub fn known_unimplemented_keys() -> impl Iterator<Item = (&'static str, &'static UnimplementedArch)>
315{
316    KNOWN_UNIMPLEMENTED.entries().map(|(k, v)| (*k, *v))
317}
318
319/// Map a GGUF `general.architecture` tag to the short runner name.
320///
321/// Returns `None` for embed-only families (`bert`, `nomic-bert`, …) which
322/// aren't currently exposed through the `rlx-run` dispatch table, and for
323/// catalog families that aren't implemented yet — those get a richer error
324/// via [`known_unimplemented_arch`] when sniffed.
325pub fn arch_runner_name(arch: &str) -> Option<&'static str> {
326    match arch {
327        "phi3" | "phi4" => return Some("phi"),
328        _ => {}
329    }
330    if let Some(fam) = gguf_family_for_arch(arch) {
331        return Some(fam.runner_name());
332    }
333    if FLUX_GGUF_ARCHES.contains(&arch) {
334        return Some("flux2");
335    }
336    if DINOV2_GGUF_ARCHES.contains(&arch) {
337        return Some("dinov2");
338    }
339    if VJEPA2_GGUF_ARCHES.contains(&arch) {
340        return Some("vjepa2");
341    }
342    if SAM3_GGUF_ARCHES.contains(&arch) {
343        return Some("sam3");
344    }
345    if SAM2_GGUF_ARCHES.contains(&arch) {
346        return Some("sam2");
347    }
348    if SAM_GGUF_ARCHES.contains(&arch) {
349        return Some("sam1");
350    }
351    if W2V_BERT_GGUF_ARCHES.contains(&arch) {
352        return Some("wav2vec2-bert");
353    }
354    None
355}
356
357/// Map an HF `config.json` `model_type` value to a short runner name.
358///
359/// HF naming differs from GGUF tags — `model_type: "llama"` covers Llama
360/// 2 / 3 / 3.x, `qwen3` covers Qwen3 and Qwen3 MoE, etc.
361pub fn model_type_runner_name(model_type: &str) -> Option<&'static str> {
362    match model_type {
363        // qwen2 deliberately omitted — rlx-qwen3 doesn't support
364        // Qwen 2 tensor layout (needs q/k/v bias + no QK-norm).
365        // qwen2 GGUFs fall through to known_unimplemented_arch.
366        "qwen3" | "qwen3_moe" | "qwen3moe" | "qwen25" | "qwen2_5" | "qwen2.5" | "qwen251"
367        | "qwen2_5_1" => Some("qwen3"),
368        "qwen35" | "qwen3_5" | "qwen35_moe" | "qwen35moe" => Some("qwen35"),
369        // Qwen3.6 runs through the qwen35 trunk (PLAN.md M1).
370        "qwen36" | "qwen3_6" | "qwen36_moe" | "qwen36moe" => Some("qwen35"),
371        "llama" | "llama2" | "llama3" => Some("llama32"),
372        // Gemma 4 dense + E2B/E4B mobile (PLE + KV-shared). The unified
373        // top-level model_type is `gemma4`; the text sub-config is
374        // `gemma4_text`. The MoE A4B variant (`gemma4moe`) stays in the
375        // unimplemented table until its routing block is validated.
376        "gemma"
377        | "gemma2"
378        | "gemma3"
379        | "gemma3n"
380        | "gemma4"
381        | "gemma4_text"
382        | "gemma4_unified"
383        | "gemma4_unified_text" => Some("gemma"),
384        "dinov2" | "dinov2_with_registers" => Some("dinov2"),
385        "vjepa2" | "vjepa" => Some("vjepa2"),
386        "sam" | "sam_vit" | "mobile-sam" | "mobile_sam" => Some("sam1"),
387        "sam2" => Some("sam2"),
388        "sam3" => Some("sam3"),
389        "whisper" => Some("whisper"),
390        "wav2vec2-bert" | "wav2vec2_bert" | "w2v-bert" | "w2v_bert" => Some("wav2vec2-bert"),
391        "flux" | "flux2" => Some("flux2"),
392        _ => None,
393    }
394}
395
396/// Sniff `model_type` from the `config.json` next to a safetensors file.
397fn read_model_type_from_sidecar(path: &Path) -> Result<Option<String>> {
398    let dir = path
399        .parent()
400        .ok_or_else(|| anyhow!("safetensors path {path:?} has no parent dir"))?;
401    let cfg = dir.join("config.json");
402    if !cfg.is_file() {
403        return Ok(None);
404    }
405    let bytes = std::fs::read(&cfg).with_context(|| format!("reading {cfg:?}"))?;
406    let v: serde_json::Value =
407        serde_json::from_slice(&bytes).with_context(|| format!("parsing {cfg:?}"))?;
408    Ok(v.get("model_type")
409        .and_then(serde_json::Value::as_str)
410        .map(str::to_owned))
411}
412
413/// Resolve `path` to a single weight file, then sniff the runner.
414pub fn auto_sniff(path: &Path) -> Result<SniffedRunner> {
415    let file = resolve_weights_file(path)?;
416    let ext = file.extension().and_then(|s| s.to_str()).unwrap_or("");
417    match ext {
418        "gguf" => {
419            let arch = gguf_architecture_from_path(&file)?;
420            let runner = arch_runner_name(&arch).ok_or_else(|| {
421                if let Some(u) = known_unimplemented_arch(&arch) {
422                    anyhow!(
423                        "{file:?}: GGUF architecture `{arch}` is {} ({}) — not yet implemented in rlx-models. {}",
424                        u.family, u.milestone, u.note
425                    )
426                } else {
427                    anyhow!(
428                        "{file:?}: GGUF architecture `{arch}` has no registered rlx runner; \
429                         see `rlx-run` for supported families"
430                    )
431                }
432            })?;
433            Ok(SniffedRunner {
434                path: file,
435                runner_name: runner,
436                from: SniffedFrom::GgufArch(arch),
437            })
438        }
439        "safetensors" => {
440            let model_type = read_model_type_from_sidecar(&file)?.ok_or_else(|| {
441                anyhow!("{file:?}: no `model_type` in sidecar config.json (auto-dispatch needs it)")
442            })?;
443            let runner = model_type_runner_name(&model_type).ok_or_else(|| {
444                if let Some(u) = known_unimplemented_arch(&model_type) {
445                    anyhow!(
446                        "{file:?}: safetensors model_type `{model_type}` is {} ({}) — not yet implemented in rlx-models. {}",
447                        u.family, u.milestone, u.note
448                    )
449                } else {
450                    anyhow!(
451                        "{file:?}: safetensors model_type `{model_type}` has no registered rlx runner"
452                    )
453                }
454            })?;
455            Ok(SniffedRunner {
456                path: file,
457                runner_name: runner,
458                from: SniffedFrom::SafetensorsConfig(model_type),
459            })
460        }
461        other => {
462            bail!("{file:?}: unsupported extension `.{other}` (expected .gguf or .safetensors)")
463        }
464    }
465}
466
467/// Sniff `path` and return only the runner short name.
468pub fn auto_runner_name(path: &Path) -> Result<&'static str> {
469    Ok(auto_sniff(path)?.runner_name)
470}
471
472/// Sniff `path`, look up its runner in the registry, and run it with `args`.
473///
474/// `args` should be the per-runner argv *without* the leading subcommand.
475/// Returns the runner name that was dispatched to.
476pub fn auto_dispatch(path: &Path, args: &[String]) -> Result<&'static str> {
477    let sniff = auto_sniff(path)?;
478    match run_registered(sniff.runner_name, args)? {
479        Some(()) => Ok(sniff.runner_name),
480        None => bail!(
481            "runner `{}` not registered (sniffed from {:?}); register it via \
482             `register_cli` before calling auto_dispatch",
483            sniff.runner_name,
484            sniff.from
485        ),
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492
493    #[test]
494    fn arch_runner_maps_lm_families() {
495        assert_eq!(arch_runner_name("qwen3"), Some("qwen3"));
496        // qwen2 now routes to the qwen3 runner — the runner reads
497        // attention_bias + qk_norm from the GGUF arch tag and emits
498        // the right per-layer math.
499        assert_eq!(arch_runner_name("qwen2"), Some("qwen3"));
500        assert_eq!(arch_runner_name("qwen35"), Some("qwen35"));
501        assert_eq!(arch_runner_name("qwen35moe"), Some("qwen35"));
502        // Qwen3.6 reuses the qwen35 trunk (PLAN.md M1). qwen36_mtp still
503        // routes through known_unimplemented_arch — base qwen36 routes
504        // here so unsloth/Qwen3.6-27B-GGUF (no MTP) just works.
505        assert_eq!(arch_runner_name("qwen36"), Some("qwen35"));
506        assert_eq!(arch_runner_name("qwen36moe"), Some("qwen35"));
507        // Qwen 2.5 / 2.5.1 ship as `qwen2` arch tag; explicit short
508        // tags also route to the qwen3 runner (PLAN.md M4).
509        assert_eq!(arch_runner_name("qwen25"), Some("qwen3"));
510        assert_eq!(arch_runner_name("qwen2_5"), Some("qwen3"));
511        assert_eq!(arch_runner_name("llama"), Some("llama32"));
512        assert_eq!(arch_runner_name("phi3"), Some("phi"));
513        assert_eq!(arch_runner_name("gemma"), Some("gemma"));
514        assert_eq!(arch_runner_name("gemma2"), Some("gemma"));
515        assert_eq!(arch_runner_name("gemma3"), Some("gemma"));
516    }
517
518    #[test]
519    fn arch_runner_maps_vision_and_diffusion() {
520        assert_eq!(arch_runner_name("dinov2"), Some("dinov2"));
521        assert_eq!(arch_runner_name("sam"), Some("sam1"));
522        assert_eq!(arch_runner_name("mobile-sam"), Some("sam1"));
523        assert_eq!(arch_runner_name("sam2"), Some("sam2"));
524        assert_eq!(arch_runner_name("sam3"), Some("sam3"));
525        assert_eq!(arch_runner_name("flux"), Some("flux2"));
526        assert_eq!(arch_runner_name("vjepa2"), Some("vjepa2"));
527        assert_eq!(arch_runner_name("w2v-bert"), Some("wav2vec2-bert"));
528    }
529
530    #[test]
531    fn arch_runner_returns_none_for_embed_and_unknown() {
532        // Embed families aren't in the rlx-run dispatch table today.
533        assert_eq!(arch_runner_name("bert"), None);
534        assert_eq!(arch_runner_name("nomic-bert"), None);
535        assert_eq!(arch_runner_name("totally-fake-arch"), None);
536    }
537
538    #[test]
539    fn known_unimplemented_covers_plan_families() {
540        // M4 — Llama-shaped (real llama.cpp tags)
541        assert_eq!(
542            known_unimplemented_arch("mistral3").map(|u| u.milestone),
543            Some("M4")
544        );
545        assert_eq!(known_unimplemented_arch("phi3"), None);
546        assert_eq!(known_unimplemented_arch("phi4"), None);
547        assert_eq!(known_unimplemented_arch("gemma3"), None);
548        assert_eq!(known_unimplemented_arch("gemma3n"), None);
549        assert_eq!(
550            known_unimplemented_arch("bonsai").map(|u| u.milestone),
551            Some("M4")
552        );
553        // M5 — MoE / SSM
554        assert_eq!(
555            known_unimplemented_arch("minimax-m2").map(|u| u.milestone),
556            Some("M5")
557        );
558        assert_eq!(
559            known_unimplemented_arch("glm4").map(|u| u.milestone),
560            Some("M5")
561        );
562        assert_eq!(
563            known_unimplemented_arch("nemotron_h").map(|u| u.milestone),
564            Some("M5")
565        );
566        // M6 — MTP
567        assert_eq!(
568            known_unimplemented_arch("qwen3_mtp").map(|u| u.milestone),
569            Some("M6")
570        );
571        // M7 — VL
572        assert_eq!(
573            known_unimplemented_arch("qwen3vl").map(|u| u.milestone),
574            Some("M7")
575        );
576        // Implemented or unknown — plain `mistral` is NOT a llama.cpp arch
577        // tag (Mistral 1/2 use `llama`), so it should not be flagged.
578        assert_eq!(known_unimplemented_arch("qwen3"), None);
579        assert_eq!(known_unimplemented_arch("mistral"), None);
580        assert_eq!(known_unimplemented_arch("totally-fake"), None);
581    }
582
583    #[test]
584    fn auto_sniff_error_points_at_milestone_for_known_unimplemented() {
585        // Build a tiny mistral.gguf and check the error message.
586        let mut buf: Vec<u8> = Vec::new();
587        buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
588        buf.extend_from_slice(&3u32.to_le_bytes());
589        buf.extend_from_slice(&1u64.to_le_bytes());
590        buf.extend_from_slice(&1u64.to_le_bytes());
591        let k = "general.architecture";
592        buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
593        buf.extend_from_slice(k.as_bytes());
594        buf.extend_from_slice(&8u32.to_le_bytes());
595        let v = "mistral3";
596        buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
597        buf.extend_from_slice(v.as_bytes());
598        let name = "w";
599        buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
600        buf.extend_from_slice(name.as_bytes());
601        buf.extend_from_slice(&1u32.to_le_bytes());
602        buf.extend_from_slice(&4u64.to_le_bytes());
603        buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
604        buf.extend_from_slice(&0u64.to_le_bytes());
605        while !buf
606            .len()
607            .is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
608        {
609            buf.push(0);
610        }
611        for _ in 0..4 {
612            buf.extend_from_slice(&1.0f32.to_le_bytes());
613        }
614        let path = std::env::temp_dir().join("rlx_auto_dispatch_mistral3_hint.gguf");
615        std::fs::write(&path, &buf).unwrap();
616        let err = auto_sniff(&path).expect_err("should error");
617        let s = format!("{err:#}");
618        assert!(s.contains("Mistral"), "expected family name in error: {s}");
619        assert!(s.contains("M4"), "expected milestone tag in error: {s}");
620        std::fs::remove_file(&path).ok();
621    }
622
623    #[test]
624    fn model_type_runner_maps_known() {
625        assert_eq!(model_type_runner_name("qwen3"), Some("qwen3"));
626        assert_eq!(model_type_runner_name("qwen3_moe"), Some("qwen3"));
627        assert_eq!(model_type_runner_name("llama"), Some("llama32"));
628        assert_eq!(model_type_runner_name("gemma3"), Some("gemma"));
629        // Gemma 4 dense + E2B mobile route to the gemma runner.
630        assert_eq!(model_type_runner_name("gemma4"), Some("gemma"));
631        assert_eq!(model_type_runner_name("gemma4_text"), Some("gemma"));
632        assert_eq!(model_type_runner_name("gemma4_unified"), Some("gemma"));
633        // Gemma 4 dense is no longer flagged unimplemented; MoE A4B still is.
634        assert!(known_unimplemented_arch("gemma4").is_none());
635        assert!(known_unimplemented_arch("gemma4moe").is_some());
636        assert_eq!(
637            model_type_runner_name("dinov2_with_registers"),
638            Some("dinov2")
639        );
640        assert_eq!(model_type_runner_name("whisper"), Some("whisper"));
641        assert_eq!(model_type_runner_name("unknown"), None);
642    }
643
644    /// Builds a minimal GGUF file in a temp dir, then verifies auto_sniff
645    /// picks the right runner name from `general.architecture`.
646    #[test]
647    fn auto_sniff_reads_gguf_arch() {
648        let mut buf: Vec<u8> = Vec::new();
649        buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
650        buf.extend_from_slice(&3u32.to_le_bytes());
651        buf.extend_from_slice(&1u64.to_le_bytes()); // tensor count
652        buf.extend_from_slice(&1u64.to_le_bytes()); // kv count
653        let write_string = |buf: &mut Vec<u8>, k: &str, v: &str| {
654            buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
655            buf.extend_from_slice(k.as_bytes());
656            buf.extend_from_slice(&8u32.to_le_bytes());
657            buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
658            buf.extend_from_slice(v.as_bytes());
659        };
660        write_string(&mut buf, "general.architecture", "qwen3");
661        // one f32 tensor with 4 elements
662        let name = "w";
663        buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
664        buf.extend_from_slice(name.as_bytes());
665        buf.extend_from_slice(&1u32.to_le_bytes());
666        buf.extend_from_slice(&4u64.to_le_bytes());
667        buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
668        buf.extend_from_slice(&0u64.to_le_bytes());
669        while !buf
670            .len()
671            .is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
672        {
673            buf.push(0);
674        }
675        for _ in 0..4 {
676            buf.extend_from_slice(&1.0f32.to_le_bytes());
677        }
678        let path = std::env::temp_dir().join("rlx_auto_dispatch_sniff.gguf");
679        std::fs::write(&path, &buf).unwrap();
680        let sniff = auto_sniff(&path).expect("sniff");
681        assert_eq!(sniff.runner_name, "qwen3");
682        match sniff.from {
683            SniffedFrom::GgufArch(a) => assert_eq!(a, "qwen3"),
684            other => panic!("wrong sniff source: {other:?}"),
685        }
686        std::fs::remove_file(&path).ok();
687    }
688
689    /// Register a fake runner under a known name, ask `run_auto` to dispatch
690    /// to it, and capture what argv it received.
691    #[test]
692    fn run_auto_injects_weights_flag_when_missing() {
693        use crate::registry::{ModelRunner, register_runner};
694        use std::sync::{Mutex, OnceLock};
695
696        static CAPTURED: OnceLock<Mutex<Vec<String>>> = OnceLock::new();
697        fn captured() -> &'static Mutex<Vec<String>> {
698            CAPTURED.get_or_init(|| Mutex::new(Vec::new()))
699        }
700
701        struct Capture;
702        impl ModelRunner for Capture {
703            fn name(&self) -> &'static str {
704                "qwen3"
705            }
706            fn description(&self) -> &'static str {
707                "test capture"
708            }
709            fn run(&self, args: &[String]) -> Result<()> {
710                *captured().lock().unwrap() = args.to_vec();
711                Ok(())
712            }
713        }
714        register_runner(Box::new(Capture));
715
716        // Build a minimal qwen3 GGUF in a temp dir.
717        let dir = std::env::temp_dir().join("rlx_auto_dispatch_run_auto");
718        std::fs::create_dir_all(&dir).unwrap();
719        let path = dir.join("model.gguf");
720        let mut buf: Vec<u8> = Vec::new();
721        buf.extend_from_slice(&rlx_gguf::GGUF_MAGIC.to_le_bytes());
722        buf.extend_from_slice(&3u32.to_le_bytes());
723        buf.extend_from_slice(&1u64.to_le_bytes());
724        buf.extend_from_slice(&1u64.to_le_bytes());
725        let k = "general.architecture";
726        buf.extend_from_slice(&(k.len() as u64).to_le_bytes());
727        buf.extend_from_slice(k.as_bytes());
728        buf.extend_from_slice(&8u32.to_le_bytes());
729        let v = "qwen3";
730        buf.extend_from_slice(&(v.len() as u64).to_le_bytes());
731        buf.extend_from_slice(v.as_bytes());
732        let name = "w";
733        buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
734        buf.extend_from_slice(name.as_bytes());
735        buf.extend_from_slice(&1u32.to_le_bytes());
736        buf.extend_from_slice(&4u64.to_le_bytes());
737        buf.extend_from_slice(&(rlx_gguf::GgmlType::F32 as u32).to_le_bytes());
738        buf.extend_from_slice(&0u64.to_le_bytes());
739        while !buf
740            .len()
741            .is_multiple_of(rlx_gguf::DEFAULT_ALIGNMENT as usize)
742        {
743            buf.push(0);
744        }
745        for _ in 0..4 {
746            buf.extend_from_slice(&1.0f32.to_le_bytes());
747        }
748        std::fs::write(&path, &buf).unwrap();
749
750        // Caller passed no --weights → run_auto must inject it.
751        run_auto(&[path.display().to_string(), "--prompt".into(), "hi".into()]).unwrap();
752        let got = captured().lock().unwrap().clone();
753        assert_eq!(
754            got,
755            vec![
756                "--weights".to_string(),
757                path.display().to_string(),
758                "--prompt".into(),
759                "hi".into()
760            ]
761        );
762
763        // Caller already passed --weights → don't inject again.
764        run_auto(&[
765            path.display().to_string(),
766            "--weights".into(),
767            "/other/path".into(),
768            "--prompt".into(),
769            "hi".into(),
770        ])
771        .unwrap();
772        let got = captured().lock().unwrap().clone();
773        assert_eq!(
774            got,
775            vec![
776                "--weights".to_string(),
777                "/other/path".into(),
778                "--prompt".into(),
779                "hi".into(),
780            ]
781        );
782
783        std::fs::remove_dir_all(&dir).ok();
784    }
785
786    #[test]
787    fn auto_sniff_reads_safetensors_sidecar() {
788        let dir = std::env::temp_dir().join("rlx_auto_dispatch_sidecar");
789        std::fs::create_dir_all(&dir).unwrap();
790        let cfg = dir.join("config.json");
791        std::fs::write(&cfg, br#"{"model_type":"llama"}"#).unwrap();
792        let st = dir.join("model.safetensors");
793        // Empty file is fine — sniffer never opens the safetensors payload.
794        std::fs::write(&st, b"").unwrap();
795        let sniff = auto_sniff(&st).expect("sniff");
796        assert_eq!(sniff.runner_name, "llama32");
797        std::fs::remove_dir_all(&dir).ok();
798    }
799}
rlx_cli/auto_dispatch.rs

rlx_cli/
auto_dispatch.rs