1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
use clap::{Args, Subcommand};
use super::util::llm_model_completion_parser;
/// `harn local` — manage local LLM runtimes (Ollama, llama.cpp,
/// MLX, generic OpenAI-compatible servers) through one stable
/// abstraction while underlying CLIs keep changing.
#[derive(Debug, Args)]
pub(crate) struct LocalArgs {
#[command(subcommand)]
pub command: LocalCommand,
}
#[derive(Debug, Subcommand)]
pub(crate) enum LocalCommand {
/// Survey every local provider Harn knows about: base URL, reachability,
/// served models, loaded models, memory footprint, context, keep-alive.
List(LocalListArgs),
/// Show the currently-selected local provider/model and a brief summary
/// of every other local runtime.
Status(LocalStatusArgs),
/// Make `<alias>` the active local model: warm it on its provider,
/// unload conflicting models, and persist the selection.
Switch(LocalSwitchArgs),
/// Unload loaded local models. By default targets the active provider;
/// pass `--all` to unload every reachable local provider.
Stop(LocalStopArgs),
}
#[derive(Debug, Args)]
pub(crate) struct LocalListArgs {
/// Emit a structured JSON snapshot instead of a human table.
#[arg(long)]
pub json: bool,
/// Restrict to one provider id (e.g. `ollama`, `llamacpp`, `mlx`).
#[arg(long)]
pub provider: Option<String>,
}
#[derive(Debug, Args)]
pub(crate) struct LocalStatusArgs {
/// Emit a structured JSON snapshot instead of human text.
#[arg(long)]
pub json: bool,
}
#[derive(Debug, Args)]
pub(crate) struct LocalSwitchArgs {
/// Model alias or provider-native model id (e.g. `qwen36-coder`,
/// `ollama:llama3.2`, `mlx-qwen36-27b`).
#[arg(
value_parser = llm_model_completion_parser(),
hide_possible_values = true
)]
pub model: String,
/// Override the inferred provider (e.g. force `--provider llamacpp` for
/// a GGUF id that would otherwise route to `ollama`).
#[arg(long)]
pub provider: Option<String>,
/// Context window override (Ollama: `num_ctx`). Defaults come from the
/// machine profile derived from `harn models recommend`.
#[arg(long)]
pub ctx: Option<u64>,
/// Keep-alive value to apply on the target provider (Ollama only at the
/// moment; e.g. `30m`, `forever`, `-1`).
#[arg(long = "keep-alive")]
pub keep_alive: Option<String>,
/// Skip pulling the model when it is missing (Ollama only).
#[arg(long = "no-pull")]
pub no_pull: bool,
/// Skip unloading other local providers / sibling models.
#[arg(long = "no-evict")]
pub no_evict: bool,
/// Emit a structured JSON result.
#[arg(long)]
pub json: bool,
}
#[derive(Debug, Args)]
pub(crate) struct LocalStopArgs {
/// Unload every reachable local provider, not just the active one.
#[arg(long)]
pub all: bool,
/// Target one provider id (overrides `--all`).
#[arg(long)]
pub provider: Option<String>,
/// Emit a structured JSON result.
#[arg(long)]
pub json: bool,
}