1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
//! `merlion fallback` — manage the provider fallback chain.
//!
//! The chain is stored at `~/.merlion/fallback.yaml` (see
//! [`merlion_config::FallbackChain`]). When non-empty, runtime LLM
//! construction sites in `main.rs` wrap their primary client with a
//! [`merlion_llm::FallbackLlmClient`] that transparently falls through to
//! the next provider on a retriable 429 / 5xx error.
use ;
use Subcommand;
use ;
pub async
/// Confirm the id parses as a known provider preset by feeding it through
/// `Config::resolve_provider`. This catches typos like `openroute:...` at
/// `merlion fallback add` time rather than at runtime fallthrough.
// -----------------------------------------------------------------------------
// WIRING SPEC — apply to `crates/merlion-cli/src/main.rs`.
//
// 1. Add a module declaration near the other `mod` lines at the top of main.rs:
//
// mod fallback_cmd;
//
// 2. Add a new variant to the `Command` enum:
//
// /// Manage the provider fallback chain.
// ///
// /// When the primary LLM (from config.yaml) returns a retriable
// /// 429 / 5xx error after exhausting its own retries, merlion will
// /// transparently fall through to the next provider in this chain.
// /// Stored at `~/.merlion/fallback.yaml`.
// Fallback {
// #[command(subcommand)]
// action: fallback_cmd::FallbackAction,
// },
//
// 3. Add a dispatch arm in the `match cli.command.unwrap_or(...)` block in
// `main()`:
//
// Command::Fallback { action } => fallback_cmd::run(action).await,
//
// 4. Add the helper below at module scope in main.rs. It wraps a primary
// `Arc<dyn LlmClient>` with `FallbackLlmClient` when the user's chain is
// non-empty; otherwise returns the primary unchanged. Constructing each
// fallback entry mirrors the existing per-wire match used by `chat()`,
// `oneshot_cmd()`, `gateway_cmd()`, and `build_cli_runner()`:
//
// ```rust
// use merlion_config::{FallbackChain, ModelConfig};
// use merlion_llm::FallbackLlmClient;
//
// /// Wrap `primary` with a fallback chain loaded from
// /// `~/.merlion/fallback.yaml`. The primary is `cfg.model.id`; each
// /// entry in the chain is a `"provider:model"` string resolved through
// /// the same `ModelConfig` -> `resolve_provider` -> wire-typed client
// /// path that the four LLM construction sites use today.
// ///
// /// Returns `primary` unchanged when the chain is empty or fails to
// /// load (we log the error but don't fail the whole command — a broken
// /// fallback file should not prevent chat from starting).
// fn wrap_with_fallback(primary: Arc<dyn LlmClient>, cfg: &Config) -> Arc<dyn LlmClient> {
// let chain_cfg = match FallbackChain::load() {
// Ok(c) => c,
// Err(e) => {
// tracing::warn!(error = %e, "failed to load fallback chain; using primary only");
// return primary;
// }
// };
// if chain_cfg.chain.is_empty() {
// return primary;
// }
//
// let mut clients: Vec<Arc<dyn LlmClient>> = Vec::new();
// let mut names: Vec<String> = vec![cfg.model.id.clone()];
// for entry in &chain_cfg.chain {
// let mut entry_cfg = cfg.clone();
// entry_cfg.model = ModelConfig {
// id: entry.clone(),
// base_url: None,
// api_key_env: None,
// temperature: cfg.model.temperature,
// max_tokens: cfg.model.max_tokens,
// };
// let provider = match entry_cfg.resolve_provider() {
// Ok(p) => p,
// Err(e) => {
// tracing::warn!(entry = %entry, error = %e, "skipping invalid fallback entry");
// continue;
// }
// };
// let api_key = std::env::var(&provider.api_key_env).ok();
// let client: Result<Arc<dyn LlmClient>> = (|| -> Result<Arc<dyn LlmClient>> {
// Ok(match provider.wire {
// Wire::OpenAi => Arc::new(OpenAiClient::new(provider.base_url.clone(), api_key)?),
// Wire::Anthropic => Arc::new(AnthropicClient::new(provider.base_url.clone(), api_key)?),
// Wire::Gemini => Arc::new(GeminiClient::new(provider.base_url.clone(), api_key)?),
// Wire::Bedrock => Arc::new(BedrockClient::from_env()?),
// Wire::Vertex => Arc::new(VertexClient::from_env()?),
// })
// })();
// match client {
// Ok(c) => {
// clients.push(c);
// names.push(entry.clone());
// }
// Err(e) => {
// tracing::warn!(entry = %entry, error = %e, "failed to build fallback client; skipping");
// }
// }
// }
//
// if clients.is_empty() {
// return primary;
// }
// Arc::new(FallbackLlmClient::new(primary, clients, names))
// }
// ```
//
// 5. Wrap each of the four LLM construction sites with `wrap_with_fallback`.
// Search for the existing `let client: Arc<dyn LlmClient> = match provider.wire {`
// and `let llm: Arc<dyn LlmClient> = match provider.wire {` blocks — there
// are four of them in `chat()`, `oneshot_cmd()`, `gateway_cmd()`, and
// `build_cli_runner()`. After the match, add one line:
//
// let client = wrap_with_fallback(client, &cfg);
// // (or `let llm = wrap_with_fallback(llm, &cfg);` for the sites that
// // bind to `llm`)
//
// Note `build_cli_runner` only has `cfg` in scope under that name —
// confirm before adapting. The CLI runner constructs its `llm` from
// `cfg.resolve_provider()`, so `cfg` is available.
//
// 6. No new clap derives are required in main.rs — `FallbackAction` already
// derives `clap::Subcommand` in this file.
// -----------------------------------------------------------------------------