harn_vm/llm/api/
ollama.rs

1//! Ollama-specific runtime settings consumed by chat, completion, and
2//! model warmup paths.
3
4use std::time::Duration;
5
6use serde::Serialize;
7use serde_json::Value;
8
9#[derive(Debug, Clone, Serialize)]
10pub struct OllamaWarmupResult {
11    pub valid: bool,
12    pub status: String,
13    pub message: String,
14    pub url: String,
15    pub model: String,
16    #[serde(skip_serializing_if = "Option::is_none")]
17    pub http_status: Option<u16>,
18}
19
20#[derive(Debug, Clone, Serialize)]
21pub struct OllamaReadinessResult {
22    pub valid: bool,
23    pub status: String,
24    pub message: String,
25    pub base_url: String,
26    pub tags_url: String,
27    pub model: String,
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub matched_model: Option<String>,
30    pub available_models: Vec<String>,
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub http_status: Option<u16>,
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub keep_alive: Option<serde_json::Value>,
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub warmup: Option<OllamaWarmupResult>,
37    /// Runtime settings Harn would inject into a request body for this
38    /// model, computed from env, provider overrides, and the catalog.
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub expected: Option<OllamaExpectedRequest>,
41    /// The matched runner reported by `/api/ps`, if the model is currently
42    /// loaded. The `context_length` field is the effective context the
43    /// loaded runner was started with — this is what `ollama ps` prints.
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub loaded_runner: Option<OllamaLoadedRunner>,
46    /// Set when the loaded runner's `context_length` differs from the
47    /// `expected.num_ctx` Harn would request. Explains how to reload.
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub context_drift: Option<String>,
50}
51
52/// The runtime knobs Harn would attach to chat/completion/warmup
53/// requests for a given model. Surfaced by readiness so callers can
54/// compare it against what `/api/ps` says is actually loaded.
55#[derive(Debug, Clone, Serialize)]
56pub struct OllamaExpectedRequest {
57    pub num_ctx: u64,
58    pub keep_alive: Value,
59}
60
61/// One entry from `GET /api/ps` — a model the Ollama daemon currently
62/// has loaded into memory. `context_length` is the effective context
63/// the runner was started with and is fixed for the lifetime of the
64/// loaded process; reloading is required to change it.
65#[derive(Debug, Clone, Serialize)]
66pub struct OllamaLoadedRunner {
67    pub name: String,
68    pub model: String,
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub context_length: Option<u64>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub size_vram: Option<u64>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub size: Option<u64>,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub expires_at: Option<String>,
77}
78
79#[derive(Debug, Clone)]
80pub struct OllamaReadinessOptions {
81    pub model: String,
82    pub base_url: Option<String>,
83    pub warm: bool,
84    pub keep_alive: Option<serde_json::Value>,
85    pub tags_timeout: Duration,
86    pub warmup_timeout: Duration,
87    /// Hit `/api/ps` and report any drift between the loaded runner's
88    /// `context_length` and the `num_ctx` Harn would request.
89    pub observe_loaded: bool,
90}
91
92impl OllamaReadinessOptions {
93    pub fn new(model: impl Into<String>) -> Self {
94        Self {
95            model: model.into(),
96            base_url: None,
97            warm: false,
98            keep_alive: None,
99            tags_timeout: Duration::from_secs(15),
100            warmup_timeout: Duration::from_secs(135),
101            observe_loaded: false,
102        }
103    }
104}
105
106/// Public wrapper around the internal keep-alive parser, used by callers
107/// (CLI flags, host bridges) that want the same normalization Harn applies
108/// to environment overrides.
109pub fn normalize_ollama_keep_alive(raw: &str) -> Option<serde_json::Value> {
110    parse_keep_alive_str(raw)
111}
112
113pub const OLLAMA_DEFAULT_NUM_CTX: u64 = 32_768;
114pub const OLLAMA_DEFAULT_KEEP_ALIVE: &str = "30m";
115pub const OLLAMA_DEFAULT_UNLOAD_GRACE_MS: u64 = 10_000;
116pub const HARN_OLLAMA_NUM_CTX_ENV: &str = "HARN_OLLAMA_NUM_CTX";
117pub const HARN_OLLAMA_KEEP_ALIVE_ENV: &str = "HARN_OLLAMA_KEEP_ALIVE";
118pub const HARN_OLLAMA_UNLOAD_GRACE_MS_ENV: &str = "HARN_OLLAMA_UNLOAD_GRACE_MS";
119pub const OLLAMA_UNLOAD_GRACE_MS_ENV: &str = "OLLAMA_UNLOAD_GRACE_MS";
120pub const OLLAMA_HOST_ENV: &str = "OLLAMA_HOST";
121
122const OLLAMA_NUM_CTX_ENV_KEYS: [&str; 3] = [
123    HARN_OLLAMA_NUM_CTX_ENV,
124    "OLLAMA_CONTEXT_LENGTH",
125    "OLLAMA_NUM_CTX",
126];
127const OLLAMA_KEEP_ALIVE_ENV_KEYS: [&str; 2] = [HARN_OLLAMA_KEEP_ALIVE_ENV, "OLLAMA_KEEP_ALIVE"];
128const OLLAMA_UNLOAD_GRACE_MS_ENV_KEYS: [&str; 2] =
129    [HARN_OLLAMA_UNLOAD_GRACE_MS_ENV, OLLAMA_UNLOAD_GRACE_MS_ENV];
130const OLLAMA_DEFAULT_BASE_URL: &str = "http://localhost:11434";
131
132#[derive(Clone, Debug, PartialEq)]
133pub struct OllamaRuntimeSettings {
134    pub num_ctx: u64,
135    pub keep_alive: Value,
136}
137
138impl OllamaRuntimeSettings {
139    pub fn from_env() -> Self {
140        Self::from_env_and_overrides(None)
141    }
142
143    pub fn from_env_and_overrides(overrides: Option<&Value>) -> Self {
144        Self::from_env_overrides_and_model(overrides, None)
145    }
146
147    pub fn from_env_overrides_and_model(overrides: Option<&Value>, model: Option<&str>) -> Self {
148        Self {
149            num_ctx: num_ctx_from_overrides(overrides)
150                .or_else(num_ctx_from_env)
151                .or_else(|| num_ctx_from_model_catalog(model))
152                .unwrap_or(OLLAMA_DEFAULT_NUM_CTX),
153            keep_alive: keep_alive_from_overrides(overrides)
154                .or_else(keep_alive_from_env)
155                .unwrap_or_else(default_keep_alive_value),
156        }
157    }
158
159    pub fn warmup_body(&self, model: &str) -> Value {
160        serde_json::json!({
161            "model": model,
162            "prompt": "",
163            "stream": false,
164            "keep_alive": self.keep_alive,
165            "options": {
166                "num_ctx": self.num_ctx,
167            },
168        })
169    }
170}
171
172pub fn ollama_runtime_settings_from_env() -> OllamaRuntimeSettings {
173    OllamaRuntimeSettings::from_env()
174}
175
176pub(crate) fn ollama_unload_grace_duration_from_env() -> Duration {
177    Duration::from_millis(
178        OLLAMA_UNLOAD_GRACE_MS_ENV_KEYS
179            .iter()
180            .find_map(|key| std::env::var(key).ok().and_then(|raw| parse_grace_ms(&raw)))
181            .unwrap_or(OLLAMA_DEFAULT_UNLOAD_GRACE_MS),
182    )
183}
184
185pub async fn warm_ollama_model(model: &str, base_url: Option<&str>) -> Result<(), String> {
186    let settings = OllamaRuntimeSettings::from_env_overrides_and_model(None, Some(model));
187    warm_ollama_model_with_settings(model, base_url, &settings).await
188}
189
190pub async fn warm_ollama_model_with_settings(
191    model: &str,
192    base_url: Option<&str>,
193    settings: &OllamaRuntimeSettings,
194) -> Result<(), String> {
195    let base_url = resolve_ollama_base_url(base_url);
196    let url = format!("{}/api/generate", base_url.trim_end_matches('/'));
197    let response = crate::llm::shared_utility_client()
198        .post(url)
199        .header("Content-Type", "application/json")
200        .json(&settings.warmup_body(model))
201        .send()
202        .await
203        .map_err(|error| format!("Ollama warmup failed: {error}"))?;
204    if response.status().is_success() {
205        Ok(())
206    } else {
207        let status = response.status();
208        let body = response.text().await.unwrap_or_default();
209        Err(format!("Ollama warmup returned HTTP {status}: {body}"))
210    }
211}
212
213pub(crate) fn apply_ollama_runtime_settings(body: &mut Value, overrides: Option<&Value>) {
214    apply_non_runtime_ollama_overrides(body, overrides);
215
216    let explicit_num_ctx = num_ctx_from_overrides(overrides);
217    if explicit_num_ctx.is_some() || body.pointer("/options/num_ctx").is_none() {
218        let num_ctx = explicit_num_ctx
219            .or_else(num_ctx_from_env)
220            .or_else(|| num_ctx_from_model_catalog(body.get("model").and_then(Value::as_str)))
221            .unwrap_or(OLLAMA_DEFAULT_NUM_CTX);
222        ensure_options_object(body).insert("num_ctx".to_string(), serde_json::json!(num_ctx));
223    }
224
225    let explicit_keep_alive = keep_alive_from_overrides(overrides);
226    if let Some(keep_alive) = explicit_keep_alive
227        .or_else(|| body.get("keep_alive").cloned())
228        .or_else(keep_alive_from_env)
229        .or_else(|| Some(default_keep_alive_value()))
230    {
231        body["keep_alive"] = keep_alive;
232    }
233}
234
235fn resolve_ollama_base_url(base_url: Option<&str>) -> String {
236    base_url
237        .map(str::trim)
238        .filter(|value| !value.is_empty())
239        .map(str::to_string)
240        .or_else(|| {
241            std::env::var(OLLAMA_HOST_ENV)
242                .ok()
243                .map(|value| value.trim().to_string())
244                .filter(|value| !value.is_empty())
245        })
246        .unwrap_or_else(|| OLLAMA_DEFAULT_BASE_URL.to_string())
247}
248
249fn num_ctx_from_env() -> Option<u64> {
250    OLLAMA_NUM_CTX_ENV_KEYS
251        .iter()
252        .find_map(|key| std::env::var(key).ok().and_then(|raw| parse_num_ctx(&raw)))
253}
254
255fn num_ctx_from_model_catalog(model: Option<&str>) -> Option<u64> {
256    let model = model?.trim();
257    if model.is_empty() {
258        return None;
259    }
260    let entry = crate::llm_config::model_catalog_entry(model)?;
261    entry
262        .runtime_context_window
263        .filter(|window| *window > 0)
264        .or_else(|| (entry.context_window > 0).then_some(entry.context_window))
265}
266
267fn keep_alive_from_env() -> Option<Value> {
268    OLLAMA_KEEP_ALIVE_ENV_KEYS.iter().find_map(|key| {
269        std::env::var(key)
270            .ok()
271            .and_then(|raw| parse_keep_alive_str(&raw))
272    })
273}
274
275fn num_ctx_from_overrides(overrides: Option<&Value>) -> Option<u64> {
276    let obj = overrides?.as_object()?;
277    obj.get("num_ctx")
278        .and_then(parse_num_ctx_value)
279        .or_else(|| {
280            obj.get("options")
281                .and_then(|options| options.get("num_ctx"))
282                .and_then(parse_num_ctx_value)
283        })
284}
285
286fn keep_alive_from_overrides(overrides: Option<&Value>) -> Option<Value> {
287    overrides?
288        .as_object()?
289        .get("keep_alive")
290        .and_then(parse_keep_alive_value)
291}
292
293fn parse_num_ctx(raw: &str) -> Option<u64> {
294    raw.trim().parse::<u64>().ok().filter(|parsed| *parsed > 0)
295}
296
297fn parse_grace_ms(raw: &str) -> Option<u64> {
298    raw.trim().parse::<u64>().ok()
299}
300
301fn parse_num_ctx_value(value: &Value) -> Option<u64> {
302    match value {
303        Value::Number(number) => number.as_u64().filter(|parsed| *parsed > 0),
304        Value::String(raw) => parse_num_ctx(raw),
305        _ => None,
306    }
307}
308
309fn parse_keep_alive_value(value: &Value) -> Option<Value> {
310    match value {
311        Value::String(raw) => parse_keep_alive_str(raw),
312        Value::Number(_) => Some(value.clone()),
313        _ => None,
314    }
315}
316
317fn parse_keep_alive_str(raw: &str) -> Option<Value> {
318    let trimmed = raw.trim();
319    if trimmed.is_empty() {
320        return None;
321    }
322    Some(match trimmed.to_ascii_lowercase().as_str() {
323        "default" => default_keep_alive_value(),
324        "forever" | "infinite" | "-1" => serde_json::json!(-1),
325        _ => {
326            if let Ok(n) = trimmed.parse::<i64>() {
327                serde_json::json!(n)
328            } else {
329                serde_json::json!(trimmed)
330            }
331        }
332    })
333}
334
335fn default_keep_alive_value() -> Value {
336    serde_json::json!(OLLAMA_DEFAULT_KEEP_ALIVE)
337}
338
339fn ensure_options_object(body: &mut Value) -> &mut serde_json::Map<String, Value> {
340    if !body.get("options").is_some_and(Value::is_object) {
341        body["options"] = serde_json::json!({});
342    }
343    body["options"]
344        .as_object_mut()
345        .expect("options initialized as object")
346}
347
348fn apply_non_runtime_ollama_overrides(body: &mut Value, overrides: Option<&Value>) {
349    let Some(obj) = overrides.and_then(Value::as_object) else {
350        return;
351    };
352
353    for (key, value) in obj {
354        match key.as_str() {
355            "num_ctx" | "keep_alive" => {}
356            "options" => {
357                if let Some(options) = value.as_object() {
358                    let body_options = ensure_options_object(body);
359                    for (option_key, option_value) in options {
360                        if option_key != "num_ctx" {
361                            body_options.insert(option_key.clone(), option_value.clone());
362                        }
363                    }
364                }
365            }
366            _ => {
367                body[key] = value.clone();
368            }
369        }
370    }
371}
372
373pub async fn ollama_readiness(options: OllamaReadinessOptions) -> OllamaReadinessResult {
374    let base_url = options.base_url.unwrap_or_else(default_ollama_base_url);
375    let mut result = OllamaReadinessResult::probing(base_url.clone(), options.model.clone());
376
377    let tags_url = match ollama_endpoint_url(&base_url, "/api/tags") {
378        Ok(url) => url,
379        Err(message) => return result.fail("invalid_url", message),
380    };
381    result.tags_url = tags_url.clone();
382
383    let client = crate::llm::shared_utility_client();
384    let response = match client
385        .get(tags_url.clone())
386        .timeout(options.tags_timeout)
387        .send()
388        .await
389    {
390        Ok(response) => response,
391        Err(error) => {
392            return result.fail(
393                "daemon_down",
394                format!("Ollama not reachable at {tags_url}: {error}"),
395            );
396        }
397    };
398
399    let status = response.status();
400    result.http_status = Some(status.as_u16());
401    if !status.is_success() {
402        let body = response.text().await.unwrap_or_default();
403        return result.fail(
404            "bad_status",
405            format!(
406                "Ollama returned HTTP {} from /api/tags: {body}",
407                status.as_u16()
408            ),
409        );
410    }
411
412    let body: Value = match response.json().await {
413        Ok(value) => value,
414        Err(error) => {
415            return result.fail(
416                "invalid_response",
417                format!("Could not parse Ollama model list: {error}"),
418            );
419        }
420    };
421
422    let Some(models) = parse_ollama_model_names(&body) else {
423        return result.fail(
424            "invalid_response",
425            "Could not parse Ollama model list: missing models[].name".to_string(),
426        );
427    };
428    result.available_models = models.clone();
429
430    let Some(matched) = find_ollama_model_match(&models, &options.model) else {
431        let available = if models.is_empty() {
432            "(none)".to_string()
433        } else {
434            models.join(", ")
435        };
436        return result.fail(
437            "model_missing",
438            format!(
439                "Ollama model '{}' not found. Available: {available}",
440                options.model
441            ),
442        );
443    };
444    result.matched_model = Some(matched.clone());
445
446    let settings = OllamaRuntimeSettings::from_env_overrides_and_model(None, Some(&matched));
447    result.expected = Some(OllamaExpectedRequest {
448        num_ctx: settings.num_ctx,
449        keep_alive: settings.keep_alive.clone(),
450    });
451    let keep_alive = options
452        .keep_alive
453        .clone()
454        .unwrap_or_else(|| settings.keep_alive.clone());
455    result.keep_alive = Some(keep_alive.clone());
456
457    result.message = format!("Ollama is reachable and model '{matched}' is available");
458    if options.warm {
459        let warm = ollama_warmup(
460            &base_url,
461            &matched,
462            Some(keep_alive),
463            options.warmup_timeout,
464        )
465        .await;
466        if !warm.valid {
467            result.valid = false;
468            result.status = "warmup_failed".to_string();
469            result.message = warm.message.clone();
470        } else {
471            result.message = format!("{}; {}", result.message, warm.message);
472        }
473        result.warmup = Some(warm);
474    }
475
476    if options.observe_loaded {
477        match fetch_ollama_loaded_runners(&base_url, options.tags_timeout).await {
478            Ok(runners) => {
479                if let Some(runner) = match_loaded_runner(runners, &matched) {
480                    if let Some(actual) = runner.context_length {
481                        if actual != settings.num_ctx {
482                            result.context_drift =
483                                Some(describe_context_drift(settings.num_ctx, actual));
484                        }
485                    }
486                    result.loaded_runner = Some(runner);
487                }
488            }
489            Err(error) => {
490                // /api/ps is best-effort; surface the failure in the
491                // message but don't fail the overall readiness check.
492                result.message = format!("{}; /api/ps probe skipped: {error}", result.message);
493            }
494        }
495    }
496
497    result
498}
499
500impl OllamaReadinessResult {
501    fn probing(base_url: String, model: String) -> Self {
502        Self {
503            valid: true,
504            status: "ok".to_string(),
505            message: String::new(),
506            base_url,
507            tags_url: String::new(),
508            model,
509            matched_model: None,
510            available_models: Vec::new(),
511            http_status: None,
512            keep_alive: None,
513            warmup: None,
514            expected: None,
515            loaded_runner: None,
516            context_drift: None,
517        }
518    }
519
520    fn fail(mut self, status: &str, message: String) -> Self {
521        self.valid = false;
522        self.status = status.to_string();
523        self.message = message;
524        self
525    }
526}
527
528fn default_ollama_base_url() -> String {
529    crate::llm_config::provider_config("ollama")
530        .as_ref()
531        .map(crate::llm_config::resolve_base_url)
532        .unwrap_or_else(|| "http://localhost:11434".to_string())
533}
534
535fn ollama_endpoint_url(base_url: &str, path: &str) -> Result<String, String> {
536    let mut url = reqwest::Url::parse(base_url)
537        .map_err(|error| format!("Invalid Ollama URL '{base_url}': {error}"))?;
538    if url.host_str() == Some("localhost") {
539        url.set_host(Some("127.0.0.1"))
540            .map_err(|_| format!("Invalid Ollama URL '{base_url}': could not normalize host"))?;
541    }
542    let base_path = url.path().trim_end_matches('/');
543    let suffix = path.trim_start_matches('/');
544    let joined = if base_path.is_empty() {
545        format!("/{suffix}")
546    } else {
547        format!("{base_path}/{suffix}")
548    };
549    url.set_path(&joined);
550    url.set_query(None);
551    Ok(url.to_string())
552}
553
554fn parse_ollama_model_names(value: &serde_json::Value) -> Option<Vec<String>> {
555    let models = value.get("models")?.as_array()?;
556    Some(
557        models
558            .iter()
559            .filter_map(|model| model.get("name").and_then(|name| name.as_str()))
560            .map(str::to_string)
561            .collect(),
562    )
563}
564
565fn find_ollama_model_match(models: &[String], selected: &str) -> Option<String> {
566    models
567        .iter()
568        .find(|name| name.as_str() == selected)
569        .or_else(|| {
570            models
571                .iter()
572                .find(|name| name.strip_suffix(":latest") == Some(selected))
573        })
574        .or_else(|| models.iter().find(|name| name.starts_with(selected)))
575        .cloned()
576}
577
578/// Fetch the list of currently-loaded runners from Ollama's `/api/ps`
579/// endpoint. Returns an empty list when no models are loaded; returns an
580/// error when the daemon is unreachable or the response is malformed.
581pub async fn fetch_ollama_loaded_runners(
582    base_url: &str,
583    timeout: Duration,
584) -> Result<Vec<OllamaLoadedRunner>, String> {
585    let url = ollama_endpoint_url(base_url, "/api/ps")?;
586    let response = crate::llm::shared_utility_client()
587        .get(&url)
588        .timeout(timeout)
589        .send()
590        .await
591        .map_err(|error| format!("Ollama /api/ps not reachable at {url}: {error}"))?;
592    if !response.status().is_success() {
593        return Err(format!(
594            "Ollama returned HTTP {} from /api/ps",
595            response.status().as_u16()
596        ));
597    }
598    let body: Value = response
599        .json()
600        .await
601        .map_err(|error| format!("Could not parse Ollama /api/ps response: {error}"))?;
602    Ok(parse_ollama_loaded_runners(&body))
603}
604
605fn parse_ollama_loaded_runners(value: &Value) -> Vec<OllamaLoadedRunner> {
606    let Some(models) = value.get("models").and_then(Value::as_array) else {
607        return Vec::new();
608    };
609    models
610        .iter()
611        .filter_map(|entry| {
612            let name = entry.get("name").and_then(Value::as_str)?.to_string();
613            let model = entry
614                .get("model")
615                .and_then(Value::as_str)
616                .map(str::to_string)
617                .unwrap_or_else(|| name.clone());
618            Some(OllamaLoadedRunner {
619                name,
620                model,
621                context_length: entry.get("context_length").and_then(Value::as_u64),
622                size_vram: entry.get("size_vram").and_then(Value::as_u64),
623                size: entry.get("size").and_then(Value::as_u64),
624                expires_at: entry
625                    .get("expires_at")
626                    .and_then(Value::as_str)
627                    .map(str::to_string),
628            })
629        })
630        .collect()
631}
632
633fn match_loaded_runner(
634    runners: Vec<OllamaLoadedRunner>,
635    model: &str,
636) -> Option<OllamaLoadedRunner> {
637    runners
638        .into_iter()
639        .find(|runner| runner.name == model || runner.model == model)
640}
641
642fn describe_context_drift(expected: u64, actual: u64) -> String {
643    if actual > expected {
644        format!(
645            "Loaded runner context_length={actual} exceeds expected num_ctx={expected}. \
646             Ollama keeps a runner at the context it was loaded with; run \
647             `ollama stop <model>` (or wait for keep_alive to expire) and let Harn \
648             re-warm it to apply the smaller context."
649        )
650    } else {
651        format!(
652            "Loaded runner context_length={actual} is smaller than expected \
653             num_ctx={expected}. The runner was loaded at a smaller context — \
654             unload with `ollama stop <model>` and let Harn re-warm to expand."
655        )
656    }
657}
658
659async fn ollama_warmup(
660    base_url: &str,
661    model: &str,
662    keep_alive: Option<serde_json::Value>,
663    timeout: Duration,
664) -> OllamaWarmupResult {
665    let url = match ollama_endpoint_url(base_url, "/api/generate") {
666        Ok(url) => url,
667        Err(message) => {
668            return OllamaWarmupResult {
669                valid: false,
670                status: "invalid_url".to_string(),
671                message,
672                url: String::new(),
673                model: model.to_string(),
674                http_status: None,
675            };
676        }
677    };
678
679    // Derive the warmup body from runtime settings so num_ctx is loaded
680    // into the runner the same way chat/completion requests would. Without
681    // it, Ollama loads the model at its Modelfile-declared maximum
682    // context, and a subsequent chat request asking for a smaller
683    // num_ctx cannot shrink an already-loaded runner — see the
684    // "Effective vs. loaded context" section of docs/src/llm/providers.md.
685    let settings = OllamaRuntimeSettings::from_env_overrides_and_model(None, Some(model));
686    let mut body = settings.warmup_body(model);
687    if let Some(value) = keep_alive {
688        body["keep_alive"] = value;
689    }
690
691    let client = crate::llm::shared_blocking_client();
692    let response = match client
693        .post(url.clone())
694        .header("Content-Type", "application/json")
695        .timeout(timeout)
696        .json(&body)
697        .send()
698        .await
699    {
700        Ok(response) => response,
701        Err(error) => {
702            return OllamaWarmupResult {
703                valid: false,
704                status: "warmup_failed".to_string(),
705                message: format!("Ollama warmup failed for model '{model}' at {url}: {error}"),
706                url,
707                model: model.to_string(),
708                http_status: None,
709            };
710        }
711    };
712
713    let status = response.status();
714    if !status.is_success() {
715        let body = response.text().await.unwrap_or_default();
716        return OllamaWarmupResult {
717            valid: false,
718            status: "warmup_failed".to_string(),
719            message: format!(
720                "Ollama warmup returned HTTP {} for model '{model}': {body}",
721                status.as_u16()
722            ),
723            url,
724            model: model.to_string(),
725            http_status: Some(status.as_u16()),
726        };
727    }
728
729    let body: serde_json::Value = response.json().await.unwrap_or_default();
730    if let Some(error) = body.get("error").and_then(|error| error.as_str()) {
731        return OllamaWarmupResult {
732            valid: false,
733            status: "warmup_failed".to_string(),
734            message: format!("Ollama warmup failed for model '{model}': {error}"),
735            url,
736            model: model.to_string(),
737            http_status: Some(status.as_u16()),
738        };
739    }
740
741    OllamaWarmupResult {
742        valid: true,
743        status: "ok".to_string(),
744        message: format!("Ollama model '{model}' warmed"),
745        url,
746        model: model.to_string(),
747        http_status: Some(status.as_u16()),
748    }
749}
750
751#[cfg(test)]
752mod tests {
753    use super::*;
754    use crate::llm::env_lock;
755    use std::io::{Read, Write};
756    use std::net::TcpListener;
757    use std::sync::{Arc, Mutex};
758    use std::time::Duration;
759
760    struct ScopedEnvVar {
761        key: &'static str,
762        previous: Option<String>,
763    }
764
765    impl ScopedEnvVar {
766        fn set(key: &'static str, value: &str) -> Self {
767            let previous = std::env::var(key).ok();
768            unsafe {
769                std::env::set_var(key, value);
770            }
771            Self { key, previous }
772        }
773
774        fn remove(key: &'static str) -> Self {
775            let previous = std::env::var(key).ok();
776            unsafe {
777                std::env::remove_var(key);
778            }
779            Self { key, previous }
780        }
781    }
782
783    impl Drop for ScopedEnvVar {
784        fn drop(&mut self) {
785            match &self.previous {
786                Some(value) => unsafe { std::env::set_var(self.key, value) },
787                None => unsafe { std::env::remove_var(self.key) },
788            }
789        }
790    }
791
792    #[test]
793    fn runtime_settings_use_harn_env_before_ollama_env() {
794        let _guard = env_lock().lock().expect("env lock");
795        let _env = [
796            ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "131072"),
797            ScopedEnvVar::set("OLLAMA_CONTEXT_LENGTH", "32768"),
798            ScopedEnvVar::set("HARN_OLLAMA_KEEP_ALIVE", "forever"),
799            ScopedEnvVar::set("OLLAMA_KEEP_ALIVE", "5m"),
800        ];
801        let settings = OllamaRuntimeSettings::from_env();
802        assert_eq!(settings.num_ctx, 131072);
803        assert_eq!(settings.keep_alive, serde_json::json!(-1));
804    }
805
806    #[test]
807    fn runtime_settings_apply_harn_defaults() {
808        let _guard = env_lock().lock().expect("env lock");
809        let _env = [
810            ScopedEnvVar::remove("HARN_OLLAMA_NUM_CTX"),
811            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
812            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
813            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
814            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
815        ];
816        let settings = OllamaRuntimeSettings::from_env();
817        assert_eq!(settings.num_ctx, OLLAMA_DEFAULT_NUM_CTX);
818        assert_eq!(settings.keep_alive, serde_json::json!("30m"));
819    }
820
821    #[test]
822    fn runtime_settings_use_catalog_context_after_env_and_overrides() {
823        let _guard = env_lock().lock().expect("env lock");
824        let _env = [
825            ScopedEnvVar::remove("HARN_OLLAMA_NUM_CTX"),
826            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
827            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
828            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
829            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
830        ];
831        crate::llm_config::clear_user_overrides();
832        let mut overlay = crate::llm_config::ProvidersConfig::default();
833        overlay.models.insert(
834            "qwen-test".to_string(),
835            crate::llm_config::ModelDef {
836                name: "Qwen Test".to_string(),
837                provider: "ollama".to_string(),
838                context_window: 100_000,
839                runtime_context_window: None,
840                stream_timeout: None,
841                capabilities: vec![],
842                pricing: None,
843                deprecated: false,
844                deprecation_note: None,
845                quality_tags: Vec::new(),
846                prefer_prefill_done: None,
847            },
848        );
849        crate::llm_config::set_user_overrides(Some(overlay));
850
851        let settings = OllamaRuntimeSettings::from_env_overrides_and_model(None, Some("qwen-test"));
852        assert_eq!(settings.num_ctx, 100_000);
853
854        let env = ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "65536");
855        let settings = OllamaRuntimeSettings::from_env_overrides_and_model(None, Some("qwen-test"));
856        assert_eq!(settings.num_ctx, 65_536);
857        drop(env);
858
859        let overrides = serde_json::json!({"num_ctx": 8192});
860        let settings = OllamaRuntimeSettings::from_env_overrides_and_model(
861            Some(&overrides),
862            Some("qwen-test"),
863        );
864        assert_eq!(settings.num_ctx, 8_192);
865
866        crate::llm_config::clear_user_overrides();
867    }
868
869    #[test]
870    fn provider_overrides_beat_env_and_normalize_keep_alive() {
871        let _guard = env_lock().lock().expect("env lock");
872        let _env = [
873            ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "131072"),
874            ScopedEnvVar::set("HARN_OLLAMA_KEEP_ALIVE", "5m"),
875        ];
876        let overrides = serde_json::json!({
877            "num_ctx": "65536",
878            "keep_alive": "infinite",
879        });
880        let settings = OllamaRuntimeSettings::from_env_and_overrides(Some(&overrides));
881        assert_eq!(settings.num_ctx, 65536);
882        assert_eq!(settings.keep_alive, serde_json::json!(-1));
883    }
884
885    #[test]
886    fn apply_runtime_settings_maps_ollama_overrides_to_native_shape() {
887        let _guard = env_lock().lock().expect("env lock");
888        let _env = [
889            ScopedEnvVar::remove("HARN_OLLAMA_NUM_CTX"),
890            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
891            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
892            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
893            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
894        ];
895        let mut body = serde_json::json!({
896            "model": "qwen",
897            "options": {"temperature": 0.1}
898        });
899        let overrides = serde_json::json!({
900            "num_ctx": 65536,
901            "keep_alive": "default",
902            "options": {"top_k": 20, "num_ctx": 999},
903            "think": true,
904        });
905        apply_ollama_runtime_settings(&mut body, Some(&overrides));
906        assert_eq!(body["options"]["num_ctx"], serde_json::json!(65536));
907        assert_eq!(body["options"]["top_k"], serde_json::json!(20));
908        assert_eq!(body["options"]["temperature"], serde_json::json!(0.1));
909        assert_eq!(body["keep_alive"], serde_json::json!("30m"));
910        assert_eq!(body["think"], serde_json::json!(true));
911        assert!(body.get("num_ctx").is_none());
912    }
913
914    #[test]
915    fn apply_runtime_settings_uses_catalog_context_when_body_has_model() {
916        let _guard = env_lock().lock().expect("env lock");
917        let _env = [
918            ScopedEnvVar::remove("HARN_OLLAMA_NUM_CTX"),
919            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
920            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
921            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
922            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
923        ];
924        crate::llm_config::clear_user_overrides();
925        let mut overlay = crate::llm_config::ProvidersConfig::default();
926        overlay.models.insert(
927            "qwen-test".to_string(),
928            crate::llm_config::ModelDef {
929                name: "Qwen Test".to_string(),
930                provider: "ollama".to_string(),
931                context_window: 100_000,
932                runtime_context_window: Some(32_768),
933                stream_timeout: None,
934                capabilities: vec![],
935                pricing: None,
936                deprecated: false,
937                deprecation_note: None,
938                quality_tags: Vec::new(),
939                prefer_prefill_done: None,
940            },
941        );
942        crate::llm_config::set_user_overrides(Some(overlay));
943
944        let mut body = serde_json::json!({
945            "model": "qwen-test",
946            "options": {"temperature": 0.1}
947        });
948        apply_ollama_runtime_settings(&mut body, None);
949        assert_eq!(body["options"]["num_ctx"], serde_json::json!(32768));
950        assert_eq!(body["options"]["temperature"], serde_json::json!(0.1));
951
952        crate::llm_config::clear_user_overrides();
953    }
954
955    #[test]
956    fn ollama_keep_alive_normalization_handles_default_and_numbers() {
957        assert_eq!(
958            normalize_ollama_keep_alive("default"),
959            Some(serde_json::json!("30m"))
960        );
961        assert_eq!(
962            normalize_ollama_keep_alive("forever"),
963            Some(serde_json::json!(-1))
964        );
965        assert_eq!(
966            normalize_ollama_keep_alive("120"),
967            Some(serde_json::json!(120))
968        );
969        assert_eq!(
970            normalize_ollama_keep_alive("10m"),
971            Some(serde_json::json!("10m"))
972        );
973        assert_eq!(normalize_ollama_keep_alive("   "), None);
974    }
975
976    fn readiness_options(model: &str, base_url: String) -> OllamaReadinessOptions {
977        OllamaReadinessOptions {
978            model: model.to_string(),
979            base_url: Some(base_url),
980            warm: false,
981            keep_alive: None,
982            tags_timeout: Duration::from_secs(2),
983            warmup_timeout: Duration::from_secs(2),
984            observe_loaded: false,
985        }
986    }
987
988    #[test]
989    fn ollama_readiness_verifies_model_and_warms_matched_tag() {
990        let _guard = env_lock().lock().expect("env lock");
991        let _env = [
992            ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "65536"),
993            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
994            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
995            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
996            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
997        ];
998        let captured = Arc::new(Mutex::new(Vec::new()));
999        let (addr, server) = spawn_stub(
1000            vec![
1001                (
1002                    200,
1003                    r#"{"models":[{"name":"qwen3:latest"},{"name":"llama3.2:latest"}]}"#,
1004                ),
1005                (200, r#"{"response":"","done":true}"#),
1006            ],
1007            captured.clone(),
1008        );
1009
1010        let result = tokio::runtime::Runtime::new()
1011            .expect("runtime")
1012            .block_on(ollama_readiness(OllamaReadinessOptions {
1013                warm: true,
1014                keep_alive: Some(serde_json::json!(-1)),
1015                ..readiness_options("qwen3", format!("http://{addr}"))
1016            }));
1017
1018        server.join().expect("stub server");
1019        assert!(result.valid, "result was: {result:?}");
1020        assert_eq!(result.status, "ok");
1021        assert_eq!(result.matched_model.as_deref(), Some("qwen3:latest"));
1022        assert!(result.warmup.as_ref().is_some_and(|warm| warm.valid));
1023        let expected = result.expected.as_ref().expect("expected request");
1024        assert_eq!(expected.num_ctx, 65_536);
1025
1026        let requests = captured.lock().expect("captured requests");
1027        assert!(requests[0].starts_with("GET /api/tags "));
1028        assert!(requests[1].starts_with("POST /api/generate "));
1029        let body = requests[1].split("\r\n\r\n").nth(1).unwrap_or("");
1030        let json: serde_json::Value = serde_json::from_str(body).expect("warmup body");
1031        assert_eq!(json["model"], "qwen3:latest");
1032        assert_eq!(json["prompt"], "");
1033        assert_eq!(json["stream"], false);
1034        assert_eq!(json["keep_alive"], -1);
1035        assert_eq!(
1036            json["options"]["num_ctx"], 65_536,
1037            "warmup must inject num_ctx so Ollama loads the runner at the requested context — see issue #1600"
1038        );
1039    }
1040
1041    #[test]
1042    fn ollama_readiness_reports_missing_model_with_available_tags() {
1043        let captured = Arc::new(Mutex::new(Vec::new()));
1044        let (addr, server) = spawn_stub(
1045            vec![(200, r#"{"models":[{"name":"llama3.2:latest"}]}"#)],
1046            captured,
1047        );
1048
1049        let result = tokio::runtime::Runtime::new()
1050            .expect("runtime")
1051            .block_on(ollama_readiness(readiness_options(
1052                "qwen3",
1053                format!("http://{addr}"),
1054            )));
1055
1056        server.join().expect("stub server");
1057        assert!(!result.valid);
1058        assert_eq!(result.status, "model_missing");
1059        assert_eq!(result.available_models, vec!["llama3.2:latest"]);
1060        assert!(result.message.contains("qwen3"));
1061    }
1062
1063    #[test]
1064    fn ollama_readiness_observes_loaded_runner_and_reports_no_drift() {
1065        let _guard = env_lock().lock().expect("env lock");
1066        let _env = [
1067            ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "32768"),
1068            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
1069            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
1070            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
1071            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
1072        ];
1073        let captured = Arc::new(Mutex::new(Vec::new()));
1074        let (addr, server) = spawn_stub(
1075            vec![
1076                (200, r#"{"models":[{"name":"qwen3:latest"}]}"#),
1077                (
1078                    200,
1079                    r#"{"models":[{"name":"qwen3:latest","model":"qwen3:latest","context_length":32768,"size_vram":1234,"size":4321,"expires_at":"2026-05-13T12:00:00Z"}]}"#,
1080                ),
1081            ],
1082            captured.clone(),
1083        );
1084
1085        let result = tokio::runtime::Runtime::new()
1086            .expect("runtime")
1087            .block_on(ollama_readiness(OllamaReadinessOptions {
1088                observe_loaded: true,
1089                ..readiness_options("qwen3", format!("http://{addr}"))
1090            }));
1091
1092        server.join().expect("stub server");
1093        assert!(result.valid, "result was: {result:?}");
1094        let runner = result.loaded_runner.expect("loaded runner present");
1095        assert_eq!(runner.context_length, Some(32_768));
1096        assert_eq!(runner.size_vram, Some(1234));
1097        assert!(
1098            result.context_drift.is_none(),
1099            "no drift expected; got {:?}",
1100            result.context_drift
1101        );
1102
1103        let requests = captured.lock().expect("captured requests");
1104        assert!(requests[0].starts_with("GET /api/tags "));
1105        assert!(requests[1].starts_with("GET /api/ps "));
1106    }
1107
1108    #[test]
1109    fn ollama_readiness_flags_context_drift_when_loaded_exceeds_expected() {
1110        let _guard = env_lock().lock().expect("env lock");
1111        let _env = [
1112            ScopedEnvVar::set("HARN_OLLAMA_NUM_CTX", "32768"),
1113            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
1114            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
1115            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
1116            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
1117        ];
1118        let (addr, server) = spawn_stub(
1119            vec![
1120                (
1121                    200,
1122                    r#"{"models":[{"name":"qwen3.6:35b-a3b-coding-nvfp4"}]}"#,
1123                ),
1124                (
1125                    200,
1126                    r#"{"models":[{"name":"qwen3.6:35b-a3b-coding-nvfp4","model":"qwen3.6:35b-a3b-coding-nvfp4","context_length":262144}]}"#,
1127                ),
1128            ],
1129            Arc::new(Mutex::new(Vec::new())),
1130        );
1131
1132        let result = tokio::runtime::Runtime::new()
1133            .expect("runtime")
1134            .block_on(ollama_readiness(OllamaReadinessOptions {
1135                observe_loaded: true,
1136                ..readiness_options("qwen3.6:35b-a3b-coding-nvfp4", format!("http://{addr}"))
1137            }));
1138
1139        server.join().expect("stub server");
1140        assert!(result.valid, "result was: {result:?}");
1141        let drift = result.context_drift.expect("drift expected");
1142        assert!(drift.contains("262144"), "drift message: {drift}");
1143        assert!(drift.contains("32768"), "drift message: {drift}");
1144        assert!(
1145            drift.contains("ollama stop"),
1146            "drift message must teach the user how to recover: {drift}"
1147        );
1148    }
1149
1150    #[test]
1151    fn ollama_readiness_uses_alias_resolved_runtime_settings() {
1152        let _guard = env_lock().lock().expect("env lock");
1153        let _env = [
1154            ScopedEnvVar::remove("HARN_OLLAMA_NUM_CTX"),
1155            ScopedEnvVar::remove("OLLAMA_CONTEXT_LENGTH"),
1156            ScopedEnvVar::remove("OLLAMA_NUM_CTX"),
1157            ScopedEnvVar::remove("HARN_OLLAMA_KEEP_ALIVE"),
1158            ScopedEnvVar::remove("OLLAMA_KEEP_ALIVE"),
1159        ];
1160        crate::llm_config::clear_user_overrides();
1161        let mut overlay = crate::llm_config::ProvidersConfig::default();
1162        overlay.aliases.insert(
1163            "qwen3.6-coding".to_string(),
1164            crate::llm_config::AliasDef {
1165                id: "qwen3.6:35b-a3b-coding-nvfp4".to_string(),
1166                provider: "ollama".to_string(),
1167                tool_format: None,
1168            },
1169        );
1170        overlay.models.insert(
1171            "qwen3.6:35b-a3b-coding-nvfp4".to_string(),
1172            crate::llm_config::ModelDef {
1173                name: "Qwen 3.6 Coding".to_string(),
1174                provider: "ollama".to_string(),
1175                context_window: 262_144,
1176                runtime_context_window: Some(98_304),
1177                stream_timeout: None,
1178                capabilities: vec![],
1179                pricing: None,
1180                deprecated: false,
1181                deprecation_note: None,
1182                quality_tags: Vec::new(),
1183                prefer_prefill_done: None,
1184            },
1185        );
1186        crate::llm_config::set_user_overrides(Some(overlay));
1187
1188        let (resolved, _) = crate::llm_config::resolve_model("qwen3.6-coding");
1189        assert_eq!(resolved, "qwen3.6:35b-a3b-coding-nvfp4");
1190
1191        let captured = Arc::new(Mutex::new(Vec::new()));
1192        let (addr, server) = spawn_stub(
1193            vec![
1194                (
1195                    200,
1196                    r#"{"models":[{"name":"qwen3.6:35b-a3b-coding-nvfp4"}]}"#,
1197                ),
1198                (200, r#"{"response":"","done":true}"#),
1199            ],
1200            captured.clone(),
1201        );
1202
1203        let result = tokio::runtime::Runtime::new()
1204            .expect("runtime")
1205            .block_on(ollama_readiness(OllamaReadinessOptions {
1206                warm: true,
1207                ..readiness_options(&resolved, format!("http://{addr}"))
1208            }));
1209
1210        server.join().expect("stub server");
1211        crate::llm_config::clear_user_overrides();
1212
1213        assert!(result.valid, "result was: {result:?}");
1214        let expected = result.expected.expect("expected request populated");
1215        assert_eq!(
1216            expected.num_ctx, 98_304,
1217            "alias-resolved model must pull runtime_context_window from the catalog"
1218        );
1219
1220        let requests = captured.lock().expect("captured requests");
1221        let warmup_body = requests[1].split("\r\n\r\n").nth(1).unwrap_or("");
1222        let json: serde_json::Value = serde_json::from_str(warmup_body).expect("warmup body");
1223        assert_eq!(json["options"]["num_ctx"], 98_304);
1224    }
1225
1226    #[test]
1227    fn fetch_ollama_loaded_runners_parses_optional_fields() {
1228        let captured = Arc::new(Mutex::new(Vec::new()));
1229        let (addr, server) = spawn_stub(
1230            vec![(
1231                200,
1232                r#"{"models":[{"name":"a:latest","model":"a:latest"},{"name":"b:latest","model":"b:latest","context_length":8192,"size_vram":42,"expires_at":"now"}]}"#,
1233            )],
1234            captured,
1235        );
1236
1237        let runners = tokio::runtime::Runtime::new()
1238            .expect("runtime")
1239            .block_on(fetch_ollama_loaded_runners(
1240                &format!("http://{addr}"),
1241                Duration::from_secs(2),
1242            ))
1243            .expect("ps response parses");
1244        server.join().expect("stub server");
1245
1246        assert_eq!(runners.len(), 2);
1247        assert_eq!(runners[0].name, "a:latest");
1248        assert!(runners[0].context_length.is_none());
1249        assert_eq!(runners[1].context_length, Some(8192));
1250        assert_eq!(runners[1].size_vram, Some(42));
1251        assert_eq!(runners[1].expires_at.as_deref(), Some("now"));
1252    }
1253
1254    fn spawn_stub(
1255        responses: Vec<(u16, &'static str)>,
1256        captured: Arc<Mutex<Vec<String>>>,
1257    ) -> (std::net::SocketAddr, std::thread::JoinHandle<()>) {
1258        let listener = TcpListener::bind("127.0.0.1:0").expect("bind ollama stub");
1259        let addr = listener.local_addr().expect("stub addr");
1260        let handle = std::thread::spawn(move || {
1261            for (status, body) in responses {
1262                let (mut stream, _) = listener.accept().expect("accept request");
1263                stream
1264                    .set_read_timeout(Some(Duration::from_secs(2)))
1265                    .expect("read timeout");
1266                let request = read_http_request(&mut stream);
1267                captured.lock().expect("captured").push(request);
1268                let reason = if status == 200 { "OK" } else { "ERROR" };
1269                let response = format!(
1270                    "HTTP/1.1 {status} {reason}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{body}",
1271                    body.len()
1272                );
1273                stream
1274                    .write_all(response.as_bytes())
1275                    .expect("write response");
1276            }
1277        });
1278        (addr, handle)
1279    }
1280
1281    fn read_http_request(stream: &mut std::net::TcpStream) -> String {
1282        let mut data = Vec::new();
1283        let mut buf = [0_u8; 512];
1284        loop {
1285            let n = stream.read(&mut buf).expect("read request");
1286            if n == 0 {
1287                break;
1288            }
1289            data.extend_from_slice(&buf[..n]);
1290            let text = String::from_utf8_lossy(&data);
1291            if let Some(header_end) = text.find("\r\n\r\n") {
1292                let headers = &text[..header_end];
1293                let content_length = headers
1294                    .lines()
1295                    .find_map(|line| {
1296                        let (name, value) = line.split_once(':')?;
1297                        name.eq_ignore_ascii_case("content-length")
1298                            .then(|| value.trim().parse::<usize>().ok())
1299                            .flatten()
1300                    })
1301                    .unwrap_or(0);
1302                if data.len() >= header_end + 4 + content_length {
1303                    break;
1304                }
1305            }
1306        }
1307        String::from_utf8(data).expect("utf8 request")
1308    }
1309}
harn_vm/llm/api/ollama.rs

harn_vm/llm/api/
ollama.rs