Skip to main content

ai_memory/atomisation/
curator.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 WT-1-B — atomisation curator.
5//!
6//! The curator is the LLM-facing half of the atomisation engine: it
7//! consumes one long memory body, asks Gemma 4 (E2B at the `smart`
8//! tier, E4B at `autonomous`) to decompose it into atomic propositions,
9//! parses the structured JSON response, validates per-atom token
10//! budgets via `tiktoken-rs::cl100k_base`, and returns a `Vec<Atom>`
11//! ready for the substrate writer in [`super::Atomiser::atomise`].
12//!
13//! The curator is intentionally factored as a trait
14//! ([`Curator`]) so the substrate test suite can inject a deterministic
15//! mock (see `tests/atomisation/core`). The production implementation
16//! ([`LlmCurator`]) wraps an `OllamaClient` and is hot-path only when
17//! the daemon's tier resolves to `smart` or higher.
18//!
19//! # Retry contract
20//!
21//! Malformed JSON responses retry up to `curator_max_retries` times
22//! (default 3) with exponential backoff (100 ms → 500 ms → 2500 ms).
23//! Each retry re-sends the original prompt verbatim — the LLM call is
24//! stateless on our side. After the final attempt fails, the curator
25//! surfaces [`CuratorError::MalformedResponse`] carrying the last
26//! parser diagnostic; [`super::Atomiser::atomise`] maps that to
27//! [`super::AtomiseError::CuratorFailed`].
28//!
29//! # Token-budget contract
30//!
31//! Atoms slightly over budget are accepted as-is — the curator emits
32//! a warn-level log line and proceeds. The rationale is documented
33//! in the WT-1-B brief ("fail-soft: accept atoms slightly over
34//! budget rather than retry-loop"). The substrate writer is the
35//! authoritative gate on memory size (governed by
36//! `validate::validate_content`), not the curator.
37
38use std::sync::Mutex;
39use std::time::Duration;
40
41use serde::Deserialize;
42
43/// One proposed atom returned by the curator.
44///
45/// The wire shape mirrors the JSON the LLM emits — `{"text": "..."}` —
46/// so the parser is `serde_json::from_str::<CuratorResponse>` with no
47/// further fixup.
48#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
49pub struct Atom {
50    /// Self-contained atomic proposition. Must be ≤ `max_atom_tokens`
51    /// when measured with `cl100k_base`; the curator accepts a small
52    /// over-budget overshoot rather than retrying.
53    pub text: String,
54}
55
56/// Top-level wire shape returned by the LLM.
57///
58/// `atoms` is the list of decomposed propositions. An empty array
59/// signals "this input cannot be decomposed" — see the prompt
60/// contract; the substrate handler maps that to
61/// [`super::AtomiseError::SourceTooSmall`].
62#[derive(Debug, Clone, Deserialize)]
63pub struct CuratorResponse {
64    pub atoms: Vec<Atom>,
65}
66
67/// Curator-side error surface.
68///
69/// All variants carry a human-readable diagnostic; the substrate
70/// `atomise` flow wraps them into the typed
71/// [`super::AtomiseError::CuratorFailed`] variant.
72#[derive(Debug)]
73pub enum CuratorError {
74    /// LLM was unreachable, returned an HTTP error, or otherwise
75    /// failed to produce a body. Retries do NOT happen at this layer
76    /// (the underlying `OllamaClient` already retries transient
77    /// failures); the substrate caller decides whether to surface or
78    /// fall back.
79    LlmUnavailable(String),
80    /// The LLM produced a body but the body did not parse as a
81    /// [`CuratorResponse`] (missing `atoms`, wrong types, JSON
82    /// trailing garbage, etc.). Carries the last parse diagnostic
83    /// AFTER all retries were exhausted.
84    MalformedResponse(String),
85}
86
87impl std::fmt::Display for CuratorError {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Self::LlmUnavailable(m) => write!(f, "curator LLM unavailable: {m}"),
91            Self::MalformedResponse(m) => write!(f, "curator response malformed: {m}"),
92        }
93    }
94}
95
96impl std::error::Error for CuratorError {}
97
98/// Trait surface the [`super::Atomiser`] consumes.
99///
100/// The trait abstracts over the LLM round-trip so unit tests can
101/// inject a deterministic stub (canned JSON, programmable
102/// failure-then-success sequences) without standing up an Ollama
103/// process. The production implementation [`LlmCurator`] performs
104/// the real network call.
105///
106/// The trait method is sync (matching the rest of the curator surface
107/// in this crate). The Ollama `generate` call is itself blocking-on-
108/// HTTP-thread; the substrate `atomise` orchestrator runs on a thread
109/// the caller manages.
110pub trait Curator: Send + Sync {
111    /// Decompose `body` into atomic propositions, each ≤ `max_atom_tokens`.
112    ///
113    /// Implementations MUST:
114    /// 1. Send the canonical system prompt (see [`CURATOR_SYSTEM_PROMPT`]) — the
115    ///    `{max_atom_tokens}` placeholder is substituted with the
116    ///    caller-supplied value.
117    /// 2. Parse the response body as a [`CuratorResponse`]. Retry up
118    ///    to `max_retries` times on malformed JSON with exponential
119    ///    backoff (100 ms / 500 ms / 2500 ms).
120    /// 3. Validate per-atom token counts via
121    ///    [`crate::storage::count_tokens_cl100k`]. Atoms slightly
122    ///    over budget (≤ 25% overshoot) are accepted and
123    ///    `tracing::warn!`-logged; gross over-budget atoms (> 25%)
124    ///    are clamped at the prompt level by retry.
125    /// 4. Bound the returned vec to `[2..=10]` atoms per the prompt
126    ///    contract. An empty vec is a legitimate "cannot decompose"
127    ///    signal — the caller maps that to `SourceTooSmall`.
128    fn decompose(
129        &self,
130        body: &str,
131        max_atom_tokens: u32,
132        max_retries: u32,
133    ) -> Result<Vec<Atom>, CuratorError>;
134}
135
136/// Verbatim system prompt sent to the LLM. The `{max_atom_tokens}`
137/// token is substituted at call time. The shape of the JSON response
138/// is pinned here — the parser depends on exactly this `{ atoms: [...] }`
139/// envelope.
140///
141/// Lifted from the WT-1-B brief without modification so a future
142/// audit can grep this constant in source against the spec doc.
143pub const CURATOR_SYSTEM_PROMPT: &str =
144    "You are decomposing a long memory into atomic propositions.
145Each atom must:
146(1) Be self-contained — readable without the original context
147(2) Be at most {max_atom_tokens} tokens
148(3) Contain exactly one fact, decision, observation, or relation
149(4) Preserve original meaning — no editorial additions
150Return JSON: { atoms: [{ text: string }] } with 2 to 10 atoms.
151If the input cannot be decomposed (already atomic, all-or-nothing),
152return { atoms: [] }.";
153
154/// Render the system prompt with the supplied token budget substituted.
155#[must_use]
156pub fn render_system_prompt(max_atom_tokens: u32) -> String {
157    CURATOR_SYSTEM_PROMPT.replace("{max_atom_tokens}", &max_atom_tokens.to_string())
158}
159
160/// Try to parse one candidate response body into a [`CuratorResponse`].
161///
162/// Returns `Ok(response)` on a clean parse, `Err(diagnostic)` on any
163/// failure — the diagnostic is the underlying `serde_json` error
164/// message verbatim so the retry loop can surface it in
165/// [`CuratorError::MalformedResponse`].
166///
167/// LLM responses often arrive wrapped in markdown code fences (```json
168/// … ```) or with leading/trailing prose; we strip the fences and
169/// re-attempt once before giving up. This is the same defensive
170/// shape used by `crate::llm::OllamaClient::auto_tag` and the
171/// reflection curator's summariser.
172pub fn parse_response(body: &str) -> Result<CuratorResponse, String> {
173    // First attempt — direct parse.
174    if let Ok(resp) = serde_json::from_str::<CuratorResponse>(body) {
175        return Ok(resp);
176    }
177    // Second attempt — strip markdown fences. The LLM frequently
178    // emits ```json\n...\n``` even when the prompt asks for raw
179    // JSON; production curators have to tolerate this.
180    let stripped = strip_code_fence(body);
181    if let Ok(resp) = serde_json::from_str::<CuratorResponse>(&stripped) {
182        return Ok(resp);
183    }
184    // Third attempt — extract the first balanced JSON object from
185    // the body. Tolerates "Here are the atoms:\n{ ... }" preambles.
186    if let Some(extracted) = extract_first_json_object(&stripped) {
187        if let Ok(resp) = serde_json::from_str::<CuratorResponse>(&extracted) {
188            return Ok(resp);
189        }
190    }
191    // All three strategies failed; return the diagnostic from the
192    // most informative (first) attempt.
193    let err = serde_json::from_str::<CuratorResponse>(body)
194        .err()
195        .map_or_else(|| "unknown parse failure".to_string(), |e| e.to_string());
196    Err(err)
197}
198
199/// Strip ``` and ```json fences from a candidate response body.
200fn strip_code_fence(s: &str) -> String {
201    let trimmed = s.trim();
202    let stripped = trimmed
203        .strip_prefix("```json")
204        .or_else(|| trimmed.strip_prefix("```JSON"))
205        .or_else(|| trimmed.strip_prefix("```"))
206        .unwrap_or(trimmed);
207    let stripped = stripped.trim_start_matches('\n');
208    stripped
209        .strip_suffix("```")
210        .unwrap_or(stripped)
211        .trim()
212        .to_string()
213}
214
215/// Extract the first balanced `{ ... }` substring. Scans byte-wise so
216/// string escapes inside the JSON don't fool the brace counter.
217fn extract_first_json_object(s: &str) -> Option<String> {
218    let bytes = s.as_bytes();
219    let mut depth = 0i32;
220    let mut start: Option<usize> = None;
221    let mut in_string = false;
222    let mut prev_backslash = false;
223    for (i, &b) in bytes.iter().enumerate() {
224        if in_string {
225            if b == b'"' && !prev_backslash {
226                in_string = false;
227            }
228            prev_backslash = b == b'\\' && !prev_backslash;
229            continue;
230        }
231        prev_backslash = false;
232        match b {
233            b'"' => in_string = true,
234            b'{' => {
235                if depth == 0 {
236                    start = Some(i);
237                }
238                depth += 1;
239            }
240            b'}' => {
241                depth -= 1;
242                if depth == 0 {
243                    if let Some(s0) = start {
244                        return Some(s[s0..=i].to_string());
245                    }
246                }
247            }
248            _ => {}
249        }
250    }
251    None
252}
253
254/// Token-budget guardrail — accept atoms within 25% of the budget,
255/// warn-log overshoots, drop atoms more than 25% over budget so a
256/// pathological response cannot pollute the memory store.
257///
258/// Returns the (atoms_kept, atoms_dropped) pair so the caller can
259/// telemetry-log how often the soft cap fires.
260#[must_use]
261pub fn enforce_token_budget(atoms: Vec<Atom>, max_atom_tokens: u32) -> (Vec<Atom>, usize) {
262    let hard_cap = max_atom_tokens.saturating_add(max_atom_tokens / 4);
263    let mut kept = Vec::with_capacity(atoms.len());
264    let mut dropped = 0usize;
265    for atom in atoms {
266        let count = crate::storage::count_tokens_cl100k(&atom.text);
267        let count_u32 = u32::try_from(count).unwrap_or(u32::MAX);
268        if count_u32 <= max_atom_tokens {
269            kept.push(atom);
270        } else if count_u32 <= hard_cap {
271            tracing::warn!(
272                target: "atomisation::curator",
273                atom_tokens = count_u32,
274                budget = max_atom_tokens,
275                "atom slightly over token budget — accepting (fail-soft)"
276            );
277            kept.push(atom);
278        } else {
279            tracing::warn!(
280                target: "atomisation::curator",
281                atom_tokens = count_u32,
282                hard_cap,
283                "atom grossly over token budget — dropping"
284            );
285            dropped += 1;
286        }
287    }
288    (kept, dropped)
289}
290
291/// Exponential backoff schedule for the curator retry loop:
292/// 100 ms, 500 ms, 2500 ms. Indexed by zero-based retry attempt; out
293/// of range collapses to the last entry so a misconfigured retry cap
294/// does not surface a `panic!`.
295#[must_use]
296pub fn backoff_for_attempt(attempt: u32) -> Duration {
297    const SCHEDULE_MS: &[u64] = &[100, 500, 2500];
298    let idx = (attempt as usize).min(SCHEDULE_MS.len() - 1);
299    Duration::from_millis(SCHEDULE_MS[idx])
300}
301
302// ---------------------------------------------------------------------------
303// LlmCurator — production impl backed by `crate::llm::OllamaClient`
304// ---------------------------------------------------------------------------
305
306/// Production curator. Wraps an `OllamaClient` (or any
307/// `crate::autonomy::AutonomyLlm`-like surface — we re-use the
308/// existing `generate` shape via a free function rather than coupling
309/// to the autonomy trait, because the autonomy trait does not expose
310/// `generate(prompt, system)`).
311pub struct LlmCurator<L: LlmGenerate + Send + Sync> {
312    llm: L,
313    /// Sleep function. Production passes `std::thread::sleep`; tests
314    /// pass a no-op to keep the suite fast.
315    sleep: Mutex<Box<dyn FnMut(Duration) + Send + Sync>>,
316}
317
318/// Minimal generate surface the curator needs. Implemented for
319/// `crate::llm::OllamaClient` in the same module; the trait stays
320/// here (not in `src/llm.rs`) so external callers don't accidentally
321/// pull it into their wire path.
322pub trait LlmGenerate {
323    /// Run a single generate cycle. Returns the response body verbatim
324    /// (no trimming, no fence-stripping — `parse_response` handles
325    /// that).
326    fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError>;
327}
328
329impl LlmGenerate for crate::llm::OllamaClient {
330    fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError> {
331        Self::generate(self, prompt, system)
332            .map_err(|e| CuratorError::LlmUnavailable(e.to_string()))
333    }
334}
335
336/// Pass-through impl for `Arc<OllamaClient>` — lets the MCP wiring at
337/// `mcp::run_mcp_server` share the daemon's existing `Arc<OllamaClient>`
338/// across the auto-tag / expand-query / detect-contradiction surface
339/// and the WT-1-C atomiser without cloning the underlying connection
340/// pool.
341impl LlmGenerate for std::sync::Arc<crate::llm::OllamaClient> {
342    fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError> {
343        crate::llm::OllamaClient::generate(self.as_ref(), prompt, system)
344            .map_err(|e| CuratorError::LlmUnavailable(e.to_string()))
345    }
346}
347
348impl<L: LlmGenerate + Send + Sync> LlmCurator<L> {
349    /// Construct a curator with the supplied LLM and the real
350    /// `std::thread::sleep` for retry backoff.
351    pub fn new(llm: L) -> Self {
352        Self {
353            llm,
354            sleep: Mutex::new(Box::new(std::thread::sleep)),
355        }
356    }
357
358    /// Construct a curator with an injected sleep — used by the
359    /// unit test below to keep the suite under one second.
360    #[cfg(test)]
361    pub fn with_sleep<F>(llm: L, sleep: F) -> Self
362    where
363        F: FnMut(Duration) + Send + Sync + 'static,
364    {
365        Self {
366            llm,
367            sleep: Mutex::new(Box::new(sleep)),
368        }
369    }
370}
371
372impl<L: LlmGenerate + Send + Sync> Curator for LlmCurator<L> {
373    fn decompose(
374        &self,
375        body: &str,
376        max_atom_tokens: u32,
377        max_retries: u32,
378    ) -> Result<Vec<Atom>, CuratorError> {
379        let system = render_system_prompt(max_atom_tokens);
380        let mut last_err = String::from("no attempts made");
381        for attempt in 0..=max_retries {
382            let resp = self.llm.generate(body, Some(&system))?;
383            match parse_response(&resp) {
384                Ok(parsed) => {
385                    let (kept, _dropped) = enforce_token_budget(parsed.atoms, max_atom_tokens);
386                    return Ok(kept);
387                }
388                Err(e) => {
389                    last_err = e;
390                    if attempt < max_retries {
391                        let backoff = backoff_for_attempt(attempt);
392                        if let Ok(mut s) = self.sleep.lock() {
393                            (s)(backoff);
394                        }
395                    }
396                }
397            }
398        }
399        Err(CuratorError::MalformedResponse(last_err))
400    }
401}
402
403// ---------------------------------------------------------------------------
404// Unit tests — pure logic. Mocked LLM. No DB, no network.
405// ---------------------------------------------------------------------------
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use std::sync::Arc;
411
412    /// Mock that returns a programmable sequence of responses. Used by
413    /// the integration suite as well as the unit tests below.
414    pub(crate) struct MockLlm {
415        responses: Mutex<Vec<Result<String, CuratorError>>>,
416        calls: Mutex<usize>,
417    }
418
419    impl MockLlm {
420        pub fn new(responses: Vec<Result<String, CuratorError>>) -> Self {
421            Self {
422                responses: Mutex::new(responses),
423                calls: Mutex::new(0),
424            }
425        }
426
427        pub fn call_count(&self) -> usize {
428            *self.calls.lock().unwrap()
429        }
430    }
431
432    impl LlmGenerate for Arc<MockLlm> {
433        fn generate(&self, _prompt: &str, _system: Option<&str>) -> Result<String, CuratorError> {
434            let mut calls = self.calls.lock().unwrap();
435            *calls += 1;
436            let mut rs = self.responses.lock().unwrap();
437            if rs.is_empty() {
438                return Err(CuratorError::LlmUnavailable(
439                    "mock: no responses left".into(),
440                ));
441            }
442            rs.remove(0)
443        }
444    }
445
446    #[test]
447    fn render_prompt_substitutes_max_atom_tokens() {
448        let p = render_system_prompt(200);
449        assert!(p.contains("at most 200 tokens"));
450        assert!(!p.contains("{max_atom_tokens}"));
451    }
452
453    #[test]
454    fn parse_response_accepts_direct_json() {
455        let body = r#"{"atoms":[{"text":"alpha"},{"text":"beta"}]}"#;
456        let r = parse_response(body).unwrap();
457        assert_eq!(r.atoms.len(), 2);
458        assert_eq!(r.atoms[0].text, "alpha");
459    }
460
461    #[test]
462    fn parse_response_strips_markdown_fence() {
463        let body = "```json\n{\"atoms\":[{\"text\":\"alpha\"}]}\n```";
464        let r = parse_response(body).unwrap();
465        assert_eq!(r.atoms.len(), 1);
466    }
467
468    #[test]
469    fn parse_response_extracts_object_with_preamble() {
470        let body = "Sure, here's the JSON:\n{\"atoms\":[{\"text\":\"alpha\"}]}\nThanks!";
471        let r = parse_response(body).unwrap();
472        assert_eq!(r.atoms.len(), 1);
473    }
474
475    #[test]
476    fn parse_response_empty_atoms_is_valid() {
477        // "Cannot decompose" signal — substrate maps to SourceTooSmall.
478        let body = r#"{"atoms":[]}"#;
479        let r = parse_response(body).unwrap();
480        assert_eq!(r.atoms.len(), 0);
481    }
482
483    #[test]
484    fn parse_response_rejects_garbage() {
485        assert!(parse_response("nope nope nope").is_err());
486        assert!(parse_response("").is_err());
487        assert!(parse_response(r#"{"wrong":"shape"}"#).is_err());
488    }
489
490    #[test]
491    fn enforce_token_budget_keeps_in_budget() {
492        let atoms = vec![
493            Atom {
494                text: "small atom".to_string(),
495            },
496            Atom {
497                text: "another small atom".to_string(),
498            },
499        ];
500        let (kept, dropped) = enforce_token_budget(atoms, 200);
501        assert_eq!(kept.len(), 2);
502        assert_eq!(dropped, 0);
503    }
504
505    #[test]
506    fn enforce_token_budget_drops_grossly_over() {
507        // Build a string that is firmly over the 25% overshoot window.
508        let huge: String = "word ".repeat(500);
509        let atoms = vec![
510            Atom {
511                text: "fine".to_string(),
512            },
513            Atom { text: huge },
514        ];
515        let (kept, dropped) = enforce_token_budget(atoms, 10);
516        assert_eq!(kept.len(), 1);
517        assert_eq!(dropped, 1);
518    }
519
520    #[test]
521    fn backoff_schedule_is_monotonic_and_bounded() {
522        assert_eq!(backoff_for_attempt(0), Duration::from_millis(100));
523        assert_eq!(backoff_for_attempt(1), Duration::from_millis(500));
524        assert_eq!(backoff_for_attempt(2), Duration::from_millis(2500));
525        assert_eq!(backoff_for_attempt(99), Duration::from_millis(2500));
526    }
527
528    #[test]
529    fn curator_succeeds_on_first_attempt() {
530        let mock = Arc::new(MockLlm::new(vec![Ok(
531            r#"{"atoms":[{"text":"alpha"},{"text":"beta"}]}"#.to_string(),
532        )]));
533        let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
534        let atoms = curator.decompose("input", 200, 3).unwrap();
535        assert_eq!(atoms.len(), 2);
536        assert_eq!(mock.call_count(), 1);
537    }
538
539    #[test]
540    fn curator_retries_on_malformed_then_succeeds() {
541        let mock = Arc::new(MockLlm::new(vec![
542            Ok("garbage".to_string()),
543            Ok("still garbage".to_string()),
544            Ok(r#"{"atoms":[{"text":"alpha"}]}"#.to_string()),
545        ]));
546        let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
547        let atoms = curator.decompose("input", 200, 3).unwrap();
548        assert_eq!(atoms.len(), 1);
549        assert_eq!(mock.call_count(), 3);
550    }
551
552    #[test]
553    fn curator_fails_after_max_retries() {
554        let mock = Arc::new(MockLlm::new(vec![
555            Ok("garbage 1".to_string()),
556            Ok("garbage 2".to_string()),
557            Ok("garbage 3".to_string()),
558            Ok("garbage 4".to_string()),
559        ]));
560        let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
561        // max_retries=3 means 1 initial + 3 retries = 4 total attempts.
562        let err = curator.decompose("input", 200, 3).unwrap_err();
563        assert!(matches!(err, CuratorError::MalformedResponse(_)));
564        assert_eq!(mock.call_count(), 4);
565    }
566
567    #[test]
568    fn curator_propagates_llm_unavailable() {
569        let mock = Arc::new(MockLlm::new(vec![Err(CuratorError::LlmUnavailable(
570            "connection refused".into(),
571        ))]));
572        let curator = LlmCurator::with_sleep(mock, |_| {});
573        let err = curator.decompose("input", 200, 3).unwrap_err();
574        assert!(matches!(err, CuratorError::LlmUnavailable(_)));
575    }
576
577    /// Drives the production `LlmGenerate for OllamaClient` impl (the
578    /// error-mapping arm) AND the real-sleep `LlmCurator::new`
579    /// constructor. Points the client at an unreachable loopback port so
580    /// `generate` fails fast; `max_retries=0` means the real
581    /// `std::thread::sleep` is never actually called (no backoff on the
582    /// final attempt), keeping the test sub-second while still
583    /// exercising `LlmCurator::new`'s body.
584    #[test]
585    fn llm_curator_new_with_real_ollama_client_maps_unavailable() {
586        // Reserved-but-unbound loopback port (TEST-NET style high port).
587        let client = crate::llm::OllamaClient::new_with_url_no_health_check(
588            "http://127.0.0.1:1",
589            "test-model",
590        )
591        .expect("build no-health-check client");
592        // Exercise the production `LlmCurator::new` (real sleep) path.
593        let curator = LlmCurator::new(client);
594        let err = curator.decompose("body", 200, 0).unwrap_err();
595        // The OllamaClient generate failed (connection refused) and the
596        // production LlmGenerate impl mapped it to LlmUnavailable.
597        assert!(
598            matches!(err, CuratorError::LlmUnavailable(_)),
599            "expected LlmUnavailable, got {err:?}"
600        );
601    }
602
603    /// Drives the `LlmGenerate for Arc<OllamaClient>` pass-through impl
604    /// (lines 341-346) used by the MCP daemon wiring.
605    #[test]
606    fn llm_curator_arc_ollama_passthrough_maps_unavailable() {
607        let client = Arc::new(
608            crate::llm::OllamaClient::new_with_url_no_health_check(
609                "http://127.0.0.1:1",
610                "test-model",
611            )
612            .expect("build no-health-check client"),
613        );
614        let curator = LlmCurator::with_sleep(client, |_| {});
615        let err = curator.decompose("body", 200, 0).unwrap_err();
616        assert!(matches!(err, CuratorError::LlmUnavailable(_)));
617    }
618
619    /// CuratorError Display arms — both variants render their prefix.
620    #[test]
621    fn curator_error_display_arms() {
622        assert_eq!(
623            CuratorError::LlmUnavailable("x".into()).to_string(),
624            "curator LLM unavailable: x"
625        );
626        assert_eq!(
627            CuratorError::MalformedResponse("y".into()).to_string(),
628            "curator response malformed: y"
629        );
630        let e = CuratorError::MalformedResponse("z".into());
631        let _: &dyn std::error::Error = &e;
632    }
633
634    #[test]
635    fn extract_first_json_object_handles_braces_in_strings() {
636        // Brace-counting must NOT be fooled by braces inside JSON strings.
637        let s = r#"prefix {"atoms":[{"text":"contains } brace"}]} suffix"#;
638        let extracted = extract_first_json_object(s).unwrap();
639        let parsed: CuratorResponse = serde_json::from_str(&extracted).unwrap();
640        assert_eq!(parsed.atoms.len(), 1);
641        assert_eq!(parsed.atoms[0].text, "contains } brace");
642    }
643}