ai_memory/atomisation/curator.rs
1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 WT-1-B — atomisation curator.
5//!
6//! The curator is the LLM-facing half of the atomisation engine: it
7//! consumes one long memory body, asks Gemma 4 (E2B at the `smart`
8//! tier, E4B at `autonomous`) to decompose it into atomic propositions,
9//! parses the structured JSON response, validates per-atom token
10//! budgets via `tiktoken-rs::cl100k_base`, and returns a `Vec<Atom>`
11//! ready for the substrate writer in [`super::Atomiser::atomise`].
12//!
13//! The curator is intentionally factored as a trait
14//! ([`Curator`]) so the substrate test suite can inject a deterministic
15//! mock (see `tests/atomisation/core`). The production implementation
16//! ([`LlmCurator`]) wraps an `OllamaClient` and is hot-path only when
17//! the daemon's tier resolves to `smart` or higher.
18//!
19//! # Retry contract
20//!
21//! Malformed JSON responses retry up to `curator_max_retries` times
22//! (default 3) with exponential backoff (100 ms → 500 ms → 2500 ms).
23//! Each retry re-sends the original prompt verbatim — the LLM call is
24//! stateless on our side. After the final attempt fails, the curator
25//! surfaces [`CuratorError::MalformedResponse`] carrying the last
26//! parser diagnostic; [`super::Atomiser::atomise`] maps that to
27//! [`super::AtomiseError::CuratorFailed`].
28//!
29//! # Token-budget contract
30//!
31//! Atoms slightly over budget are accepted as-is — the curator emits
32//! a warn-level log line and proceeds. The rationale is documented
33//! in the WT-1-B brief ("fail-soft: accept atoms slightly over
34//! budget rather than retry-loop"). The substrate writer is the
35//! authoritative gate on memory size (governed by
36//! `validate::validate_content`), not the curator.
37
38use std::sync::Mutex;
39use std::time::Duration;
40
41use serde::Deserialize;
42
43/// One proposed atom returned by the curator.
44///
45/// The wire shape mirrors the JSON the LLM emits — `{"text": "..."}` —
46/// so the parser is `serde_json::from_str::<CuratorResponse>` with no
47/// further fixup.
48#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
49pub struct Atom {
50 /// Self-contained atomic proposition. Must be ≤ `max_atom_tokens`
51 /// when measured with `cl100k_base`; the curator accepts a small
52 /// over-budget overshoot rather than retrying.
53 pub text: String,
54}
55
56/// Top-level wire shape returned by the LLM.
57///
58/// `atoms` is the list of decomposed propositions. An empty array
59/// signals "this input cannot be decomposed" — see the prompt
60/// contract; the substrate handler maps that to
61/// [`super::AtomiseError::SourceTooSmall`].
62#[derive(Debug, Clone, Deserialize)]
63pub struct CuratorResponse {
64 pub atoms: Vec<Atom>,
65}
66
67/// Curator-side error surface.
68///
69/// All variants carry a human-readable diagnostic; the substrate
70/// `atomise` flow wraps them into the typed
71/// [`super::AtomiseError::CuratorFailed`] variant.
72#[derive(Debug)]
73pub enum CuratorError {
74 /// LLM was unreachable, returned an HTTP error, or otherwise
75 /// failed to produce a body. Retries do NOT happen at this layer
76 /// (the underlying `OllamaClient` already retries transient
77 /// failures); the substrate caller decides whether to surface or
78 /// fall back.
79 LlmUnavailable(String),
80 /// The LLM produced a body but the body did not parse as a
81 /// [`CuratorResponse`] (missing `atoms`, wrong types, JSON
82 /// trailing garbage, etc.). Carries the last parse diagnostic
83 /// AFTER all retries were exhausted.
84 MalformedResponse(String),
85}
86
87impl std::fmt::Display for CuratorError {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 match self {
90 Self::LlmUnavailable(m) => write!(f, "curator LLM unavailable: {m}"),
91 Self::MalformedResponse(m) => write!(f, "curator response malformed: {m}"),
92 }
93 }
94}
95
96impl std::error::Error for CuratorError {}
97
98/// Trait surface the [`super::Atomiser`] consumes.
99///
100/// The trait abstracts over the LLM round-trip so unit tests can
101/// inject a deterministic stub (canned JSON, programmable
102/// failure-then-success sequences) without standing up an Ollama
103/// process. The production implementation [`LlmCurator`] performs
104/// the real network call.
105///
106/// The trait method is sync (matching the rest of the curator surface
107/// in this crate). The Ollama `generate` call is itself blocking-on-
108/// HTTP-thread; the substrate `atomise` orchestrator runs on a thread
109/// the caller manages.
110pub trait Curator: Send + Sync {
111 /// Decompose `body` into atomic propositions, each ≤ `max_atom_tokens`.
112 ///
113 /// Implementations MUST:
114 /// 1. Send the canonical system prompt (see [`CURATOR_SYSTEM_PROMPT`]) — the
115 /// `{max_atom_tokens}` placeholder is substituted with the
116 /// caller-supplied value.
117 /// 2. Parse the response body as a [`CuratorResponse`]. Retry up
118 /// to `max_retries` times on malformed JSON with exponential
119 /// backoff (100 ms / 500 ms / 2500 ms).
120 /// 3. Validate per-atom token counts via
121 /// [`crate::storage::count_tokens_cl100k`]. Atoms slightly
122 /// over budget (≤ 25% overshoot) are accepted and
123 /// `tracing::warn!`-logged; gross over-budget atoms (> 25%)
124 /// are clamped at the prompt level by retry.
125 /// 4. Bound the returned vec to `[2..=10]` atoms per the prompt
126 /// contract. An empty vec is a legitimate "cannot decompose"
127 /// signal — the caller maps that to `SourceTooSmall`.
128 fn decompose(
129 &self,
130 body: &str,
131 max_atom_tokens: u32,
132 max_retries: u32,
133 ) -> Result<Vec<Atom>, CuratorError>;
134}
135
136/// Verbatim system prompt sent to the LLM. The `{max_atom_tokens}`
137/// token is substituted at call time. The shape of the JSON response
138/// is pinned here — the parser depends on exactly this `{ atoms: [...] }`
139/// envelope.
140///
141/// Lifted from the WT-1-B brief without modification so a future
142/// audit can grep this constant in source against the spec doc.
143pub const CURATOR_SYSTEM_PROMPT: &str =
144 "You are decomposing a long memory into atomic propositions.
145Each atom must:
146(1) Be self-contained — readable without the original context
147(2) Be at most {max_atom_tokens} tokens
148(3) Contain exactly one fact, decision, observation, or relation
149(4) Preserve original meaning — no editorial additions
150Return JSON: { atoms: [{ text: string }] } with 2 to 10 atoms.
151If the input cannot be decomposed (already atomic, all-or-nothing),
152return { atoms: [] }.";
153
154/// Render the system prompt with the supplied token budget substituted.
155#[must_use]
156pub fn render_system_prompt(max_atom_tokens: u32) -> String {
157 CURATOR_SYSTEM_PROMPT.replace("{max_atom_tokens}", &max_atom_tokens.to_string())
158}
159
160/// Try to parse one candidate response body into a [`CuratorResponse`].
161///
162/// Returns `Ok(response)` on a clean parse, `Err(diagnostic)` on any
163/// failure — the diagnostic is the underlying `serde_json` error
164/// message verbatim so the retry loop can surface it in
165/// [`CuratorError::MalformedResponse`].
166///
167/// LLM responses often arrive wrapped in markdown code fences (```json
168/// … ```) or with leading/trailing prose; we strip the fences and
169/// re-attempt once before giving up. This is the same defensive
170/// shape used by `crate::llm::OllamaClient::auto_tag` and the
171/// reflection curator's summariser.
172pub fn parse_response(body: &str) -> Result<CuratorResponse, String> {
173 // First attempt — direct parse.
174 if let Ok(resp) = serde_json::from_str::<CuratorResponse>(body) {
175 return Ok(resp);
176 }
177 // Second attempt — strip markdown fences. The LLM frequently
178 // emits ```json\n...\n``` even when the prompt asks for raw
179 // JSON; production curators have to tolerate this.
180 let stripped = strip_code_fence(body);
181 if let Ok(resp) = serde_json::from_str::<CuratorResponse>(&stripped) {
182 return Ok(resp);
183 }
184 // Third attempt — extract the first balanced JSON object from
185 // the body. Tolerates "Here are the atoms:\n{ ... }" preambles.
186 if let Some(extracted) = extract_first_json_object(&stripped) {
187 if let Ok(resp) = serde_json::from_str::<CuratorResponse>(&extracted) {
188 return Ok(resp);
189 }
190 }
191 // All three strategies failed; return the diagnostic from the
192 // most informative (first) attempt.
193 let err = serde_json::from_str::<CuratorResponse>(body)
194 .err()
195 .map_or_else(|| "unknown parse failure".to_string(), |e| e.to_string());
196 Err(err)
197}
198
199/// Strip ``` and ```json fences from a candidate response body.
200fn strip_code_fence(s: &str) -> String {
201 let trimmed = s.trim();
202 let stripped = trimmed
203 .strip_prefix("```json")
204 .or_else(|| trimmed.strip_prefix("```JSON"))
205 .or_else(|| trimmed.strip_prefix("```"))
206 .unwrap_or(trimmed);
207 let stripped = stripped.trim_start_matches('\n');
208 stripped
209 .strip_suffix("```")
210 .unwrap_or(stripped)
211 .trim()
212 .to_string()
213}
214
215/// Extract the first balanced `{ ... }` substring. Scans byte-wise so
216/// string escapes inside the JSON don't fool the brace counter.
217fn extract_first_json_object(s: &str) -> Option<String> {
218 let bytes = s.as_bytes();
219 let mut depth = 0i32;
220 let mut start: Option<usize> = None;
221 let mut in_string = false;
222 let mut prev_backslash = false;
223 for (i, &b) in bytes.iter().enumerate() {
224 if in_string {
225 if b == b'"' && !prev_backslash {
226 in_string = false;
227 }
228 prev_backslash = b == b'\\' && !prev_backslash;
229 continue;
230 }
231 prev_backslash = false;
232 match b {
233 b'"' => in_string = true,
234 b'{' => {
235 if depth == 0 {
236 start = Some(i);
237 }
238 depth += 1;
239 }
240 b'}' => {
241 depth -= 1;
242 if depth == 0 {
243 if let Some(s0) = start {
244 return Some(s[s0..=i].to_string());
245 }
246 }
247 }
248 _ => {}
249 }
250 }
251 None
252}
253
254/// Token-budget guardrail — accept atoms within 25% of the budget,
255/// warn-log overshoots, drop atoms more than 25% over budget so a
256/// pathological response cannot pollute the memory store.
257///
258/// Returns the (atoms_kept, atoms_dropped) pair so the caller can
259/// telemetry-log how often the soft cap fires.
260#[must_use]
261pub fn enforce_token_budget(atoms: Vec<Atom>, max_atom_tokens: u32) -> (Vec<Atom>, usize) {
262 let hard_cap = max_atom_tokens.saturating_add(max_atom_tokens / 4);
263 let mut kept = Vec::with_capacity(atoms.len());
264 let mut dropped = 0usize;
265 for atom in atoms {
266 let count = crate::storage::count_tokens_cl100k(&atom.text);
267 let count_u32 = u32::try_from(count).unwrap_or(u32::MAX);
268 if count_u32 <= max_atom_tokens {
269 kept.push(atom);
270 } else if count_u32 <= hard_cap {
271 tracing::warn!(
272 target: "atomisation::curator",
273 atom_tokens = count_u32,
274 budget = max_atom_tokens,
275 "atom slightly over token budget — accepting (fail-soft)"
276 );
277 kept.push(atom);
278 } else {
279 tracing::warn!(
280 target: "atomisation::curator",
281 atom_tokens = count_u32,
282 hard_cap,
283 "atom grossly over token budget — dropping"
284 );
285 dropped += 1;
286 }
287 }
288 (kept, dropped)
289}
290
291/// Exponential backoff schedule for the curator retry loop:
292/// 100 ms, 500 ms, 2500 ms. Indexed by zero-based retry attempt; out
293/// of range collapses to the last entry so a misconfigured retry cap
294/// does not surface a `panic!`.
295#[must_use]
296pub fn backoff_for_attempt(attempt: u32) -> Duration {
297 const SCHEDULE_MS: &[u64] = &[100, 500, 2500];
298 let idx = (attempt as usize).min(SCHEDULE_MS.len() - 1);
299 Duration::from_millis(SCHEDULE_MS[idx])
300}
301
302// ---------------------------------------------------------------------------
303// LlmCurator — production impl backed by `crate::llm::OllamaClient`
304// ---------------------------------------------------------------------------
305
306/// Production curator. Wraps an `OllamaClient` (or any
307/// `crate::autonomy::AutonomyLlm`-like surface — we re-use the
308/// existing `generate` shape via a free function rather than coupling
309/// to the autonomy trait, because the autonomy trait does not expose
310/// `generate(prompt, system)`).
311pub struct LlmCurator<L: LlmGenerate + Send + Sync> {
312 llm: L,
313 /// Sleep function. Production passes `std::thread::sleep`; tests
314 /// pass a no-op to keep the suite fast.
315 sleep: Mutex<Box<dyn FnMut(Duration) + Send + Sync>>,
316}
317
318/// Minimal generate surface the curator needs. Implemented for
319/// `crate::llm::OllamaClient` in the same module; the trait stays
320/// here (not in `src/llm.rs`) so external callers don't accidentally
321/// pull it into their wire path.
322pub trait LlmGenerate {
323 /// Run a single generate cycle. Returns the response body verbatim
324 /// (no trimming, no fence-stripping — `parse_response` handles
325 /// that).
326 fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError>;
327}
328
329impl LlmGenerate for crate::llm::OllamaClient {
330 fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError> {
331 Self::generate(self, prompt, system)
332 .map_err(|e| CuratorError::LlmUnavailable(e.to_string()))
333 }
334}
335
336/// Pass-through impl for `Arc<OllamaClient>` — lets the MCP wiring at
337/// `mcp::run_mcp_server` share the daemon's existing `Arc<OllamaClient>`
338/// across the auto-tag / expand-query / detect-contradiction surface
339/// and the WT-1-C atomiser without cloning the underlying connection
340/// pool.
341impl LlmGenerate for std::sync::Arc<crate::llm::OllamaClient> {
342 fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String, CuratorError> {
343 crate::llm::OllamaClient::generate(self.as_ref(), prompt, system)
344 .map_err(|e| CuratorError::LlmUnavailable(e.to_string()))
345 }
346}
347
348impl<L: LlmGenerate + Send + Sync> LlmCurator<L> {
349 /// Construct a curator with the supplied LLM and the real
350 /// `std::thread::sleep` for retry backoff.
351 pub fn new(llm: L) -> Self {
352 Self {
353 llm,
354 sleep: Mutex::new(Box::new(std::thread::sleep)),
355 }
356 }
357
358 /// Construct a curator with an injected sleep — used by the
359 /// unit test below to keep the suite under one second.
360 #[cfg(test)]
361 pub fn with_sleep<F>(llm: L, sleep: F) -> Self
362 where
363 F: FnMut(Duration) + Send + Sync + 'static,
364 {
365 Self {
366 llm,
367 sleep: Mutex::new(Box::new(sleep)),
368 }
369 }
370}
371
372impl<L: LlmGenerate + Send + Sync> Curator for LlmCurator<L> {
373 fn decompose(
374 &self,
375 body: &str,
376 max_atom_tokens: u32,
377 max_retries: u32,
378 ) -> Result<Vec<Atom>, CuratorError> {
379 let system = render_system_prompt(max_atom_tokens);
380 let mut last_err = String::from("no attempts made");
381 for attempt in 0..=max_retries {
382 let resp = self.llm.generate(body, Some(&system))?;
383 match parse_response(&resp) {
384 Ok(parsed) => {
385 let (kept, _dropped) = enforce_token_budget(parsed.atoms, max_atom_tokens);
386 return Ok(kept);
387 }
388 Err(e) => {
389 last_err = e;
390 if attempt < max_retries {
391 let backoff = backoff_for_attempt(attempt);
392 if let Ok(mut s) = self.sleep.lock() {
393 (s)(backoff);
394 }
395 }
396 }
397 }
398 }
399 Err(CuratorError::MalformedResponse(last_err))
400 }
401}
402
403// ---------------------------------------------------------------------------
404// Unit tests — pure logic. Mocked LLM. No DB, no network.
405// ---------------------------------------------------------------------------
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410 use std::sync::Arc;
411
412 /// Mock that returns a programmable sequence of responses. Used by
413 /// the integration suite as well as the unit tests below.
414 pub(crate) struct MockLlm {
415 responses: Mutex<Vec<Result<String, CuratorError>>>,
416 calls: Mutex<usize>,
417 }
418
419 impl MockLlm {
420 pub fn new(responses: Vec<Result<String, CuratorError>>) -> Self {
421 Self {
422 responses: Mutex::new(responses),
423 calls: Mutex::new(0),
424 }
425 }
426
427 pub fn call_count(&self) -> usize {
428 *self.calls.lock().unwrap()
429 }
430 }
431
432 impl LlmGenerate for Arc<MockLlm> {
433 fn generate(&self, _prompt: &str, _system: Option<&str>) -> Result<String, CuratorError> {
434 let mut calls = self.calls.lock().unwrap();
435 *calls += 1;
436 let mut rs = self.responses.lock().unwrap();
437 if rs.is_empty() {
438 return Err(CuratorError::LlmUnavailable(
439 "mock: no responses left".into(),
440 ));
441 }
442 rs.remove(0)
443 }
444 }
445
446 #[test]
447 fn render_prompt_substitutes_max_atom_tokens() {
448 let p = render_system_prompt(200);
449 assert!(p.contains("at most 200 tokens"));
450 assert!(!p.contains("{max_atom_tokens}"));
451 }
452
453 #[test]
454 fn parse_response_accepts_direct_json() {
455 let body = r#"{"atoms":[{"text":"alpha"},{"text":"beta"}]}"#;
456 let r = parse_response(body).unwrap();
457 assert_eq!(r.atoms.len(), 2);
458 assert_eq!(r.atoms[0].text, "alpha");
459 }
460
461 #[test]
462 fn parse_response_strips_markdown_fence() {
463 let body = "```json\n{\"atoms\":[{\"text\":\"alpha\"}]}\n```";
464 let r = parse_response(body).unwrap();
465 assert_eq!(r.atoms.len(), 1);
466 }
467
468 #[test]
469 fn parse_response_extracts_object_with_preamble() {
470 let body = "Sure, here's the JSON:\n{\"atoms\":[{\"text\":\"alpha\"}]}\nThanks!";
471 let r = parse_response(body).unwrap();
472 assert_eq!(r.atoms.len(), 1);
473 }
474
475 #[test]
476 fn parse_response_empty_atoms_is_valid() {
477 // "Cannot decompose" signal — substrate maps to SourceTooSmall.
478 let body = r#"{"atoms":[]}"#;
479 let r = parse_response(body).unwrap();
480 assert_eq!(r.atoms.len(), 0);
481 }
482
483 #[test]
484 fn parse_response_rejects_garbage() {
485 assert!(parse_response("nope nope nope").is_err());
486 assert!(parse_response("").is_err());
487 assert!(parse_response(r#"{"wrong":"shape"}"#).is_err());
488 }
489
490 #[test]
491 fn enforce_token_budget_keeps_in_budget() {
492 let atoms = vec![
493 Atom {
494 text: "small atom".to_string(),
495 },
496 Atom {
497 text: "another small atom".to_string(),
498 },
499 ];
500 let (kept, dropped) = enforce_token_budget(atoms, 200);
501 assert_eq!(kept.len(), 2);
502 assert_eq!(dropped, 0);
503 }
504
505 #[test]
506 fn enforce_token_budget_drops_grossly_over() {
507 // Build a string that is firmly over the 25% overshoot window.
508 let huge: String = "word ".repeat(500);
509 let atoms = vec![
510 Atom {
511 text: "fine".to_string(),
512 },
513 Atom { text: huge },
514 ];
515 let (kept, dropped) = enforce_token_budget(atoms, 10);
516 assert_eq!(kept.len(), 1);
517 assert_eq!(dropped, 1);
518 }
519
520 #[test]
521 fn backoff_schedule_is_monotonic_and_bounded() {
522 assert_eq!(backoff_for_attempt(0), Duration::from_millis(100));
523 assert_eq!(backoff_for_attempt(1), Duration::from_millis(500));
524 assert_eq!(backoff_for_attempt(2), Duration::from_millis(2500));
525 assert_eq!(backoff_for_attempt(99), Duration::from_millis(2500));
526 }
527
528 #[test]
529 fn curator_succeeds_on_first_attempt() {
530 let mock = Arc::new(MockLlm::new(vec![Ok(
531 r#"{"atoms":[{"text":"alpha"},{"text":"beta"}]}"#.to_string(),
532 )]));
533 let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
534 let atoms = curator.decompose("input", 200, 3).unwrap();
535 assert_eq!(atoms.len(), 2);
536 assert_eq!(mock.call_count(), 1);
537 }
538
539 #[test]
540 fn curator_retries_on_malformed_then_succeeds() {
541 let mock = Arc::new(MockLlm::new(vec![
542 Ok("garbage".to_string()),
543 Ok("still garbage".to_string()),
544 Ok(r#"{"atoms":[{"text":"alpha"}]}"#.to_string()),
545 ]));
546 let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
547 let atoms = curator.decompose("input", 200, 3).unwrap();
548 assert_eq!(atoms.len(), 1);
549 assert_eq!(mock.call_count(), 3);
550 }
551
552 #[test]
553 fn curator_fails_after_max_retries() {
554 let mock = Arc::new(MockLlm::new(vec![
555 Ok("garbage 1".to_string()),
556 Ok("garbage 2".to_string()),
557 Ok("garbage 3".to_string()),
558 Ok("garbage 4".to_string()),
559 ]));
560 let curator = LlmCurator::with_sleep(mock.clone(), |_| {});
561 // max_retries=3 means 1 initial + 3 retries = 4 total attempts.
562 let err = curator.decompose("input", 200, 3).unwrap_err();
563 assert!(matches!(err, CuratorError::MalformedResponse(_)));
564 assert_eq!(mock.call_count(), 4);
565 }
566
567 #[test]
568 fn curator_propagates_llm_unavailable() {
569 let mock = Arc::new(MockLlm::new(vec![Err(CuratorError::LlmUnavailable(
570 "connection refused".into(),
571 ))]));
572 let curator = LlmCurator::with_sleep(mock, |_| {});
573 let err = curator.decompose("input", 200, 3).unwrap_err();
574 assert!(matches!(err, CuratorError::LlmUnavailable(_)));
575 }
576
577 /// Drives the production `LlmGenerate for OllamaClient` impl (the
578 /// error-mapping arm) AND the real-sleep `LlmCurator::new`
579 /// constructor. Points the client at an unreachable loopback port so
580 /// `generate` fails fast; `max_retries=0` means the real
581 /// `std::thread::sleep` is never actually called (no backoff on the
582 /// final attempt), keeping the test sub-second while still
583 /// exercising `LlmCurator::new`'s body.
584 #[test]
585 fn llm_curator_new_with_real_ollama_client_maps_unavailable() {
586 // Reserved-but-unbound loopback port (TEST-NET style high port).
587 let client = crate::llm::OllamaClient::new_with_url_no_health_check(
588 "http://127.0.0.1:1",
589 "test-model",
590 )
591 .expect("build no-health-check client");
592 // Exercise the production `LlmCurator::new` (real sleep) path.
593 let curator = LlmCurator::new(client);
594 let err = curator.decompose("body", 200, 0).unwrap_err();
595 // The OllamaClient generate failed (connection refused) and the
596 // production LlmGenerate impl mapped it to LlmUnavailable.
597 assert!(
598 matches!(err, CuratorError::LlmUnavailable(_)),
599 "expected LlmUnavailable, got {err:?}"
600 );
601 }
602
603 /// Drives the `LlmGenerate for Arc<OllamaClient>` pass-through impl
604 /// (lines 341-346) used by the MCP daemon wiring.
605 #[test]
606 fn llm_curator_arc_ollama_passthrough_maps_unavailable() {
607 let client = Arc::new(
608 crate::llm::OllamaClient::new_with_url_no_health_check(
609 "http://127.0.0.1:1",
610 "test-model",
611 )
612 .expect("build no-health-check client"),
613 );
614 let curator = LlmCurator::with_sleep(client, |_| {});
615 let err = curator.decompose("body", 200, 0).unwrap_err();
616 assert!(matches!(err, CuratorError::LlmUnavailable(_)));
617 }
618
619 /// CuratorError Display arms — both variants render their prefix.
620 #[test]
621 fn curator_error_display_arms() {
622 assert_eq!(
623 CuratorError::LlmUnavailable("x".into()).to_string(),
624 "curator LLM unavailable: x"
625 );
626 assert_eq!(
627 CuratorError::MalformedResponse("y".into()).to_string(),
628 "curator response malformed: y"
629 );
630 let e = CuratorError::MalformedResponse("z".into());
631 let _: &dyn std::error::Error = &e;
632 }
633
634 #[test]
635 fn extract_first_json_object_handles_braces_in_strings() {
636 // Brace-counting must NOT be fooled by braces inside JSON strings.
637 let s = r#"prefix {"atoms":[{"text":"contains } brace"}]} suffix"#;
638 let extracted = extract_first_json_object(s).unwrap();
639 let parsed: CuratorResponse = serde_json::from_str(&extracted).unwrap();
640 assert_eq!(parsed.atoms.len(), 1);
641 assert_eq!(parsed.atoms[0].text, "contains } brace");
642 }
643}