mimir-librarian 0.1.0

Mimir librarian for governed draft ingestion, validation, and canonical memory commits.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
//! `LlmInvoker` — the trait over "ask Claude to structure this
//! prose as canonical Mimir Lisp."
//!
//! Wrapped as a trait so tests can mock the LLM without spawning
//! subprocesses or hitting the operator's `claude` CLI auth. The
//! default production impl is [`ClaudeCliInvoker`] which shells out
//! to `claude -p` non-interactively.
//!
//! # Invocation shape
//!
//! [`ClaudeCliInvoker::invoke`] runs:
//!
//! ```text
//! <binary_path> -p --no-session-persistence --model <model>
//!               --system-prompt <system_prompt> <user_message>
//! ```
//!
//! with `stdin` closed, `stdout + stderr` piped, and a
//! [`wait_timeout::ChildExt::wait_timeout`]-bounded wait. Error
//! classification (spawn failure, timeout, non-zero exit, empty
//! stdout) maps to [`LibrarianError::LlmInvocationFailed`] with
//! a specific `message` on every failure path; the caller can
//! match on the enum variant while logs and operator-facing
//! surfaces get the diagnostic message.

use std::io::Read as _;
use std::path::{Path, PathBuf};
use std::process::{Child, Command, Stdio};
use std::thread;
use std::time::{Duration, Instant};

use wait_timeout::ChildExt as _;

use crate::{LibrarianError, DEFAULT_LLM_TIMEOUT_SECS};

/// Default binary name searched on `PATH`.
const DEFAULT_BINARY: &str = "claude";

/// Maximum number of characters of `stderr` retained in error
/// messages on non-zero exit. Bounded, sufficient for debugging,
/// avoids carrying unbounded operator-controlled data in logs.
const STDERR_TAIL_CHARS: usize = 400;
const TEXT_FILE_BUSY_OS_ERROR: i32 = 26;
const SPAWN_RETRY_COUNT: usize = 3;
const SPAWN_RETRY_DELAY: Duration = Duration::from_millis(10);

/// Ask the LLM to produce a JSON response for a prose draft.
///
/// The `system_prompt` sets the librarian's role and the output
/// schema; the `user_message` carries the wrapped prose draft
/// (typically `<draft>...</draft>` — the envelope is the caller's
/// responsibility). The returned `String` is the LLM's raw stdout,
/// expected (but not verified by this trait) to be a JSON object
/// matching the librarian system prompt's output contract.
///
/// Implementations must be `Send + Sync` so callers can use them
/// from multi-threaded runners.
pub trait LlmInvoker: Send + Sync + std::fmt::Debug {
    /// Run one LLM invocation and return its stdout.
    ///
    /// # Errors
    ///
    /// - [`LibrarianError::LlmInvocationFailed`] if the invocation
    ///   mechanism (typically a subprocess) failed to produce
    ///   usable output. The attached `message` distinguishes spawn
    ///   failure, timeout, non-zero exit, and empty output.
    fn invoke(&self, system_prompt: &str, user_message: &str) -> Result<String, LibrarianError>;
}

/// Production `LlmInvoker` that shells out to the `claude` CLI in
/// non-interactive mode.
///
/// Uses whatever auth the operator's `claude` CLI already has —
/// no `ANTHROPIC_API_KEY` required. See the
/// `feedback_no_api_blocker.md` memory.
///
/// Construction is via [`ClaudeCliInvoker::new`] (takes a model
/// alias; binary defaults to `claude` on `PATH`) with optional
/// [`Self::with_timeout`] and [`Self::with_binary_path`] builders.
#[derive(Debug, Clone)]
pub struct ClaudeCliInvoker {
    model: String,
    timeout: Duration,
    binary_path: PathBuf,
}

impl ClaudeCliInvoker {
    /// Construct with the given Claude model alias (e.g.
    /// `"claude-sonnet-4-6"` or `"claude-opus-4-7"`). Binary
    /// defaults to `claude` resolved via `PATH`; override with
    /// [`Self::with_binary_path`]. Timeout defaults to
    /// [`DEFAULT_LLM_TIMEOUT_SECS`].
    #[must_use]
    pub fn new(model: impl Into<String>) -> Self {
        Self {
            model: model.into(),
            timeout: Duration::from_secs(DEFAULT_LLM_TIMEOUT_SECS),
            binary_path: PathBuf::from(DEFAULT_BINARY),
        }
    }

    /// Override the invocation timeout.
    #[must_use]
    pub fn with_timeout(mut self, timeout: Duration) -> Self {
        self.timeout = timeout;
        self
    }

    /// Override the path to the `claude` binary. Accepts any
    /// path-convertible type — operators can pin a specific binary
    /// version; tests can point at a shim.
    #[must_use]
    pub fn with_binary_path(mut self, path: impl Into<PathBuf>) -> Self {
        self.binary_path = path.into();
        self
    }

    /// The model alias this invoker calls.
    #[must_use]
    pub fn model(&self) -> &str {
        &self.model
    }

    /// The per-invocation timeout.
    #[must_use]
    pub fn timeout(&self) -> Duration {
        self.timeout
    }

    /// The binary path this invoker will execute.
    #[must_use]
    pub fn binary_path(&self) -> &Path {
        &self.binary_path
    }

    /// Build the argv (excluding argv\[0\], which is the binary
    /// path) for a given invocation. Extracted as a pure function
    /// so the precise flag layout is unit-testable without spawning
    /// a subprocess.
    fn build_argv(&self, system_prompt: &str, user_message: &str) -> Vec<String> {
        vec![
            "-p".to_string(),
            "--no-session-persistence".to_string(),
            "--model".to_string(),
            self.model.clone(),
            "--system-prompt".to_string(),
            system_prompt.to_string(),
            user_message.to_string(),
        ]
    }
}

impl Default for ClaudeCliInvoker {
    /// Default model is Sonnet 4.6 — matches the librarian-prototype
    /// run configuration.
    fn default() -> Self {
        Self::new("claude-sonnet-4-6")
    }
}

/// Return at most the last `STDERR_TAIL_CHARS` characters of `s`,
/// as a new `String`. UTF-8-safe (operates on `char` boundaries).
fn tail_chars(s: &str) -> String {
    let char_count = s.chars().count();
    if char_count <= STDERR_TAIL_CHARS {
        return s.to_string();
    }
    let skip = char_count - STDERR_TAIL_CHARS;
    s.chars().skip(skip).collect()
}

impl LlmInvoker for ClaudeCliInvoker {
    #[tracing::instrument(
        name = "mimir.librarian.llm.invoke",
        skip_all,
        fields(
            model = %self.model,
            prompt_bytes = system_prompt.len() + user_message.len(),
            response_bytes = tracing::field::Empty,
            exit_code = tracing::field::Empty,
            duration_ms = tracing::field::Empty,
        ),
    )]
    fn invoke(&self, system_prompt: &str, user_message: &str) -> Result<String, LibrarianError> {
        let started = Instant::now();
        let argv = self.build_argv(system_prompt, user_message);

        let mut command = Command::new(&self.binary_path);
        command
            .args(&argv)
            .stdin(Stdio::null())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped());
        let mut child = spawn_with_retry(&mut command).map_err(|io_err| {
            LibrarianError::LlmInvocationFailed {
                message: format!("failed to spawn {}: {io_err}", self.binary_path.display()),
            }
        })?;

        let wait_outcome = child.wait_timeout(self.timeout);
        let status = match wait_outcome {
            Ok(Some(status)) => status,
            Ok(None) => {
                // Timeout — reap the child so we don't leave a zombie.
                let _ = child.kill();
                let _ = child.wait();
                return Err(LibrarianError::LlmInvocationFailed {
                    message: format!("invocation timed out after {}s", self.timeout.as_secs()),
                });
            }
            Err(io_err) => {
                return Err(LibrarianError::LlmInvocationFailed {
                    message: format!("wait error: {io_err}"),
                });
            }
        };

        // Drain stdout and stderr. The prompt + response sizes we
        // encounter in practice (~10 KB total) are comfortably below
        // typical 64 KB pipe buffers, so reading after wait is safe.
        // If that invariant ever tightens (much larger prompts), a
        // follow-up can switch to threaded reading.
        let mut stdout = String::new();
        if let Some(mut handle) = child.stdout.take() {
            let _ = handle.read_to_string(&mut stdout);
        }
        let mut stderr = String::new();
        if let Some(mut handle) = child.stderr.take() {
            let _ = handle.read_to_string(&mut stderr);
        }

        let duration_ms = u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX);
        let span = tracing::Span::current();
        span.record("response_bytes", stdout.len());
        span.record("duration_ms", duration_ms);

        if !status.success() {
            let exit_label = status
                .code()
                .map_or_else(|| "signalled".to_string(), |c| c.to_string());
            span.record("exit_code", exit_label.as_str());
            tracing::warn!(
                target: "mimir.librarian.llm.nonzero_exit",
                exit = exit_label.as_str(),
            );
            return Err(LibrarianError::LlmInvocationFailed {
                message: format!(
                    "{} exited {exit_label}: {}",
                    self.binary_path.display(),
                    tail_chars(stderr.trim())
                ),
            });
        }
        span.record("exit_code", 0);

        let trimmed = stdout.trim();
        if trimmed.is_empty() {
            return Err(LibrarianError::LlmInvocationFailed {
                message: format!("{} exited 0 with empty stdout", self.binary_path.display()),
            });
        }

        Ok(trimmed.to_string())
    }
}

fn spawn_with_retry(command: &mut Command) -> Result<Child, std::io::Error> {
    let mut attempt = 0;
    loop {
        match command.spawn() {
            Err(err)
                if err.raw_os_error() == Some(TEXT_FILE_BUSY_OS_ERROR)
                    && attempt < SPAWN_RETRY_COUNT =>
            {
                attempt += 1;
                thread::sleep(SPAWN_RETRY_DELAY);
            }
            result => return result,
        }
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
    use super::*;

    use std::fs;
    use std::io::Write as _;
    use tempfile::TempDir;

    /// Write an executable shell-script shim to a fresh tempdir.
    /// The shim simulates `claude` for integration tests. Returns
    /// the tempdir (kept alive for the test) and the shim path.
    #[cfg(unix)]
    fn make_shim(script_body: &str) -> (TempDir, PathBuf) {
        use std::os::unix::fs::PermissionsExt as _;

        let dir = tempfile::tempdir().expect("tempdir");
        let path = dir.path().join("claude");
        let tmp_path = dir.path().join(".claude.tmp");
        let mut file = fs::File::create(&tmp_path).expect("create shim");
        file.write_all(script_body.as_bytes()).expect("write shim");
        file.sync_all().expect("sync shim");
        drop(file);
        let mut perms = fs::metadata(&tmp_path).expect("stat shim").permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&tmp_path, perms).expect("chmod shim");
        fs::rename(&tmp_path, &path).expect("publish shim");
        (dir, path)
    }

    /// Write a shell-script shim through a Windows command wrapper.
    /// GitHub's Windows runners provide Git Bash `sh`; the wrapper
    /// keeps the test fixture bodies identical across platforms.
    #[cfg(windows)]
    fn make_shim(script_body: &str) -> (TempDir, PathBuf) {
        let dir = tempfile::tempdir().expect("tempdir");
        let script_path = dir.path().join("claude.sh");
        let path = dir.path().join("claude.cmd");
        let tmp_script_path = dir.path().join(".claude.sh.tmp");
        let tmp_path = dir.path().join(".claude.cmd.tmp");
        let mut script = fs::File::create(&tmp_script_path).expect("create shim script");
        script
            .write_all(script_body.as_bytes())
            .expect("write shim script");
        script.sync_all().expect("sync shim script");
        drop(script);
        let mut command = fs::File::create(&tmp_path).expect("create shim command");
        command
            .write_all(b"@echo off\r\nsh \"%~dp0claude.sh\" %*\r\n")
            .expect("write shim command");
        command.sync_all().expect("sync shim command");
        drop(command);
        fs::rename(&tmp_script_path, &script_path).expect("publish shim script");
        fs::rename(&tmp_path, &path).expect("publish shim command");
        (dir, path)
    }

    #[test]
    fn construction() {
        let invoker = ClaudeCliInvoker::new("claude-opus-4-7");
        assert_eq!(invoker.model(), "claude-opus-4-7");
        assert_eq!(
            invoker.timeout(),
            Duration::from_secs(DEFAULT_LLM_TIMEOUT_SECS)
        );
        assert_eq!(invoker.binary_path(), Path::new("claude"));
    }

    #[test]
    fn default_is_sonnet_4_6() {
        let invoker = ClaudeCliInvoker::default();
        assert_eq!(invoker.model(), "claude-sonnet-4-6");
    }

    #[test]
    fn builders_override_defaults() {
        let invoker = ClaudeCliInvoker::new("m")
            .with_timeout(Duration::from_secs(7))
            .with_binary_path("/tmp/fake-claude");
        assert_eq!(invoker.timeout(), Duration::from_secs(7));
        assert_eq!(invoker.binary_path(), Path::new("/tmp/fake-claude"));
    }

    #[test]
    fn argv_shape_is_exact() {
        let invoker = ClaudeCliInvoker::new("claude-opus-4-7");
        let argv = invoker.build_argv("SYS PROMPT", "USER MSG");
        assert_eq!(
            argv,
            vec![
                "-p",
                "--no-session-persistence",
                "--model",
                "claude-opus-4-7",
                "--system-prompt",
                "SYS PROMPT",
                "USER MSG",
            ]
        );
    }

    #[test]
    fn tail_chars_returns_whole_short_string() {
        assert_eq!(tail_chars("hello"), "hello");
    }

    #[test]
    fn tail_chars_truncates_long_string_to_last_n() {
        let long = "x".repeat(STDERR_TAIL_CHARS + 100);
        let tail = tail_chars(&long);
        assert_eq!(tail.chars().count(), STDERR_TAIL_CHARS);
    }

    #[test]
    fn tail_chars_preserves_utf8() {
        let s = "".repeat(STDERR_TAIL_CHARS + 10);
        let tail = tail_chars(&s);
        assert_eq!(tail.chars().count(), STDERR_TAIL_CHARS);
        assert!(tail.chars().all(|c| c == ''));
    }

    // ---- Integration tests via shim binary ----

    #[test]
    fn invoke_success_returns_trimmed_stdout() {
        let (_dir, shim) = make_shim("#!/bin/sh\necho '{\"records\":[],\"notes\":\"ok\"}'\n");
        let invoker = ClaudeCliInvoker::default().with_binary_path(&shim);
        let result = invoker.invoke("sys", "usr").expect("shim always succeeds");
        assert_eq!(result, r#"{"records":[],"notes":"ok"}"#);
    }

    #[test]
    fn invoke_nonzero_exit_is_classified() {
        let (_dir, shim) = make_shim("#!/bin/sh\necho 'something broke' >&2\nexit 7\n");
        let invoker = ClaudeCliInvoker::default().with_binary_path(&shim);
        let err = invoker
            .invoke("sys", "usr")
            .expect_err("shim always exits 7");
        let LibrarianError::LlmInvocationFailed { message } = err else {
            panic!("expected LlmInvocationFailed, got {err:?}");
        };
        assert!(message.contains("exited 7"), "message was: {message}");
        assert!(
            message.contains("something broke"),
            "stderr tail missing: {message}"
        );
    }

    #[test]
    fn invoke_empty_stdout_is_rejected() {
        let (_dir, shim) = make_shim("#!/bin/sh\nexit 0\n");
        let invoker = ClaudeCliInvoker::default().with_binary_path(&shim);
        let err = invoker.invoke("sys", "usr").expect_err("empty stdout");
        let LibrarianError::LlmInvocationFailed { message } = err else {
            panic!("expected LlmInvocationFailed, got {err:?}");
        };
        assert!(message.contains("empty stdout"), "message was: {message}");
    }

    #[test]
    fn invoke_timeout_kills_child_and_reports() {
        let (_dir, shim) = make_shim("#!/bin/sh\nsleep 5\n");
        let invoker = ClaudeCliInvoker::default()
            .with_binary_path(&shim)
            .with_timeout(Duration::from_millis(200));
        let started = Instant::now();
        let err = invoker.invoke("sys", "usr").expect_err("must time out");
        // Timeout path must return promptly; give generous slack for
        // slow CI hosts but bound below the shim's 5 s sleep.
        assert!(
            started.elapsed() < Duration::from_secs(3),
            "timeout path took too long: {:?}",
            started.elapsed()
        );
        let LibrarianError::LlmInvocationFailed { message } = err else {
            panic!("expected LlmInvocationFailed, got {err:?}");
        };
        assert!(message.contains("timed out"), "message was: {message}");
    }

    #[test]
    fn invoke_missing_binary_returns_spawn_error() {
        let invoker = ClaudeCliInvoker::default()
            .with_binary_path("/nonexistent/definitely-not-a-claude-binary");
        let err = invoker.invoke("sys", "usr").expect_err("binary is missing");
        let LibrarianError::LlmInvocationFailed { message } = err else {
            panic!("expected LlmInvocationFailed, got {err:?}");
        };
        assert!(
            message.contains("failed to spawn"),
            "message was: {message}"
        );
    }

    /// Demonstrates the trait is mockable — used by every future test
    /// that exercises the librarian without spawning `claude`.
    #[test]
    fn trait_is_mockable() {
        #[derive(Debug)]
        struct MockInvoker {
            canned_response: String,
        }
        impl LlmInvoker for MockInvoker {
            fn invoke(&self, _sys: &str, _usr: &str) -> Result<String, LibrarianError> {
                Ok(self.canned_response.clone())
            }
        }
        let mock = MockInvoker {
            canned_response: r#"{"records": [], "notes": "mock"}"#.to_string(),
        };
        let out = mock.invoke("sys", "usr").expect("mock never errors");
        assert!(out.contains("mock"));
    }
}