Skip to main content

tj_core/classifier/
agent_sdk.rs

1//! Claude CLI ("agent SDK") classifier backend.
2//!
3//! Runs the locally-installed, already-authenticated `claude` binary in
4//! non-interactive print mode, pinned to Haiku, to classify a chunk *without*
5//! an `ANTHROPIC_API_KEY`. This resurrects the v0.7.x `cli` backend that was
6//! removed in v0.8.0 — but honestly: since **2026-06-15** a headless
7//! `claude -p` run draws from the separate **Agent SDK** monthly credit pool
8//! (~$20 Pro / $100 Max 5x / $200 Max 20x, at API rates), not the interactive
9//! Pro/Max pool. Classification is Haiku-class and tiny (a few hundred tokens
10//! per chunk), so the credit lasts a long time — but it is not strictly free.
11//!
12//! The command execution is abstracted behind [`CommandRunner`] so the parsing
13//! path is unit-testable with a fake; the suite never shells out to `claude`.
14
15use super::{Classifier, ClassifyInput, ClassifyOutput};
16use anyhow::{anyhow, Context};
17use std::process::Command;
18
19/// Default model. `claude --model` accepts the short alias and resolves it to
20/// the current dated id (`claude-haiku-4-5-20251001`). Override with
21/// `TJ_AGENT_SDK_MODEL`.
22pub const DEFAULT_MODEL: &str = "claude-haiku-4-5";
23
24/// Env var stamped onto every spawned classifier `claude -p` subprocess. That
25/// subprocess is a full Claude Code instance, so on startup it re-runs the
26/// user's SessionStart hooks — including `task-journal ingest-hook`, which
27/// would spawn yet another classifier `claude -p`, and so on: an unbounded
28/// fork bomb. `ingest-hook` checks for this marker and no-ops when it is set,
29/// breaking the recursion. The CLI guard and the worker's `env_remove` both
30/// reference this constant so the setter and the checker can never drift
31/// (which is exactly the bug that let the fork bomb through: the guard checked
32/// `TJ_IN_CLASSIFIER` but no spawn site ever set it).
33pub const IN_CLASSIFIER_ENV: &str = "TJ_IN_CLASSIFIER";
34
35/// "Run the classifier command and hand back its raw stdout." The production
36/// impl shells out to `claude`; tests inject a fake returning canned JSON.
37pub trait CommandRunner: Send + Sync {
38    /// Run the classification for `prompt` against `model`, returning the raw
39    /// stdout (the `--output-format json` wrapper) on success.
40    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String>;
41}
42
43/// Build the base `claude` invocation shared by both runners: print mode, the
44/// pinned model, the JSON envelope, an isolated MCP config, and — critically —
45/// the [`IN_CLASSIFIER_ENV`] recursion marker. The argv runner appends the
46/// prompt as a positional arg; the stdin runner feeds it on stdin. Extracted so
47/// a unit test can assert the marker is present without spawning `claude` (the
48/// missing marker is exactly what let the fork bomb through before).
49fn base_claude_command(model: &str) -> Command {
50    let mut cmd = Command::new("claude");
51    cmd.arg("-p")
52        .arg("--model")
53        .arg(model)
54        .arg("--output-format")
55        .arg("json")
56        .arg("--strict-mcp-config")
57        // We never use tools in these one-shot text calls — denying the
58        // built-in toolset keeps their schemas out of the prompt, roughly
59        // halving the harness overhead. (The cache-creation cost floor
60        // remains; for true pennies use a direct API backend.)
61        .arg("--disallowed-tools")
62        .arg(DISABLED_TOOLS)
63        .env(IN_CLASSIFIER_ENV, "1");
64    cmd
65}
66
67/// Built-in tools denied in our one-shot `claude -p` calls (we only want a text
68/// completion, never tool use). Listed explicitly because there is no wildcard.
69const DISABLED_TOOLS: &str = "Bash Read Edit Write Glob Grep Task WebFetch \
70WebSearch NotebookEdit TodoWrite BashOutput KillBash";
71
72/// Production runner: invokes the local `claude` binary in print mode, pinned
73/// to the given model, asking for the JSON envelope and an isolated MCP config
74/// (`--strict-mcp-config` keeps the project's own MCP servers — including this
75/// very journal — out of the classification subprocess).
76pub struct ClaudeBinaryRunner;
77
78/// Build the error for a non-zero `claude -p` exit. With `--output-format
79/// json` claude reports the real cause (invalid model, usage limit, auth) as
80/// JSON on **stdout**, not stderr — so surface both, capped, or the user just
81/// sees a bare "exit status 1".
82fn claude_exit_error(
83    status: std::process::ExitStatus,
84    stdout: &[u8],
85    stderr: &[u8],
86) -> anyhow::Error {
87    let cap = |b: &[u8]| {
88        let s = String::from_utf8_lossy(b);
89        let s = s.trim().to_string();
90        if s.chars().count() > 600 {
91            format!("{}…", s.chars().take(600).collect::<String>())
92        } else {
93            s
94        }
95    };
96    let out = cap(stdout);
97    let err = cap(stderr);
98    let detail = match (out.is_empty(), err.is_empty()) {
99        (true, true) => "(no output)".to_string(),
100        (false, true) => out,
101        (true, false) => err,
102        (false, false) => format!("{err} | stdout: {out}"),
103    };
104    anyhow!("`claude -p` exited with {status}: {detail}")
105}
106
107/// Per-call wall-clock ceiling for a `claude -p` invocation. A spawned full
108/// Claude Code instance normally answers in seconds; this kills a wedged one so
109/// a multi-chunk enrich can't hang the whole `complete`. Override with
110/// `TJ_CLAUDE_TIMEOUT_SECS`.
111fn claude_timeout() -> std::time::Duration {
112    let secs = std::env::var("TJ_CLAUDE_TIMEOUT_SECS")
113        .ok()
114        .and_then(|s| s.parse::<u64>().ok())
115        .filter(|n| *n > 0)
116        .unwrap_or(90);
117    std::time::Duration::from_secs(secs)
118}
119
120/// Wait for `child` up to `timeout`, draining stdout/stderr concurrently so a
121/// full pipe can't deadlock the wait. On timeout the child is killed and an
122/// error returned; otherwise the captured output is handed back.
123fn wait_with_timeout(
124    mut child: std::process::Child,
125    timeout: std::time::Duration,
126) -> anyhow::Result<std::process::Output> {
127    use std::io::Read;
128    let mut out_pipe = child.stdout.take();
129    let mut err_pipe = child.stderr.take();
130    let so = std::thread::spawn(move || {
131        let mut b = Vec::new();
132        if let Some(p) = out_pipe.as_mut() {
133            let _ = p.read_to_end(&mut b);
134        }
135        b
136    });
137    let se = std::thread::spawn(move || {
138        let mut b = Vec::new();
139        if let Some(p) = err_pipe.as_mut() {
140            let _ = p.read_to_end(&mut b);
141        }
142        b
143    });
144    let start = std::time::Instant::now();
145    let status = loop {
146        if let Some(status) = child.try_wait()? {
147            break status;
148        }
149        if start.elapsed() >= timeout {
150            let _ = child.kill();
151            let _ = child.wait();
152            anyhow::bail!("`claude -p` timed out after {}s", timeout.as_secs());
153        }
154        std::thread::sleep(std::time::Duration::from_millis(150));
155    };
156    Ok(std::process::Output {
157        status,
158        stdout: so.join().unwrap_or_default(),
159        stderr: se.join().unwrap_or_default(),
160    })
161}
162
163impl CommandRunner for ClaudeBinaryRunner {
164    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
165        let child = base_claude_command(model)
166            .arg(prompt)
167            .stdout(std::process::Stdio::piped())
168            .stderr(std::process::Stdio::piped())
169            .spawn()
170            .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
171        let output = wait_with_timeout(child, claude_timeout())?;
172        if !output.status.success() {
173            return Err(claude_exit_error(
174                output.status,
175                &output.stdout,
176                &output.stderr,
177            ));
178        }
179        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
180    }
181}
182
183/// Like [`ClaudeBinaryRunner`] but feeds the prompt on **stdin** instead of as
184/// an argv argument. Use for large prompts (e.g. a whole session transcript in
185/// dream backfill) that would otherwise blow the per-argument size limit
186/// (`E2BIG`, ~128 KiB on Linux). `claude -p` with no positional prompt reads
187/// the prompt from stdin.
188pub struct ClaudeBinaryStdinRunner;
189
190impl CommandRunner for ClaudeBinaryStdinRunner {
191    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
192        use std::io::Write;
193        use std::process::Stdio;
194        let mut child = base_claude_command(model)
195            .stdin(Stdio::piped())
196            .stdout(Stdio::piped())
197            .stderr(Stdio::piped())
198            .spawn()
199            .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
200        // Write the prompt, then drop the handle to close stdin so `claude`
201        // sees EOF and starts working.
202        child
203            .stdin
204            .take()
205            .context("claude stdin was not captured")?
206            .write_all(prompt.as_bytes())
207            .context("failed to write prompt to claude stdin")?;
208        let output = wait_with_timeout(child, claude_timeout())?;
209        if !output.status.success() {
210            return Err(claude_exit_error(
211                output.status,
212                &output.stdout,
213                &output.stderr,
214            ));
215        }
216        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
217    }
218}
219
220pub struct ClaudeCliClassifier {
221    model: String,
222    runner: Box<dyn CommandRunner>,
223}
224
225impl ClaudeCliClassifier {
226    /// Build from environment. Returns `None` unless a `claude` binary is on
227    /// PATH (probed with `claude --version`) — the caller then falls through to
228    /// the next backend. Model comes from `TJ_AGENT_SDK_MODEL`, else Haiku.
229    pub fn from_env() -> Option<Self> {
230        if !claude_on_path() {
231            return None;
232        }
233        let model = std::env::var("TJ_AGENT_SDK_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into());
234        Some(Self {
235            model,
236            runner: Box::new(ClaudeBinaryRunner),
237        })
238    }
239
240    /// Test/dev constructor: inject a fake runner and an explicit model so the
241    /// parse path can be exercised without a live `claude` login.
242    pub fn with_runner(model: impl Into<String>, runner: Box<dyn CommandRunner>) -> Self {
243        Self {
244            model: model.into(),
245            runner,
246        }
247    }
248}
249
250/// The JSON wrapper emitted by `claude --output-format json`. We read the error
251/// flag, the `result` string (the model's verdict text), and the usage/cost so
252/// callers can report what a call actually consumed.
253#[derive(serde::Deserialize)]
254struct CliEnvelope {
255    #[serde(default)]
256    is_error: bool,
257    #[serde(default)]
258    result: Option<String>,
259    #[serde(default)]
260    subtype: Option<String>,
261    #[serde(default)]
262    usage: Option<EnvelopeUsage>,
263    #[serde(default)]
264    total_cost_usd: Option<f64>,
265}
266
267#[derive(serde::Deserialize, Default)]
268struct EnvelopeUsage {
269    #[serde(default)]
270    input_tokens: u64,
271    #[serde(default)]
272    output_tokens: u64,
273}
274
275impl Classifier for ClaudeCliClassifier {
276    fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
277        let prompt = crate::classifier::prompt::build(input);
278        let verdict = run_claude_json(self.runner.as_ref(), &self.model, &prompt)?;
279        super::parse_verdict(&verdict)
280    }
281}
282
283/// Run `prompt` through the claude CLI (via `runner`) and return the model's
284/// reply text — the `result` field of the `--output-format json` envelope.
285/// Shared by the classifier and the dream agent-sdk backend so the envelope
286/// handling lives in one place.
287pub fn run_claude_json(
288    runner: &dyn CommandRunner,
289    model: &str,
290    prompt: &str,
291) -> anyhow::Result<String> {
292    run_claude_json_usage(runner, model, prompt).map(|(text, _)| text)
293}
294
295/// Like [`run_claude_json`] but also returns the envelope's reported token
296/// usage and cost (zeros when the envelope omits them).
297pub fn run_claude_json_usage(
298    runner: &dyn CommandRunner,
299    model: &str,
300    prompt: &str,
301) -> anyhow::Result<(String, crate::llm::LlmUsage)> {
302    let stdout = runner.run(model, prompt)?;
303    let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| {
304        format!(
305            "claude --output-format json wrapper parse failed; got: {}",
306            stdout.trim()
307        )
308    })?;
309    if envelope.is_error {
310        return Err(anyhow!(
311            "claude reported an error (subtype={})",
312            envelope.subtype.as_deref().unwrap_or("unknown")
313        ));
314    }
315    let u = envelope.usage.unwrap_or_default();
316    let usage = crate::llm::LlmUsage {
317        // Only our *fresh* prompt tokens — NOT the cached Claude Code system
318        // prompt + tool schemas (cache_read/creation), which are harness
319        // overhead, not work the user asked for. The dollar `cost` below still
320        // reflects everything (claude computes it with the cache discount), so
321        // a small token count next to a few-cents cost is the honest signal
322        // that claude -p's overhead dominates — switch to a direct API backend
323        // to avoid it.
324        input_tokens: u.input_tokens,
325        output_tokens: u.output_tokens,
326        cost_usd: envelope.total_cost_usd,
327    };
328    let result = envelope
329        .result
330        .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))?;
331    Ok((result, usage))
332}
333
334/// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does
335/// no network) and tolerant — any spawn/exec failure means "not available".
336pub fn claude_on_path() -> bool {
337    Command::new("claude")
338        .arg("--version")
339        .output()
340        .map(|o| o.status.success())
341        .unwrap_or(false)
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use crate::classifier::{decide_status, CONFIDENCE_THRESHOLD};
348    use crate::event::{EventStatus, EventType};
349
350    /// Fake runner: returns canned stdout, ignoring model/prompt. Captures the
351    /// model it was asked for so tests can assert the pin.
352    struct FakeRunner {
353        canned: String,
354        seen_model: std::sync::Mutex<Option<String>>,
355    }
356
357    impl FakeRunner {
358        fn new(canned: impl Into<String>) -> Self {
359            Self {
360                canned: canned.into(),
361                seen_model: std::sync::Mutex::new(None),
362            }
363        }
364    }
365
366    impl CommandRunner for FakeRunner {
367        fn run(&self, model: &str, _prompt: &str) -> anyhow::Result<String> {
368            *self.seen_model.lock().unwrap() = Some(model.to_string());
369            Ok(self.canned.clone())
370        }
371    }
372
373    fn input() -> ClassifyInput {
374        ClassifyInput {
375            text: "We adopted Rust for the journal core.".into(),
376            author_hint: "assistant".into(),
377            recent_tasks: vec![],
378        }
379    }
380
381    fn envelope(result_json: &str) -> String {
382        serde_json::json!({
383            "type": "result",
384            "subtype": "success",
385            "is_error": false,
386            "result": result_json,
387        })
388        .to_string()
389    }
390
391    #[test]
392    fn base_command_carries_recursion_marker() {
393        use std::ffi::OsStr;
394        // The tj-cli ingest-hook guard short-circuits on this exact var; if the
395        // const and the spawn site ever drift, the fork bomb returns.
396        assert_eq!(IN_CLASSIFIER_ENV, "TJ_IN_CLASSIFIER");
397        let cmd = base_claude_command("claude-haiku-4-5");
398        let marker = cmd
399            .get_envs()
400            .any(|(k, v)| k == OsStr::new(IN_CLASSIFIER_ENV) && v == Some(OsStr::new("1")));
401        assert!(
402            marker,
403            "every spawned `claude -p` must set {IN_CLASSIFIER_ENV}=1 to break ingest-hook recursion"
404        );
405    }
406
407    #[test]
408    fn parses_canned_verdict_into_classify_output() {
409        let verdict = r#"{"event_type":"decision","task_id_guess":"tj-x","confidence":0.93,"evidence_strength":null,"suggested_text":"Adopt Rust."}"#;
410        let c = ClaudeCliClassifier::with_runner(
411            DEFAULT_MODEL,
412            Box::new(FakeRunner::new(envelope(verdict))),
413        );
414        let out = c.classify(&input()).unwrap();
415        assert_eq!(out.event_type, EventType::Decision);
416        assert_eq!(out.task_id_guess.as_deref(), Some("tj-x"));
417        assert!((out.confidence - 0.93).abs() < 1e-6);
418        // 0.93 >= 0.85 → confirmed.
419        assert_eq!(decide_status(out.confidence), EventStatus::Confirmed);
420    }
421
422    /// Adapter so a test can keep an `Arc` handle to inspect the runner after
423    /// it is boxed into the classifier.
424    struct ArcRunner(std::sync::Arc<FakeRunner>);
425    impl CommandRunner for ArcRunner {
426        fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
427            self.0.run(model, prompt)
428        }
429    }
430
431    #[test]
432    fn pins_the_configured_model() {
433        let verdict = r#"{"event_type":"finding","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"x"}"#;
434        let captured = std::sync::Arc::new(FakeRunner::new(envelope(verdict)));
435        let c = ClaudeCliClassifier::with_runner(
436            "claude-haiku-4-5",
437            Box::new(ArcRunner(captured.clone())),
438        );
439        let _ = c.classify(&input()).unwrap();
440        assert_eq!(
441            captured.seen_model.lock().unwrap().as_deref(),
442            Some("claude-haiku-4-5"),
443            "classifier must pin the model it was constructed with"
444        );
445    }
446
447    #[test]
448    fn decide_status_at_the_0_85_threshold() {
449        for (conf, expect) in [
450            (0.85_f64, EventStatus::Confirmed),
451            (0.84_f64, EventStatus::Suggested),
452        ] {
453            let verdict = format!(
454                r#"{{"event_type":"evidence","task_id_guess":null,"confidence":{conf},"evidence_strength":"strong","suggested_text":"t"}}"#
455            );
456            let c = ClaudeCliClassifier::with_runner(
457                DEFAULT_MODEL,
458                Box::new(FakeRunner::new(envelope(&verdict))),
459            );
460            let out = c.classify(&input()).unwrap();
461            assert!((out.confidence - conf).abs() < 1e-6);
462            assert_eq!(decide_status(out.confidence), expect);
463            assert_eq!(CONFIDENCE_THRESHOLD, 0.85);
464        }
465    }
466
467    #[test]
468    fn tolerates_code_fence_wrapped_verdict() {
469        let verdict = "```json\n{\"event_type\":\"rejection\",\"task_id_guess\":null,\"confidence\":0.88,\"evidence_strength\":null,\"suggested_text\":\"won't work\"}\n```";
470        let c = ClaudeCliClassifier::with_runner(
471            DEFAULT_MODEL,
472            Box::new(FakeRunner::new(envelope(verdict))),
473        );
474        let out = c.classify(&input()).unwrap();
475        assert_eq!(out.event_type, EventType::Rejection);
476    }
477
478    #[test]
479    fn errors_when_claude_reports_is_error() {
480        let canned = serde_json::json!({
481            "type": "result",
482            "subtype": "error_during_execution",
483            "is_error": true,
484            "result": null,
485        })
486        .to_string();
487        let c = ClaudeCliClassifier::with_runner(DEFAULT_MODEL, Box::new(FakeRunner::new(canned)));
488        let err = c.classify(&input()).unwrap_err();
489        assert!(format!("{err}").contains("error"), "got: {err}");
490    }
491}