Skip to main content

tj_core/classifier/
agent_sdk.rs

1//! Claude CLI ("agent SDK") classifier backend.
2//!
3//! Runs the locally-installed, already-authenticated `claude` binary in
4//! non-interactive print mode, pinned to Haiku, to classify a chunk *without*
5//! an `ANTHROPIC_API_KEY`. This resurrects the v0.7.x `cli` backend that was
6//! removed in v0.8.0 — but honestly: since **2026-06-15** a headless
7//! `claude -p` run draws from the separate **Agent SDK** monthly credit pool
8//! (~$20 Pro / $100 Max 5x / $200 Max 20x, at API rates), not the interactive
9//! Pro/Max pool. Classification is Haiku-class and tiny (a few hundred tokens
10//! per chunk), so the credit lasts a long time — but it is not strictly free.
11//!
12//! The command execution is abstracted behind [`CommandRunner`] so the parsing
13//! path is unit-testable with a fake; the suite never shells out to `claude`.
14
15use super::{Classifier, ClassifyInput, ClassifyOutput};
16use anyhow::{anyhow, Context};
17use std::process::Command;
18
19/// Default model. `claude --model` accepts the short alias and resolves it to
20/// the current dated id (`claude-haiku-4-5-20251001`). Override with
21/// `TJ_AGENT_SDK_MODEL`.
22pub const DEFAULT_MODEL: &str = "claude-haiku-4-5";
23
24/// Env var stamped onto every spawned classifier `claude -p` subprocess. That
25/// subprocess is a full Claude Code instance, so on startup it re-runs the
26/// user's SessionStart hooks — including `task-journal ingest-hook`, which
27/// would spawn yet another classifier `claude -p`, and so on: an unbounded
28/// fork bomb. `ingest-hook` checks for this marker and no-ops when it is set,
29/// breaking the recursion. The CLI guard and the worker's `env_remove` both
30/// reference this constant so the setter and the checker can never drift
31/// (which is exactly the bug that let the fork bomb through: the guard checked
32/// `TJ_IN_CLASSIFIER` but no spawn site ever set it).
33pub const IN_CLASSIFIER_ENV: &str = "TJ_IN_CLASSIFIER";
34
35/// "Run the classifier command and hand back its raw stdout." The production
36/// impl shells out to `claude`; tests inject a fake returning canned JSON.
37pub trait CommandRunner: Send + Sync {
38    /// Run the classification for `prompt` against `model`, returning the raw
39    /// stdout (the `--output-format json` wrapper) on success.
40    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String>;
41}
42
43/// Build the base `claude` invocation shared by both runners: print mode, the
44/// pinned model, the JSON envelope, an isolated MCP config, and — critically —
45/// the [`IN_CLASSIFIER_ENV`] recursion marker. The argv runner appends the
46/// prompt as a positional arg; the stdin runner feeds it on stdin. Extracted so
47/// a unit test can assert the marker is present without spawning `claude` (the
48/// missing marker is exactly what let the fork bomb through before).
49fn base_claude_command(model: &str) -> Command {
50    let mut cmd = Command::new("claude");
51    cmd.arg("-p")
52        .arg("--model")
53        .arg(model)
54        .arg("--output-format")
55        .arg("json")
56        .arg("--strict-mcp-config")
57        .env(IN_CLASSIFIER_ENV, "1");
58    cmd
59}
60
61/// Production runner: invokes the local `claude` binary in print mode, pinned
62/// to the given model, asking for the JSON envelope and an isolated MCP config
63/// (`--strict-mcp-config` keeps the project's own MCP servers — including this
64/// very journal — out of the classification subprocess).
65pub struct ClaudeBinaryRunner;
66
67impl CommandRunner for ClaudeBinaryRunner {
68    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
69        let output = base_claude_command(model)
70            .arg(prompt)
71            .output()
72            .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
73        if !output.status.success() {
74            let stderr = String::from_utf8_lossy(&output.stderr);
75            return Err(anyhow!(
76                "`claude -p` exited with {}: {}",
77                output.status,
78                stderr.trim()
79            ));
80        }
81        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
82    }
83}
84
85/// Like [`ClaudeBinaryRunner`] but feeds the prompt on **stdin** instead of as
86/// an argv argument. Use for large prompts (e.g. a whole session transcript in
87/// dream backfill) that would otherwise blow the per-argument size limit
88/// (`E2BIG`, ~128 KiB on Linux). `claude -p` with no positional prompt reads
89/// the prompt from stdin.
90pub struct ClaudeBinaryStdinRunner;
91
92impl CommandRunner for ClaudeBinaryStdinRunner {
93    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
94        use std::io::Write;
95        use std::process::Stdio;
96        let mut child = base_claude_command(model)
97            .stdin(Stdio::piped())
98            .stdout(Stdio::piped())
99            .stderr(Stdio::piped())
100            .spawn()
101            .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
102        // Write the prompt, then drop the handle to close stdin so `claude`
103        // sees EOF and starts working.
104        child
105            .stdin
106            .take()
107            .context("claude stdin was not captured")?
108            .write_all(prompt.as_bytes())
109            .context("failed to write prompt to claude stdin")?;
110        let output = child
111            .wait_with_output()
112            .context("failed to wait for `claude`")?;
113        if !output.status.success() {
114            let stderr = String::from_utf8_lossy(&output.stderr);
115            return Err(anyhow!(
116                "`claude -p` exited with {}: {}",
117                output.status,
118                stderr.trim()
119            ));
120        }
121        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
122    }
123}
124
125pub struct ClaudeCliClassifier {
126    model: String,
127    runner: Box<dyn CommandRunner>,
128}
129
130impl ClaudeCliClassifier {
131    /// Build from environment. Returns `None` unless a `claude` binary is on
132    /// PATH (probed with `claude --version`) — the caller then falls through to
133    /// the next backend. Model comes from `TJ_AGENT_SDK_MODEL`, else Haiku.
134    pub fn from_env() -> Option<Self> {
135        if !claude_on_path() {
136            return None;
137        }
138        let model = std::env::var("TJ_AGENT_SDK_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into());
139        Some(Self {
140            model,
141            runner: Box::new(ClaudeBinaryRunner),
142        })
143    }
144
145    /// Test/dev constructor: inject a fake runner and an explicit model so the
146    /// parse path can be exercised without a live `claude` login.
147    pub fn with_runner(model: impl Into<String>, runner: Box<dyn CommandRunner>) -> Self {
148        Self {
149            model: model.into(),
150            runner,
151        }
152    }
153}
154
155/// The JSON wrapper emitted by `claude --output-format json`. We only need the
156/// error flag and the `result` string (the model's verdict text); the rest of
157/// the envelope (usage, cost, timings) is ignored.
158#[derive(serde::Deserialize)]
159struct CliEnvelope {
160    #[serde(default)]
161    is_error: bool,
162    #[serde(default)]
163    result: Option<String>,
164    #[serde(default)]
165    subtype: Option<String>,
166}
167
168impl Classifier for ClaudeCliClassifier {
169    fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
170        let prompt = crate::classifier::prompt::build(input);
171        let verdict = run_claude_json(self.runner.as_ref(), &self.model, &prompt)?;
172        super::parse_verdict(&verdict)
173    }
174}
175
176/// Run `prompt` through the claude CLI (via `runner`) and return the model's
177/// reply text — the `result` field of the `--output-format json` envelope.
178/// Shared by the classifier and the dream agent-sdk backend so the envelope
179/// handling lives in one place.
180pub fn run_claude_json(
181    runner: &dyn CommandRunner,
182    model: &str,
183    prompt: &str,
184) -> anyhow::Result<String> {
185    let stdout = runner.run(model, prompt)?;
186    let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| {
187        format!(
188            "claude --output-format json wrapper parse failed; got: {}",
189            stdout.trim()
190        )
191    })?;
192    if envelope.is_error {
193        return Err(anyhow!(
194            "claude reported an error (subtype={})",
195            envelope.subtype.as_deref().unwrap_or("unknown")
196        ));
197    }
198    envelope
199        .result
200        .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))
201}
202
203/// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does
204/// no network) and tolerant — any spawn/exec failure means "not available".
205pub fn claude_on_path() -> bool {
206    Command::new("claude")
207        .arg("--version")
208        .output()
209        .map(|o| o.status.success())
210        .unwrap_or(false)
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use crate::classifier::{decide_status, CONFIDENCE_THRESHOLD};
217    use crate::event::{EventStatus, EventType};
218
219    /// Fake runner: returns canned stdout, ignoring model/prompt. Captures the
220    /// model it was asked for so tests can assert the pin.
221    struct FakeRunner {
222        canned: String,
223        seen_model: std::sync::Mutex<Option<String>>,
224    }
225
226    impl FakeRunner {
227        fn new(canned: impl Into<String>) -> Self {
228            Self {
229                canned: canned.into(),
230                seen_model: std::sync::Mutex::new(None),
231            }
232        }
233    }
234
235    impl CommandRunner for FakeRunner {
236        fn run(&self, model: &str, _prompt: &str) -> anyhow::Result<String> {
237            *self.seen_model.lock().unwrap() = Some(model.to_string());
238            Ok(self.canned.clone())
239        }
240    }
241
242    fn input() -> ClassifyInput {
243        ClassifyInput {
244            text: "We adopted Rust for the journal core.".into(),
245            author_hint: "assistant".into(),
246            recent_tasks: vec![],
247        }
248    }
249
250    fn envelope(result_json: &str) -> String {
251        serde_json::json!({
252            "type": "result",
253            "subtype": "success",
254            "is_error": false,
255            "result": result_json,
256        })
257        .to_string()
258    }
259
260    #[test]
261    fn base_command_carries_recursion_marker() {
262        use std::ffi::OsStr;
263        // The tj-cli ingest-hook guard short-circuits on this exact var; if the
264        // const and the spawn site ever drift, the fork bomb returns.
265        assert_eq!(IN_CLASSIFIER_ENV, "TJ_IN_CLASSIFIER");
266        let cmd = base_claude_command("claude-haiku-4-5");
267        let marker = cmd
268            .get_envs()
269            .any(|(k, v)| k == OsStr::new(IN_CLASSIFIER_ENV) && v == Some(OsStr::new("1")));
270        assert!(
271            marker,
272            "every spawned `claude -p` must set {IN_CLASSIFIER_ENV}=1 to break ingest-hook recursion"
273        );
274    }
275
276    #[test]
277    fn parses_canned_verdict_into_classify_output() {
278        let verdict = r#"{"event_type":"decision","task_id_guess":"tj-x","confidence":0.93,"evidence_strength":null,"suggested_text":"Adopt Rust."}"#;
279        let c = ClaudeCliClassifier::with_runner(
280            DEFAULT_MODEL,
281            Box::new(FakeRunner::new(envelope(verdict))),
282        );
283        let out = c.classify(&input()).unwrap();
284        assert_eq!(out.event_type, EventType::Decision);
285        assert_eq!(out.task_id_guess.as_deref(), Some("tj-x"));
286        assert!((out.confidence - 0.93).abs() < 1e-6);
287        // 0.93 >= 0.85 → confirmed.
288        assert_eq!(decide_status(out.confidence), EventStatus::Confirmed);
289    }
290
291    /// Adapter so a test can keep an `Arc` handle to inspect the runner after
292    /// it is boxed into the classifier.
293    struct ArcRunner(std::sync::Arc<FakeRunner>);
294    impl CommandRunner for ArcRunner {
295        fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
296            self.0.run(model, prompt)
297        }
298    }
299
300    #[test]
301    fn pins_the_configured_model() {
302        let verdict = r#"{"event_type":"finding","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"x"}"#;
303        let captured = std::sync::Arc::new(FakeRunner::new(envelope(verdict)));
304        let c = ClaudeCliClassifier::with_runner(
305            "claude-haiku-4-5",
306            Box::new(ArcRunner(captured.clone())),
307        );
308        let _ = c.classify(&input()).unwrap();
309        assert_eq!(
310            captured.seen_model.lock().unwrap().as_deref(),
311            Some("claude-haiku-4-5"),
312            "classifier must pin the model it was constructed with"
313        );
314    }
315
316    #[test]
317    fn decide_status_at_the_0_85_threshold() {
318        for (conf, expect) in [
319            (0.85_f64, EventStatus::Confirmed),
320            (0.84_f64, EventStatus::Suggested),
321        ] {
322            let verdict = format!(
323                r#"{{"event_type":"evidence","task_id_guess":null,"confidence":{conf},"evidence_strength":"strong","suggested_text":"t"}}"#
324            );
325            let c = ClaudeCliClassifier::with_runner(
326                DEFAULT_MODEL,
327                Box::new(FakeRunner::new(envelope(&verdict))),
328            );
329            let out = c.classify(&input()).unwrap();
330            assert!((out.confidence - conf).abs() < 1e-6);
331            assert_eq!(decide_status(out.confidence), expect);
332            assert_eq!(CONFIDENCE_THRESHOLD, 0.85);
333        }
334    }
335
336    #[test]
337    fn tolerates_code_fence_wrapped_verdict() {
338        let verdict = "```json\n{\"event_type\":\"rejection\",\"task_id_guess\":null,\"confidence\":0.88,\"evidence_strength\":null,\"suggested_text\":\"won't work\"}\n```";
339        let c = ClaudeCliClassifier::with_runner(
340            DEFAULT_MODEL,
341            Box::new(FakeRunner::new(envelope(verdict))),
342        );
343        let out = c.classify(&input()).unwrap();
344        assert_eq!(out.event_type, EventType::Rejection);
345    }
346
347    #[test]
348    fn errors_when_claude_reports_is_error() {
349        let canned = serde_json::json!({
350            "type": "result",
351            "subtype": "error_during_execution",
352            "is_error": true,
353            "result": null,
354        })
355        .to_string();
356        let c = ClaudeCliClassifier::with_runner(DEFAULT_MODEL, Box::new(FakeRunner::new(canned)));
357        let err = c.classify(&input()).unwrap_err();
358        assert!(format!("{err}").contains("error"), "got: {err}");
359    }
360}