Skip to main content

tj_core/classifier/
agent_sdk.rs

1//! Claude CLI ("agent SDK") classifier backend.
2//!
3//! Runs the locally-installed, already-authenticated `claude` binary in
4//! non-interactive print mode, pinned to Haiku, to classify a chunk *without*
5//! an `ANTHROPIC_API_KEY`. This resurrects the v0.7.x `cli` backend that was
6//! removed in v0.8.0 — but honestly: since **2026-06-15** a headless
7//! `claude -p` run draws from the separate **Agent SDK** monthly credit pool
8//! (~$20 Pro / $100 Max 5x / $200 Max 20x, at API rates), not the interactive
9//! Pro/Max pool. Classification is Haiku-class and tiny (a few hundred tokens
10//! per chunk), so the credit lasts a long time — but it is not strictly free.
11//!
12//! The command execution is abstracted behind [`CommandRunner`] so the parsing
13//! path is unit-testable with a fake; the suite never shells out to `claude`.
14
15use super::{Classifier, ClassifyInput, ClassifyOutput};
16use anyhow::{anyhow, Context};
17use std::process::Command;
18
19/// Default model. `claude --model` accepts the short alias and resolves it to
20/// the current dated id (`claude-haiku-4-5-20251001`). Override with
21/// `TJ_AGENT_SDK_MODEL`.
22pub const DEFAULT_MODEL: &str = "claude-haiku-4-5";
23
24/// "Run the classifier command and hand back its raw stdout." The production
25/// impl shells out to `claude`; tests inject a fake returning canned JSON.
26pub trait CommandRunner: Send + Sync {
27    /// Run the classification for `prompt` against `model`, returning the raw
28    /// stdout (the `--output-format json` wrapper) on success.
29    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String>;
30}
31
32/// Production runner: invokes the local `claude` binary in print mode, pinned
33/// to the given model, asking for the JSON envelope and an isolated MCP config
34/// (`--strict-mcp-config` keeps the project's own MCP servers — including this
35/// very journal — out of the classification subprocess).
36pub struct ClaudeBinaryRunner;
37
38impl CommandRunner for ClaudeBinaryRunner {
39    fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
40        let output = Command::new("claude")
41            .arg("-p")
42            .arg(prompt)
43            .arg("--model")
44            .arg(model)
45            .arg("--output-format")
46            .arg("json")
47            .arg("--strict-mcp-config")
48            .output()
49            .context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
50        if !output.status.success() {
51            let stderr = String::from_utf8_lossy(&output.stderr);
52            return Err(anyhow!(
53                "`claude -p` exited with {}: {}",
54                output.status,
55                stderr.trim()
56            ));
57        }
58        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
59    }
60}
61
62pub struct ClaudeCliClassifier {
63    model: String,
64    runner: Box<dyn CommandRunner>,
65}
66
67impl ClaudeCliClassifier {
68    /// Build from environment. Returns `None` unless a `claude` binary is on
69    /// PATH (probed with `claude --version`) — the caller then falls through to
70    /// the next backend. Model comes from `TJ_AGENT_SDK_MODEL`, else Haiku.
71    pub fn from_env() -> Option<Self> {
72        if !claude_on_path() {
73            return None;
74        }
75        let model = std::env::var("TJ_AGENT_SDK_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into());
76        Some(Self {
77            model,
78            runner: Box::new(ClaudeBinaryRunner),
79        })
80    }
81
82    /// Test/dev constructor: inject a fake runner and an explicit model so the
83    /// parse path can be exercised without a live `claude` login.
84    pub fn with_runner(model: impl Into<String>, runner: Box<dyn CommandRunner>) -> Self {
85        Self {
86            model: model.into(),
87            runner,
88        }
89    }
90}
91
92/// The JSON wrapper emitted by `claude --output-format json`. We only need the
93/// error flag and the `result` string (the model's verdict text); the rest of
94/// the envelope (usage, cost, timings) is ignored.
95#[derive(serde::Deserialize)]
96struct CliEnvelope {
97    #[serde(default)]
98    is_error: bool,
99    #[serde(default)]
100    result: Option<String>,
101    #[serde(default)]
102    subtype: Option<String>,
103}
104
105impl Classifier for ClaudeCliClassifier {
106    fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
107        let prompt = crate::classifier::prompt::build(input);
108        let stdout = self.runner.run(&self.model, &prompt)?;
109        let envelope: CliEnvelope = serde_json::from_str(stdout.trim()).with_context(|| {
110            format!(
111                "claude --output-format json wrapper parse failed; got: {}",
112                stdout.trim()
113            )
114        })?;
115        if envelope.is_error {
116            return Err(anyhow!(
117                "claude reported an error (subtype={})",
118                envelope.subtype.as_deref().unwrap_or("unknown")
119            ));
120        }
121        let verdict = envelope
122            .result
123            .ok_or_else(|| anyhow!("claude json wrapper had no `result` field"))?;
124        super::parse_verdict(&verdict)
125    }
126}
127
128/// Probe whether `claude` resolves on PATH and runs. Cheap (`--version` does
129/// no network) and tolerant — any spawn/exec failure means "not available".
130fn claude_on_path() -> bool {
131    Command::new("claude")
132        .arg("--version")
133        .output()
134        .map(|o| o.status.success())
135        .unwrap_or(false)
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::classifier::{decide_status, CONFIDENCE_THRESHOLD};
142    use crate::event::{EventStatus, EventType};
143
144    /// Fake runner: returns canned stdout, ignoring model/prompt. Captures the
145    /// model it was asked for so tests can assert the pin.
146    struct FakeRunner {
147        canned: String,
148        seen_model: std::sync::Mutex<Option<String>>,
149    }
150
151    impl FakeRunner {
152        fn new(canned: impl Into<String>) -> Self {
153            Self {
154                canned: canned.into(),
155                seen_model: std::sync::Mutex::new(None),
156            }
157        }
158    }
159
160    impl CommandRunner for FakeRunner {
161        fn run(&self, model: &str, _prompt: &str) -> anyhow::Result<String> {
162            *self.seen_model.lock().unwrap() = Some(model.to_string());
163            Ok(self.canned.clone())
164        }
165    }
166
167    fn input() -> ClassifyInput {
168        ClassifyInput {
169            text: "We adopted Rust for the journal core.".into(),
170            author_hint: "assistant".into(),
171            recent_tasks: vec![],
172        }
173    }
174
175    fn envelope(result_json: &str) -> String {
176        serde_json::json!({
177            "type": "result",
178            "subtype": "success",
179            "is_error": false,
180            "result": result_json,
181        })
182        .to_string()
183    }
184
185    #[test]
186    fn parses_canned_verdict_into_classify_output() {
187        let verdict = r#"{"event_type":"decision","task_id_guess":"tj-x","confidence":0.93,"evidence_strength":null,"suggested_text":"Adopt Rust."}"#;
188        let c = ClaudeCliClassifier::with_runner(
189            DEFAULT_MODEL,
190            Box::new(FakeRunner::new(envelope(verdict))),
191        );
192        let out = c.classify(&input()).unwrap();
193        assert_eq!(out.event_type, EventType::Decision);
194        assert_eq!(out.task_id_guess.as_deref(), Some("tj-x"));
195        assert!((out.confidence - 0.93).abs() < 1e-6);
196        // 0.93 >= 0.85 → confirmed.
197        assert_eq!(decide_status(out.confidence), EventStatus::Confirmed);
198    }
199
200    /// Adapter so a test can keep an `Arc` handle to inspect the runner after
201    /// it is boxed into the classifier.
202    struct ArcRunner(std::sync::Arc<FakeRunner>);
203    impl CommandRunner for ArcRunner {
204        fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
205            self.0.run(model, prompt)
206        }
207    }
208
209    #[test]
210    fn pins_the_configured_model() {
211        let verdict = r#"{"event_type":"finding","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"x"}"#;
212        let captured = std::sync::Arc::new(FakeRunner::new(envelope(verdict)));
213        let c = ClaudeCliClassifier::with_runner(
214            "claude-haiku-4-5",
215            Box::new(ArcRunner(captured.clone())),
216        );
217        let _ = c.classify(&input()).unwrap();
218        assert_eq!(
219            captured.seen_model.lock().unwrap().as_deref(),
220            Some("claude-haiku-4-5"),
221            "classifier must pin the model it was constructed with"
222        );
223    }
224
225    #[test]
226    fn decide_status_at_the_0_85_threshold() {
227        for (conf, expect) in [
228            (0.85_f64, EventStatus::Confirmed),
229            (0.84_f64, EventStatus::Suggested),
230        ] {
231            let verdict = format!(
232                r#"{{"event_type":"evidence","task_id_guess":null,"confidence":{conf},"evidence_strength":"strong","suggested_text":"t"}}"#
233            );
234            let c = ClaudeCliClassifier::with_runner(
235                DEFAULT_MODEL,
236                Box::new(FakeRunner::new(envelope(&verdict))),
237            );
238            let out = c.classify(&input()).unwrap();
239            assert!((out.confidence - conf).abs() < 1e-6);
240            assert_eq!(decide_status(out.confidence), expect);
241            assert_eq!(CONFIDENCE_THRESHOLD, 0.85);
242        }
243    }
244
245    #[test]
246    fn tolerates_code_fence_wrapped_verdict() {
247        let verdict = "```json\n{\"event_type\":\"rejection\",\"task_id_guess\":null,\"confidence\":0.88,\"evidence_strength\":null,\"suggested_text\":\"won't work\"}\n```";
248        let c = ClaudeCliClassifier::with_runner(
249            DEFAULT_MODEL,
250            Box::new(FakeRunner::new(envelope(verdict))),
251        );
252        let out = c.classify(&input()).unwrap();
253        assert_eq!(out.event_type, EventType::Rejection);
254    }
255
256    #[test]
257    fn errors_when_claude_reports_is_error() {
258        let canned = serde_json::json!({
259            "type": "result",
260            "subtype": "error_during_execution",
261            "is_error": true,
262            "result": null,
263        })
264        .to_string();
265        let c = ClaudeCliClassifier::with_runner(DEFAULT_MODEL, Box::new(FakeRunner::new(canned)));
266        let err = c.classify(&input()).unwrap_err();
267        assert!(format!("{err}").contains("error"), "got: {err}");
268    }
269}