Skip to main content

tj_core/classifier/
hybrid.rs

1//! Hybrid classifier — heuristic-first, LLM fallback.
2//!
3//! Tries the cheap, zero-network heuristic first. If a rule fires with
4//! confidence >= `min_heuristic_confidence`, returns the heuristic verdict.
5//! Otherwise escalates to the HTTP (Anthropic API) backend — which
6//! requires `ANTHROPIC_API_KEY`. When no key is set and the heuristic
7//! is uncertain, the classifier errors out (caller should drop the
8//! chunk into the pending queue for later retry rather than guess).
9//!
10//! This replaces the v0.7.x `cli` backend that relied on `claude -p`.
11//! Anthropic changed `claude -p` to bill against tokens separately
12//! from the Pro/Max subscription, breaking the "free fallback" promise
13//! the cli backend was built on.
14
15use super::heuristic::try_heuristic;
16use super::http::AnthropicClassifier;
17use super::{Classifier, ClassifyInput, ClassifyOutput};
18
19/// Confidence the heuristic must reach to skip the LLM fallback. Below
20/// this, the chunk is ambiguous enough that the API call is worth the
21/// cost.
22const DEFAULT_MIN_HEURISTIC_CONFIDENCE: f64 = 0.7;
23
24pub struct HybridClassifier {
25    http: Option<AnthropicClassifier>,
26    min_heuristic_confidence: f64,
27}
28
29impl HybridClassifier {
30    /// Build from environment. Picks up `ANTHROPIC_API_KEY` if present;
31    /// without it, the hybrid still works for chunks the heuristic
32    /// handles confidently, but uncertain chunks will fail (caller
33    /// queues them in pending/).
34    pub fn from_env() -> Self {
35        Self {
36            http: AnthropicClassifier::from_env().ok(),
37            min_heuristic_confidence: DEFAULT_MIN_HEURISTIC_CONFIDENCE,
38        }
39    }
40
41    /// Test-only constructor — accepts an explicit HTTP backend
42    /// (e.g. one pointed at a mock server) without touching env vars.
43    #[cfg(test)]
44    pub fn with_http(http: Option<AnthropicClassifier>, min_conf: f64) -> Self {
45        Self {
46            http,
47            min_heuristic_confidence: min_conf,
48        }
49    }
50
51    pub fn has_llm_fallback(&self) -> bool {
52        self.http.is_some()
53    }
54}
55
56impl Classifier for HybridClassifier {
57    fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
58        if let Some(out) = try_heuristic(input) {
59            if out.confidence >= self.min_heuristic_confidence {
60                return Ok(out);
61            }
62        }
63        match &self.http {
64            Some(h) => h.classify(input),
65            None => anyhow::bail!(
66                "hybrid: heuristic uncertain and ANTHROPIC_API_KEY not set — \
67                 chunk left in pending queue for later retry"
68            ),
69        }
70    }
71}
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76    use crate::classifier::TaskContext;
77    use crate::event::EventType;
78
79    fn ctx(text: &str) -> ClassifyInput {
80        ClassifyInput {
81            text: text.into(),
82            author_hint: "assistant".into(),
83            recent_tasks: vec![TaskContext {
84                task_id: "tj-abc".into(),
85                title: "test".into(),
86                last_events: vec![],
87            }],
88        }
89    }
90
91    #[test]
92    fn heuristic_hit_skips_http_even_when_available() {
93        // Build a hybrid with `http` set to a *dummy* that would error if called.
94        // Heuristic catches the decision phrase, so http never runs.
95        let hybrid = HybridClassifier::with_http(None, 0.7);
96        let out = hybrid
97            .classify(&ctx(
98                "After review we'll use TOML for the config format going forward",
99            ))
100            .unwrap();
101        assert_eq!(out.event_type, EventType::Decision);
102    }
103
104    #[test]
105    fn uncertain_heuristic_without_api_key_bails() {
106        let hybrid = HybridClassifier::with_http(None, 0.7);
107        let err = hybrid
108            .classify(&ctx(
109                "Browsing the call site of refundProcessor to understand the dispatch.",
110            ))
111            .unwrap_err();
112        let msg = format!("{err}");
113        assert!(
114            msg.contains("ANTHROPIC_API_KEY"),
115            "error must mention env var: {msg}"
116        );
117    }
118
119    #[test]
120    fn from_env_constructs_without_key() {
121        // SAFETY: tests in this crate do not concurrently read these env vars.
122        let prev = std::env::var("ANTHROPIC_API_KEY").ok();
123        unsafe { std::env::remove_var("ANTHROPIC_API_KEY"); }
124        let hybrid = HybridClassifier::from_env();
125        assert!(!hybrid.has_llm_fallback());
126        unsafe {
127            match prev {
128                Some(v) => std::env::set_var("ANTHROPIC_API_KEY", v),
129                None => std::env::remove_var("ANTHROPIC_API_KEY"),
130            }
131        }
132    }
133}