tj_core/classifier/
hybrid.rs1use super::agent_sdk::ClaudeCliClassifier;
22use super::heuristic::try_heuristic;
23#[cfg(test)]
24use super::http::AnthropicClassifier;
25use super::{Classifier, ClassifyInput, ClassifyOutput};
26
27const DEFAULT_MIN_HEURISTIC_CONFIDENCE: f64 = 0.7;
30
31const DEFAULT_LLM_ORDER: &str = "agent-sdk,api";
34
35pub struct HybridClassifier {
36 llm_chain: Vec<Box<dyn Classifier>>,
39 min_heuristic_confidence: f64,
40}
41
42impl HybridClassifier {
43 pub fn from_env() -> Self {
47 let order =
48 std::env::var("TJ_HYBRID_LLM_ORDER").unwrap_or_else(|_| DEFAULT_LLM_ORDER.into());
49 let mut llm_chain: Vec<Box<dyn Classifier>> = Vec::new();
50 for kind in order.split(',').map(str::trim) {
51 match kind {
52 "agent-sdk" => {
53 if let Some(c) = ClaudeCliClassifier::from_env() {
54 llm_chain.push(Box::new(c));
55 }
56 }
57 "api" => {
58 if let Ok(c) = super::http::AnthropicClassifier::from_env() {
59 llm_chain.push(Box::new(c));
60 }
61 }
62 _ => {} }
64 }
65 Self {
66 llm_chain,
67 min_heuristic_confidence: DEFAULT_MIN_HEURISTIC_CONFIDENCE,
68 }
69 }
70
71 #[cfg(test)]
74 pub fn with_http(http: Option<AnthropicClassifier>, min_conf: f64) -> Self {
75 let llm_chain: Vec<Box<dyn Classifier>> = match http {
76 Some(h) => vec![Box::new(h)],
77 None => vec![],
78 };
79 Self {
80 llm_chain,
81 min_heuristic_confidence: min_conf,
82 }
83 }
84
85 #[cfg(test)]
89 pub fn with_llm_chain(llm_chain: Vec<Box<dyn Classifier>>, min_conf: f64) -> Self {
90 Self {
91 llm_chain,
92 min_heuristic_confidence: min_conf,
93 }
94 }
95
96 pub fn has_llm_fallback(&self) -> bool {
97 !self.llm_chain.is_empty()
98 }
99}
100
101impl Classifier for HybridClassifier {
102 fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
103 if let Some(out) = try_heuristic(input) {
104 if out.confidence >= self.min_heuristic_confidence {
105 return Ok(out);
106 }
107 }
108 if self.llm_chain.is_empty() {
109 anyhow::bail!(
110 "hybrid: heuristic uncertain and no LLM backend available \
111 (no `claude` on PATH for agent-sdk, no ANTHROPIC_API_KEY for api) — \
112 chunk left in pending queue for later retry"
113 );
114 }
115 let mut last_err = None;
116 for backend in &self.llm_chain {
117 match backend.classify(input) {
118 Ok(out) => return Ok(out),
119 Err(e) => last_err = Some(e),
120 }
121 }
122 Err(last_err.expect("non-empty chain must produce an error on full failure"))
124 }
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130 use crate::classifier::agent_sdk::{ClaudeCliClassifier, CommandRunner};
131 use crate::classifier::TaskContext;
132 use crate::event::EventType;
133
134 fn ctx(text: &str) -> ClassifyInput {
135 ClassifyInput {
136 text: text.into(),
137 author_hint: "assistant".into(),
138 recent_tasks: vec![TaskContext {
139 task_id: "tj-abc".into(),
140 title: "test".into(),
141 last_events: vec![],
142 constraints: vec![],
143 }],
144 }
145 }
146
147 #[test]
148 fn heuristic_hit_skips_http_even_when_available() {
149 let hybrid = HybridClassifier::with_http(None, 0.7);
151 let out = hybrid
152 .classify(&ctx(
153 "After review we'll use TOML for the config format going forward",
154 ))
155 .unwrap();
156 assert_eq!(out.event_type, EventType::Decision);
157 }
158
159 #[test]
160 fn uncertain_heuristic_without_api_key_bails() {
161 let hybrid = HybridClassifier::with_http(None, 0.7);
162 let err = hybrid
163 .classify(&ctx(
164 "Browsing the call site of refundProcessor to understand the dispatch.",
165 ))
166 .unwrap_err();
167 let msg = format!("{err}");
168 assert!(
169 msg.contains("ANTHROPIC_API_KEY"),
170 "error must mention env var: {msg}"
171 );
172 }
173
174 #[test]
175 fn from_env_constructs_without_key() {
176 let prev_key = std::env::var("ANTHROPIC_API_KEY").ok();
178 let prev_order = std::env::var("TJ_HYBRID_LLM_ORDER").ok();
182 unsafe {
183 std::env::remove_var("ANTHROPIC_API_KEY");
184 std::env::set_var("TJ_HYBRID_LLM_ORDER", "none");
185 }
186 let hybrid = HybridClassifier::from_env();
187 assert!(!hybrid.has_llm_fallback());
188 unsafe {
189 match prev_key {
190 Some(v) => std::env::set_var("ANTHROPIC_API_KEY", v),
191 None => std::env::remove_var("ANTHROPIC_API_KEY"),
192 }
193 match prev_order {
194 Some(v) => std::env::set_var("TJ_HYBRID_LLM_ORDER", v),
195 None => std::env::remove_var("TJ_HYBRID_LLM_ORDER"),
196 }
197 }
198 }
199
200 #[test]
201 fn uncertain_heuristic_prefers_agent_sdk_and_never_touches_http() {
202 struct OkRunner;
205 impl CommandRunner for OkRunner {
206 fn run(&self, _model: &str, _prompt: &str) -> anyhow::Result<String> {
207 Ok(serde_json::json!({
208 "type": "result",
209 "is_error": false,
210 "result": r#"{"event_type":"decision","task_id_guess":null,"confidence":0.9,"evidence_strength":null,"suggested_text":"Adopt X."}"#,
211 })
212 .to_string())
213 }
214 }
215 struct PanicBackend;
216 impl Classifier for PanicBackend {
217 fn classify(&self, _input: &ClassifyInput) -> anyhow::Result<ClassifyOutput> {
218 panic!("http backend must not be reached when agent-sdk succeeds");
219 }
220 }
221
222 let agent = ClaudeCliClassifier::with_runner("claude-haiku-4-5", Box::new(OkRunner));
223 let hybrid =
224 HybridClassifier::with_llm_chain(vec![Box::new(agent), Box::new(PanicBackend)], 0.7);
225 let out = hybrid
226 .classify(&ctx(
227 "Browsing the call site of refundProcessor to understand the dispatch.",
228 ))
229 .unwrap();
230 assert_eq!(out.event_type, EventType::Decision);
231 }
232}