Skip to main content

tj_core/classifier/
mod.rs

1//! Event classifier: takes a chat chunk + recent task context,
2//! returns suggested event_type + task_id + confidence.
3
4use crate::event::{EventType, EvidenceStrength};
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize)]
8pub struct ClassifyInput {
9    pub text: String,
10    pub author_hint: String,
11    pub recent_tasks: Vec<TaskContext>,
12}
13
14#[derive(Debug, Clone, Serialize)]
15pub struct TaskContext {
16    pub task_id: String,
17    pub title: String,
18    pub last_events: Vec<String>,
19    /// The task's most-recent `constraint` events (≤ N). Empty when the
20    /// task has no constraints — the prompt is then unchanged.
21    pub constraints: Vec<String>,
22}
23
24#[derive(Debug, Clone, Deserialize, Serialize)]
25pub struct ClassifyOutput {
26    pub event_type: EventType,
27    pub task_id_guess: Option<String>,
28    pub confidence: f64,
29    pub evidence_strength: Option<EvidenceStrength>,
30    pub suggested_text: String,
31    /// v0.6.0: optional structured artifacts the classifier extracted
32    /// directly. When absent (old protocol or model didn't bother),
33    /// the journal falls back to regex extraction in
34    /// `db::ingest_new_events`. When present, the two sets are merged
35    /// at ingest time so the model can surface artifacts the regex
36    /// would miss (e.g. ticket ids in non-ASCII brackets).
37    #[serde(default)]
38    pub artifacts: Option<crate::artifacts::Artifacts>,
39}
40
41pub trait Classifier: Send + Sync {
42    fn classify(&self, input: &ClassifyInput) -> anyhow::Result<ClassifyOutput>;
43}
44
45use crate::event::EventStatus;
46
47pub const CONFIDENCE_THRESHOLD: f64 = 0.85;
48
49pub fn decide_status(confidence: f64) -> EventStatus {
50    if confidence >= CONFIDENCE_THRESHOLD {
51        EventStatus::Confirmed
52    } else {
53        EventStatus::Suggested
54    }
55}
56
57/// Parse a model's raw text reply into a strict-JSON `ClassifyOutput`,
58/// tolerating ```json code-fence wrapping. Shared by the HTTP and agent-sdk
59/// backends so the two never diverge on how they read the verdict.
60pub(crate) fn parse_verdict(text: &str) -> anyhow::Result<ClassifyOutput> {
61    use anyhow::Context;
62    let json_str = text
63        .trim()
64        .trim_start_matches("```json")
65        .trim_start_matches("```")
66        .trim_end_matches("```")
67        .trim();
68    serde_json::from_str(json_str)
69        .with_context(|| format!("classifier JSON parse failed; got: {json_str}"))
70}
71
72pub mod agent_sdk;
73pub mod heuristic;
74pub mod http;
75pub mod hybrid;
76pub mod mock;
77pub mod prompt;
78pub mod telemetry;
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    /// The HTTP backend must honour `TJ_CLASSIFIER_MODEL`. Wraps the
85    /// read-set-restore steps in one test to avoid env-var races with
86    /// other tests in this crate.
87    #[test]
88    fn tj_classifier_model_env_var_overrides_http_default() {
89        let prev_model = std::env::var("TJ_CLASSIFIER_MODEL").ok();
90        let prev_key = std::env::var("ANTHROPIC_API_KEY").ok();
91
92        // SAFETY: tests in this crate do not concurrently read these env vars.
93        unsafe {
94            std::env::remove_var("TJ_CLASSIFIER_MODEL");
95            std::env::set_var("ANTHROPIC_API_KEY", "test-key-do-not-use");
96        }
97        let http_default = http::AnthropicClassifier::from_env().unwrap();
98        assert_eq!(http_default.model, http::DEFAULT_MODEL);
99
100        unsafe {
101            std::env::set_var("TJ_CLASSIFIER_MODEL", "sonnet-override");
102        }
103        let http_override = http::AnthropicClassifier::from_env().unwrap();
104        assert_eq!(http_override.model, "sonnet-override");
105
106        // Restore.
107        unsafe {
108            match prev_model {
109                Some(v) => std::env::set_var("TJ_CLASSIFIER_MODEL", v),
110                None => std::env::remove_var("TJ_CLASSIFIER_MODEL"),
111            }
112            match prev_key {
113                Some(v) => std::env::set_var("ANTHROPIC_API_KEY", v),
114                None => std::env::remove_var("ANTHROPIC_API_KEY"),
115            }
116        }
117    }
118
119    #[test]
120    fn task_context_has_constraints_field() {
121        let c = TaskContext {
122            task_id: "tj-1".into(),
123            title: "t".into(),
124            last_events: vec![],
125            constraints: vec!["must support PHP 7.4".into()],
126        };
127        assert_eq!(c.constraints, vec!["must support PHP 7.4".to_string()]);
128    }
129
130    #[test]
131    fn classify_input_serializes() {
132        let i = ClassifyInput {
133            text: "Adopted Rust for the journal".into(),
134            author_hint: "assistant".into(),
135            recent_tasks: vec![],
136        };
137        let s = serde_json::to_string(&i).unwrap();
138        assert!(s.contains("Adopted Rust"));
139    }
140
141    #[test]
142    fn decide_status_high_confidence_is_confirmed() {
143        assert_eq!(decide_status(0.95), EventStatus::Confirmed);
144        assert_eq!(decide_status(0.85), EventStatus::Confirmed);
145    }
146
147    #[test]
148    fn decide_status_low_confidence_is_suggested() {
149        assert_eq!(decide_status(0.84), EventStatus::Suggested);
150        assert_eq!(decide_status(0.0), EventStatus::Suggested);
151    }
152}