Skip to main content

oris_evolution/
task_class.rs

1//! Task-class abstraction for semantic-equivalent task grouping.
2//!
3//! A `TaskClass` represents a category of semantically equivalent tasks that
4//! can reuse the same learned `Gene` even when the exact signal strings differ.
5//!
6//! # Example classes
7//!
8//! | ID | Name | Signal keywords |
9//! |----|------|-----------------|
10//! | `missing-import` | Missing import / undefined symbol | `E0425`, `E0433`, `unresolved`, `undefined`, `import`, `use` |
11//! | `type-mismatch` | Type mismatch | `E0308`, `mismatched`, `expected`, `found`, `type` |
12//! | `borrow-conflict` | Borrow checker conflict | `E0502`, `E0505`, `borrow`, `lifetime`, `moved` |
13//!
14//! # How matching works
15//!
16//! 1. Each signal string is tokenised into lowercase words.
17//! 2. A signal **matches** a `TaskClass` if the intersection of its word-set
18//!    with the class's `signal_keywords` is non-empty.
19//! 3. The `TaskClassMatcher::classify` method returns the class whose keywords
20//!    produce the highest overlap score with the combined signal list.
21//!
22//! Cross-class false positives are prevented because each class uses disjoint
23//! keyword sets; overlap scoring breaks ties by choosing the highest count, so
24//! a signal that partially matches two classes still maps to the one with
25//! more matching keywords.
26
27use serde::{Deserialize, Serialize};
28
29// ─── TaskClass ────────────────────────────────────────────────────────────────
30
31/// A named category of semantically equivalent tasks.
32///
33/// Genes are tagged with a `task_class_id` during their Solidify phase.
34/// When the Select stage cannot find an exact signal match, it falls back to
35/// `TaskClassMatcher` to surface candidates that share the same class.
36#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
37pub struct TaskClass {
38    /// Opaque, stable identifier. Genes reference this via `Gene::task_class_id`.
39    pub id: String,
40    /// Human-readable label.
41    pub name: String,
42    /// Lowercase keywords used for signal classification.
43    ///
44    /// A signal string matches this class when any of these keywords appears as
45    /// a word token (after lowercasing and splitting on non-alphanumeric chars).
46    pub signal_keywords: Vec<String>,
47}
48
49impl TaskClass {
50    /// Create a new `TaskClass`.
51    pub fn new(
52        id: impl Into<String>,
53        name: impl Into<String>,
54        signal_keywords: impl IntoIterator<Item = impl Into<String>>,
55    ) -> Self {
56        Self {
57            id: id.into(),
58            name: name.into(),
59            signal_keywords: signal_keywords
60                .into_iter()
61                .map(|k| k.into().to_lowercase())
62                .collect(),
63        }
64    }
65
66    /// Count how many keyword tokens overlap with `signal`.
67    ///
68    /// The signal is tokenised (split on non-alphanumeric characters) and each
69    /// token is compared against `signal_keywords`. Returns the overlap count.
70    pub(crate) fn overlap_score(&self, signal: &str) -> usize {
71        let tokens = tokenise(signal);
72        self.signal_keywords
73            .iter()
74            .filter(|kw| tokens.contains(*kw))
75            .count()
76    }
77}
78
79// ─── Built-in task classes ────────────────────────────────────────────────────
80
81/// Return the canonical built-in set of task classes.
82///
83/// Callers may extend this list with domain-specific classes before passing it
84/// to `TaskClassMatcher::new`.
85pub fn builtin_task_classes() -> Vec<TaskClass> {
86    vec![
87        TaskClass::new(
88            "missing-import",
89            "Missing import / undefined symbol",
90            [
91                "e0425",
92                "e0433",
93                "unresolved",
94                "undefined",
95                "import",
96                "missing",
97                "cannot",
98                "find",
99                "symbol",
100            ],
101        ),
102        TaskClass::new(
103            "type-mismatch",
104            "Type mismatch",
105            [
106                "e0308",
107                "mismatched",
108                "expected",
109                "found",
110                "type",
111                "mismatch",
112            ],
113        ),
114        TaskClass::new(
115            "borrow-conflict",
116            "Borrow checker conflict",
117            [
118                "e0502", "e0505", "borrow", "lifetime", "moved", "cannot", "conflict",
119            ],
120        ),
121        TaskClass::new(
122            "test-failure",
123            "Test failure",
124            ["test", "failed", "panic", "assert", "assertion", "failure"],
125        ),
126        TaskClass::new(
127            "performance",
128            "Performance issue",
129            ["slow", "latency", "timeout", "perf", "performance", "hot"],
130        ),
131    ]
132}
133
134// ─── TaskClassMatcher ─────────────────────────────────────────────────────────
135
136/// Classifies a list of signal strings to the best-matching `TaskClass`.
137pub struct TaskClassMatcher {
138    classes: Vec<TaskClass>,
139}
140
141impl TaskClassMatcher {
142    /// Create a matcher with the provided task-class registry.
143    pub fn new(classes: Vec<TaskClass>) -> Self {
144        Self { classes }
145    }
146
147    /// Create a matcher pre-loaded with `builtin_task_classes()`.
148    pub fn with_builtins() -> Self {
149        Self::new(builtin_task_classes())
150    }
151
152    /// Classify `signals` to the best-matching task class.
153    ///
154    /// Returns `None` when no class achieves a positive overlap score.
155    pub fn classify<'a>(&'a self, signals: &[String]) -> Option<&'a TaskClass> {
156        let mut best: Option<(&TaskClass, usize)> = None;
157
158        for class in &self.classes {
159            let total_score: usize = signals.iter().map(|s| class.overlap_score(s)).sum();
160            if total_score > 0 {
161                match best {
162                    None => best = Some((class, total_score)),
163                    Some((_, prev_score)) if total_score > prev_score => {
164                        best = Some((class, total_score));
165                    }
166                    _ => {}
167                }
168            }
169        }
170
171        best.map(|(c, _)| c)
172    }
173
174    /// Return a reference to the underlying class registry.
175    pub fn classes(&self) -> &[TaskClass] {
176        &self.classes
177    }
178}
179
180// ─── Helpers ──────────────────────────────────────────────────────────────────
181
182/// Tokenise a string into lowercase alphanumeric words.
183fn tokenise(s: &str) -> Vec<String> {
184    s.split(|c: char| !c.is_alphanumeric())
185        .filter(|t| !t.is_empty())
186        .map(|t| t.to_lowercase())
187        .collect()
188}
189
190/// Check whether `signals` match the given task-class ID in `registry`.
191///
192/// A convenience wrapper around `TaskClassMatcher::classify`.
193pub fn signals_match_class(signals: &[String], class_id: &str, registry: &[TaskClass]) -> bool {
194    let matcher = TaskClassMatcher::new(registry.to_vec());
195    matcher
196        .classify(signals)
197        .map_or(false, |c| c.id == class_id)
198}
199
200// ─── Tests ────────────────────────────────────────────────────────────────────
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    fn matcher() -> TaskClassMatcher {
207        TaskClassMatcher::with_builtins()
208    }
209
210    // ── Positive: same task-class, different signal variants ─────────────────
211
212    #[test]
213    fn test_missing_import_via_error_code() {
214        let m = matcher();
215        let signals = vec!["error[E0425]: cannot find value `foo` in scope".to_string()];
216        let cls = m.classify(&signals).expect("should classify");
217        assert_eq!(cls.id, "missing-import");
218    }
219
220    #[test]
221    fn test_missing_import_via_natural_language() {
222        let m = matcher();
223        // Different phrasing — no Rust error code, but "undefined symbol" keywords
224        let signals = vec!["undefined symbol: use_missing_fn".to_string()];
225        let cls = m.classify(&signals).expect("should classify");
226        assert_eq!(cls.id, "missing-import");
227    }
228
229    #[test]
230    fn test_missing_import_via_unresolved_import() {
231        let m = matcher();
232        let signals = vec!["unresolved import `std::collections::Missing`".to_string()];
233        let cls = m.classify(&signals).expect("should classify");
234        assert_eq!(cls.id, "missing-import");
235    }
236
237    #[test]
238    fn test_type_mismatch_classification() {
239        let m = matcher();
240        let signals =
241            vec!["error[E0308]: mismatched types: expected `u32` found `String`".to_string()];
242        let cls = m.classify(&signals).expect("should classify");
243        assert_eq!(cls.id, "type-mismatch");
244    }
245
246    #[test]
247    fn test_borrow_conflict_classification() {
248        let m = matcher();
249        let signals = vec![
250            "error[E0502]: cannot borrow `x` as mutable because it is also borrowed as immutable"
251                .to_string(),
252        ];
253        let cls = m.classify(&signals).expect("should classify");
254        assert_eq!(cls.id, "borrow-conflict");
255    }
256
257    #[test]
258    fn test_test_failure_classification() {
259        let m = matcher();
260        let signals = vec!["test panicked: assertion failed: x == y".to_string()];
261        let cls = m.classify(&signals).expect("should classify");
262        assert_eq!(cls.id, "test-failure");
263    }
264
265    #[test]
266    fn test_multiple_signals_accumulate_score() {
267        let m = matcher();
268        // Two signals both pointing at type-mismatch → still resolves correctly
269        let signals = vec![
270            "expected type `u32`".to_string(),
271            "found type `String` — type mismatch".to_string(),
272        ];
273        let cls = m.classify(&signals).expect("should classify");
274        assert_eq!(cls.id, "type-mismatch");
275    }
276
277    // ── Negative: cross-class no false positives ──────────────────────────────
278
279    #[test]
280    fn test_no_false_positive_type_vs_borrow() {
281        let m = matcher();
282        // "E0308" → type-mismatch only, not borrow-conflict
283        let signals = vec!["error[E0308]: mismatched type".to_string()];
284        let cls = m.classify(&signals).unwrap();
285        assert_ne!(
286            cls.id, "borrow-conflict",
287            "must not cross-match borrow-conflict"
288        );
289    }
290
291    #[test]
292    fn test_no_false_positive_borrow_vs_import() {
293        let m = matcher();
294        let signals = vec!["error[E0502]: cannot borrow as mutable".to_string()];
295        let cls = m.classify(&signals).unwrap();
296        assert_ne!(cls.id, "missing-import");
297    }
298
299    #[test]
300    fn test_no_match_returns_none() {
301        let m = matcher();
302        // Completely unrelated signal with no keyword overlap
303        let signals = vec!["network timeout connecting to database server".to_string()];
304        // This might match "performance/timeout" — but if it doesn't, None is fine.
305        // The key invariant is it doesn't match an unrelated class like "missing-import".
306        if let Some(cls) = m.classify(&signals) {
307            assert_ne!(cls.id, "missing-import");
308            assert_ne!(cls.id, "type-mismatch");
309            assert_ne!(cls.id, "borrow-conflict");
310        }
311        // None is also acceptable
312    }
313
314    #[test]
315    fn test_empty_signals_returns_none() {
316        let m = matcher();
317        assert!(m.classify(&[]).is_none());
318    }
319
320    // ── Boundary: custom classes ──────────────────────────────────────────────
321
322    #[test]
323    fn test_custom_class_wins_over_builtin() {
324        // A domain-specific class with high keyword density should beat builtins
325        let mut classes = builtin_task_classes();
326        classes.push(TaskClass::new(
327            "db-timeout",
328            "Database timeout",
329            ["database", "timeout", "connection", "pool", "exhausted"],
330        ));
331        let m = TaskClassMatcher::new(classes);
332        let signals = vec!["database connection pool exhausted — timeout".to_string()];
333        let cls = m.classify(&signals).expect("should classify");
334        assert_eq!(cls.id, "db-timeout");
335    }
336
337    #[test]
338    fn test_signals_match_class_helper() {
339        let registry = builtin_task_classes();
340        let signals = vec!["error[E0425]: cannot find value".to_string()];
341        assert!(signals_match_class(&signals, "missing-import", &registry));
342        assert!(!signals_match_class(&signals, "type-mismatch", &registry));
343    }
344
345    #[test]
346    fn test_overlap_score_case_insensitive() {
347        let class = TaskClass::new("tc", "Test", ["e0425", "unresolved"]);
348        let m = TaskClassMatcher::new(vec![class]);
349        // Signal contains uppercase E0425 — tokenise lowercases all tokens
350        // so the match is case-insensitive.
351        let signals = vec!["E0425 unresolved import".to_string()];
352        let cls = m
353            .classify(&signals)
354            .expect("case-insensitive classify should work");
355        assert_eq!(cls.id, "tc");
356    }
357}