Skip to main content

innate_core/kb/
situation.rs

1//! Situation — the signal bundle that triggers intuition.
2//!
3//! Replaces the bare `query` as the unit of resonance. The key design constraint
4//! (PRD §3.2 / Spec §2) is the **double-path split**:
5//!
6//! - **Resonance path** ([`Situation::embed_text`]): the *rich* situation joined into one
7//!   string and embedded. Fine-grained is fine — it is a continuous cosine similarity that
8//!   naturally tolerates fragments.
9//! - **Calibration path** ([`Situation::context_key`]): the situation *coarsened* into a stable
10//!   signature before hashing into a `context_key`. This MUST be coarse (stage + error_class +
11//!   file_type, never the raw error text), otherwise every slightly-different situation becomes a
12//!   new bucket, `chunk_context_stats` never accumulates ≥5 evidence, and calibration collapses
13//!   to ~0 (`evidence_weight = min(evidence/5, 1)`).
14//!
15//! A pure-`query` situation degrades exactly to the legacy
16//! `content_hash(normalize_query(query))` behaviour, so `recall()` stays zero-regression.
17
18use crate::utils::content_hash;
19
20use super::normalize_query;
21
22/// The signal bundle that drives intuition. Borrowed and `Default`-able.
23#[derive(Debug, Clone, Default)]
24pub struct Situation<'a> {
25    /// Explicit question. May be empty/absent for ambient appraisal.
26    pub query: Option<&'a str>,
27    /// Current or most-recent error text.
28    pub last_error: Option<&'a str>,
29    /// The last few actions taken (commands, edits, steps).
30    pub recent_actions: &'a [String],
31    /// Task stage (e.g. "merge", "implement", "review").
32    pub stage: Option<&'a str>,
33    /// File type / path summary in scope (e.g. "src/foo.tsx").
34    pub file_context: Option<&'a str>,
35}
36
37impl<'a> Situation<'a> {
38    /// Construct from a bare query — the legacy code path. Used by `recall` to keep
39    /// existing callers' behaviour identical (degrades to `normalize_query`).
40    pub fn from_query(query: &'a str) -> Self {
41        Situation {
42            query: Some(query),
43            ..Default::default()
44        }
45    }
46
47    /// True when only `query` carries signal — every other field empty. In this case both
48    /// `embed_text` and `context_key` degrade to the legacy query-only behaviour.
49    fn is_query_only(&self) -> bool {
50        self.last_error.map(str::trim).unwrap_or("").is_empty()
51            && self.recent_actions.iter().all(|a| a.trim().is_empty())
52            && self.stage.map(str::trim).unwrap_or("").is_empty()
53            && self.file_context.map(str::trim).unwrap_or("").is_empty()
54    }
55
56    /// **Resonance path.** Join the rich situation into one embed string. Labelled segments
57    /// keep the embedder from blurring distinct signals together; empty fields are dropped.
58    ///
59    /// A query-only situation returns the query verbatim, so the embedding is byte-identical
60    /// to the legacy `embed_both(query)` path (zero regression for `recall`).
61    pub fn embed_text(&self) -> String {
62        let query = self.query.map(str::trim).unwrap_or("");
63        if self.is_query_only() {
64            return query.to_string();
65        }
66        let mut parts: Vec<String> = Vec::new();
67        if !query.is_empty() {
68            parts.push(format!("[query] {query}"));
69        }
70        if let Some(err) = self.last_error.map(str::trim).filter(|s| !s.is_empty()) {
71            parts.push(format!("[error] {err}"));
72        }
73        let actions: Vec<&str> = self
74            .recent_actions
75            .iter()
76            .map(|a| a.trim())
77            .filter(|a| !a.is_empty())
78            .collect();
79        if !actions.is_empty() {
80            parts.push(format!("[actions] {}", actions.join(" ; ")));
81        }
82        if let Some(stage) = self.stage.map(str::trim).filter(|s| !s.is_empty()) {
83            parts.push(format!("[stage] {stage}"));
84        }
85        if let Some(files) = self.file_context.map(str::trim).filter(|s| !s.is_empty()) {
86            parts.push(format!("[files] {files}"));
87        }
88        parts.join("\n")
89    }
90
91    /// **Calibration path.** Hash the coarse signature into a stable `context_key`.
92    ///
93    /// `coarse_keys` selects which dimensions enter the signature (default
94    /// `stage,error_class,file_type`). A query-only situation degrades to the legacy
95    /// `content_hash(normalize_query(query))` so read/write buckets match historical data.
96    pub fn context_key(&self, coarse_keys: &str) -> String {
97        if self.is_query_only() {
98            return content_hash(&normalize_query(self.query.unwrap_or("")));
99        }
100        content_hash(&self.coarse_signature(coarse_keys))
101    }
102
103    /// Build the coarse signature string, e.g. `stage=merge|err=TypeError|file=tsx`.
104    /// Only the dimensions named in `coarse_keys` are included, in a fixed order, so the
105    /// key is stable across runs. Never includes raw error text — only the error *class*.
106    pub fn coarse_signature(&self, coarse_keys: &str) -> String {
107        let keys: Vec<&str> = coarse_keys
108            .split(',')
109            .map(str::trim)
110            .filter(|k| !k.is_empty())
111            .collect();
112        let mut parts: Vec<String> = Vec::new();
113        for key in keys {
114            match key {
115                "stage" => parts.push(format!(
116                    "stage={}",
117                    self.stage.map(str::trim).unwrap_or("").to_lowercase()
118                )),
119                "error_class" => parts.push(format!("err={}", self.error_class())),
120                "file_type" => parts.push(format!("file={}", self.file_type())),
121                // Unknown dimension: ignore rather than poison the signature.
122                _ => {}
123            }
124        }
125        parts.join("|")
126    }
127
128    /// Normalise `last_error` to a stable category — the command of the no-blow-up rule.
129    /// Strategy (no original text leaks through):
130    /// 1. A typed error name (`TypeError`, `NullPointerException`, …) → that name.
131    /// 2. A Rust diagnostic code (`error[E0599]` / `E0277`) → the code.
132    /// 3. A panic → `panic`.
133    /// 4. Otherwise the first alphabetic token, lowercased.
134    fn error_class(&self) -> String {
135        let err = self.last_error.map(str::trim).unwrap_or("");
136        if err.is_empty() {
137            return String::new();
138        }
139        // Rust diagnostic code: E followed by digits (optionally inside error[...]).
140        if let Some(code) = find_rust_error_code(err) {
141            return code;
142        }
143        // Typed error name: an identifier ending in Error / Exception.
144        if let Some(name) = err
145            .split(|c: char| !(c.is_alphanumeric() || c == '_'))
146            .find(|tok| {
147                tok.len() > 3 && (tok.ends_with("Error") || tok.ends_with("Exception"))
148            })
149        {
150            return name.to_string();
151        }
152        let low = err.to_lowercase();
153        if low.contains("panic") {
154            return "panic".to_string();
155        }
156        // Fallback: first alphabetic token, lowercased, truncated.
157        low.split(|c: char| !c.is_alphabetic())
158            .find(|t| !t.is_empty())
159            .map(|t| t.chars().take(24).collect())
160            .unwrap_or_default()
161    }
162
163    /// Extract a coarse file type from `file_context` — the extension of the last path token,
164    /// or the bare token if no extension. Never the full path.
165    fn file_type(&self) -> String {
166        let ctx = self.file_context.map(str::trim).unwrap_or("");
167        if ctx.is_empty() {
168            return String::new();
169        }
170        // Take the first whitespace/comma-separated path token.
171        let token = ctx
172            .split(|c: char| c.is_whitespace() || c == ',')
173            .find(|t| !t.is_empty())
174            .unwrap_or("");
175        match token.rsplit_once('.') {
176            Some((_, ext)) if !ext.is_empty() && ext.len() <= 8 => ext.to_lowercase(),
177            _ => token
178                .rsplit(['/', '\\'])
179                .next()
180                .unwrap_or(token)
181                .to_lowercase(),
182        }
183    }
184}
185
186/// Find a Rust-style error code `E<digits>` anywhere in the text. Returns e.g. `E0599`.
187fn find_rust_error_code(err: &str) -> Option<String> {
188    let bytes = err.as_bytes();
189    let mut i = 0;
190    while i < bytes.len() {
191        if (bytes[i] == b'E' || bytes[i] == b'e') && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() {
192            let start = i;
193            let mut j = i + 1;
194            while j < bytes.len() && bytes[j].is_ascii_digit() {
195                j += 1;
196            }
197            // At least 3 digits to look like a real diagnostic code (E0599), avoid "e5".
198            if j - (start + 1) >= 3 {
199                return Some(format!("E{}", &err[start + 1..j]));
200            }
201        }
202        i += 1;
203    }
204    None
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn query_only_degrades_to_legacy_key() {
213        let s = Situation::from_query("How to fix the Merge?");
214        let legacy = content_hash(&normalize_query("How to fix the Merge?"));
215        assert_eq!(s.context_key("stage,error_class,file_type"), legacy);
216        // embed_text is byte-identical to the bare query (after trim).
217        assert_eq!(s.embed_text(), "How to fix the Merge?");
218    }
219
220    #[test]
221    fn context_key_stable_across_differing_error_text() {
222        // Same stage + error class + file type, different raw error message → same key.
223        let a = Situation {
224            stage: Some("merge"),
225            last_error: Some("TypeError: cannot read property 'x' of undefined at line 42"),
226            file_context: Some("src/components/Foo.tsx"),
227            ..Default::default()
228        };
229        let b = Situation {
230            stage: Some("merge"),
231            last_error: Some("TypeError: undefined is not a function in handler"),
232            file_context: Some("src/pages/Bar.tsx"),
233            ..Default::default()
234        };
235        let keys = "stage,error_class,file_type";
236        assert_eq!(a.context_key(keys), b.context_key(keys));
237        assert_eq!(a.coarse_signature(keys), "stage=merge|err=TypeError|file=tsx");
238    }
239
240    #[test]
241    fn differing_class_yields_different_key() {
242        let keys = "stage,error_class,file_type";
243        let a = Situation {
244            stage: Some("merge"),
245            last_error: Some("TypeError: boom"),
246            file_context: Some("a.tsx"),
247            ..Default::default()
248        };
249        let b = Situation {
250            stage: Some("merge"),
251            last_error: Some("RangeError: boom"),
252            file_context: Some("a.tsx"),
253            ..Default::default()
254        };
255        assert_ne!(a.context_key(keys), b.context_key(keys));
256    }
257
258    #[test]
259    fn rust_error_code_classified() {
260        let s = Situation {
261            stage: Some("build"),
262            last_error: Some("error[E0599]: no method named `foo` found"),
263            file_context: Some("src/lib.rs"),
264            ..Default::default()
265        };
266        assert_eq!(s.coarse_signature("error_class"), "err=E0599");
267    }
268
269    #[test]
270    fn embed_text_includes_all_nonempty_fields() {
271        let actions = vec!["git merge".to_string(), "cargo test".to_string()];
272        let s = Situation {
273            query: Some("why did merge fail"),
274            last_error: Some("conflict"),
275            recent_actions: &actions,
276            stage: Some("merge"),
277            file_context: Some("Cargo.toml"),
278        };
279        let text = s.embed_text();
280        assert!(text.contains("[query] why did merge fail"));
281        assert!(text.contains("[error] conflict"));
282        assert!(text.contains("git merge"));
283        assert!(text.contains("[stage] merge"));
284        assert!(text.contains("[files] Cargo.toml"));
285    }
286}