Skip to main content

innate_core/kb/
situation.rs

1//! Situation — the signal bundle that triggers intuition.
2//!
3//! Replaces the bare `query` as the unit of resonance. The key design constraint
4//! (PRD §3.2 / Spec §2) is the **double-path split**:
5//!
6//! - **Resonance path** ([`Situation::embed_text`]): the *rich* situation joined into one
7//!   string and embedded. Fine-grained is fine — it is a continuous cosine similarity that
8//!   naturally tolerates fragments.
9//! - **Calibration path** ([`Situation::context_key`]): the situation *coarsened* into a stable
10//!   signature before hashing into a `context_key`. This MUST be coarse (stage + error_class +
11//!   file_type, never the raw error text), otherwise every slightly-different situation becomes a
12//!   new bucket, `chunk_context_stats` never accumulates ≥5 evidence, and calibration collapses
13//!   to ~0 (`evidence_weight = min(evidence/5, 1)`).
14//!
15//! A pure-`query` situation degrades exactly to the legacy
16//! `content_hash(normalize_query(query))` behaviour, so `recall()` stays zero-regression.
17
18use crate::utils::content_hash;
19
20use super::normalize_query;
21
22/// The signal bundle that drives intuition. Borrowed and `Default`-able.
23#[derive(Debug, Clone, Default)]
24pub struct Situation<'a> {
25    /// Explicit question. May be empty/absent for ambient appraisal.
26    pub query: Option<&'a str>,
27    /// Current or most-recent error text.
28    pub last_error: Option<&'a str>,
29    /// The last few actions taken (commands, edits, steps).
30    pub recent_actions: &'a [String],
31    /// Task stage (e.g. "merge", "implement", "review").
32    pub stage: Option<&'a str>,
33    /// File type / path summary in scope (e.g. "src/foo.tsx").
34    pub file_context: Option<&'a str>,
35}
36
37impl<'a> Situation<'a> {
38    /// Construct from a bare query — the legacy code path. Used by `recall` to keep
39    /// existing callers' behaviour identical (degrades to `normalize_query`).
40    pub fn from_query(query: &'a str) -> Self {
41        Situation {
42            query: Some(query),
43            ..Default::default()
44        }
45    }
46
47    /// True when only `query` carries signal — every other field empty. In this case both
48    /// `embed_text` and `context_key` degrade to the legacy query-only behaviour.
49    fn is_query_only(&self) -> bool {
50        self.last_error.map(str::trim).unwrap_or("").is_empty()
51            && self.recent_actions.iter().all(|a| a.trim().is_empty())
52            && self.stage.map(str::trim).unwrap_or("").is_empty()
53            && self.file_context.map(str::trim).unwrap_or("").is_empty()
54    }
55
56    /// **Resonance path.** Join the rich situation into one embed string. Labelled segments
57    /// keep the embedder from blurring distinct signals together; empty fields are dropped.
58    ///
59    /// A query-only situation returns the query verbatim, so the embedding is byte-identical
60    /// to the legacy `embed_both(query)` path (zero regression for `recall`).
61    pub fn embed_text(&self) -> String {
62        let query = self.query.map(str::trim).unwrap_or("");
63        if self.is_query_only() {
64            return query.to_string();
65        }
66        let mut parts: Vec<String> = Vec::new();
67        if !query.is_empty() {
68            parts.push(format!("[query] {query}"));
69        }
70        if let Some(err) = self.last_error.map(str::trim).filter(|s| !s.is_empty()) {
71            parts.push(format!("[error] {err}"));
72        }
73        let actions: Vec<&str> = self
74            .recent_actions
75            .iter()
76            .map(|a| a.trim())
77            .filter(|a| !a.is_empty())
78            .collect();
79        if !actions.is_empty() {
80            parts.push(format!("[actions] {}", actions.join(" ; ")));
81        }
82        if let Some(stage) = self.stage.map(str::trim).filter(|s| !s.is_empty()) {
83            parts.push(format!("[stage] {stage}"));
84        }
85        if let Some(files) = self.file_context.map(str::trim).filter(|s| !s.is_empty()) {
86            parts.push(format!("[files] {files}"));
87        }
88        parts.join("\n")
89    }
90
91    /// **Calibration path.** Hash the coarse signature into a stable `context_key`.
92    ///
93    /// `coarse_keys` selects which dimensions enter the signature (default
94    /// `stage,error_class,file_type`). A query-only situation degrades to the legacy
95    /// `content_hash(normalize_query(query))` so read/write buckets match historical data.
96    pub fn context_key(&self, coarse_keys: &str) -> String {
97        if self.is_query_only() {
98            return content_hash(&normalize_query(self.query.unwrap_or("")));
99        }
100        content_hash(&self.coarse_signature(coarse_keys))
101    }
102
103    /// Build the coarse signature string, e.g. `stage=merge|err=TypeError|file=tsx`.
104    /// Only the dimensions named in `coarse_keys` are included, in a fixed order, so the
105    /// key is stable across runs. Never includes raw error text — only the error *class*.
106    pub fn coarse_signature(&self, coarse_keys: &str) -> String {
107        let keys: Vec<&str> = coarse_keys
108            .split(',')
109            .map(str::trim)
110            .filter(|k| !k.is_empty())
111            .collect();
112        let mut parts: Vec<String> = Vec::new();
113        for key in keys {
114            match key {
115                "stage" => parts.push(format!(
116                    "stage={}",
117                    self.stage.map(str::trim).unwrap_or("").to_lowercase()
118                )),
119                "error_class" => parts.push(format!("err={}", self.error_class())),
120                "file_type" => parts.push(format!("file={}", self.file_type())),
121                // Unknown dimension: ignore rather than poison the signature.
122                _ => {}
123            }
124        }
125        parts.join("|")
126    }
127
128    /// Normalise `last_error` to a stable category — the command of the no-blow-up rule.
129    /// Strategy (no original text leaks through):
130    /// 1. A typed error name (`TypeError`, `NullPointerException`, …) → that name.
131    /// 2. A Rust diagnostic code (`error[E0599]` / `E0277`) → the code.
132    /// 3. A panic → `panic`.
133    /// 4. Otherwise the first alphabetic token, lowercased.
134    fn error_class(&self) -> String {
135        let err = self.last_error.map(str::trim).unwrap_or("");
136        if err.is_empty() {
137            return String::new();
138        }
139        // Rust diagnostic code: E followed by digits (optionally inside error[...]).
140        if let Some(code) = find_rust_error_code(err) {
141            return code;
142        }
143        // Typed error name: an identifier ending in Error / Exception.
144        if let Some(name) = err
145            .split(|c: char| !(c.is_alphanumeric() || c == '_'))
146            .find(|tok| tok.len() > 3 && (tok.ends_with("Error") || tok.ends_with("Exception")))
147        {
148            return name.to_string();
149        }
150        let low = err.to_lowercase();
151        if low.contains("panic") {
152            return "panic".to_string();
153        }
154        // Fallback: first alphabetic token, lowercased, truncated.
155        low.split(|c: char| !c.is_alphabetic())
156            .find(|t| !t.is_empty())
157            .map(|t| t.chars().take(24).collect())
158            .unwrap_or_default()
159    }
160
161    /// Extract a coarse file type from `file_context` — the extension of the last path token,
162    /// or the bare token if no extension. Never the full path.
163    fn file_type(&self) -> String {
164        let ctx = self.file_context.map(str::trim).unwrap_or("");
165        if ctx.is_empty() {
166            return String::new();
167        }
168        // Take the first whitespace/comma-separated path token.
169        let token = ctx
170            .split(|c: char| c.is_whitespace() || c == ',')
171            .find(|t| !t.is_empty())
172            .unwrap_or("");
173        match token.rsplit_once('.') {
174            Some((_, ext)) if !ext.is_empty() && ext.len() <= 8 => ext.to_lowercase(),
175            _ => token
176                .rsplit(['/', '\\'])
177                .next()
178                .unwrap_or(token)
179                .to_lowercase(),
180        }
181    }
182}
183
184/// Find a Rust-style error code `E<digits>` anywhere in the text. Returns e.g. `E0599`.
185fn find_rust_error_code(err: &str) -> Option<String> {
186    let bytes = err.as_bytes();
187    let mut i = 0;
188    while i < bytes.len() {
189        if (bytes[i] == b'E' || bytes[i] == b'e')
190            && i + 1 < bytes.len()
191            && bytes[i + 1].is_ascii_digit()
192        {
193            let start = i;
194            let mut j = i + 1;
195            while j < bytes.len() && bytes[j].is_ascii_digit() {
196                j += 1;
197            }
198            // At least 3 digits to look like a real diagnostic code (E0599), avoid "e5".
199            if j - (start + 1) >= 3 {
200                return Some(format!("E{}", &err[start + 1..j]));
201            }
202        }
203        i += 1;
204    }
205    None
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn query_only_degrades_to_legacy_key() {
214        let s = Situation::from_query("How to fix the Merge?");
215        let legacy = content_hash(&normalize_query("How to fix the Merge?"));
216        assert_eq!(s.context_key("stage,error_class,file_type"), legacy);
217        // embed_text is byte-identical to the bare query (after trim).
218        assert_eq!(s.embed_text(), "How to fix the Merge?");
219    }
220
221    #[test]
222    fn context_key_stable_across_differing_error_text() {
223        // Same stage + error class + file type, different raw error message → same key.
224        let a = Situation {
225            stage: Some("merge"),
226            last_error: Some("TypeError: cannot read property 'x' of undefined at line 42"),
227            file_context: Some("src/components/Foo.tsx"),
228            ..Default::default()
229        };
230        let b = Situation {
231            stage: Some("merge"),
232            last_error: Some("TypeError: undefined is not a function in handler"),
233            file_context: Some("src/pages/Bar.tsx"),
234            ..Default::default()
235        };
236        let keys = "stage,error_class,file_type";
237        assert_eq!(a.context_key(keys), b.context_key(keys));
238        assert_eq!(
239            a.coarse_signature(keys),
240            "stage=merge|err=TypeError|file=tsx"
241        );
242    }
243
244    #[test]
245    fn differing_class_yields_different_key() {
246        let keys = "stage,error_class,file_type";
247        let a = Situation {
248            stage: Some("merge"),
249            last_error: Some("TypeError: boom"),
250            file_context: Some("a.tsx"),
251            ..Default::default()
252        };
253        let b = Situation {
254            stage: Some("merge"),
255            last_error: Some("RangeError: boom"),
256            file_context: Some("a.tsx"),
257            ..Default::default()
258        };
259        assert_ne!(a.context_key(keys), b.context_key(keys));
260    }
261
262    #[test]
263    fn rust_error_code_classified() {
264        let s = Situation {
265            stage: Some("build"),
266            last_error: Some("error[E0599]: no method named `foo` found"),
267            file_context: Some("src/lib.rs"),
268            ..Default::default()
269        };
270        assert_eq!(s.coarse_signature("error_class"), "err=E0599");
271    }
272
273    #[test]
274    fn embed_text_includes_all_nonempty_fields() {
275        let actions = vec!["git merge".to_string(), "cargo test".to_string()];
276        let s = Situation {
277            query: Some("why did merge fail"),
278            last_error: Some("conflict"),
279            recent_actions: &actions,
280            stage: Some("merge"),
281            file_context: Some("Cargo.toml"),
282        };
283        let text = s.embed_text();
284        assert!(text.contains("[query] why did merge fail"));
285        assert!(text.contains("[error] conflict"));
286        assert!(text.contains("git merge"));
287        assert!(text.contains("[stage] merge"));
288        assert!(text.contains("[files] Cargo.toml"));
289    }
290}