spool/distill/heuristic/
extraction.rs1use serde::{Deserialize, Serialize};
38
39pub const INCIDENT_WINDOW: usize = 6;
42
43pub const MAX_SUMMARY_CHARS: usize = 1000;
45
46#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
48pub struct ExtractionSignal {
49 pub kind: ExtractionKind,
50 pub summary: String,
51 pub evidence_indices: Vec<usize>,
54}
55
56#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
57#[serde(rename_all = "snake_case")]
58pub enum ExtractionKind {
59 Incident,
61 BehaviorPattern,
65 Decision,
69}
70
71impl ExtractionKind {
72 pub fn memory_type(self) -> &'static str {
73 match self {
74 ExtractionKind::Incident => "incident",
75 ExtractionKind::BehaviorPattern => "behavior_pattern",
76 ExtractionKind::Decision => "decision",
77 }
78 }
79}
80
81const FRUSTRATION_PHRASES: &[&str] = &[
84 "错了",
86 "不对",
87 "不行",
88 "失败了",
89 "又失败",
90 "不工作",
91 "崩了",
92 "出错了",
93 "wrong",
95 "not working",
96 "doesn't work",
97 "didn't work",
98 "still broken",
99 "broke again",
100 "still not",
101 "not fixed",
102];
103
104pub fn detect(user_messages: &[&str]) -> Vec<ExtractionSignal> {
107 if user_messages.len() < 2 {
108 return Vec::new();
109 }
110 let hits: Vec<usize> = user_messages
111 .iter()
112 .enumerate()
113 .filter(|(_, msg)| contains_frustration(msg))
114 .map(|(idx, _)| idx)
115 .collect();
116 if hits.len() < 2 {
117 return Vec::new();
118 }
119
120 let mut signals = Vec::new();
126 let mut i = 0;
127 while i < hits.len() {
128 let start = hits[i];
129 let window_end_exclusive = start + INCIDENT_WINDOW;
130 let cluster: Vec<usize> = hits[i..]
131 .iter()
132 .copied()
133 .take_while(|h| *h < window_end_exclusive)
134 .collect();
135 if cluster.len() < 2 {
136 i += 1;
137 continue;
138 }
139 let last = *cluster.last().expect("non-empty cluster");
140 let summary = cap_chars(user_messages[last].trim(), MAX_SUMMARY_CHARS);
141 if !summary.is_empty() {
142 signals.push(ExtractionSignal {
143 kind: ExtractionKind::Incident,
144 summary,
145 evidence_indices: cluster.clone(),
146 });
147 }
148 i += cluster.len();
149 }
150 signals
151}
152
153fn contains_frustration(msg: &str) -> bool {
154 if msg.is_empty() {
155 return false;
156 }
157 let lower = msg.to_lowercase();
158 for phrase in FRUSTRATION_PHRASES {
159 if phrase.is_ascii() {
160 if lower.contains(*phrase) {
162 return true;
163 }
164 } else {
165 if msg.contains(*phrase) {
169 return true;
170 }
171 }
172 }
173 false
174}
175
176fn cap_chars(s: &str, max_chars: usize) -> String {
177 if s.chars().count() <= max_chars {
178 return s.to_string();
179 }
180 let mut out = String::with_capacity(s.len());
181 for (i, ch) in s.chars().enumerate() {
182 if i >= max_chars {
183 break;
184 }
185 out.push(ch);
186 }
187 out.push('…');
188 out
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194
195 #[test]
196 fn detect_returns_empty_for_short_session() {
197 assert!(detect(&[]).is_empty());
198 assert!(detect(&["just one message"]).is_empty());
199 }
200
201 #[test]
202 fn detect_returns_empty_when_no_frustration() {
203 let msgs = ["normal", "everything fine", "all good"];
204 let refs: Vec<&str> = msgs.to_vec();
205 assert!(detect(&refs).is_empty());
206 }
207
208 #[test]
209 fn detect_returns_empty_for_single_frustration_hit() {
210 let msgs = ["normal", "this still not working"];
212 let refs: Vec<&str> = msgs.to_vec();
213 assert!(detect(&refs).is_empty());
214 }
215
216 #[test]
217 fn detect_emits_signal_for_two_chinese_frustration_hits() {
218 let msgs = ["试一下", "还是错了", "看看日志", "又失败了"];
219 let refs: Vec<&str> = msgs.to_vec();
220 let signals = detect(&refs);
221 assert_eq!(signals.len(), 1);
222 assert_eq!(signals[0].kind, ExtractionKind::Incident);
223 assert_eq!(signals[0].summary, "又失败了");
224 assert_eq!(signals[0].evidence_indices, vec![1, 3]);
225 }
226
227 #[test]
228 fn detect_emits_signal_for_two_english_frustration_hits() {
229 let msgs = [
230 "let me try this",
231 "ugh, that's wrong",
232 "let me check logs",
233 "still not working",
234 ];
235 let refs: Vec<&str> = msgs.to_vec();
236 let signals = detect(&refs);
237 assert_eq!(signals.len(), 1);
238 assert_eq!(signals[0].summary, "still not working");
239 }
240
241 #[test]
242 fn detect_skips_when_hits_outside_sliding_window() {
243 let mut msgs = vec!["wrong"];
245 msgs.extend(std::iter::repeat_n("filler", INCIDENT_WINDOW));
246 msgs.push("still not");
247 let refs: Vec<&str> = msgs.to_vec();
248 let signals = detect(&refs);
249 assert!(
250 signals.is_empty(),
251 "hits across {} msgs should not emit",
252 INCIDENT_WINDOW + 2
253 );
254 }
255
256 #[test]
257 fn detect_collapses_one_incident_per_overlapping_window() {
258 let msgs = ["wrong", "still wrong", "broke again"];
261 let refs: Vec<&str> = msgs.to_vec();
262 let signals = detect(&refs);
263 assert_eq!(signals.len(), 1);
264 assert_eq!(signals[0].evidence_indices.len(), 3);
265 assert_eq!(signals[0].summary, "broke again");
267 }
268
269 #[test]
270 fn detect_caps_summary_when_user_message_is_huge() {
271 let mut huge = String::from("wrong: ");
272 huge.push_str(&"x".repeat(MAX_SUMMARY_CHARS * 2));
273 let msgs = ["first wrong attempt", huge.as_str()];
274 let refs: Vec<&str> = msgs.to_vec();
275 let signals = detect(&refs);
276 assert_eq!(signals.len(), 1);
277 let chars = signals[0].summary.chars().count();
278 assert!(chars <= MAX_SUMMARY_CHARS + 1);
279 assert!(signals[0].summary.ends_with('…'));
280 }
281
282 #[test]
283 fn detect_case_insensitive_for_english_phrases() {
284 let msgs = ["WRONG.", "Still NOT working"];
285 let refs: Vec<&str> = msgs.to_vec();
286 let signals = detect(&refs);
287 assert_eq!(signals.len(), 1);
288 }
289
290 #[test]
291 fn detect_handles_mixed_chinese_english_hits() {
292 let msgs = ["错了", "filler", "wrong"];
293 let refs: Vec<&str> = msgs.to_vec();
294 let signals = detect(&refs);
295 assert_eq!(signals.len(), 1);
296 }
297
298 #[test]
299 fn extraction_kind_memory_type_is_incident() {
300 assert_eq!(ExtractionKind::Incident.memory_type(), "incident");
301 }
302}