use serde::{Deserialize, Serialize};
pub const INCIDENT_WINDOW: usize = 6;
pub const MAX_SUMMARY_CHARS: usize = 1000;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ExtractionSignal {
pub kind: ExtractionKind,
pub summary: String,
pub evidence_indices: Vec<usize>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ExtractionKind {
Incident,
BehaviorPattern,
Decision,
}
impl ExtractionKind {
pub fn memory_type(self) -> &'static str {
match self {
ExtractionKind::Incident => "incident",
ExtractionKind::BehaviorPattern => "behavior_pattern",
ExtractionKind::Decision => "decision",
}
}
}
const FRUSTRATION_PHRASES: &[&str] = &[
"错了",
"不对",
"不行",
"失败了",
"又失败",
"不工作",
"崩了",
"出错了",
"wrong",
"not working",
"doesn't work",
"didn't work",
"still broken",
"broke again",
"still not",
"not fixed",
];
pub fn detect(user_messages: &[&str]) -> Vec<ExtractionSignal> {
if user_messages.len() < 2 {
return Vec::new();
}
let hits: Vec<usize> = user_messages
.iter()
.enumerate()
.filter(|(_, msg)| contains_frustration(msg))
.map(|(idx, _)| idx)
.collect();
if hits.len() < 2 {
return Vec::new();
}
let mut signals = Vec::new();
let mut i = 0;
while i < hits.len() {
let start = hits[i];
let window_end_exclusive = start + INCIDENT_WINDOW;
let cluster: Vec<usize> = hits[i..]
.iter()
.copied()
.take_while(|h| *h < window_end_exclusive)
.collect();
if cluster.len() < 2 {
i += 1;
continue;
}
let last = *cluster.last().expect("non-empty cluster");
let summary = cap_chars(user_messages[last].trim(), MAX_SUMMARY_CHARS);
if !summary.is_empty() {
signals.push(ExtractionSignal {
kind: ExtractionKind::Incident,
summary,
evidence_indices: cluster.clone(),
});
}
i += cluster.len();
}
signals
}
fn contains_frustration(msg: &str) -> bool {
if msg.is_empty() {
return false;
}
let lower = msg.to_lowercase();
for phrase in FRUSTRATION_PHRASES {
if phrase.is_ascii() {
if lower.contains(*phrase) {
return true;
}
} else {
if msg.contains(*phrase) {
return true;
}
}
}
false
}
fn cap_chars(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
for (i, ch) in s.chars().enumerate() {
if i >= max_chars {
break;
}
out.push(ch);
}
out.push('…');
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_returns_empty_for_short_session() {
assert!(detect(&[]).is_empty());
assert!(detect(&["just one message"]).is_empty());
}
#[test]
fn detect_returns_empty_when_no_frustration() {
let msgs = ["normal", "everything fine", "all good"];
let refs: Vec<&str> = msgs.to_vec();
assert!(detect(&refs).is_empty());
}
#[test]
fn detect_returns_empty_for_single_frustration_hit() {
let msgs = ["normal", "this still not working"];
let refs: Vec<&str> = msgs.to_vec();
assert!(detect(&refs).is_empty());
}
#[test]
fn detect_emits_signal_for_two_chinese_frustration_hits() {
let msgs = ["试一下", "还是错了", "看看日志", "又失败了"];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
assert_eq!(signals[0].kind, ExtractionKind::Incident);
assert_eq!(signals[0].summary, "又失败了");
assert_eq!(signals[0].evidence_indices, vec![1, 3]);
}
#[test]
fn detect_emits_signal_for_two_english_frustration_hits() {
let msgs = [
"let me try this",
"ugh, that's wrong",
"let me check logs",
"still not working",
];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
assert_eq!(signals[0].summary, "still not working");
}
#[test]
fn detect_skips_when_hits_outside_sliding_window() {
let mut msgs = vec!["wrong"];
msgs.extend(std::iter::repeat_n("filler", INCIDENT_WINDOW));
msgs.push("still not");
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert!(
signals.is_empty(),
"hits across {} msgs should not emit",
INCIDENT_WINDOW + 2
);
}
#[test]
fn detect_collapses_one_incident_per_overlapping_window() {
let msgs = ["wrong", "still wrong", "broke again"];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
assert_eq!(signals[0].evidence_indices.len(), 3);
assert_eq!(signals[0].summary, "broke again");
}
#[test]
fn detect_caps_summary_when_user_message_is_huge() {
let mut huge = String::from("wrong: ");
huge.push_str(&"x".repeat(MAX_SUMMARY_CHARS * 2));
let msgs = ["first wrong attempt", huge.as_str()];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
let chars = signals[0].summary.chars().count();
assert!(chars <= MAX_SUMMARY_CHARS + 1);
assert!(signals[0].summary.ends_with('…'));
}
#[test]
fn detect_case_insensitive_for_english_phrases() {
let msgs = ["WRONG.", "Still NOT working"];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
}
#[test]
fn detect_handles_mixed_chinese_english_hits() {
let msgs = ["错了", "filler", "wrong"];
let refs: Vec<&str> = msgs.to_vec();
let signals = detect(&refs);
assert_eq!(signals.len(), 1);
}
#[test]
fn extraction_kind_memory_type_is_incident() {
assert_eq!(ExtractionKind::Incident.memory_type(), "incident");
}
}