Skip to main content

chump_perception/
lib.rs

1//! Structured perception layer: rule-based extraction of task structure from user input.
2//! Runs before the main agent loop iteration. No LLM calls — fast pattern matching only.
3//! Reference architecture gap remediation: pre-reasoning structured perception.
4
5use serde::Serialize;
6
7#[derive(Debug, Clone, Serialize)]
8pub struct PerceivedInput {
9    pub raw_text: String,
10    pub likely_needs_tools: bool,
11    pub detected_entities: Vec<String>,
12    pub detected_constraints: Vec<String>,
13    pub ambiguity_level: f32,
14    pub risk_indicators: Vec<String>,
15    pub question_count: usize,
16    pub task_type: TaskType,
17}
18
19#[derive(Debug, Clone, PartialEq, Serialize)]
20pub enum TaskType {
21    Question,
22    Action,
23    Planning,
24    Research,
25    Meta,
26    Unclear,
27}
28
29impl std::fmt::Display for TaskType {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        match self {
32            Self::Question => write!(f, "Question"),
33            Self::Action => write!(f, "Action"),
34            Self::Planning => write!(f, "Planning"),
35            Self::Research => write!(f, "Research"),
36            Self::Meta => write!(f, "Meta"),
37            Self::Unclear => write!(f, "Unclear"),
38        }
39    }
40}
41
42/// Run structured perception on user input. Pure rule-based, no LLM calls.
43pub fn perceive(text: &str, needs_tools_hint: bool) -> PerceivedInput {
44    let trimmed = text.trim();
45    let lower = trimmed.to_lowercase();
46
47    let detected_entities = extract_entities(trimmed);
48    let detected_constraints = extract_constraints(&lower);
49    let risk_indicators = extract_risk_indicators(&lower);
50    let question_count = trimmed.matches('?').count();
51    let task_type = classify_task_type(&lower, question_count);
52    let ambiguity_level = score_ambiguity(trimmed, &lower, question_count, &detected_entities);
53
54    PerceivedInput {
55        raw_text: trimmed.to_string(),
56        likely_needs_tools: needs_tools_hint,
57        detected_entities,
58        detected_constraints,
59        ambiguity_level,
60        risk_indicators,
61        question_count,
62        task_type,
63    }
64}
65
66/// Build a compact context summary for system prompt injection.
67/// Returns empty string for trivial inputs.
68pub fn context_summary(p: &PerceivedInput) -> String {
69    if p.task_type == TaskType::Unclear
70        && p.detected_entities.is_empty()
71        && p.risk_indicators.is_empty()
72    {
73        return String::new();
74    }
75    let mut parts = Vec::new();
76    parts.push(format!("Task: {}", p.task_type));
77    if !p.detected_entities.is_empty() {
78        let entities: Vec<&str> = p
79            .detected_entities
80            .iter()
81            .take(5)
82            .map(|s| s.as_str())
83            .collect();
84        parts.push(format!("Entities: {}", entities.join(", ")));
85    }
86    if !p.detected_constraints.is_empty() {
87        let constraints: Vec<&str> = p
88            .detected_constraints
89            .iter()
90            .take(3)
91            .map(|s| s.as_str())
92            .collect();
93        parts.push(format!("Constraints: {}", constraints.join(", ")));
94    }
95    if p.ambiguity_level > 0.6 {
96        parts.push(format!(
97            "Ambiguity: {:.1} (consider clarifying)",
98            p.ambiguity_level
99        ));
100    }
101    if !p.risk_indicators.is_empty() {
102        parts.push(format!("Risk: {}", p.risk_indicators.join(", ")));
103    }
104    parts.join(" | ")
105}
106
107// ── Entity extraction ──────────────────────────────────────────────────
108
109fn extract_entities(text: &str) -> Vec<String> {
110    let mut entities = Vec::new();
111
112    // Quoted strings
113    let mut in_quote = false;
114    let mut current = String::new();
115    for ch in text.chars() {
116        if ch == '"' || ch == '\'' || ch == '`' {
117            if in_quote {
118                if !current.is_empty() {
119                    entities.push(current.clone());
120                    current.clear();
121                }
122                in_quote = false;
123            } else {
124                in_quote = true;
125            }
126        } else if in_quote {
127            current.push(ch);
128        }
129    }
130
131    // Capitalized words not at sentence start (likely proper nouns)
132    let ignore = [
133        "I", "I'm", "I'll", "I've", "I'd", "OK", "The", "A", "An", "It", "Is", "Are", "Was",
134        "Were", "Do", "Does", "Did", "Have", "Has", "Had", "Can", "Could", "Will", "Would",
135        "Should", "May", "Might", "But", "And", "Or", "So", "If", "When", "Where", "What", "How",
136        "Why", "Who", "Which", "That", "This", "Not", "No", "Yes", "For", "From", "With", "About",
137        "Also", "Just", "Then",
138    ];
139    for (i, word) in text.split_whitespace().enumerate() {
140        if i > 0
141            && word
142                .chars()
143                .next()
144                .map(|c| c.is_uppercase())
145                .unwrap_or(false)
146            && word.len() > 1
147            && !ignore.contains(&word)
148        {
149            let clean = word
150                .trim_matches(|c: char| !c.is_alphanumeric())
151                .to_string();
152            if clean.len() > 1 && !entities.contains(&clean) {
153                entities.push(clean);
154            }
155        }
156    }
157
158    // File paths
159    for word in text.split_whitespace() {
160        let w = word.trim_matches(|c: char| c == '\'' || c == '"' || c == '`');
161        if (w.contains('/') || w.contains('\\'))
162            && w.len() > 2
163            && !w.starts_with("http")
164            && !entities.contains(&w.to_string())
165        {
166            entities.push(w.to_string());
167        }
168    }
169
170    entities.truncate(10);
171    entities
172}
173
174// ── Constraint extraction ──────────────────────────────────────────────
175
176fn extract_constraints(lower: &str) -> Vec<String> {
177    let markers: &[(&str, &str)] = &[
178        ("before ", "temporal:before"),
179        ("by ", "temporal:deadline"),
180        ("after ", "temporal:after"),
181        ("must ", "requirement"),
182        ("cannot ", "prohibition"),
183        ("don't ", "prohibition"),
184        ("do not ", "prohibition"),
185        ("never ", "prohibition"),
186        ("always ", "requirement"),
187        ("only ", "restriction"),
188        ("at most ", "limit"),
189        ("at least ", "minimum"),
190        ("no more than ", "limit"),
191        ("without ", "exclusion"),
192    ];
193    let mut constraints = Vec::new();
194    for &(marker, kind) in markers {
195        if lower.contains(marker) {
196            constraints.push(kind.to_string());
197        }
198    }
199    constraints.dedup();
200    constraints
201}
202
203// ── Risk indicator extraction ──────────────────────────────────────────
204
205fn extract_risk_indicators(lower: &str) -> Vec<String> {
206    let risk_words: &[&str] = &[
207        "delete",
208        "drop",
209        "force",
210        "production",
211        "prod ",
212        "master ",
213        "main ",
214        "rm -rf",
215        "sudo",
216        "reboot",
217        "shutdown",
218        "destroy",
219        "overwrite",
220        "reset",
221        "wipe",
222        "truncate",
223        "everything",
224    ];
225    risk_words
226        .iter()
227        .filter(|w| lower.contains(**w))
228        .map(|w| w.trim().to_string())
229        .collect()
230}
231
232// ── Task type classification ───────────────────────────────────────────
233
234fn classify_task_type(lower: &str, question_count: usize) -> TaskType {
235    // Meta: about chump itself
236    if lower.contains("yourself")
237        || lower.contains("your memory")
238        || lower.contains("your brain")
239        || lower.contains("introspect")
240        || lower.contains("your status")
241        || lower.contains("your state")
242    {
243        return TaskType::Meta;
244    }
245    // Planning: multi-step indicators
246    if lower.contains("plan")
247        || lower.contains("steps to")
248        || lower.contains("strategy")
249        || lower.contains("roadmap")
250        || lower.contains("how should we")
251        || (lower.contains("first") && lower.contains("then"))
252    {
253        return TaskType::Planning;
254    }
255    // Research: investigation indicators
256    if lower.contains("research")
257        || lower.contains("investigate")
258        || lower.contains("explore")
259        || lower.contains("find out")
260        || lower.contains("look into")
261        || lower.contains("analyze")
262    {
263        return TaskType::Research;
264    }
265    // Question: ends with ? or starts with question words
266    if question_count > 0
267        || lower.starts_with("what ")
268        || lower.starts_with("why ")
269        || lower.starts_with("how ")
270        || lower.starts_with("when ")
271        || lower.starts_with("where ")
272        || lower.starts_with("who ")
273        || lower.starts_with("is ")
274        || lower.starts_with("are ")
275        || lower.starts_with("does ")
276        || lower.starts_with("do ")
277    {
278        return TaskType::Question;
279    }
280    // Action: imperative verbs
281    let action_starters = [
282        "run ", "create ", "make ", "build ", "deploy ", "fix ", "update ", "delete ", "write ",
283        "read ", "open ", "close ", "set ", "add ", "remove ", "install ", "push ", "commit ",
284        "merge ", "test ", "check ", "list ", "show ", "start ", "stop ",
285    ];
286    if action_starters.iter().any(|a| lower.starts_with(a)) {
287        return TaskType::Action;
288    }
289    TaskType::Unclear
290}
291
292// ── Ambiguity scoring ──────────────────────────────────────────────────
293
294fn score_ambiguity(text: &str, lower: &str, question_count: usize, entities: &[String]) -> f32 {
295    let mut score: f32 = 0.5;
296    // Vague language
297    let vague = [
298        "something",
299        "somehow",
300        "maybe",
301        "perhaps",
302        "whatever",
303        "stuff",
304        "things",
305        "it",
306    ];
307    let vague_count = vague
308        .iter()
309        .filter(|w| {
310            // Match whole words, not substrings
311            lower.split_whitespace().any(|token| token == **w)
312        })
313        .count();
314    score += vague_count as f32 * 0.1;
315    // Entities reduce ambiguity
316    score -= entities.len().min(3) as f32 * 0.1;
317    // Short = more ambiguous
318    if text.len() < 20 {
319        score += 0.2;
320    }
321    // Multiple questions
322    if question_count > 1 {
323        score += 0.15;
324    }
325    // Long detailed messages reduce ambiguity
326    if text.len() > 200 {
327        score -= 0.2;
328    }
329    score.clamp(0.0, 1.0)
330}
331
332// ── Tests ──────────────────────────────────────────────────────────────
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn classify_action() {
340        let p = perceive("create a new task for the website redesign", true);
341        assert_eq!(p.task_type, TaskType::Action);
342        assert!(p.detected_entities.is_empty() || !p.detected_entities.is_empty());
343        // entities are opportunistic
344    }
345
346    #[test]
347    fn classify_question() {
348        let p = perceive("what tasks do we have?", false);
349        assert_eq!(p.task_type, TaskType::Question);
350        assert_eq!(p.question_count, 1);
351    }
352
353    #[test]
354    fn classify_planning() {
355        let p = perceive("plan the steps to migrate our database", true);
356        assert_eq!(p.task_type, TaskType::Planning);
357    }
358
359    #[test]
360    fn classify_research() {
361        let p = perceive("investigate why the tests are failing", true);
362        assert_eq!(p.task_type, TaskType::Research);
363    }
364
365    #[test]
366    fn classify_meta() {
367        let p = perceive("tell me about your memory", false);
368        assert_eq!(p.task_type, TaskType::Meta);
369    }
370
371    #[test]
372    fn risk_detection() {
373        let p = perceive("delete everything in production", true);
374        assert!(p.risk_indicators.contains(&"delete".to_string()));
375        assert!(p.risk_indicators.contains(&"everything".to_string()));
376        assert!(p.risk_indicators.contains(&"production".to_string()));
377    }
378
379    #[test]
380    fn entity_extraction_quoted() {
381        let p = perceive("look at the \"CustomerService\" module in src/lib.rs", true);
382        assert!(p.detected_entities.contains(&"CustomerService".to_string()));
383        assert!(p.detected_entities.iter().any(|e| e.contains("src/lib.rs")));
384    }
385
386    #[test]
387    fn constraint_detection() {
388        let p = perceive(
389            "we must finish before Friday and cannot use the old API",
390            true,
391        );
392        assert!(p.detected_constraints.contains(&"requirement".to_string()));
393        assert!(p
394            .detected_constraints
395            .contains(&"temporal:before".to_string()));
396        assert!(p.detected_constraints.contains(&"prohibition".to_string()));
397    }
398
399    #[test]
400    fn ambiguity_high_for_vague() {
401        let p = perceive("do something", true);
402        assert!(p.ambiguity_level > 0.5);
403    }
404
405    #[test]
406    fn ambiguity_low_for_detailed() {
407        let long = "Create a new task titled 'Migrate database schema' with priority high, assigned to Jeff, due by April 20th. The task should include steps for backup, migration, and verification.";
408        let p = perceive(long, true);
409        assert!(p.ambiguity_level < 0.5);
410    }
411
412    #[test]
413    fn context_summary_empty_for_trivial() {
414        let p = perceive("hi", false);
415        assert!(context_summary(&p).is_empty() || p.task_type == TaskType::Unclear);
416    }
417
418    #[test]
419    fn context_summary_nonempty_for_risk() {
420        let p = perceive("delete the production database", true);
421        let s = context_summary(&p);
422        assert!(s.contains("Risk"));
423    }
424}