Skip to main content

batty_cli/shim/
classifier.rs

1//! State classifiers: determine agent state from virtual screen content.
2//!
3//! Each agent type (Claude, Codex, Kiro, Generic) has different prompt
4//! patterns, spinner indicators, and context exhaustion messages.
5
6use serde::{Deserialize, Serialize};
7
8/// What the classifier thinks the agent is doing.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum ScreenVerdict {
11    /// Agent is at its input prompt, waiting for a message.
12    AgentIdle,
13    /// Agent is actively processing (producing output).
14    AgentWorking,
15    /// Agent reported conversation/context too large.
16    ContextExhausted,
17    /// Can't determine — keep previous state.
18    Unknown,
19}
20
21/// Agent type selector for the shim classifier.
22///
23/// This operates on vt100::Screen content, independent of the AgentType
24/// in src/agent/ which works with tmux capture.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum AgentType {
28    Claude,
29    Codex,
30    Kiro,
31    Generic,
32}
33
34impl std::str::FromStr for AgentType {
35    type Err = String;
36    fn from_str(s: &str) -> Result<Self, Self::Err> {
37        match s.to_lowercase().as_str() {
38            "claude" => Ok(Self::Claude),
39            "codex" => Ok(Self::Codex),
40            "kiro" => Ok(Self::Kiro),
41            "generic" | "bash" | "shell" => Ok(Self::Generic),
42            _ => Err(format!("unknown agent type: {s}")),
43        }
44    }
45}
46
47impl std::fmt::Display for AgentType {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            Self::Claude => write!(f, "claude"),
51            Self::Codex => write!(f, "codex"),
52            Self::Kiro => write!(f, "kiro"),
53            Self::Generic => write!(f, "generic"),
54        }
55    }
56}
57
58// ---------------------------------------------------------------------------
59// Classifier dispatch
60// ---------------------------------------------------------------------------
61
62/// Classify screen content based on agent type.
63pub fn classify(agent_type: AgentType, screen: &vt100::Screen) -> ScreenVerdict {
64    let content = screen.contents();
65    if content.trim().is_empty() {
66        return ScreenVerdict::Unknown;
67    }
68
69    // Context exhaustion check (common across all types)
70    if detect_context_exhausted(&content) {
71        return ScreenVerdict::ContextExhausted;
72    }
73
74    match agent_type {
75        AgentType::Claude => classify_claude(&content),
76        AgentType::Codex => classify_codex(&content),
77        AgentType::Kiro => classify_kiro(&content),
78        AgentType::Generic => classify_generic(&content),
79    }
80}
81
82// ---------------------------------------------------------------------------
83// Context exhaustion (shared)
84// ---------------------------------------------------------------------------
85
86const EXHAUSTION_PATTERNS: &[&str] = &[
87    "context window exceeded",
88    "context window is full",
89    "conversation is too long",
90    "maximum context length",
91    "context limit reached",
92    "truncated due to context limit",
93    "input exceeds the model",
94    "prompt is too long",
95];
96
97fn detect_context_exhausted(content: &str) -> bool {
98    let lower = content.to_lowercase();
99    EXHAUSTION_PATTERNS.iter().any(|p| lower.contains(p))
100}
101
102// ---------------------------------------------------------------------------
103// Claude Code classifier
104// ---------------------------------------------------------------------------
105
106/// Claude Code prompt characters.
107// Claude prompt and spinner chars retained for use in tests and other classifiers.
108#[allow(dead_code)]
109const CLAUDE_PROMPT_CHARS: &[char] = &['\u{276F}']; // ❯
110
111/// Claude spinner prefixes.
112#[allow(dead_code)]
113const CLAUDE_SPINNER_CHARS: &[char] = &[
114    '\u{00B7}', // ·
115    '\u{2722}', // ✢
116    '\u{2733}', // ✳
117    '\u{2736}', // ✶
118    '\u{273B}', // ✻
119    '\u{273D}', // ✽
120];
121
122fn classify_claude(content: &str) -> ScreenVerdict {
123    // Claude Code classification based on the status bar and tool execution indicators.
124    //
125    // Working signals (any of these = Working):
126    //   - "esc to interrupt" in status bar (agent thinking/generating)
127    //   - "ctrl+b to run in background" (tool/bash command executing)
128    //   - "Waiting…" or "Running…" (tool execution in progress)
129    // Idle signal:
130    //   - Status bar present with none of the above
131    //
132    // The status bar is always in the last few lines of the terminal.
133    let lines: Vec<&str> = content.lines().collect();
134    let bottom: Vec<&str> = lines.iter().rev().take(6).copied().collect();
135
136    let is_working = bottom.iter().any(|line| {
137        let lower = line.trim().to_lowercase();
138        // "esc to interrupt" — agent thinking/generating (+ truncated variants)
139        lower.contains("esc to interrupt")
140            || lower.contains("esc to inter")
141            || lower.contains("esc to in\u{2026}")
142            || lower.contains("esc to in...")
143            || lower.contains("esc t\u{2026}")
144            || (lower.contains("esc t") && lower.contains("bypass"))
145            // "ctrl+b to run in background" — tool/bash executing
146            || lower.contains("ctrl+b to run")
147            || lower.contains("ctrl+b to r")
148    });
149
150    if is_working {
151        return ScreenVerdict::AgentWorking;
152    }
153
154    let has_status_bar = bottom.iter().any(|line| {
155        let lower = line.trim().to_lowercase();
156        lower.contains("bypass permissions")
157            || lower.contains("shift+tab")
158            || lower.contains("ctrl+g to edit")
159    });
160
161    if has_status_bar {
162        return ScreenVerdict::AgentIdle;
163    }
164
165    ScreenVerdict::Unknown
166}
167
168// ---------------------------------------------------------------------------
169// Startup dialog detection (for auto-dismiss during startup)
170// ---------------------------------------------------------------------------
171
172/// Patterns that indicate an agent trust/consent dialog requiring auto-dismiss.
173/// Shared across agent types — both Claude and Codex show trust dialogs.
174const STARTUP_DIALOG_PATTERNS: &[&str] = &[
175    // Claude
176    "is this a project you created",
177    "quick safety check",
178    "enter to confirm",
179    "yes, i trust this folder",
180    // Codex
181    "do you trust the contents",
182    "press enter to continue",
183    "yes, continue",
184    "working with untrusted contents",
185];
186
187/// Detect known startup dialogs that should be auto-dismissed by the shim.
188/// Works for Claude, Codex, and other agents that show trust prompts.
189pub fn detect_startup_dialog(content: &str) -> bool {
190    let lower = content.to_lowercase();
191    STARTUP_DIALOG_PATTERNS.iter().any(|p| lower.contains(p))
192}
193
194/// Legacy alias for backward compatibility in tests.
195pub fn detect_claude_dialog(content: &str) -> bool {
196    detect_startup_dialog(content)
197}
198
199#[allow(dead_code)]
200fn looks_like_claude_spinner(line: &str) -> bool {
201    let trimmed = line.trim();
202    if trimmed.is_empty() {
203        return false;
204    }
205    let first = trimmed.chars().next().unwrap();
206    CLAUDE_SPINNER_CHARS.contains(&first)
207        && (trimmed.contains('\u{2026}') || trimmed.contains("(thinking"))
208}
209
210// ---------------------------------------------------------------------------
211// Codex classifier
212// ---------------------------------------------------------------------------
213
214fn classify_codex(content: &str) -> ScreenVerdict {
215    let lines: Vec<&str> = content.lines().collect();
216    let recent_nonempty: Vec<&str> = lines
217        .iter()
218        .rev()
219        .filter(|l| !l.trim().is_empty())
220        .take(12)
221        .copied()
222        .collect();
223
224    // Check for Codex working/loading indicators before idle check.
225    // "esc to interrupt" means Codex is actively working or loading.
226    for line in &recent_nonempty {
227        let lower = line.trim().to_lowercase();
228        if lower.contains("esc to interrupt")
229            || lower.contains("starting mcp")
230            || lower.contains("executing")
231        {
232            return ScreenVerdict::AgentWorking;
233        }
234    }
235
236    // Codex prompt: › at the start of a recent line.
237    // Codex shows placeholder text after › (e.g., "› Explain this codebase")
238    // which is greyed-out suggestion text — still idle.
239    // Only idle when no working indicators are present.
240    for line in &recent_nonempty {
241        let trimmed = line.trim();
242        if trimmed.starts_with('\u{203A}') {
243            return ScreenVerdict::AgentIdle;
244        }
245    }
246
247    ScreenVerdict::Unknown
248}
249
250// ---------------------------------------------------------------------------
251// Kiro classifier
252// ---------------------------------------------------------------------------
253
254fn classify_kiro(content: &str) -> ScreenVerdict {
255    let lines: Vec<&str> = content.lines().collect();
256    let recent_nonempty: Vec<&str> = lines
257        .iter()
258        .rev()
259        .filter(|l| !l.trim().is_empty())
260        .take(12)
261        .copied()
262        .collect();
263
264    // Check for working/loading indicators first
265    for line in &recent_nonempty {
266        let lower = line.trim().to_lowercase();
267        // Kiro-cli uses ● spinner during initialization and ⠉/⠋ braille
268        // spinners during processing
269        if lower.contains("initializing")
270            || lower.contains("esc to interrupt")
271            || lower.contains("thinking")
272            || lower.contains("planning")
273            || lower.contains("applying")
274        {
275            return ScreenVerdict::AgentWorking;
276        }
277    }
278
279    // Kiro-cli prompt: "ask a question, or describe a task"
280    // This is the placeholder text shown when kiro-cli is idle.
281    let lower_content = content.to_lowercase();
282    if lower_content.contains("ask a question") || lower_content.contains("describe a task") {
283        return ScreenVerdict::AgentIdle;
284    }
285
286    // Kiro prompts: Kiro>, kiro>, Kiro >, kiro >, or bare >
287    // Only match when the prompt has no typed content after it.
288    for line in &recent_nonempty {
289        let trimmed = line.trim();
290        let lower = trimmed.to_lowercase();
291        if trimmed == ">" || trimmed == "> " {
292            return ScreenVerdict::AgentIdle;
293        }
294        if lower.starts_with("kiro>") {
295            let after = &trimmed["kiro>".len()..];
296            if after.trim().is_empty() {
297                return ScreenVerdict::AgentIdle;
298            }
299        } else if lower.starts_with("kiro >") {
300            let after = &trimmed["kiro >".len()..];
301            if after.trim().is_empty() {
302                return ScreenVerdict::AgentIdle;
303            }
304        }
305        if trimmed.ends_with("> ") || trimmed.ends_with('>') {
306            let before_gt = trimmed.trim_end_matches(['>', ' ']);
307            if before_gt.len() < trimmed.len() {
308                return ScreenVerdict::AgentIdle;
309            }
310        }
311    }
312
313    ScreenVerdict::Unknown
314}
315
316// ---------------------------------------------------------------------------
317// Generic classifier (bash / shell / REPL)
318// ---------------------------------------------------------------------------
319
320fn classify_generic(content: &str) -> ScreenVerdict {
321    let lines: Vec<&str> = content.lines().collect();
322    let recent_nonempty: Vec<&str> = lines
323        .iter()
324        .rev()
325        .filter(|l| !l.trim().is_empty())
326        .take(6)
327        .copied()
328        .collect();
329
330    for line in &recent_nonempty {
331        let trimmed = line.trim();
332        // Shell prompts: ends with "$ " or "$", or "% " or "%", or "> " or ">"
333        if trimmed.ends_with("$ ")
334            || trimmed.ends_with('$')
335            || trimmed.ends_with("% ")
336            || trimmed.ends_with('%')
337            || trimmed.ends_with("> ")
338            || trimmed.ends_with('>')
339        {
340            return ScreenVerdict::AgentIdle;
341        }
342    }
343
344    ScreenVerdict::Unknown
345}
346
347// ---------------------------------------------------------------------------
348// Tests
349// ---------------------------------------------------------------------------
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    fn make_screen(content: &str) -> vt100::Parser {
356        let mut parser = vt100::Parser::new(24, 80, 0);
357        parser.process(content.as_bytes());
358        parser
359    }
360
361    // -- Claude --
362
363    #[test]
364    fn claude_idle_prompt() {
365        // Status bar without "esc to interrupt" = idle
366        let parser =
367            make_screen("Some output\n\u{276F}\n  bypass permissions on (shift+tab to cycle)");
368        assert_eq!(
369            classify(AgentType::Claude, parser.screen()),
370            ScreenVerdict::AgentIdle
371        );
372    }
373
374    #[test]
375    fn claude_idle_bare_prompt() {
376        // Status bar with "ctrl+g to edit" but no interrupt = idle
377        let parser = make_screen("Some output\n\u{276F}\n  ctrl+g to edit in Vim");
378        assert_eq!(
379            classify(AgentType::Claude, parser.screen()),
380            ScreenVerdict::AgentIdle
381        );
382    }
383
384    #[test]
385    fn claude_working_spinner() {
386        // Status bar with "esc to interrupt" = working
387        let parser =
388            make_screen("\u{00B7} Thinking\u{2026}\n  bypass permissions on · esc to interrupt");
389        assert_eq!(
390            classify(AgentType::Claude, parser.screen()),
391            ScreenVerdict::AgentWorking
392        );
393    }
394
395    #[test]
396    fn claude_working_interrupt_footer() {
397        let parser = make_screen("Some output\nesc to interrupt\n");
398        assert_eq!(
399            classify(AgentType::Claude, parser.screen()),
400            ScreenVerdict::AgentWorking
401        );
402    }
403
404    #[test]
405    fn claude_working_interrupt_truncated() {
406        let parser = make_screen("Some output\nesc to inter\n");
407        assert_eq!(
408            classify(AgentType::Claude, parser.screen()),
409            ScreenVerdict::AgentWorking
410        );
411    }
412
413    #[test]
414    fn claude_working_interrupt_narrow_pane_ellipsis() {
415        // Narrow pane truncates to "esc t…" with ellipsis
416        let parser =
417            make_screen("output\n  bypass permissions on (shift+tab) \u{00B7} esc t\u{2026}");
418        assert_eq!(
419            classify(AgentType::Claude, parser.screen()),
420            ScreenVerdict::AgentWorking
421        );
422    }
423
424    #[test]
425    fn claude_working_interrupt_narrow_pane_cutoff() {
426        // Narrow pane cuts mid-word with bypass context
427        let parser = make_screen("output\n  bypass permissions on · esc t");
428        assert_eq!(
429            classify(AgentType::Claude, parser.screen()),
430            ScreenVerdict::AgentWorking
431        );
432    }
433
434    #[test]
435    fn claude_context_exhausted() {
436        let parser = make_screen("Error: context window is full\n\u{276F} ");
437        assert_eq!(
438            classify(AgentType::Claude, parser.screen()),
439            ScreenVerdict::ContextExhausted
440        );
441    }
442
443    // -- Codex --
444
445    #[test]
446    fn codex_idle_prompt() {
447        let parser = make_screen("Done.\n\n\u{203A} ");
448        assert_eq!(
449            classify(AgentType::Codex, parser.screen()),
450            ScreenVerdict::AgentIdle
451        );
452    }
453
454    #[test]
455    fn codex_idle_bare_prompt() {
456        let parser = make_screen("Done.\n\n\u{203A}");
457        assert_eq!(
458            classify(AgentType::Codex, parser.screen()),
459            ScreenVerdict::AgentIdle
460        );
461    }
462
463    #[test]
464    fn codex_unknown_no_prompt() {
465        let parser = make_screen("Running something...\n");
466        assert_eq!(
467            classify(AgentType::Codex, parser.screen()),
468            ScreenVerdict::Unknown
469        );
470    }
471
472    #[test]
473    fn codex_idle_with_placeholder() {
474        // Codex shows placeholder text after › — still idle
475        let parser = make_screen("Output\n\u{203A} Explain this codebase\n");
476        assert_eq!(
477            classify(AgentType::Codex, parser.screen()),
478            ScreenVerdict::AgentIdle,
479            "placeholder text after › should be Idle"
480        );
481    }
482
483    // -- Kiro --
484
485    #[test]
486    fn kiro_idle_prompt() {
487        let parser = make_screen("Result\nKiro> ");
488        assert_eq!(
489            classify(AgentType::Kiro, parser.screen()),
490            ScreenVerdict::AgentIdle
491        );
492    }
493
494    #[test]
495    fn kiro_idle_bare_gt() {
496        let parser = make_screen("Result\n>");
497        assert_eq!(
498            classify(AgentType::Kiro, parser.screen()),
499            ScreenVerdict::AgentIdle
500        );
501    }
502
503    #[test]
504    fn kiro_working() {
505        let parser = make_screen("Kiro is thinking...\n");
506        assert_eq!(
507            classify(AgentType::Kiro, parser.screen()),
508            ScreenVerdict::AgentWorking
509        );
510    }
511
512    #[test]
513    fn kiro_working_agent_planning() {
514        let parser = make_screen("Agent is planning...\n");
515        assert_eq!(
516            classify(AgentType::Kiro, parser.screen()),
517            ScreenVerdict::AgentWorking
518        );
519    }
520
521    // -- Generic --
522
523    #[test]
524    fn generic_shell_prompt_dollar() {
525        let parser = make_screen("user@host:~$ ");
526        assert_eq!(
527            classify(AgentType::Generic, parser.screen()),
528            ScreenVerdict::AgentIdle
529        );
530    }
531
532    #[test]
533    fn generic_shell_prompt_percent() {
534        let parser = make_screen("user@host:~% ");
535        assert_eq!(
536            classify(AgentType::Generic, parser.screen()),
537            ScreenVerdict::AgentIdle
538        );
539    }
540
541    #[test]
542    fn generic_shell_prompt_gt() {
543        let parser = make_screen("prompt> ");
544        assert_eq!(
545            classify(AgentType::Generic, parser.screen()),
546            ScreenVerdict::AgentIdle
547        );
548    }
549
550    #[test]
551    fn generic_empty_unknown() {
552        let parser = make_screen("");
553        assert_eq!(
554            classify(AgentType::Generic, parser.screen()),
555            ScreenVerdict::Unknown
556        );
557    }
558
559    // -- Shared --
560
561    #[test]
562    fn exhaustion_all_types() {
563        for agent_type in [
564            AgentType::Claude,
565            AgentType::Codex,
566            AgentType::Kiro,
567            AgentType::Generic,
568        ] {
569            let parser = make_screen("Error: conversation is too long to continue\n$ ");
570            assert_eq!(
571                classify(agent_type, parser.screen()),
572                ScreenVerdict::ContextExhausted,
573                "failed for {agent_type}",
574            );
575        }
576    }
577
578    #[test]
579    fn exhaustion_maximum_context_length() {
580        let parser = make_screen("Error: maximum context length exceeded\n$ ");
581        assert_eq!(
582            classify(AgentType::Generic, parser.screen()),
583            ScreenVerdict::ContextExhausted
584        );
585    }
586
587    #[test]
588    fn agent_type_from_str() {
589        assert_eq!("claude".parse::<AgentType>().unwrap(), AgentType::Claude);
590        assert_eq!("CODEX".parse::<AgentType>().unwrap(), AgentType::Codex);
591        assert_eq!("Kiro".parse::<AgentType>().unwrap(), AgentType::Kiro);
592        assert_eq!("generic".parse::<AgentType>().unwrap(), AgentType::Generic);
593        assert_eq!("bash".parse::<AgentType>().unwrap(), AgentType::Generic);
594        assert_eq!("shell".parse::<AgentType>().unwrap(), AgentType::Generic);
595        assert!("unknown".parse::<AgentType>().is_err());
596    }
597
598    #[test]
599    fn agent_type_display() {
600        assert_eq!(AgentType::Claude.to_string(), "claude");
601        assert_eq!(AgentType::Codex.to_string(), "codex");
602        assert_eq!(AgentType::Kiro.to_string(), "kiro");
603        assert_eq!(AgentType::Generic.to_string(), "generic");
604    }
605
606    #[test]
607    fn claude_dialog_not_idle() {
608        // Trust dialog with ❯ as selection indicator — NOT an idle prompt
609        let parser = make_screen(
610            "Quick safety check: Is this a project you created or one you trust?\n\n\
611             \u{276F} 1. Yes, I trust this folder\n\
612             2. No, exit\n\n\
613             Enter to confirm \u{00B7} Esc to cancel\n",
614        );
615        assert_ne!(
616            classify(AgentType::Claude, parser.screen()),
617            ScreenVerdict::AgentIdle,
618            "trust dialog should NOT be classified as Idle"
619        );
620    }
621
622    #[test]
623    fn claude_dialog_detected() {
624        let content = "Quick safety check: Is this a project you created or one you trust?\n\
625                       \u{276F} 1. Yes, I trust this folder\n\
626                       Enter to confirm";
627        assert!(
628            detect_claude_dialog(content),
629            "should detect Claude trust dialog"
630        );
631    }
632
633    #[test]
634    fn claude_dialog_not_detected_normal() {
635        let content = "Some response\n\u{276F} ";
636        assert!(
637            !detect_claude_dialog(content),
638            "normal prompt should not trigger dialog detection"
639        );
640    }
641
642    #[test]
643    fn codex_dialog_detected() {
644        let content = "Do you trust the contents of this directory?\n\
645                       \u{203A} 1. Yes, continue\n\
646                       Press enter to continue";
647        assert!(
648            detect_startup_dialog(content),
649            "should detect Codex trust dialog"
650        );
651    }
652
653    #[test]
654    fn claude_idle_with_trailing_spaces() {
655        // Status bar present, no interrupt = idle
656        let parser =
657            make_screen("Output\n\u{276F}    \n  bypass permissions on (shift+tab to cycle)    ");
658        assert_eq!(
659            classify(AgentType::Claude, parser.screen()),
660            ScreenVerdict::AgentIdle
661        );
662    }
663
664    #[test]
665    fn all_exhaustion_patterns_trigger() {
666        for pattern in EXHAUSTION_PATTERNS {
667            let parser = make_screen(&format!("Error: {pattern}\n$ "));
668            assert_eq!(
669                classify(AgentType::Generic, parser.screen()),
670                ScreenVerdict::ContextExhausted,
671                "pattern '{pattern}' did not trigger exhaustion",
672            );
673        }
674    }
675}