batty_cli/shim/
classifier.rs

1//! State classifiers: determine agent state from virtual screen content.
2//!
3//! Each agent type (Claude, Codex, Kiro, Generic) has different prompt
4//! patterns, spinner indicators, and context exhaustion messages.
5
6use serde::{Deserialize, Serialize};
7
8/// What the classifier thinks the agent is doing.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum ScreenVerdict {
11    /// Agent is at its input prompt, waiting for a message.
12    AgentIdle,
13    /// Agent is actively processing (producing output).
14    AgentWorking,
15    /// Agent reported conversation/context too large.
16    ContextExhausted,
17    /// Can't determine — keep previous state.
18    Unknown,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq)]
22pub struct Classification {
23    pub verdict: ScreenVerdict,
24    pub confidence: f32,
25}
26
27impl Classification {
28    const fn exact(verdict: ScreenVerdict) -> Self {
29        Self {
30            verdict,
31            confidence: 1.0,
32        }
33    }
34
35    const fn ambiguous(verdict: ScreenVerdict) -> Self {
36        Self {
37            verdict,
38            confidence: 0.45,
39        }
40    }
41
42    const fn unknown() -> Self {
43        Self {
44            verdict: ScreenVerdict::Unknown,
45            confidence: 0.0,
46        }
47    }
48}
49
50pub const MIN_CLASSIFIER_CONFIDENCE: f32 = 0.75;
51
52/// How a single output line should be treated by narration enforcement.
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum NarrationLineKind {
55    Explanation,
56    ToolOrCommand,
57    Other,
58}
59
60/// Agent type selector for the shim classifier.
61///
62/// This operates on vt100::Screen content, independent of the AgentType
63/// in src/agent/ which works with tmux capture.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
65#[serde(rename_all = "snake_case")]
66pub enum AgentType {
67    Claude,
68    Codex,
69    Kiro,
70    Generic,
71}
72
73impl std::str::FromStr for AgentType {
74    type Err = String;
75    fn from_str(s: &str) -> Result<Self, Self::Err> {
76        match s.to_lowercase().as_str() {
77            "claude" => Ok(Self::Claude),
78            "codex" => Ok(Self::Codex),
79            "kiro" => Ok(Self::Kiro),
80            "generic" | "bash" | "shell" => Ok(Self::Generic),
81            _ => Err(format!("unknown agent type: {s}")),
82        }
83    }
84}
85
86impl std::fmt::Display for AgentType {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        match self {
89            Self::Claude => write!(f, "claude"),
90            Self::Codex => write!(f, "codex"),
91            Self::Kiro => write!(f, "kiro"),
92            Self::Generic => write!(f, "generic"),
93        }
94    }
95}
96
97// ---------------------------------------------------------------------------
98// Classifier dispatch
99// ---------------------------------------------------------------------------
100
101/// Classify screen content based on agent type.
102pub fn classify(agent_type: AgentType, screen: &vt100::Screen) -> ScreenVerdict {
103    let result = classify_with_confidence(agent_type, screen);
104    if result.confidence >= MIN_CLASSIFIER_CONFIDENCE {
105        result.verdict
106    } else {
107        ScreenVerdict::Unknown
108    }
109}
110
111/// Classify screen content and return a confidence score for the match.
112pub fn classify_with_confidence(agent_type: AgentType, screen: &vt100::Screen) -> Classification {
113    let content = screen.contents();
114    if content.trim().is_empty() {
115        return Classification::unknown();
116    }
117
118    // Context exhaustion check (common across all types)
119    if detect_context_exhausted(&content) {
120        return Classification::exact(ScreenVerdict::ContextExhausted);
121    }
122
123    match agent_type {
124        AgentType::Claude => classify_claude(&content),
125        AgentType::Codex => classify_codex(&content),
126        AgentType::Kiro => classify_kiro(&content),
127        AgentType::Generic => classify_generic(&content),
128    }
129}
130
131/// Detect meta-conversation patterns where the agent keeps planning or
132/// narrating without moving to concrete execution.
133pub fn detect_meta_conversation(content: &str, agent_type: AgentType) -> bool {
134    let lower = content.to_lowercase();
135    let trimmed = lower.trim();
136    if trimmed.is_empty() {
137        return false;
138    }
139
140    let tool_markers: &[&str] = match agent_type {
141        AgentType::Claude => &[
142            "read(",
143            "edit(",
144            "bash(",
145            "write(",
146            "grep(",
147            "glob(",
148            "multiedit(",
149            "⎿",
150        ],
151        AgentType::Codex => &[
152            "apply_patch",
153            "*** begin patch",
154            "$ ",
155            "\n$ ",
156            "exit code:",
157            "target/",
158        ],
159        AgentType::Kiro => &["applying", "$ ", "\n$ ", "running…", "running..."],
160        AgentType::Generic => &["$ ", "\n$ ", "exit code:"],
161    };
162    if tool_markers.iter().any(|marker| trimmed.contains(marker)) {
163        return false;
164    }
165
166    let meta_patterns = [
167        "i should",
168        "i will",
169        "i'll",
170        "let me",
171        "next step",
172        "need to",
173        "we need to",
174        "should i",
175        "maybe i should",
176        "perhaps i should",
177        "i can",
178        "plan:",
179        "thinking through",
180        "first, i'll",
181        "then i'll",
182        "instead of",
183    ];
184    let question_patterns = [
185        "should i",
186        "what should i",
187        "do i need to",
188        "am i supposed to",
189        "would it make sense",
190    ];
191
192    let meta_hits = meta_patterns
193        .iter()
194        .filter(|pattern| trimmed.contains(**pattern))
195        .count();
196    let question_hits = question_patterns
197        .iter()
198        .filter(|pattern| trimmed.contains(**pattern))
199        .count();
200    let line_hits = trimmed
201        .lines()
202        .filter(|line| {
203            let line = line.trim();
204            !line.is_empty()
205                && (meta_patterns.iter().any(|pattern| line.contains(pattern))
206                    || question_patterns
207                        .iter()
208                        .any(|pattern| line.contains(pattern))
209                    || line.ends_with('?'))
210        })
211        .count();
212
213    (meta_hits + question_hits) >= 2 || line_hits >= 2
214}
215
216/// Classify a single output line for narration-loop detection.
217pub fn classify_narration_line(line: &str, agent_type: AgentType) -> NarrationLineKind {
218    let trimmed = line.trim();
219    if trimmed.is_empty() {
220        return NarrationLineKind::Other;
221    }
222
223    if has_command_or_tool_signal(trimmed, agent_type) {
224        return NarrationLineKind::ToolOrCommand;
225    }
226
227    let lower = trimmed.to_ascii_lowercase();
228    let explanation_patterns = [
229        "i should",
230        "i will",
231        "i'll",
232        "let me",
233        "next step",
234        "need to",
235        "we need to",
236        "should i",
237        "maybe i should",
238        "perhaps i should",
239        "i can",
240        "plan:",
241        "thinking through",
242        "first, i'll",
243        "then i'll",
244        "instead of",
245        "i'm going to",
246        "before i",
247    ];
248    if explanation_patterns
249        .iter()
250        .any(|pattern| lower.contains(pattern))
251        || lower.ends_with('?')
252    {
253        return NarrationLineKind::Explanation;
254    }
255
256    NarrationLineKind::Other
257}
258
259/// Detect whether the current screen content looks like narration instead of
260/// concrete execution. This is intentionally screen-level so daemon health
261/// checks can count consecutive narration polls.
262pub fn detect_narration_pattern(content: &str, agent_type: AgentType) -> bool {
263    let mut explanation_lines = 0usize;
264
265    for line in content.lines() {
266        match classify_narration_line(line, agent_type) {
267            NarrationLineKind::ToolOrCommand => return false,
268            NarrationLineKind::Explanation => explanation_lines += 1,
269            NarrationLineKind::Other => {}
270        }
271    }
272
273    explanation_lines > 0
274}
275
276fn has_command_or_tool_signal(line: &str, agent_type: AgentType) -> bool {
277    let common_markers = [
278        "*** Begin Patch",
279        "*** Update File:",
280        "*** Add File:",
281        "*** Delete File:",
282        "$ ",
283        "> ",
284        "Exit code:",
285        "apply_patch",
286    ];
287    if common_markers.iter().any(|marker| line.contains(marker)) {
288        return true;
289    }
290
291    let trimmed = line.trim_start();
292    let shell_prefixes = [
293        "git ",
294        "cargo ",
295        "rg ",
296        "sed ",
297        "ls ",
298        "cat ",
299        "grep ",
300        "find ",
301        "npm ",
302        "pnpm ",
303        "yarn ",
304        "pytest",
305        "go test",
306        "make ",
307        "batty ",
308        "kanban-md ",
309    ];
310    if shell_prefixes
311        .iter()
312        .any(|prefix| trimmed.starts_with(prefix))
313    {
314        return true;
315    }
316
317    match agent_type {
318        AgentType::Claude => [
319            "Read(",
320            "Edit(",
321            "Bash(",
322            "Write(",
323            "Grep(",
324            "Glob(",
325            "MultiEdit(",
326            "⎿",
327        ]
328        .iter()
329        .any(|marker| line.contains(marker)),
330        AgentType::Codex => ["target/", "apply_patch"]
331            .iter()
332            .any(|marker| line.contains(marker)),
333        AgentType::Kiro => ["applying", "running…", "running..."]
334            .iter()
335            .any(|marker| line.to_ascii_lowercase().contains(marker)),
336        AgentType::Generic => false,
337    }
338}
339
340// ---------------------------------------------------------------------------
341// Context exhaustion (shared)
342// ---------------------------------------------------------------------------
343
344const EXHAUSTION_PATTERNS: &[&str] = &[
345    "context window exceeded",
346    "context window is full",
347    "conversation is too long",
348    "maximum context length",
349    "context limit reached",
350    "truncated due to context limit",
351    "input exceeds the model",
352    "prompt is too long",
353];
354
355fn detect_context_exhausted(content: &str) -> bool {
356    let lower = content.to_lowercase();
357    EXHAUSTION_PATTERNS.iter().any(|p| lower.contains(p))
358}
359
360// ---------------------------------------------------------------------------
361// Claude Code classifier
362// ---------------------------------------------------------------------------
363
364/// Claude Code prompt characters.
365// Claude prompt and spinner chars retained for use in tests and other classifiers.
366#[allow(dead_code)]
367const CLAUDE_PROMPT_CHARS: &[char] = &['\u{276F}']; // ❯
368
369/// Claude spinner prefixes.
370#[allow(dead_code)]
371const CLAUDE_SPINNER_CHARS: &[char] = &[
372    '\u{00B7}', // ·
373    '\u{2722}', // ✢
374    '\u{2733}', // ✳
375    '\u{2736}', // ✶
376    '\u{273B}', // ✻
377    '\u{273D}', // ✽
378];
379
380fn classify_claude(content: &str) -> Classification {
381    // Claude Code classification based on the status bar and tool execution indicators.
382    //
383    // Working signals (any of these = Working):
384    //   - "esc to interrupt" in status bar (agent thinking/generating)
385    //   - "ctrl+b to run in background" (tool/bash command executing)
386    //   - "Waiting…" or "Running…" (tool execution in progress)
387    // Idle signal:
388    //   - Status bar present with none of the above
389    //
390    // The status bar is always in the last few lines of the terminal.
391    let lines: Vec<&str> = content.lines().collect();
392    let bottom: Vec<&str> = lines.iter().rev().take(6).copied().collect();
393
394    let working_confidence = bottom
395        .iter()
396        .filter_map(|line| claude_working_confidence(line))
397        .max_by(f32::total_cmp);
398
399    if let Some(confidence) = working_confidence {
400        return if confidence >= MIN_CLASSIFIER_CONFIDENCE {
401            Classification {
402                verdict: ScreenVerdict::AgentWorking,
403                confidence,
404            }
405        } else {
406            Classification::ambiguous(ScreenVerdict::AgentWorking)
407        };
408    }
409
410    let idle_confidence = bottom
411        .iter()
412        .filter_map(|line| {
413            best_phrase_confidence(line, &["bypass permissions", "shift+tab", "ctrl+g to edit"])
414        })
415        .max_by(f32::total_cmp);
416
417    if let Some(confidence) = idle_confidence {
418        return Classification {
419            verdict: ScreenVerdict::AgentIdle,
420            confidence,
421        };
422    }
423
424    Classification::unknown()
425}
426
427// ---------------------------------------------------------------------------
428// Startup dialog detection (for auto-dismiss during startup)
429// ---------------------------------------------------------------------------
430
431/// Patterns that indicate an agent trust/consent dialog requiring auto-dismiss.
432/// Shared across agent types — both Claude and Codex show trust dialogs.
433const STARTUP_DIALOG_PATTERNS: &[&str] = &[
434    // Claude
435    "is this a project you created",
436    "quick safety check",
437    "enter to confirm",
438    "yes, i trust this folder",
439    // Codex
440    "do you trust the contents",
441    "press enter to continue",
442    "yes, continue",
443    "working with untrusted contents",
444];
445
446/// Detect known startup dialogs that should be auto-dismissed by the shim.
447/// Works for Claude, Codex, and other agents that show trust prompts.
448pub fn detect_startup_dialog(content: &str) -> bool {
449    let lower = content.to_lowercase();
450    STARTUP_DIALOG_PATTERNS.iter().any(|p| lower.contains(p))
451}
452
453/// Legacy alias for backward compatibility in tests.
454pub fn detect_claude_dialog(content: &str) -> bool {
455    detect_startup_dialog(content)
456}
457
458#[allow(dead_code)]
459fn looks_like_claude_spinner(line: &str) -> bool {
460    let trimmed = line.trim();
461    if trimmed.is_empty() {
462        return false;
463    }
464    let first = trimmed.chars().next().unwrap();
465    CLAUDE_SPINNER_CHARS.contains(&first)
466        && (trimmed.contains('\u{2026}') || trimmed.contains("(thinking"))
467}
468
469// ---------------------------------------------------------------------------
470// Codex classifier
471// ---------------------------------------------------------------------------
472
473fn classify_codex(content: &str) -> Classification {
474    let lines: Vec<&str> = content.lines().collect();
475    let recent_nonempty: Vec<&str> = lines
476        .iter()
477        .rev()
478        .filter(|l| !l.trim().is_empty())
479        .take(12)
480        .copied()
481        .collect();
482
483    // Check for Codex working/loading indicators before idle check.
484    // "esc to interrupt" means Codex is actively working or loading.
485    for line in &recent_nonempty {
486        if let Some(confidence) =
487            best_phrase_confidence(line, &["esc to interrupt", "starting mcp", "executing"])
488        {
489            return if confidence >= MIN_CLASSIFIER_CONFIDENCE {
490                Classification {
491                    verdict: ScreenVerdict::AgentWorking,
492                    confidence,
493                }
494            } else {
495                Classification::ambiguous(ScreenVerdict::AgentWorking)
496            };
497        }
498    }
499
500    // Codex prompt: › at the start of a recent line.
501    // Codex shows placeholder text after › (e.g., "› Explain this codebase")
502    // which is greyed-out suggestion text — still idle.
503    // Only idle when no working indicators are present.
504    for line in &recent_nonempty {
505        let trimmed = line.trim();
506        if trimmed.starts_with('\u{203A}') {
507            let confidence = if trimmed
508                .strip_prefix('\u{203A}')
509                .is_some_and(|r| r.trim().is_empty())
510            {
511                1.0
512            } else {
513                0.92
514            };
515            return Classification {
516                verdict: ScreenVerdict::AgentIdle,
517                confidence,
518            };
519        }
520    }
521
522    Classification::unknown()
523}
524
525// ---------------------------------------------------------------------------
526// Kiro classifier
527// ---------------------------------------------------------------------------
528
529fn classify_kiro(content: &str) -> Classification {
530    let lines: Vec<&str> = content.lines().collect();
531    let recent_nonempty: Vec<&str> = lines
532        .iter()
533        .rev()
534        .filter(|l| !l.trim().is_empty())
535        .take(12)
536        .copied()
537        .collect();
538
539    // Check for working/loading indicators first
540    for line in &recent_nonempty {
541        // Kiro-cli uses ● spinner during initialization and ⠉/⠋ braille
542        // spinners during processing
543        if let Some(confidence) = best_phrase_confidence(
544            line,
545            &[
546                "initializing",
547                "esc to interrupt",
548                "thinking",
549                "planning",
550                "applying",
551            ],
552        ) {
553            return Classification {
554                verdict: ScreenVerdict::AgentWorking,
555                confidence,
556            };
557        }
558    }
559
560    // Kiro-cli prompt: "ask a question, or describe a task"
561    // This is the placeholder text shown when kiro-cli is idle.
562    let lower_content = content.to_lowercase();
563    if lower_content.contains("ask a question") || lower_content.contains("describe a task") {
564        return Classification::exact(ScreenVerdict::AgentIdle);
565    }
566
567    // Kiro prompts: Kiro>, kiro>, Kiro >, kiro >, or bare >
568    // Only match when the prompt has no typed content after it.
569    for line in &recent_nonempty {
570        let trimmed = line.trim();
571        let lower = trimmed.to_lowercase();
572        if trimmed == ">" || trimmed == "> " {
573            return Classification::exact(ScreenVerdict::AgentIdle);
574        }
575        if lower.starts_with("kiro>") {
576            let after = &trimmed["kiro>".len()..];
577            if after.trim().is_empty() {
578                return Classification::exact(ScreenVerdict::AgentIdle);
579            }
580        } else if lower.starts_with("kiro >") {
581            let after = &trimmed["kiro >".len()..];
582            if after.trim().is_empty() {
583                return Classification::exact(ScreenVerdict::AgentIdle);
584            }
585        }
586        if trimmed.ends_with("> ") || trimmed.ends_with('>') {
587            let before_gt = trimmed.trim_end_matches(['>', ' ']);
588            if before_gt.len() < trimmed.len() {
589                return Classification::ambiguous(ScreenVerdict::AgentIdle);
590            }
591        }
592    }
593
594    Classification::unknown()
595}
596
597// ---------------------------------------------------------------------------
598// Generic classifier (bash / shell / REPL)
599// ---------------------------------------------------------------------------
600
601fn classify_generic(content: &str) -> Classification {
602    let lines: Vec<&str> = content.lines().collect();
603    let recent_nonempty: Vec<&str> = lines
604        .iter()
605        .rev()
606        .filter(|l| !l.trim().is_empty())
607        .take(6)
608        .copied()
609        .collect();
610
611    for line in &recent_nonempty {
612        let trimmed = line.trim();
613        // Shell prompts: ends with "$ " or "$", or "% " or "%", or "> " or ">"
614        if trimmed.ends_with("$ ")
615            || trimmed.ends_with('$')
616            || trimmed.ends_with("% ")
617            || trimmed.ends_with('%')
618            || (trimmed.ends_with("> ") && trimmed.len() > 1)
619            || (trimmed.ends_with('>') && trimmed.len() > 1)
620        {
621            return Classification::exact(ScreenVerdict::AgentIdle);
622        }
623        if trimmed == ">" || trimmed == "> " {
624            return Classification::ambiguous(ScreenVerdict::AgentIdle);
625        }
626    }
627
628    Classification::unknown()
629}
630
631fn best_phrase_confidence(line: &str, phrases: &[&str]) -> Option<f32> {
632    phrases
633        .iter()
634        .filter_map(|phrase| phrase_match_confidence(line, phrase))
635        .max_by(f32::total_cmp)
636}
637
638fn phrase_match_confidence(line: &str, phrase: &str) -> Option<f32> {
639    let normalized_line = normalize_match_text(line);
640    let normalized_phrase = normalize_match_text(phrase);
641    if normalized_line.is_empty() || normalized_phrase.is_empty() {
642        return None;
643    }
644
645    if normalized_line.contains(&normalized_phrase) {
646        return Some(1.0);
647    }
648
649    let line_tokens = normalized_line.split_whitespace().collect::<Vec<_>>();
650    let phrase_tokens = normalized_phrase.split_whitespace().collect::<Vec<_>>();
651    if line_tokens.is_empty() || phrase_tokens.is_empty() {
652        return None;
653    }
654
655    let max_start = line_tokens.len().saturating_sub(1);
656    for start in 0..=max_start {
657        let score = token_prefix_score(&line_tokens[start..], &phrase_tokens);
658        if score > 0.0 {
659            return Some(score);
660        }
661    }
662
663    None
664}
665
666fn claude_working_confidence(line: &str) -> Option<f32> {
667    let lower = normalize_match_text(line);
668    if lower.contains("esc to interrupt")
669        || lower.contains("ctrl+b to run in background")
670        || lower.contains("waiting")
671        || lower.contains("running")
672    {
673        return Some(1.0);
674    }
675
676    if lower.contains("esc to inter")
677        || lower.contains("esc to in...")
678        || lower.contains("esc t...")
679        || lower.contains("ctrl+b to run")
680        || lower.contains("ctrl+b to r")
681        || (lower.contains("esc t")
682            && (lower.contains("bypass")
683                || lower.contains("shift+tab")
684                || lower.contains("ctrl+g")))
685    {
686        return Some(0.84);
687    }
688
689    None
690}
691
692fn token_prefix_score(line_tokens: &[&str], phrase_tokens: &[&str]) -> f32 {
693    let mut matched = 0usize;
694    let mut consumed_chars = 0usize;
695    let mut used_prefix = false;
696
697    for (line_token, phrase_token) in line_tokens.iter().zip(phrase_tokens.iter()) {
698        if *line_token == *phrase_token {
699            matched += 1;
700            consumed_chars += phrase_token.len();
701            continue;
702        }
703        if phrase_token.starts_with(*line_token) && !line_token.is_empty() {
704            matched += 1;
705            consumed_chars += line_token.len();
706            used_prefix = true;
707            break;
708        }
709        return 0.0;
710    }
711
712    if matched == 0 {
713        return 0.0;
714    }
715
716    let phrase_chars = phrase_tokens.iter().map(|token| token.len()).sum::<usize>();
717    let coverage = consumed_chars as f32 / phrase_chars as f32;
718
719    if matched == phrase_tokens.len() && !used_prefix {
720        return 1.0;
721    }
722
723    if matched >= 2 && coverage >= 0.45 {
724        return 0.84;
725    }
726
727    if matched == 1 && coverage >= 0.25 {
728        return 0.45;
729    }
730
731    0.0
732}
733
734fn normalize_match_text(value: &str) -> String {
735    value
736        .to_ascii_lowercase()
737        .replace('\u{2026}', "...")
738        .replace("…", "...")
739        .split_whitespace()
740        .collect::<Vec<_>>()
741        .join(" ")
742}
743
744// ---------------------------------------------------------------------------
745// Tests
746// ---------------------------------------------------------------------------
747
748#[cfg(test)]
749mod tests {
750    use super::*;
751
752    fn make_screen(content: &str) -> vt100::Parser {
753        let mut parser = vt100::Parser::new(24, 80, 0);
754        parser.process(content.as_bytes());
755        parser
756    }
757
758    fn classify_result(agent_type: AgentType, content: &str) -> Classification {
759        let parser = make_screen(content);
760        classify_with_confidence(agent_type, parser.screen())
761    }
762
763    // -- Claude --
764
765    #[test]
766    fn claude_idle_prompt() {
767        // Status bar without "esc to interrupt" = idle
768        let parser =
769            make_screen("Some output\n\u{276F}\n  bypass permissions on (shift+tab to cycle)");
770        assert_eq!(
771            classify(AgentType::Claude, parser.screen()),
772            ScreenVerdict::AgentIdle
773        );
774    }
775
776    #[test]
777    fn claude_idle_bare_prompt() {
778        // Status bar with "ctrl+g to edit" but no interrupt = idle
779        let parser = make_screen("Some output\n\u{276F}\n  ctrl+g to edit in Vim");
780        assert_eq!(
781            classify(AgentType::Claude, parser.screen()),
782            ScreenVerdict::AgentIdle
783        );
784    }
785
786    #[test]
787    fn claude_working_spinner() {
788        // Status bar with "esc to interrupt" = working
789        let parser =
790            make_screen("\u{00B7} Thinking\u{2026}\n  bypass permissions on · esc to interrupt");
791        assert_eq!(
792            classify(AgentType::Claude, parser.screen()),
793            ScreenVerdict::AgentWorking
794        );
795    }
796
797    #[test]
798    fn claude_working_interrupt_footer() {
799        let parser = make_screen("Some output\nesc to interrupt\n");
800        assert_eq!(
801            classify(AgentType::Claude, parser.screen()),
802            ScreenVerdict::AgentWorking
803        );
804    }
805
806    #[test]
807    fn claude_working_interrupt_truncated() {
808        let parser = make_screen("Some output\nesc to inter\n");
809        assert_eq!(
810            classify(AgentType::Claude, parser.screen()),
811            ScreenVerdict::AgentWorking
812        );
813    }
814
815    #[test]
816    fn claude_working_interrupt_narrow_pane_ellipsis() {
817        // Narrow pane truncates to "esc t…" with ellipsis
818        let parser =
819            make_screen("output\n  bypass permissions on (shift+tab) \u{00B7} esc t\u{2026}");
820        assert_eq!(
821            classify(AgentType::Claude, parser.screen()),
822            ScreenVerdict::AgentWorking
823        );
824    }
825
826    #[test]
827    fn claude_working_interrupt_narrow_pane_cutoff() {
828        // Narrow pane cuts mid-word with bypass context
829        let parser = make_screen("output\n  bypass permissions on · esc t");
830        assert_eq!(
831            classify(AgentType::Claude, parser.screen()),
832            ScreenVerdict::AgentWorking
833        );
834    }
835
836    #[test]
837    fn claude_exact_match_has_full_confidence() {
838        let result = classify_result(
839            AgentType::Claude,
840            "Some output\n\u{276F}\n  bypass permissions on (shift+tab to cycle)",
841        );
842        assert_eq!(result.verdict, ScreenVerdict::AgentIdle);
843        assert_eq!(result.confidence, 1.0);
844    }
845
846    #[test]
847    fn claude_fuzzy_match_detects_truncated_footer() {
848        let result = classify_result(
849            AgentType::Claude,
850            "output\n  bypass permissions on · esc to inter",
851        );
852        assert_eq!(result.verdict, ScreenVerdict::AgentWorking);
853        assert!(result.confidence >= MIN_CLASSIFIER_CONFIDENCE);
854        assert!(result.confidence < 1.0);
855    }
856
857    #[test]
858    fn claude_truncated_idle_status_bar_matches_fuzzily() {
859        let result = classify_result(AgentType::Claude, "output\n  bypass permiss");
860        assert_eq!(result.verdict, ScreenVerdict::AgentIdle);
861        assert!(result.confidence >= MIN_CLASSIFIER_CONFIDENCE);
862        assert!(result.confidence < 1.0);
863    }
864
865    #[test]
866    fn claude_ambiguous_status_returns_low_confidence() {
867        let result = classify_result(AgentType::Claude, "output\n  shift");
868        assert_eq!(result.verdict, ScreenVerdict::AgentIdle);
869        assert!(result.confidence < MIN_CLASSIFIER_CONFIDENCE);
870
871        let parser = make_screen("output\n  shift");
872        assert_eq!(
873            classify(AgentType::Claude, parser.screen()),
874            ScreenVerdict::Unknown
875        );
876    }
877
878    #[test]
879    fn claude_context_exhausted() {
880        let parser = make_screen("Error: context window is full\n\u{276F} ");
881        assert_eq!(
882            classify(AgentType::Claude, parser.screen()),
883            ScreenVerdict::ContextExhausted
884        );
885    }
886
887    // -- Codex --
888
889    #[test]
890    fn codex_idle_prompt() {
891        let parser = make_screen("Done.\n\n\u{203A} ");
892        assert_eq!(
893            classify(AgentType::Codex, parser.screen()),
894            ScreenVerdict::AgentIdle
895        );
896    }
897
898    #[test]
899    fn codex_idle_bare_prompt() {
900        let parser = make_screen("Done.\n\n\u{203A}");
901        assert_eq!(
902            classify(AgentType::Codex, parser.screen()),
903            ScreenVerdict::AgentIdle
904        );
905    }
906
907    #[test]
908    fn codex_unknown_no_prompt() {
909        let parser = make_screen("Running something...\n");
910        assert_eq!(
911            classify(AgentType::Codex, parser.screen()),
912            ScreenVerdict::Unknown
913        );
914    }
915
916    #[test]
917    fn codex_idle_with_placeholder() {
918        // Codex shows placeholder text after › — still idle
919        let parser = make_screen("Output\n\u{203A} Explain this codebase\n");
920        assert_eq!(
921            classify(AgentType::Codex, parser.screen()),
922            ScreenVerdict::AgentIdle,
923            "placeholder text after › should be Idle"
924        );
925    }
926
927    #[test]
928    fn codex_truncated_interrupt_footer_matches_fuzzily() {
929        let result = classify_result(AgentType::Codex, "loading\nesc to inter");
930        assert_eq!(result.verdict, ScreenVerdict::AgentWorking);
931        assert!(result.confidence >= MIN_CLASSIFIER_CONFIDENCE);
932        assert!(result.confidence < 1.0);
933    }
934
935    #[test]
936    fn detect_meta_conversation_flags_repeated_planning_without_tools() {
937        let content = "I should inspect the daemon first.\nNext step: I will review the health loop.\nShould I patch narration or the classifier?";
938        assert!(detect_meta_conversation(content, AgentType::Codex));
939    }
940
941    #[test]
942    fn detect_meta_conversation_ignores_tool_execution_output() {
943        let content = "I will inspect the daemon.\n$ rg -n narration src/team\nExit code: 0";
944        assert!(!detect_meta_conversation(content, AgentType::Codex));
945    }
946
947    #[test]
948    fn classify_narration_line_marks_explanations() {
949        assert_eq!(
950            classify_narration_line(
951                "I will inspect the runtime before changing anything.",
952                AgentType::Codex
953            ),
954            NarrationLineKind::Explanation
955        );
956    }
957
958    #[test]
959    fn classify_narration_line_marks_tool_output() {
960        assert_eq!(
961            classify_narration_line("$ cargo test -p batty", AgentType::Codex),
962            NarrationLineKind::ToolOrCommand
963        );
964    }
965
966    #[test]
967    fn classify_narration_line_ignores_plain_output() {
968        assert_eq!(
969            classify_narration_line("src/team/daemon/health/narration.rs", AgentType::Codex),
970            NarrationLineKind::Other
971        );
972    }
973
974    #[test]
975    fn detect_narration_pattern_matches_planning_without_tools() {
976        let content = "I will inspect the daemon.\nLet me review the health loop.\nMy plan is to patch narration handling.";
977        assert!(detect_narration_pattern(content, AgentType::Codex));
978    }
979
980    #[test]
981    fn detect_narration_pattern_rejects_tool_execution() {
982        let content = "I will inspect the daemon.\n$ rg -n narration src/team\nExit code: 0";
983        assert!(!detect_narration_pattern(content, AgentType::Codex));
984    }
985
986    // -- Kiro --
987
988    #[test]
989    fn kiro_idle_prompt() {
990        let parser = make_screen("Result\nKiro> ");
991        assert_eq!(
992            classify(AgentType::Kiro, parser.screen()),
993            ScreenVerdict::AgentIdle
994        );
995    }
996
997    #[test]
998    fn kiro_idle_bare_gt() {
999        let parser = make_screen("Result\n>");
1000        assert_eq!(
1001            classify(AgentType::Kiro, parser.screen()),
1002            ScreenVerdict::AgentIdle
1003        );
1004    }
1005
1006    #[test]
1007    fn kiro_working() {
1008        let parser = make_screen("Kiro is thinking...\n");
1009        assert_eq!(
1010            classify(AgentType::Kiro, parser.screen()),
1011            ScreenVerdict::AgentWorking
1012        );
1013    }
1014
1015    #[test]
1016    fn kiro_working_agent_planning() {
1017        let parser = make_screen("Agent is planning...\n");
1018        assert_eq!(
1019            classify(AgentType::Kiro, parser.screen()),
1020            ScreenVerdict::AgentWorking
1021        );
1022    }
1023
1024    // -- Generic --
1025
1026    #[test]
1027    fn generic_shell_prompt_dollar() {
1028        let parser = make_screen("user@host:~$ ");
1029        assert_eq!(
1030            classify(AgentType::Generic, parser.screen()),
1031            ScreenVerdict::AgentIdle
1032        );
1033    }
1034
1035    #[test]
1036    fn generic_shell_prompt_percent() {
1037        let parser = make_screen("user@host:~% ");
1038        assert_eq!(
1039            classify(AgentType::Generic, parser.screen()),
1040            ScreenVerdict::AgentIdle
1041        );
1042    }
1043
1044    #[test]
1045    fn generic_shell_prompt_gt() {
1046        let parser = make_screen("prompt> ");
1047        assert_eq!(
1048            classify(AgentType::Generic, parser.screen()),
1049            ScreenVerdict::AgentIdle
1050        );
1051    }
1052
1053    #[test]
1054    fn generic_bare_gt_prompt_is_ambiguous_at_low_confidence() {
1055        let result = classify_result(AgentType::Generic, ">");
1056        assert_eq!(result.verdict, ScreenVerdict::AgentIdle);
1057        assert!(result.confidence < MIN_CLASSIFIER_CONFIDENCE);
1058    }
1059
1060    #[test]
1061    fn generic_empty_unknown() {
1062        let parser = make_screen("");
1063        assert_eq!(
1064            classify(AgentType::Generic, parser.screen()),
1065            ScreenVerdict::Unknown
1066        );
1067    }
1068
1069    #[test]
1070    fn unknown_pattern_returns_unknown_with_zero_confidence() {
1071        let result = classify_result(AgentType::Codex, "plain output with no known prompt");
1072        assert_eq!(result.verdict, ScreenVerdict::Unknown);
1073        assert_eq!(result.confidence, 0.0);
1074    }
1075
1076    // -- Shared --
1077
1078    #[test]
1079    fn exhaustion_all_types() {
1080        for agent_type in [
1081            AgentType::Claude,
1082            AgentType::Codex,
1083            AgentType::Kiro,
1084            AgentType::Generic,
1085        ] {
1086            let parser = make_screen("Error: conversation is too long to continue\n$ ");
1087            assert_eq!(
1088                classify(agent_type, parser.screen()),
1089                ScreenVerdict::ContextExhausted,
1090                "failed for {agent_type}",
1091            );
1092        }
1093    }
1094
1095    #[test]
1096    fn exhaustion_maximum_context_length() {
1097        let parser = make_screen("Error: maximum context length exceeded\n$ ");
1098        assert_eq!(
1099            classify(AgentType::Generic, parser.screen()),
1100            ScreenVerdict::ContextExhausted
1101        );
1102    }
1103
1104    #[test]
1105    fn agent_type_from_str() {
1106        assert_eq!("claude".parse::<AgentType>().unwrap(), AgentType::Claude);
1107        assert_eq!("CODEX".parse::<AgentType>().unwrap(), AgentType::Codex);
1108        assert_eq!("Kiro".parse::<AgentType>().unwrap(), AgentType::Kiro);
1109        assert_eq!("generic".parse::<AgentType>().unwrap(), AgentType::Generic);
1110        assert_eq!("bash".parse::<AgentType>().unwrap(), AgentType::Generic);
1111        assert_eq!("shell".parse::<AgentType>().unwrap(), AgentType::Generic);
1112        assert!("unknown".parse::<AgentType>().is_err());
1113    }
1114
1115    #[test]
1116    fn agent_type_display() {
1117        assert_eq!(AgentType::Claude.to_string(), "claude");
1118        assert_eq!(AgentType::Codex.to_string(), "codex");
1119        assert_eq!(AgentType::Kiro.to_string(), "kiro");
1120        assert_eq!(AgentType::Generic.to_string(), "generic");
1121    }
1122
1123    #[test]
1124    fn claude_dialog_not_idle() {
1125        // Trust dialog with ❯ as selection indicator — NOT an idle prompt
1126        let parser = make_screen(
1127            "Quick safety check: Is this a project you created or one you trust?\n\n\
1128             \u{276F} 1. Yes, I trust this folder\n\
1129             2. No, exit\n\n\
1130             Enter to confirm \u{00B7} Esc to cancel\n",
1131        );
1132        assert_ne!(
1133            classify(AgentType::Claude, parser.screen()),
1134            ScreenVerdict::AgentIdle,
1135            "trust dialog should NOT be classified as Idle"
1136        );
1137    }
1138
1139    #[test]
1140    fn claude_dialog_detected() {
1141        let content = "Quick safety check: Is this a project you created or one you trust?\n\
1142                       \u{276F} 1. Yes, I trust this folder\n\
1143                       Enter to confirm";
1144        assert!(
1145            detect_claude_dialog(content),
1146            "should detect Claude trust dialog"
1147        );
1148    }
1149
1150    #[test]
1151    fn claude_dialog_not_detected_normal() {
1152        let content = "Some response\n\u{276F} ";
1153        assert!(
1154            !detect_claude_dialog(content),
1155            "normal prompt should not trigger dialog detection"
1156        );
1157    }
1158
1159    #[test]
1160    fn codex_dialog_detected() {
1161        let content = "Do you trust the contents of this directory?\n\
1162                       \u{203A} 1. Yes, continue\n\
1163                       Press enter to continue";
1164        assert!(
1165            detect_startup_dialog(content),
1166            "should detect Codex trust dialog"
1167        );
1168    }
1169
1170    #[test]
1171    fn claude_idle_with_trailing_spaces() {
1172        // Status bar present, no interrupt = idle
1173        let parser =
1174            make_screen("Output\n\u{276F}    \n  bypass permissions on (shift+tab to cycle)    ");
1175        assert_eq!(
1176            classify(AgentType::Claude, parser.screen()),
1177            ScreenVerdict::AgentIdle
1178        );
1179    }
1180
1181    #[test]
1182    fn all_exhaustion_patterns_trigger() {
1183        for pattern in EXHAUSTION_PATTERNS {
1184            let parser = make_screen(&format!("Error: {pattern}\n$ "));
1185            assert_eq!(
1186                classify(AgentType::Generic, parser.screen()),
1187                ScreenVerdict::ContextExhausted,
1188                "pattern '{pattern}' did not trigger exhaustion",
1189            );
1190        }
1191    }
1192}
batty_cli/shim/classifier.rs

batty_cli/shim/
classifier.rs