Skip to main content

double_o/
classify.rs

1//! Command output classification and intelligent truncation.
2//!
3//! This module is the core of `oo`'s context-efficient output handling. It analyzes
4//! command results and produces one of four [`Classification`] outcomes:
5//!
6//! - **Failure**: Non-zero exit codes → filtered error output
7//! - **Passthrough**: Small successful outputs (<4KB) → verbatim
8//! - **Success**: Large successful outputs with pattern match → compressed summary
9//! - **Large**: Large successful outputs without pattern → indexed for recall
10//!
11//! The [`classify`] function combines pattern matching with automatic command category
12//! detection to make intelligent decisions about how to present output.
13
14use crate::exec::CommandOutput;
15use crate::pattern::{self, Pattern};
16
17/// 4 KB — below this, output passes through verbatim.
18pub const SMALL_THRESHOLD: usize = 4096;
19
20/// Maximum lines to show in failure output before smart truncation kicks in.
21const TRUNCATION_THRESHOLD: usize = 80;
22
23/// Hard cap on total lines shown after truncation.
24const MAX_LINES: usize = 120;
25
26/// Command category — determines default output handling when no pattern matches.
27///
28/// Categories are auto-detected from command strings using [`detect_category`].
29/// When a large output has no matching pattern, the category determines the fallback
30/// behavior:
31///
32/// - **Status**: Test runners, builds, linters → quiet success (empty summary)
33/// - **Content**: File viewers and diffs → always passthrough (never index)
34/// - **Data**: Listing and querying commands → index for recall
35/// - **Unknown**: Anything else → passthrough (safe default)
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum CommandCategory {
38    /// test runners, linters, builds — agent wants pass/fail (quiet success)
39    Status,
40    /// git show, git diff, cat — agent wants the actual output (passthrough)
41    Content,
42    /// git log, gh api, ls — structured/queryable data (index for recall)
43    Data,
44    /// anything else — defaults to passthrough (safe)
45    Unknown,
46}
47
48/// Command output classification result.
49///
50/// Represents the outcome of analyzing a command's exit code and output.
51/// Each variant determines how the output should be presented to the AI agent.
52///
53/// # Variants
54///
55/// - **Failure**: Command exited non-zero. Contains filtered error output.
56/// - **Passthrough**: Command succeeded with small output. Contains verbatim output.
57/// - **Success**: Command succeeded with large output and pattern match. Contains compressed summary.
58/// - **Large**: Command succeeded with large output and no pattern. Output is indexed for recall.
59///
60/// The classification is produced by the [`classify`] function.
61pub enum Classification {
62    /// Exit ≠ 0. Filtered failure output.
63    ///
64    /// # Fields
65    ///
66    /// * `label` - Short label derived from the command (e.g., "cargo", "pytest").
67    /// * `output` - Filtered error output, truncated if large.
68    Failure {
69        /// Short label derived from the command (e.g., "cargo", "pytest").
70        label: String,
71        /// Filtered error output, truncated if large.
72        output: String,
73    },
74
75    /// Exit 0, output ≤ threshold. Verbatim.
76    ///
77    /// # Fields
78    ///
79    /// * `output` - The full command output (merged stdout and stderr).
80    Passthrough {
81        /// The full command output (merged stdout and stderr).
82        output: String,
83    },
84
85    /// Exit 0, output > threshold, pattern matched with summary.
86    ///
87    /// # Fields
88    ///
89    /// * `label` - Short label derived from the command (e.g., "cargo", "pytest").
90    /// * `summary` - Compressed summary extracted using the pattern's template.
91    Success {
92        /// Short label derived from the command (e.g., "cargo", "pytest").
93        label: String,
94        /// Compressed summary extracted using the pattern's template.
95        summary: String,
96    },
97
98    /// Exit 0, output > threshold, no pattern. Content needs indexing.
99    ///
100    /// # Fields
101    ///
102    /// * `label` - Short label derived from the command (e.g., "git", "gh").
103    /// * `output` - The full command output to be indexed for recall.
104    /// * `size` - Size of the output in bytes.
105    Large {
106        /// Short label derived from the command (e.g., "git", "gh").
107        label: String,
108        /// The full command output to be indexed for recall.
109        output: String,
110        /// Size of the output in bytes.
111        size: usize,
112    },
113}
114
115/// Derive a short label from a command string.
116///
117/// Extracts the first word of the command (typically the binary name),
118/// stripping any path prefix. For example:
119/// - "cargo test" → "cargo"
120/// - "/usr/bin/python script.py" → "python"
121/// - "gh issue list" → "gh"
122///
123/// # Arguments
124///
125/// * `command` - The command string
126///
127/// # Returns
128///
129/// A short label derived from the command.
130pub fn label(command: &str) -> String {
131    command
132        .split_whitespace()
133        .next()
134        .unwrap_or("command")
135        .rsplit('/')
136        .next()
137        .unwrap_or("command")
138        .to_string()
139}
140
141/// Detect command category from command string.
142///
143/// Analyzes the command string to determine its category, which is used as
144/// a fallback when no pattern matches for large outputs.
145///
146/// # Categories
147///
148/// - **Status**: Test runners, builds, linters → quiet success
149/// - **Content**: File viewers and diffs → always passthrough
150/// - **Data**: Listing and querying commands → index for recall
151/// - **Unknown**: Anything else → passthrough (safe default)
152///
153/// # Arguments
154///
155/// * `command` - The command string to analyze
156///
157/// # Returns
158///
159/// A [`CommandCategory`] indicating the command's type.
160pub fn detect_category(command: &str) -> CommandCategory {
161    let parts: Vec<&str> = command.split_whitespace().collect();
162    if parts.is_empty() {
163        return CommandCategory::Unknown;
164    }
165
166    // Extract binary name (strip path prefix)
167    let binary = parts[0].rsplit('/').next().unwrap_or(parts[0]);
168    let subcommand = parts.get(1).copied().unwrap_or("");
169
170    match binary {
171        // Status: test runners, build systems, linters
172        "cargo" => match subcommand {
173            "test" | "clippy" | "build" | "fmt" | "check" => CommandCategory::Status,
174            _ => CommandCategory::Unknown,
175        },
176        "pytest" | "jest" | "vitest" | "go" | "npm" | "yarn" | "pnpm" | "bun" | "eslint"
177        | "ruff" | "mypy" | "tsc" | "make" | "rubocop" => CommandCategory::Status,
178
179        // Content: file viewers and diffs
180        "git" => match subcommand {
181            "show" | "diff" => CommandCategory::Content,
182            "log" | "status" | "branch" | "tag" => CommandCategory::Data,
183            _ => CommandCategory::Unknown,
184        },
185        "cat" | "bat" | "less" => CommandCategory::Content,
186
187        // Data: listing and querying
188        "gh" => CommandCategory::Data,
189        "ls" | "find" | "grep" | "rg" => CommandCategory::Data,
190
191        _ => CommandCategory::Unknown,
192    }
193}
194
195/// Classify command output using patterns and automatic category detection.
196///
197/// This is the main entry point for output classification. It analyzes the command's
198/// exit code, output size, and applies pattern matching to determine the appropriate
199/// presentation strategy.
200///
201/// # Algorithm
202///
203/// 1. **Failure path** (exit_code ≠ 0): Apply failure pattern or smart truncation
204/// 2. **Small success** (output ≤ 4KB): Pass through verbatim
205/// 3. **Pattern match**: Extract summary using success pattern
206/// 4. **Category fallback**: Use command category to determine behavior
207///
208/// # Arguments
209///
210/// * `output` - The command's exit code, stdout, and stderr
211/// * `command` - The command string (used for pattern matching and category detection)
212/// * `patterns` - List of patterns to try (typically [`pattern::builtins`] + user patterns)
213///
214/// # Returns
215///
216/// A [`Classification`] indicating how to present the output.
217///
218/// # Examples
219///
220/// ```
221/// use double_o::{classify, CommandOutput};
222/// use double_o::pattern::builtins;
223///
224/// let output = CommandOutput {
225///     stdout: b"test result: ok. 5 passed; 0 failed; finished in 0.3s".to_vec(),
226///     stderr: Vec::new(),
227///     exit_code: 0,
228/// };
229/// let patterns = builtins();
230/// let result = classify(&output, "cargo test", patterns);
231/// ```
232pub fn classify(output: &CommandOutput, command: &str, patterns: &[Pattern]) -> Classification {
233    let merged = output.merged_lossy();
234    let lbl = label(command);
235
236    // Failure path
237    if output.exit_code != 0 {
238        let filtered = match pattern::find_matching(command, patterns) {
239            Some(pat) => {
240                if let Some(failure) = &pat.failure {
241                    pattern::extract_failure(failure, &merged)
242                } else {
243                    smart_truncate(&merged)
244                }
245            }
246            None => smart_truncate(&merged),
247        };
248        return Classification::Failure {
249            label: lbl,
250            output: filtered,
251        };
252    }
253
254    // Success, small output → passthrough
255    if merged.len() <= SMALL_THRESHOLD {
256        return Classification::Passthrough { output: merged };
257    }
258
259    // Success, large output — try pattern
260    if let Some(pat) = pattern::find_matching(command, patterns) {
261        if let Some(sp) = &pat.success {
262            if let Some(summary) = pattern::extract_summary(sp, &merged) {
263                return Classification::Success {
264                    label: lbl,
265                    summary,
266                };
267            }
268        }
269    }
270
271    // Large, no pattern match — use category to determine behavior
272    let category = detect_category(command);
273    match category {
274        CommandCategory::Status => {
275            // Status commands: quiet success (empty summary)
276            Classification::Success {
277                label: lbl,
278                summary: String::new(),
279            }
280        }
281        CommandCategory::Content | CommandCategory::Unknown => {
282            // Content and Unknown: always passthrough (never index)
283            Classification::Passthrough { output: merged }
284        }
285        CommandCategory::Data => {
286            // Data: index for recall
287            let size = merged.len();
288            Classification::Large {
289                label: lbl,
290                output: merged,
291                size,
292            }
293        }
294    }
295}
296
297/// Smart truncation: first 60% + marker + last 40%, capped at MAX_LINES.
298pub fn smart_truncate(output: &str) -> String {
299    let lines: Vec<&str> = output.lines().collect();
300    let total = lines.len();
301
302    if total <= TRUNCATION_THRESHOLD {
303        return output.to_string();
304    }
305
306    let budget = total.min(MAX_LINES);
307    let head_count = (budget as f64 * 0.6).ceil() as usize;
308    let tail_count = budget - head_count;
309    let truncated = total - head_count - tail_count;
310
311    let mut result = lines[..head_count].join("\n");
312    if truncated > 0 {
313        result.push_str(&format!("\n... [{truncated} lines truncated] ...\n"));
314    }
315    if tail_count > 0 {
316        result.push_str(&lines[total - tail_count..].join("\n"));
317    }
318    result
319}
320
321// ---------------------------------------------------------------------------
322// Tests
323// ---------------------------------------------------------------------------
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use crate::exec::CommandOutput;
329
330    fn make_output(exit_code: i32, stdout: &str) -> CommandOutput {
331        CommandOutput {
332            stdout: stdout.as_bytes().to_vec(),
333            stderr: Vec::new(),
334            exit_code,
335        }
336    }
337
338    #[test]
339    fn test_passthrough_small_output() {
340        let out = make_output(0, "hello world\n");
341        let result = classify(&out, "echo hello", &[]);
342        assert!(
343            matches!(result, Classification::Passthrough { output } if output == "hello world\n")
344        );
345    }
346
347    #[test]
348    fn test_failure_output() {
349        let out = make_output(1, "error: something broke\n");
350        let result = classify(&out, "some_cmd", &[]);
351        match result {
352            Classification::Failure { label, output } => {
353                assert_eq!(label, "some_cmd");
354                assert!(output.contains("something broke"));
355            }
356            _ => panic!("expected Failure"),
357        }
358    }
359
360    #[test]
361    fn test_large_output_no_pattern() {
362        let big = "x\n".repeat(3000); // > 4KB
363        let out = make_output(0, &big);
364        let result = classify(&out, "unknown_cmd", &[]);
365        match result {
366            Classification::Passthrough { .. } => {
367                // Unknown category → passthrough
368            }
369            _ => panic!("expected Passthrough for unknown command"),
370        }
371    }
372
373    #[test]
374    fn test_large_output_with_pattern() {
375        let patterns = pattern::builtins();
376        let big = format!("{}\n47 passed in 3.2s\n", ".\n".repeat(3000));
377        let out = make_output(0, &big);
378        let result = classify(&out, "pytest tests/", patterns);
379        match result {
380            Classification::Success { label, summary } => {
381                assert_eq!(label, "pytest");
382                assert_eq!(summary, "47 passed, 3.2s");
383            }
384            _ => panic!("expected Success"),
385        }
386    }
387
388    #[test]
389    fn test_smart_truncation_short() {
390        let lines: String = (0..50).map(|i| format!("line {i}\n")).collect();
391        let result = smart_truncate(&lines);
392        assert_eq!(result, lines);
393        assert!(!result.contains("truncated"));
394    }
395
396    #[test]
397    fn test_smart_truncation_long() {
398        let lines: String = (0..200)
399            .map(|i| format!("line {i}"))
400            .collect::<Vec<_>>()
401            .join("\n");
402        let result = smart_truncate(&lines);
403        assert!(result.contains("line 0"));
404        assert!(result.contains("line 199"));
405        assert!(result.contains("truncated"));
406        // Should not exceed MAX_LINES + marker
407        let result_lines: Vec<&str> = result.lines().collect();
408        assert!(result_lines.len() <= MAX_LINES + 1); // +1 for truncation marker
409    }
410
411    #[test]
412    fn test_label_derivation() {
413        assert_eq!(label("pytest -x"), "pytest");
414        assert_eq!(label("cargo test"), "cargo");
415        assert_eq!(label("gh issue list"), "gh");
416        assert_eq!(label("/usr/bin/python test.py"), "python");
417    }
418
419    #[test]
420    fn test_failure_with_pattern() {
421        let patterns = pattern::builtins();
422        let big_fail: String = (0..100).map(|i| format!("error line {i}\n")).collect();
423        let out = make_output(1, &big_fail);
424        let result = classify(&out, "pytest -x", &patterns);
425        match result {
426            Classification::Failure { label, output } => {
427                assert_eq!(label, "pytest");
428                // pytest failure uses tail 30
429                assert!(output.contains("error line 70"));
430                assert!(output.contains("error line 99"));
431            }
432            _ => panic!("expected Failure"),
433        }
434    }
435
436    #[test]
437    fn test_empty_output_passthrough() {
438        let out = make_output(0, "");
439        let result = classify(&out, "true", &[]);
440        assert!(matches!(result, Classification::Passthrough { output } if output.is_empty()));
441    }
442
443    #[test]
444    fn test_success_with_empty_summary_is_quiet() {
445        let patterns = pattern::builtins();
446        let big = "Compiling foo\n".repeat(500);
447        let out = make_output(0, &big);
448        let result = classify(&out, "cargo build --release", &patterns);
449        match result {
450            Classification::Success { summary, .. } => {
451                assert!(summary.is_empty()); // quiet success
452            }
453            _ => panic!("expected Success with empty summary"),
454        }
455    }
456
457    // New tests for CommandCategory detection and behavior
458
459    #[test]
460    fn test_detect_category_status_commands() {
461        assert_eq!(detect_category("cargo test"), CommandCategory::Status);
462        assert_eq!(detect_category("cargo build"), CommandCategory::Status);
463        assert_eq!(detect_category("cargo clippy"), CommandCategory::Status);
464        assert_eq!(detect_category("cargo fmt"), CommandCategory::Status);
465        assert_eq!(detect_category("pytest tests/"), CommandCategory::Status);
466        assert_eq!(detect_category("jest"), CommandCategory::Status);
467        assert_eq!(detect_category("eslint src/"), CommandCategory::Status);
468        assert_eq!(detect_category("ruff check"), CommandCategory::Status);
469    }
470
471    #[test]
472    fn test_detect_category_content_commands() {
473        assert_eq!(
474            detect_category("git show HEAD:file"),
475            CommandCategory::Content
476        );
477        assert_eq!(detect_category("git diff HEAD~1"), CommandCategory::Content);
478        assert_eq!(detect_category("cat file.txt"), CommandCategory::Content);
479        assert_eq!(detect_category("bat src/main.rs"), CommandCategory::Content);
480    }
481
482    #[test]
483    fn test_detect_category_data_commands() {
484        assert_eq!(detect_category("git log"), CommandCategory::Data);
485        assert_eq!(detect_category("git status"), CommandCategory::Data);
486        assert_eq!(detect_category("gh issue list"), CommandCategory::Data);
487        assert_eq!(detect_category("gh pr list"), CommandCategory::Data);
488        assert_eq!(detect_category("ls -la"), CommandCategory::Data);
489        assert_eq!(detect_category("find . -name test"), CommandCategory::Data);
490        assert_eq!(detect_category("grep pattern file"), CommandCategory::Data);
491    }
492
493    #[test]
494    fn test_detect_category_unknown_defaults() {
495        assert_eq!(
496            detect_category("curl https://example.com"),
497            CommandCategory::Unknown
498        );
499        assert_eq!(detect_category("wget file.zip"), CommandCategory::Unknown);
500        assert_eq!(
501            detect_category("docker run image"),
502            CommandCategory::Unknown
503        );
504        assert_eq!(
505            detect_category("random-binary arg"),
506            CommandCategory::Unknown
507        );
508    }
509
510    #[test]
511    fn test_status_no_pattern_quiet_success() {
512        let big = "x\n".repeat(3000); // > 4KB
513        let out = make_output(0, &big);
514        let result = classify(&out, "cargo test", &[]);
515        match result {
516            Classification::Success { label, summary } => {
517                assert_eq!(label, "cargo");
518                assert!(summary.is_empty()); // quiet success
519            }
520            _ => panic!("expected Success with empty summary for status command"),
521        }
522    }
523
524    #[test]
525    fn test_content_always_passthrough() {
526        let big = "x\n".repeat(3000); // > 4KB
527        let out = make_output(0, &big);
528        let result = classify(&out, "git show HEAD:file", &[]);
529        match result {
530            Classification::Passthrough { .. } => {
531                // Correct: content commands always pass through
532            }
533            _ => panic!("expected Passthrough for content command"),
534        }
535    }
536
537    #[test]
538    fn test_data_no_pattern_indexes() {
539        let big = "line\n".repeat(3000); // > 4KB
540        let out = make_output(0, &big);
541        let result = classify(&out, "git log", &[]);
542        match result {
543            Classification::Large { label, size, .. } => {
544                assert_eq!(label, "git");
545                assert!(size > SMALL_THRESHOLD);
546            }
547            _ => panic!("expected Large (indexed) for data command"),
548        }
549    }
550
551    #[test]
552    fn test_unknown_defaults_to_passthrough() {
553        let big = "x\n".repeat(3000); // > 4KB
554        let out = make_output(0, &big);
555        let result = classify(&out, "curl https://example.com", &[]);
556        match result {
557            Classification::Passthrough { .. } => {
558                // Correct: unknown commands pass through (safe default)
559            }
560            _ => panic!("expected Passthrough for unknown command"),
561        }
562    }
563
564    #[test]
565    fn test_pattern_overrides_category() {
566        let patterns = pattern::builtins();
567        let big = format!("{}\n47 passed in 3.2s\n", ".\n".repeat(3000));
568        let out = make_output(0, &big);
569        // Status command (pytest) verified with pattern that extracts summary
570        // Pattern matching overrides category classification
571        let result = classify(&out, "pytest", &patterns);
572        match result {
573            Classification::Success { summary, .. } => {
574                assert_eq!(summary, "47 passed, 3.2s");
575            }
576            _ => panic!("expected pattern-matched Success"),
577        }
578    }
579
580    #[test]
581    fn test_category_detection_with_full_paths() {
582        assert_eq!(
583            detect_category("/usr/bin/cargo test"),
584            CommandCategory::Status
585        );
586        assert_eq!(
587            detect_category("/usr/local/bin/pytest"),
588            CommandCategory::Status
589        );
590        assert_eq!(
591            detect_category("/usr/bin/git show"),
592            CommandCategory::Content
593        );
594        assert_eq!(
595            detect_category("/bin/cat file.txt"),
596            CommandCategory::Content
597        );
598        assert_eq!(
599            detect_category("/usr/bin/gh issue list"),
600            CommandCategory::Data
601        );
602        assert_eq!(detect_category("/bin/ls -la"), CommandCategory::Data);
603    }
604}