Skip to main content

destructive_command_guard/
simulate.rs

1//! Simulation input parsing for `dcg simulate`.
2//!
3//! This module provides streaming, line-by-line parsing of command logs
4//! for replay/simulation against dcg policy. It supports multiple input
5//! formats with conservative auto-detection.
6//!
7//! # Supported input formats
8//!
9//! 1. **Plain command** - The entire line is a shell command
10//! 2. **Hook JSON** - `{"tool_name":"Bash","tool_input":{"command":"..."}}`
11//! 3. **Structured decision log** - Schema-versioned log entries (future)
12//!
13//! # Design principles
14//!
15//! - **Streaming**: Process line-by-line, never load entire file into memory
16//! - **Conservative**: Ambiguous lines are treated as malformed, not guessed
17//! - **Deterministic**: Same line always produces same format classification
18//! - **Panic-free**: Parser never panics on arbitrary input
19
20use serde::{Deserialize, Serialize};
21use std::io::{BufRead, BufReader, Read};
22
23/// Schema version for simulate output (for future compatibility).
24pub const SIMULATE_SCHEMA_VERSION: u32 = 1;
25
26/// Input format detected for a line.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum SimulateInputFormat {
30    /// Plain command string (the entire line is the command)
31    PlainCommand,
32    /// Hook JSON: `{"tool_name":"Bash","tool_input":{"command":"..."}}`
33    HookJson,
34    /// Structured decision log entry (schema-versioned)
35    DecisionLog,
36}
37
38/// Result of parsing a single line.
39#[derive(Debug, Clone)]
40pub enum ParsedLine {
41    /// Successfully parsed command with its detected format
42    Command {
43        command: String,
44        format: SimulateInputFormat,
45    },
46    /// Line should be ignored (e.g., non-Bash tool in hook JSON)
47    Ignore { reason: &'static str },
48    /// Line could not be parsed
49    Malformed { error: String },
50    /// Empty or whitespace-only line
51    Empty,
52}
53
54/// Limits for the streaming parser.
55#[derive(Debug, Clone)]
56pub struct SimulateLimits {
57    /// Maximum number of lines to process (None = unlimited)
58    pub max_lines: Option<usize>,
59    /// Maximum total bytes to read (None = unlimited)
60    pub max_bytes: Option<usize>,
61    /// Maximum command length in bytes (longer commands are truncated/skipped)
62    pub max_command_bytes: Option<usize>,
63}
64
65impl Default for SimulateLimits {
66    fn default() -> Self {
67        Self {
68            max_lines: None,
69            max_bytes: None,
70            max_command_bytes: Some(64 * 1024), // 64KB default max command
71        }
72    }
73}
74
75/// Statistics from parsing.
76#[derive(Debug, Clone, Default, Serialize, Deserialize)]
77pub struct ParseStats {
78    /// Total lines read
79    pub lines_read: usize,
80    /// Total bytes read
81    pub bytes_read: usize,
82    /// Number of commands extracted
83    pub commands_extracted: usize,
84    /// Number of malformed lines
85    pub malformed_count: usize,
86    /// Number of ignored lines (e.g., non-Bash tools)
87    pub ignored_count: usize,
88    /// Number of empty lines
89    pub empty_count: usize,
90    /// Whether parsing stopped due to limits
91    pub stopped_at_limit: bool,
92    /// Which limit was hit (if any)
93    pub limit_hit: Option<LimitHit>,
94}
95
96/// Which limit caused parsing to stop.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99pub enum LimitHit {
100    MaxLines,
101    MaxBytes,
102}
103
104/// Streaming parser for simulate input.
105///
106/// Processes input line-by-line with configurable limits.
107pub struct SimulateParser<R: Read> {
108    reader: BufReader<R>,
109    limits: SimulateLimits,
110    stats: ParseStats,
111    strict: bool,
112}
113
114impl<R: Read> SimulateParser<R> {
115    /// Create a new parser with the given reader and limits.
116    pub fn new(reader: R, limits: SimulateLimits) -> Self {
117        Self {
118            reader: BufReader::new(reader),
119            limits,
120            stats: ParseStats::default(),
121            strict: false,
122        }
123    }
124
125    /// Enable strict mode (return error on first malformed line).
126    #[must_use]
127    pub const fn strict(mut self, strict: bool) -> Self {
128        self.strict = strict;
129        self
130    }
131
132    /// Get current parsing statistics.
133    pub const fn stats(&self) -> &ParseStats {
134        &self.stats
135    }
136
137    /// Consume the parser and return final statistics.
138    pub fn into_stats(self) -> ParseStats {
139        self.stats
140    }
141
142    /// Parse the next line from input.
143    ///
144    /// Returns `None` when input is exhausted or a limit is reached.
145    /// Returns `Some(Err(...))` in strict mode when a malformed line is encountered.
146    pub fn next_line(&mut self) -> Option<Result<ParsedLine, ParseError>> {
147        // Check limits before reading
148        if let Some(max_lines) = self.limits.max_lines {
149            if self.stats.lines_read >= max_lines {
150                self.stats.stopped_at_limit = true;
151                self.stats.limit_hit = Some(LimitHit::MaxLines);
152                return None;
153            }
154        }
155
156        if let Some(max_bytes) = self.limits.max_bytes {
157            if self.stats.bytes_read >= max_bytes {
158                self.stats.stopped_at_limit = true;
159                self.stats.limit_hit = Some(LimitHit::MaxBytes);
160                return None;
161            }
162        }
163
164        // Read next line
165        let mut line = String::new();
166        match self.reader.read_line(&mut line) {
167            Ok(0) => return None, // EOF
168            Ok(n) => {
169                self.stats.lines_read += 1;
170                self.stats.bytes_read += n;
171            }
172            Err(e) => {
173                return Some(Err(ParseError::Io(e.to_string())));
174            }
175        }
176
177        // Parse the line
178        let parsed = parse_line(&line, self.limits.max_command_bytes);
179
180        // Update stats
181        match &parsed {
182            ParsedLine::Command { .. } => self.stats.commands_extracted += 1,
183            ParsedLine::Malformed { error } => {
184                self.stats.malformed_count += 1;
185                if self.strict {
186                    return Some(Err(ParseError::Malformed {
187                        line: self.stats.lines_read,
188                        error: error.clone(),
189                    }));
190                }
191            }
192            ParsedLine::Ignore { .. } => self.stats.ignored_count += 1,
193            ParsedLine::Empty => self.stats.empty_count += 1,
194        }
195
196        Some(Ok(parsed))
197    }
198
199    /// Collect all parsed commands (for small inputs).
200    ///
201    /// Returns commands and final stats. In strict mode, stops on first error.
202    ///
203    /// # Errors
204    ///
205    /// Returns `ParseError::Io` on I/O failures, or `ParseError::Malformed` in strict
206    /// mode when encountering an unparseable line.
207    pub fn collect_commands(mut self) -> Result<(Vec<ParsedCommand>, ParseStats), ParseError> {
208        let mut commands = Vec::new();
209
210        while let Some(result) = self.next_line() {
211            match result? {
212                ParsedLine::Command { command, format } => {
213                    commands.push(ParsedCommand {
214                        command,
215                        format,
216                        line_number: self.stats.lines_read,
217                    });
218                }
219                ParsedLine::Ignore { .. } | ParsedLine::Malformed { .. } | ParsedLine::Empty => {
220                    // Continue (stats already updated)
221                }
222            }
223        }
224
225        Ok((commands, self.stats))
226    }
227}
228
229/// Iterator adapter for `SimulateParser`.
230impl<R: Read> Iterator for SimulateParser<R> {
231    type Item = Result<ParsedLine, ParseError>;
232
233    fn next(&mut self) -> Option<Self::Item> {
234        self.next_line()
235    }
236}
237
238/// A successfully parsed command with metadata.
239#[derive(Debug, Clone, Serialize)]
240pub struct ParsedCommand {
241    /// The extracted command string
242    pub command: String,
243    /// Detected input format
244    pub format: SimulateInputFormat,
245    /// Line number in the input (1-indexed)
246    pub line_number: usize,
247}
248
249/// Errors that can occur during parsing.
250#[derive(Debug, Clone)]
251pub enum ParseError {
252    /// I/O error reading input
253    Io(String),
254    /// Malformed line in strict mode
255    Malformed { line: usize, error: String },
256}
257
258impl std::fmt::Display for ParseError {
259    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260        match self {
261            Self::Io(e) => write!(f, "I/O error: {e}"),
262            Self::Malformed { line, error } => write!(f, "Line {line}: {error}"),
263        }
264    }
265}
266
267impl std::error::Error for ParseError {}
268
269// =============================================================================
270// Line parsing implementation
271// =============================================================================
272
273/// Parse a single line and detect its format.
274fn parse_line(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
275    let trimmed = line.trim();
276
277    // Empty line
278    if trimmed.is_empty() {
279        return ParsedLine::Empty;
280    }
281
282    // Try Decision Log format first (unambiguous prefix)
283    if trimmed.starts_with("DCG_LOG_V") {
284        return parse_decision_log(trimmed, max_command_bytes);
285    }
286
287    // Try Hook JSON format (starts with '{' and parses as valid hook JSON)
288    // Note: Shell brace blocks like `{ echo hello; }` also start with '{',
289    // so we must fall back to plain command if JSON parsing fails.
290    if trimmed.starts_with('{') {
291        if let Some(result) = try_parse_hook_json(trimmed, max_command_bytes) {
292            return result;
293        }
294        // Not valid hook JSON, treat as plain command (e.g., shell brace block)
295        return parse_plain_command(trimmed, max_command_bytes);
296    }
297
298    // Default: treat as plain command
299    parse_plain_command(trimmed, max_command_bytes)
300}
301
302/// Try to parse a line as hook JSON format.
303///
304/// Returns `Some(ParsedLine)` if the line is valid JSON that looks like hook input
305/// (including Malformed for missing/invalid fields), or `None` if the line is not
306/// valid JSON or does not resemble hook input (should fall back to plain command).
307fn try_parse_hook_json(line: &str, max_command_bytes: Option<usize>) -> Option<ParsedLine> {
308    // Minimal JSON structure we expect:
309    // {"tool_name":"Bash","tool_input":{"command":"..."}}
310
311    let value: serde_json::Value = serde_json::from_str(line).ok()?;
312    let serde_json::Value::Object(map) = value else {
313        return None;
314    };
315
316    let tool_name_value = map.get("tool_name")?;
317    let serde_json::Value::String(tool_name) = tool_name_value else {
318        return Some(ParsedLine::Malformed {
319            error: "tool_name must be a string".to_string(),
320        });
321    };
322
323    // Check if it's a Bash (Claude Code) or launch-process (Augment Code CLI) tool
324    if tool_name != "Bash" && tool_name != "launch-process" {
325        return Some(ParsedLine::Ignore {
326            reason: "non-Bash/launch-process tool",
327        });
328    }
329
330    let tool_input_value = map.get("tool_input").ok_or_else(|| ParsedLine::Malformed {
331        error: "missing tool_input".to_string(),
332    });
333    let tool_input_value = match tool_input_value {
334        Ok(value) => value,
335        Err(err) => return Some(err),
336    };
337
338    let serde_json::Value::Object(tool_input_map) = tool_input_value else {
339        return Some(ParsedLine::Malformed {
340            error: "tool_input must be an object".to_string(),
341        });
342    };
343
344    let command_value = tool_input_map
345        .get("command")
346        .ok_or_else(|| ParsedLine::Malformed {
347            error: "missing command in tool_input".to_string(),
348        });
349    let command_value = match command_value {
350        Ok(value) => value,
351        Err(err) => return Some(err),
352    };
353
354    let serde_json::Value::String(command) = command_value else {
355        return Some(ParsedLine::Malformed {
356            error: "command must be a string".to_string(),
357        });
358    };
359
360    // Check command length limit
361    if let Some(max_bytes) = max_command_bytes {
362        if command.len() > max_bytes {
363            return Some(ParsedLine::Malformed {
364                error: format!(
365                    "command exceeds max length ({} > {max_bytes} bytes)",
366                    command.len()
367                ),
368            });
369        }
370    }
371
372    Some(ParsedLine::Command {
373        command: command.clone(),
374        format: SimulateInputFormat::HookJson,
375    })
376}
377
378/// Parse a line as decision log format (future schema).
379fn parse_decision_log(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
380    use base64::Engine;
381
382    // Decision log format (v1):
383    // DCG_LOG_V1|timestamp|decision|command_base64|...
384    //
385    // For now, we'll implement a simple version.
386
387    let parts: Vec<&str> = line.splitn(5, '|').collect();
388
389    if parts.len() < 4 {
390        return ParsedLine::Malformed {
391            error: "invalid decision log format (expected at least 4 pipe-separated fields)"
392                .to_string(),
393        };
394    }
395
396    let version = parts[0];
397    if version != "DCG_LOG_V1" {
398        return ParsedLine::Malformed {
399            error: format!("unsupported log version: {version}"),
400        };
401    }
402
403    // parts[1] = timestamp (ignored for now)
404    // parts[2] = decision (allow/deny/warn - ignored for replay)
405    // parts[3] = command (base64 encoded)
406
407    let command_b64 = parts[3];
408
409    // Decode base64
410    let command = match base64::engine::general_purpose::STANDARD.decode(command_b64) {
411        Ok(bytes) => match String::from_utf8(bytes) {
412            Ok(s) => s,
413            Err(_) => {
414                return ParsedLine::Malformed {
415                    error: "command is not valid UTF-8".to_string(),
416                };
417            }
418        },
419        Err(e) => {
420            return ParsedLine::Malformed {
421                error: format!("invalid base64 in command field: {e}"),
422            };
423        }
424    };
425
426    // Check command length limit
427    if let Some(max_bytes) = max_command_bytes {
428        if command.len() > max_bytes {
429            return ParsedLine::Malformed {
430                error: format!(
431                    "command exceeds max length ({} > {max_bytes} bytes)",
432                    command.len()
433                ),
434            };
435        }
436    }
437
438    ParsedLine::Command {
439        command,
440        format: SimulateInputFormat::DecisionLog,
441    }
442}
443
444/// Parse a line as a plain command string.
445fn parse_plain_command(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
446    // Check command length limit
447    if let Some(max_bytes) = max_command_bytes {
448        if line.len() > max_bytes {
449            return ParsedLine::Malformed {
450                error: format!(
451                    "command exceeds max length ({} > {max_bytes} bytes)",
452                    line.len()
453                ),
454            };
455        }
456    }
457
458    ParsedLine::Command {
459        command: line.to_string(),
460        format: SimulateInputFormat::PlainCommand,
461    }
462}
463
464// =============================================================================
465// Evaluation Loop + Aggregation (git_safety_guard-1gt.8.2)
466// =============================================================================
467//
468// This section implements the core simulation loop that evaluates parsed commands
469// and aggregates results into actionable summaries.
470
471use crate::config::Config;
472use crate::evaluator::{EvaluationDecision, EvaluationResult, evaluate_command_with_pack_order};
473use crate::packs::REGISTRY;
474use std::collections::{HashMap, HashSet};
475
476/// Default number of exemplars to keep per rule.
477pub const DEFAULT_EXEMPLAR_LIMIT: usize = 3;
478
479/// Decision category for aggregation (maps to policy mode).
480#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
481#[serde(rename_all = "snake_case")]
482pub enum SimulateDecision {
483    /// Command was allowed (no pattern match or allowlisted).
484    Allow,
485    /// Command matched a warn-level pattern (warn mode).
486    Warn,
487    /// Command matched a deny-level pattern (blocked).
488    Deny,
489}
490
491impl SimulateDecision {
492    /// Convert from evaluation result to simulation decision.
493    #[inline]
494    #[must_use]
495    pub const fn from_evaluation(result: &EvaluationResult) -> Self {
496        match result.decision {
497            EvaluationDecision::Allow => Self::Allow,
498            EvaluationDecision::Deny => {
499                // Check effective_mode for warn vs deny distinction
500                match result.effective_mode {
501                    Some(crate::packs::DecisionMode::Warn) => Self::Warn,
502                    Some(crate::packs::DecisionMode::Log) => Self::Allow,
503                    _ => Self::Deny,
504                }
505            }
506        }
507    }
508}
509
510/// An exemplar command for a rule (sampled occurrence).
511#[derive(Debug, Clone, Serialize, Deserialize)]
512pub struct Exemplar {
513    /// The command string (may be truncated).
514    pub command: String,
515    /// Line number in the input (1-indexed).
516    pub line_number: usize,
517    /// Original command length in bytes.
518    pub original_length: usize,
519}
520
521/// Statistics for a single rule (`pack_id:pattern_name`).
522#[derive(Debug, Clone, Serialize, Deserialize)]
523pub struct RuleStats {
524    /// The rule ID (e.g., "core.git:reset-hard").
525    pub rule_id: String,
526    /// Pack ID (e.g., "core.git").
527    pub pack_id: String,
528    /// Pattern name (e.g., "reset-hard").
529    pub pattern_name: String,
530    /// Number of matches for this rule.
531    pub count: usize,
532    /// Decision for this rule (deny/warn/allow via allowlist).
533    pub decision: SimulateDecision,
534    /// Sample exemplars (first K occurrences by input order).
535    pub exemplars: Vec<Exemplar>,
536}
537
538/// Statistics for a single pack.
539#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct PackStats {
541    /// The pack ID (e.g., "core.git").
542    pub pack_id: String,
543    /// Total matches across all patterns in this pack.
544    pub count: usize,
545    /// Breakdown by decision type.
546    pub by_decision: HashMap<String, usize>,
547}
548
549/// Summary of simulation results.
550#[derive(Debug, Clone, Default, Serialize, Deserialize)]
551pub struct SimulationSummary {
552    /// Total commands evaluated.
553    pub total_commands: usize,
554    /// Commands that would be allowed.
555    pub allow_count: usize,
556    /// Commands that would trigger warnings.
557    pub warn_count: usize,
558    /// Commands that would be denied/blocked.
559    pub deny_count: usize,
560}
561
562/// Complete simulation result.
563#[derive(Debug, Clone, Serialize, Deserialize)]
564pub struct SimulationResult {
565    /// Schema version for output compatibility.
566    pub schema_version: u32,
567    /// Summary statistics.
568    pub summary: SimulationSummary,
569    /// Per-rule statistics (sorted by count desc, then `rule_id` asc).
570    pub rules: Vec<RuleStats>,
571    /// Per-pack statistics (sorted by count desc, then `pack_id` asc).
572    pub packs: Vec<PackStats>,
573    /// Parse statistics from the input.
574    pub parse_stats: ParseStats,
575}
576
577/// Configuration for the simulation evaluator.
578#[derive(Debug, Clone)]
579pub struct SimulationConfig {
580    /// Maximum exemplars to keep per rule.
581    pub exemplar_limit: usize,
582    /// Maximum command length for exemplars (truncate longer commands).
583    pub max_exemplar_command_len: usize,
584    /// Include allowlisted commands in results.
585    pub include_allowlisted: bool,
586}
587
588impl Default for SimulationConfig {
589    fn default() -> Self {
590        Self {
591            exemplar_limit: DEFAULT_EXEMPLAR_LIMIT,
592            max_exemplar_command_len: 200,
593            include_allowlisted: true,
594        }
595    }
596}
597
598/// Aggregator for simulation results.
599///
600/// Collects and aggregates evaluation results with deterministic output.
601#[derive(Debug)]
602pub struct SimulationAggregator {
603    config: SimulationConfig,
604    summary: SimulationSummary,
605    rule_builders: HashMap<String, RuleStatsBuilder>,
606    pack_counts: HashMap<String, HashMap<SimulateDecision, usize>>,
607}
608
609/// Builder for `RuleStats` (accumulates exemplars).
610#[derive(Debug)]
611struct RuleStatsBuilder {
612    pack_id: String,
613    pattern_name: String,
614    count: usize,
615    decision: SimulateDecision,
616    exemplars: Vec<Exemplar>,
617    exemplar_limit: usize,
618}
619
620impl RuleStatsBuilder {
621    fn new(
622        pack_id: String,
623        pattern_name: String,
624        decision: SimulateDecision,
625        exemplar_limit: usize,
626    ) -> Self {
627        Self {
628            pack_id,
629            pattern_name,
630            count: 0,
631            decision,
632            exemplars: Vec::with_capacity(exemplar_limit),
633            exemplar_limit,
634        }
635    }
636
637    fn add_match(&mut self, command: &str, line_number: usize, max_len: usize) {
638        self.count += 1;
639        if self.exemplars.len() < self.exemplar_limit {
640            let truncated = if command.len() > max_len {
641                // Account for "..." suffix (3 chars) so total doesn't exceed max_len
642                let target = max_len.saturating_sub(3);
643                let mut end = target;
644                while end > 0 && !command.is_char_boundary(end) {
645                    end -= 1;
646                }
647                format!("{}...", &command[..end])
648            } else {
649                command.to_string()
650            };
651            self.exemplars.push(Exemplar {
652                command: truncated,
653                line_number,
654                original_length: command.len(),
655            });
656        }
657    }
658
659    fn build(self, rule_id: String) -> RuleStats {
660        RuleStats {
661            rule_id,
662            pack_id: self.pack_id,
663            pattern_name: self.pattern_name,
664            count: self.count,
665            decision: self.decision,
666            exemplars: self.exemplars,
667        }
668    }
669}
670
671impl SimulationAggregator {
672    /// Create a new aggregator with the given configuration.
673    #[must_use]
674    pub fn new(config: SimulationConfig) -> Self {
675        Self {
676            config,
677            summary: SimulationSummary::default(),
678            rule_builders: HashMap::new(),
679            pack_counts: HashMap::new(),
680        }
681    }
682
683    /// Record an evaluation result.
684    pub fn record(&mut self, command: &str, line_number: usize, result: &EvaluationResult) {
685        self.summary.total_commands += 1;
686        let decision = SimulateDecision::from_evaluation(result);
687
688        match decision {
689            SimulateDecision::Allow => self.summary.allow_count += 1,
690            SimulateDecision::Warn => self.summary.warn_count += 1,
691            SimulateDecision::Deny => self.summary.deny_count += 1,
692        }
693
694        if let Some(ref pattern_info) = result.pattern_info {
695            let pack_id = pattern_info
696                .pack_id
697                .as_deref()
698                .unwrap_or("unknown")
699                .to_string();
700            let pattern_name = pattern_info
701                .pattern_name
702                .as_deref()
703                .unwrap_or("unknown")
704                .to_string();
705            let rule_id = format!("{pack_id}:{pattern_name}");
706
707            let builder = self.rule_builders.entry(rule_id).or_insert_with(|| {
708                RuleStatsBuilder::new(
709                    pack_id.clone(),
710                    pattern_name,
711                    decision,
712                    self.config.exemplar_limit,
713                )
714            });
715            builder.add_match(command, line_number, self.config.max_exemplar_command_len);
716
717            let pack_decisions = self.pack_counts.entry(pack_id).or_default();
718            *pack_decisions.entry(decision).or_insert(0) += 1;
719        } else if let Some(ref allowlist_override) = result.allowlist_override {
720            if self.config.include_allowlisted {
721                let pack_id = allowlist_override
722                    .matched
723                    .pack_id
724                    .as_deref()
725                    .unwrap_or("unknown")
726                    .to_string();
727                let pattern_name = allowlist_override
728                    .matched
729                    .pattern_name
730                    .as_deref()
731                    .unwrap_or("unknown")
732                    .to_string();
733                let rule_id = format!("{pack_id}:{pattern_name}");
734
735                let builder = self.rule_builders.entry(rule_id).or_insert_with(|| {
736                    RuleStatsBuilder::new(
737                        pack_id.clone(),
738                        pattern_name,
739                        SimulateDecision::Allow,
740                        self.config.exemplar_limit,
741                    )
742                });
743                builder.add_match(command, line_number, self.config.max_exemplar_command_len);
744
745                let pack_decisions = self.pack_counts.entry(pack_id).or_default();
746                *pack_decisions.entry(SimulateDecision::Allow).or_insert(0) += 1;
747            }
748        }
749    }
750
751    /// Finalize aggregation and produce sorted results.
752    #[must_use]
753    pub fn finalize(self, parse_stats: ParseStats) -> SimulationResult {
754        let mut rules: Vec<RuleStats> = self
755            .rule_builders
756            .into_iter()
757            .map(|(rule_id, builder)| builder.build(rule_id))
758            .collect();
759
760        rules.sort_by(|a, b| {
761            b.count
762                .cmp(&a.count)
763                .then_with(|| a.rule_id.cmp(&b.rule_id))
764        });
765
766        let mut packs: Vec<PackStats> = self
767            .pack_counts
768            .into_iter()
769            .map(|(pack_id, decisions)| {
770                let count = decisions.values().sum();
771                let by_decision: HashMap<String, usize> = decisions
772                    .into_iter()
773                    .map(|(d, c)| {
774                        let key = match d {
775                            SimulateDecision::Allow => "allow",
776                            SimulateDecision::Warn => "warn",
777                            SimulateDecision::Deny => "deny",
778                        };
779                        (key.to_string(), c)
780                    })
781                    .collect();
782                PackStats {
783                    pack_id,
784                    count,
785                    by_decision,
786                }
787            })
788            .collect();
789
790        packs.sort_by(|a, b| {
791            b.count
792                .cmp(&a.count)
793                .then_with(|| a.pack_id.cmp(&b.pack_id))
794        });
795
796        SimulationResult {
797            schema_version: SIMULATE_SCHEMA_VERSION,
798            summary: self.summary,
799            rules,
800            packs,
801            parse_stats,
802        }
803    }
804}
805
806/// Run simulation on parsed commands.
807pub fn run_simulation<I>(
808    commands: I,
809    parse_stats: ParseStats,
810    config: &Config,
811    sim_config: SimulationConfig,
812) -> SimulationResult
813where
814    I: IntoIterator<Item = ParsedCommand>,
815{
816    let enabled_packs: HashSet<String> = config.enabled_pack_ids();
817    let ordered_packs = REGISTRY.expand_enabled_ordered(&enabled_packs);
818    let keywords = REGISTRY.collect_enabled_keywords(&enabled_packs);
819    let keyword_index = REGISTRY.build_enabled_keyword_index(&ordered_packs);
820    let compiled_overrides = config.overrides.compile();
821    let allowlists = crate::allowlist::load_default_allowlists();
822    let heredoc_settings = config.heredoc_settings();
823
824    let mut aggregator = SimulationAggregator::new(sim_config);
825
826    for cmd in commands {
827        let result = evaluate_command_with_pack_order(
828            &cmd.command,
829            &keywords,
830            &ordered_packs,
831            keyword_index.as_ref(),
832            &compiled_overrides,
833            &allowlists,
834            &heredoc_settings,
835        );
836        aggregator.record(&cmd.command, cmd.line_number, &result);
837    }
838
839    aggregator.finalize(parse_stats)
840}
841
842/// Run simulation from a reader (convenience wrapper).
843///
844/// # Errors
845///
846/// Returns `ParseError` if the input cannot be parsed.
847pub fn run_simulation_from_reader<R: std::io::Read>(
848    reader: R,
849    limits: SimulateLimits,
850    config: &Config,
851    sim_config: SimulationConfig,
852    strict: bool,
853) -> Result<SimulationResult, ParseError> {
854    let parser = SimulateParser::new(reader, limits).strict(strict);
855    let (commands, parse_stats) = parser.collect_commands()?;
856    Ok(run_simulation(commands, parse_stats, config, sim_config))
857}
858
859// =============================================================================
860// Output Formatting (git_safety_guard-1gt.8.3)
861// =============================================================================
862
863use crate::scan::ScanRedactMode;
864
865/// Configuration for output formatting.
866#[derive(Debug, Clone)]
867pub struct SimulateOutputConfig {
868    /// Redaction mode for sensitive data.
869    pub redact: ScanRedactMode,
870    /// Maximum command length in output (0 = unlimited).
871    pub truncate: usize,
872    /// Limit to top N rules (0 = show all).
873    pub top: usize,
874    /// Show verbose output with exemplars.
875    pub verbose: bool,
876}
877
878impl Default for SimulateOutputConfig {
879    fn default() -> Self {
880        Self {
881            redact: ScanRedactMode::None,
882            truncate: 120,
883            top: 20,
884            verbose: false,
885        }
886    }
887}
888
889/// JSON output structure for simulate command.
890#[derive(Debug, Clone, Serialize, Deserialize)]
891pub struct SimulateJsonOutput {
892    pub schema_version: u32,
893    pub totals: SimulateJsonTotals,
894    pub rules: Vec<SimulateJsonRule>,
895    pub errors: SimulateJsonErrors,
896}
897
898#[derive(Debug, Clone, Serialize, Deserialize)]
899pub struct SimulateJsonTotals {
900    pub commands: usize,
901    pub allowed: usize,
902    pub warned: usize,
903    pub denied: usize,
904}
905
906#[derive(Debug, Clone, Serialize, Deserialize)]
907pub struct SimulateJsonRule {
908    pub rule_id: String,
909    pub count: usize,
910    pub decision: String,
911    pub exemplars: Vec<String>,
912}
913
914#[derive(Debug, Clone, Serialize, Deserialize)]
915pub struct SimulateJsonErrors {
916    pub malformed_count: usize,
917    pub ignored_count: usize,
918    pub stopped_at_limit: bool,
919    #[serde(skip_serializing_if = "Option::is_none")]
920    pub limit_hit: Option<String>,
921}
922
923/// Apply redaction and truncation to a command string.
924#[must_use]
925pub fn redact_and_truncate_command(cmd: &str, config: &SimulateOutputConfig) -> String {
926    let redacted = match config.redact {
927        ScanRedactMode::None => cmd.to_string(),
928        ScanRedactMode::Quoted => crate::scan::redact_quoted_strings(cmd),
929        ScanRedactMode::Aggressive => crate::scan::redact_aggressively(cmd),
930    };
931
932    if config.truncate > 0 && redacted.len() > config.truncate {
933        let target = config.truncate.saturating_sub(3);
934        let mut end = target;
935        while end > 0 && !redacted.is_char_boundary(end) {
936            end -= 1;
937        }
938        format!("{}...", &redacted[..end])
939    } else {
940        redacted
941    }
942}
943
944/// Format simulation result as pretty-printed text.
945#[must_use]
946#[allow(clippy::format_push_string)]
947pub fn format_pretty_output(result: &SimulationResult, config: &SimulateOutputConfig) -> String {
948    let mut output = String::new();
949    output.push_str("Simulation Results\n==================\n\n");
950    output.push_str("Summary:\n");
951    output.push_str(&format!(
952        "  Total commands:  {}\n",
953        result.summary.total_commands
954    ));
955    output.push_str(&format!(
956        "  Allowed:         {}\n",
957        result.summary.allow_count
958    ));
959    output.push_str(&format!(
960        "  Warned:          {}\n",
961        result.summary.warn_count
962    ));
963    output.push_str(&format!(
964        "  Denied:          {}\n",
965        result.summary.deny_count
966    ));
967    output.push('\n');
968
969    if !result.rules.is_empty() {
970        output.push_str("Rules Triggered (sorted by count):\n");
971        let rules_to_show: Vec<_> = if config.top > 0 {
972            result.rules.iter().take(config.top).collect()
973        } else {
974            result.rules.iter().collect()
975        };
976        for rule in rules_to_show {
977            let decision_str = match rule.decision {
978                SimulateDecision::Allow => "allow",
979                SimulateDecision::Warn => "warn",
980                SimulateDecision::Deny => "DENY",
981            };
982            output.push_str(&format!(
983                "  {:>5} x {} [{}]\n",
984                rule.count, rule.rule_id, decision_str
985            ));
986            if config.verbose {
987                for ex in &rule.exemplars {
988                    let display_cmd = redact_and_truncate_command(&ex.command, config);
989                    output.push_str(&format!("         L{}: {}\n", ex.line_number, display_cmd));
990                }
991            }
992        }
993        if config.top > 0 && result.rules.len() > config.top {
994            output.push_str(&format!(
995                "  ... and {} more rules\n",
996                result.rules.len() - config.top
997            ));
998        }
999        output.push('\n');
1000    }
1001
1002    if !result.packs.is_empty() {
1003        output.push_str("Packs Summary:\n");
1004        for pack in &result.packs {
1005            output.push_str(&format!("  {:>5} x {}\n", pack.count, pack.pack_id));
1006        }
1007        output.push('\n');
1008    }
1009
1010    output.push_str("Parse Statistics:\n");
1011    output.push_str(&format!(
1012        "  Lines read:         {}\n",
1013        result.parse_stats.lines_read
1014    ));
1015    output.push_str(&format!(
1016        "  Commands extracted: {}\n",
1017        result.parse_stats.commands_extracted
1018    ));
1019    output.push_str(&format!(
1020        "  Malformed lines:    {}\n",
1021        result.parse_stats.malformed_count
1022    ));
1023    output.push_str(&format!(
1024        "  Ignored lines:      {}\n",
1025        result.parse_stats.ignored_count
1026    ));
1027    if result.parse_stats.stopped_at_limit {
1028        if let Some(ref limit) = result.parse_stats.limit_hit {
1029            output.push_str(&format!("  Stopped at limit:   {limit:?}\n"));
1030        }
1031    }
1032    output
1033}
1034
1035/// Format simulation result as JSON.
1036///
1037/// # Errors
1038///
1039/// Returns an error if JSON serialization fails.
1040pub fn format_json_output(
1041    result: SimulationResult,
1042    config: &SimulateOutputConfig,
1043) -> Result<String, serde_json::Error> {
1044    let rules_to_show: Vec<_> = if config.top > 0 {
1045        result.rules.into_iter().take(config.top).collect()
1046    } else {
1047        result.rules
1048    };
1049
1050    let json_rules: Vec<SimulateJsonRule> = rules_to_show
1051        .into_iter()
1052        .map(|r| {
1053            let exemplars: Vec<String> = r
1054                .exemplars
1055                .iter()
1056                .map(|ex| redact_and_truncate_command(&ex.command, config))
1057                .collect();
1058            SimulateJsonRule {
1059                rule_id: r.rule_id,
1060                count: r.count,
1061                decision: match r.decision {
1062                    SimulateDecision::Allow => "allow".to_string(),
1063                    SimulateDecision::Warn => "warn".to_string(),
1064                    SimulateDecision::Deny => "deny".to_string(),
1065                },
1066                exemplars,
1067            }
1068        })
1069        .collect();
1070
1071    let output = SimulateJsonOutput {
1072        schema_version: result.schema_version,
1073        totals: SimulateJsonTotals {
1074            commands: result.summary.total_commands,
1075            allowed: result.summary.allow_count,
1076            warned: result.summary.warn_count,
1077            denied: result.summary.deny_count,
1078        },
1079        rules: json_rules,
1080        errors: SimulateJsonErrors {
1081            malformed_count: result.parse_stats.malformed_count,
1082            ignored_count: result.parse_stats.ignored_count,
1083            stopped_at_limit: result.parse_stats.stopped_at_limit,
1084            limit_hit: result.parse_stats.limit_hit.map(|l| format!("{l:?}")),
1085        },
1086    };
1087
1088    serde_json::to_string_pretty(&output)
1089}
1090
1091// =============================================================================
1092// Tests
1093// =============================================================================
1094
1095#[cfg(test)]
1096mod tests {
1097    use super::*;
1098
1099    // -------------------------------------------------------------------------
1100    // Format detection tests
1101    // -------------------------------------------------------------------------
1102
1103    #[test]
1104    fn detect_plain_command() {
1105        let result = parse_line("git status --short", None);
1106        assert!(
1107            matches!(&result, ParsedLine::Command { .. }),
1108            "expected Command, got {result:?}"
1109        );
1110        if let ParsedLine::Command { command, format } = result {
1111            assert_eq!(command, "git status --short");
1112            assert_eq!(format, SimulateInputFormat::PlainCommand);
1113        }
1114    }
1115
1116    #[test]
1117    fn detect_hook_json_bash() {
1118        let line = r#"{"tool_name":"Bash","tool_input":{"command":"git status"}}"#;
1119        let result = parse_line(line, None);
1120        assert!(
1121            matches!(&result, ParsedLine::Command { .. }),
1122            "expected Command, got {result:?}"
1123        );
1124        if let ParsedLine::Command { command, format } = result {
1125            assert_eq!(command, "git status");
1126            assert_eq!(format, SimulateInputFormat::HookJson);
1127        }
1128    }
1129
1130    #[test]
1131    fn detect_hook_json_launch_process() {
1132        // Augment Code CLI uses launch-process tool name
1133        let line = r#"{"tool_name":"launch-process","tool_input":{"command":"git status"}}"#;
1134        let result = parse_line(line, None);
1135        assert!(
1136            matches!(&result, ParsedLine::Command { .. }),
1137            "expected Command, got {result:?}"
1138        );
1139        if let ParsedLine::Command { command, format } = result {
1140            assert_eq!(command, "git status");
1141            assert_eq!(format, SimulateInputFormat::HookJson);
1142        }
1143    }
1144
1145    #[test]
1146    fn detect_hook_json_non_bash_ignored() {
1147        let line = r#"{"tool_name":"Read","tool_input":{"path":"/etc/passwd"}}"#;
1148        let result = parse_line(line, None);
1149        assert!(
1150            matches!(&result, ParsedLine::Ignore { .. }),
1151            "expected Ignore, got {result:?}"
1152        );
1153        if let ParsedLine::Ignore { reason } = result {
1154            assert_eq!(reason, "non-Bash/launch-process tool");
1155        }
1156    }
1157
1158    #[test]
1159    fn detect_hook_json_command_wrong_type() {
1160        let line = r#"{"tool_name":"Bash","tool_input":{"command":123}}"#;
1161        let result = parse_line(line, None);
1162        assert!(
1163            matches!(&result, ParsedLine::Malformed { .. }),
1164            "expected Malformed, got {result:?}"
1165        );
1166        if let ParsedLine::Malformed { error } = result {
1167            assert_eq!(error, "command must be a string");
1168        }
1169    }
1170
1171    #[test]
1172    fn detect_hook_json_tool_name_wrong_type() {
1173        let line = r#"{"tool_name":42,"tool_input":{"command":"git status"}}"#;
1174        let result = parse_line(line, None);
1175        assert!(
1176            matches!(&result, ParsedLine::Malformed { .. }),
1177            "expected Malformed, got {result:?}"
1178        );
1179        if let ParsedLine::Malformed { error } = result {
1180            assert_eq!(error, "tool_name must be a string");
1181        }
1182    }
1183
1184    #[test]
1185    fn detect_decision_log() {
1186        // "git status" in base64 = "Z2l0IHN0YXR1cw=="
1187        let line = "DCG_LOG_V1|2026-01-09T00:00:00Z|allow|Z2l0IHN0YXR1cw==|";
1188        let result = parse_line(line, None);
1189        assert!(
1190            matches!(&result, ParsedLine::Command { .. }),
1191            "expected Command, got {result:?}"
1192        );
1193        if let ParsedLine::Command { command, format } = result {
1194            assert_eq!(command, "git status");
1195            assert_eq!(format, SimulateInputFormat::DecisionLog);
1196        }
1197    }
1198
1199    #[test]
1200    fn empty_line() {
1201        assert!(matches!(parse_line("", None), ParsedLine::Empty));
1202        assert!(matches!(parse_line("   ", None), ParsedLine::Empty));
1203        assert!(matches!(parse_line("\t\n", None), ParsedLine::Empty));
1204    }
1205
1206    #[test]
1207    fn invalid_json_falls_back_to_plain_command() {
1208        // Invalid JSON starting with '{' should be treated as a plain command,
1209        // not malformed. This handles shell brace blocks like `{ echo hello; }`.
1210        let result = parse_line("{invalid json}", None);
1211        assert!(
1212            matches!(&result, ParsedLine::Command { .. }),
1213            "expected Command (PlainCommand), got {result:?}"
1214        );
1215        if let ParsedLine::Command { command, format } = result {
1216            assert_eq!(command, "{invalid json}");
1217            assert_eq!(format, SimulateInputFormat::PlainCommand);
1218        }
1219    }
1220
1221    #[test]
1222    fn shell_brace_block_as_plain_command() {
1223        // Shell brace blocks should be treated as plain commands
1224        let result = parse_line("{ echo hello; } | cat", None);
1225        assert!(
1226            matches!(&result, ParsedLine::Command { .. }),
1227            "expected Command (PlainCommand), got {result:?}"
1228        );
1229        if let ParsedLine::Command { command, format } = result {
1230            assert_eq!(command, "{ echo hello; } | cat");
1231            assert_eq!(format, SimulateInputFormat::PlainCommand);
1232        }
1233    }
1234
1235    #[test]
1236    fn valid_json_missing_command_is_malformed() {
1237        // Valid JSON with missing fields is still hook JSON format, just malformed
1238        // (not a plain command)
1239        let line = r#"{"tool_name":"Bash","tool_input":{}}"#;
1240        let result = parse_line(line, None);
1241        assert!(
1242            matches!(&result, ParsedLine::Malformed { .. }),
1243            "expected Malformed, got {result:?}"
1244        );
1245        if let ParsedLine::Malformed { error } = result {
1246            assert!(error.contains("missing command"));
1247        }
1248    }
1249
1250    #[test]
1251    fn malformed_decision_log_wrong_version() {
1252        let line = "DCG_LOG_V99|timestamp|allow|cmd|";
1253        let result = parse_line(line, None);
1254        assert!(
1255            matches!(&result, ParsedLine::Malformed { .. }),
1256            "expected Malformed, got {result:?}"
1257        );
1258        if let ParsedLine::Malformed { error } = result {
1259            assert!(error.contains("unsupported log version"));
1260        }
1261    }
1262
1263    // -------------------------------------------------------------------------
1264    // Limit tests
1265    // -------------------------------------------------------------------------
1266
1267    #[test]
1268    fn command_length_limit() {
1269        let long_cmd = "x".repeat(1000);
1270        let result = parse_line(&long_cmd, Some(500));
1271        assert!(
1272            matches!(&result, ParsedLine::Malformed { .. }),
1273            "expected Malformed, got {result:?}"
1274        );
1275        if let ParsedLine::Malformed { error } = result {
1276            assert!(error.contains("exceeds max length"));
1277        }
1278    }
1279
1280    #[test]
1281    fn command_within_limit() {
1282        let cmd = "git status";
1283        let result = parse_line(cmd, Some(500));
1284        assert!(matches!(result, ParsedLine::Command { .. }));
1285    }
1286
1287    // -------------------------------------------------------------------------
1288    // Streaming parser tests
1289    // -------------------------------------------------------------------------
1290
1291    #[test]
1292    fn parser_collects_commands() {
1293        let input = r#"git status
1294{"tool_name":"Bash","tool_input":{"command":"git log"}}
1295{"tool_name":"Read","tool_input":{"path":"file.txt"}}
1296
1297echo hello
1298"#;
1299
1300        let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default());
1301        let (commands, stats) = parser.collect_commands().unwrap();
1302
1303        assert_eq!(commands.len(), 3);
1304        assert_eq!(commands[0].command, "git status");
1305        assert_eq!(commands[0].format, SimulateInputFormat::PlainCommand);
1306        assert_eq!(commands[1].command, "git log");
1307        assert_eq!(commands[1].format, SimulateInputFormat::HookJson);
1308        assert_eq!(commands[2].command, "echo hello");
1309
1310        assert_eq!(stats.lines_read, 5);
1311        assert_eq!(stats.commands_extracted, 3);
1312        assert_eq!(stats.ignored_count, 1); // Read tool
1313        assert_eq!(stats.empty_count, 1);
1314        assert_eq!(stats.malformed_count, 0);
1315    }
1316
1317    #[test]
1318    fn parser_respects_line_limit() {
1319        let input = "line1\nline2\nline3\nline4\nline5\n";
1320
1321        let limits = SimulateLimits {
1322            max_lines: Some(3),
1323            ..Default::default()
1324        };
1325        let parser = SimulateParser::new(input.as_bytes(), limits);
1326        let (commands, stats) = parser.collect_commands().unwrap();
1327
1328        assert_eq!(commands.len(), 3);
1329        assert_eq!(stats.lines_read, 3);
1330        assert!(stats.stopped_at_limit);
1331        assert!(matches!(stats.limit_hit, Some(LimitHit::MaxLines)));
1332    }
1333
1334    #[test]
1335    fn parser_strict_mode_fails_on_malformed() {
1336        // Use valid JSON with missing command field to trigger malformed error
1337        let input = r#"git status
1338{"tool_name":"Bash","tool_input":{}}
1339echo hello
1340"#;
1341
1342        let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default()).strict(true);
1343        let result = parser.collect_commands();
1344
1345        assert!(result.is_err());
1346        let err = result.unwrap_err();
1347        assert!(matches!(err, ParseError::Malformed { line: 2, .. }));
1348    }
1349
1350    #[test]
1351    fn parser_non_strict_continues_on_malformed() {
1352        // Use valid JSON with missing command field to trigger malformed error
1353        let input = r#"git status
1354{"tool_name":"Bash","tool_input":{}}
1355echo hello
1356"#;
1357
1358        let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default()).strict(false);
1359        let (commands, stats) = parser.collect_commands().unwrap();
1360
1361        assert_eq!(commands.len(), 2); // git status and echo hello
1362        assert_eq!(stats.malformed_count, 1);
1363    }
1364
1365    #[test]
1366    fn parser_treats_invalid_json_as_plain_command() {
1367        // Invalid JSON (like shell brace blocks) should be treated as plain commands
1368        let input = r"git status
1369{ echo hello; }
1370echo world
1371";
1372
1373        let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default());
1374        let (commands, stats) = parser.collect_commands().unwrap();
1375
1376        assert_eq!(commands.len(), 3); // All three are plain commands
1377        assert_eq!(commands[1].command, "{ echo hello; }");
1378        assert_eq!(commands[1].format, SimulateInputFormat::PlainCommand);
1379        assert_eq!(stats.malformed_count, 0);
1380    }
1381
1382    // -------------------------------------------------------------------------
1383    // Determinism tests
1384    // -------------------------------------------------------------------------
1385
1386    #[test]
1387    fn parsing_is_deterministic() {
1388        let lines = [
1389            "git status",
1390            r#"{"tool_name":"Bash","tool_input":{"command":"ls"}}"#,
1391            "{broken",
1392            "",
1393            "DCG_LOG_V1|ts|allow|Z2l0IHN0YXR1cw==|",
1394        ];
1395
1396        // Parse each line 100 times and ensure same result
1397        for line in lines {
1398            let first = parse_line(line, None);
1399            for _ in 0..100 {
1400                let result = parse_line(line, None);
1401                assert_eq!(
1402                    format!("{first:?}"),
1403                    format!("{result:?}"),
1404                    "Non-deterministic parsing for: {line}"
1405                );
1406            }
1407        }
1408    }
1409
1410    // -------------------------------------------------------------------------
1411    // Aggregation tests (git_safety_guard-1gt.8.2)
1412    // -------------------------------------------------------------------------
1413
1414    #[test]
1415    fn aggregator_counts_decisions_correctly() {
1416        let config = SimulationConfig::default();
1417        let mut agg = SimulationAggregator::new(config);
1418
1419        // Record some results
1420        agg.record("ls", 1, &EvaluationResult::allowed());
1421        agg.record("git status", 2, &EvaluationResult::allowed());
1422        agg.record(
1423            "rm -rf /",
1424            3,
1425            &EvaluationResult::denied_by_pack("core.filesystem", "destructive", None),
1426        );
1427
1428        let parse_stats = ParseStats {
1429            lines_read: 3,
1430            commands_extracted: 3,
1431            ..Default::default()
1432        };
1433        let result = agg.finalize(parse_stats);
1434
1435        assert_eq!(result.summary.total_commands, 3);
1436        assert_eq!(result.summary.allow_count, 2);
1437        assert_eq!(result.summary.deny_count, 1);
1438        assert_eq!(result.summary.warn_count, 0);
1439    }
1440
1441    #[test]
1442    fn aggregator_sorts_rules_deterministically() {
1443        let config = SimulationConfig::default();
1444        let mut agg = SimulationAggregator::new(config);
1445
1446        // Add rules with same count in different order
1447        agg.record(
1448            "cmd1",
1449            1,
1450            &EvaluationResult::denied_by_pack_pattern(
1451                "pack.b",
1452                "rule1",
1453                "test",
1454                None,
1455                crate::packs::Severity::Critical,
1456                &[],
1457            ),
1458        );
1459        agg.record(
1460            "cmd2",
1461            2,
1462            &EvaluationResult::denied_by_pack_pattern(
1463                "pack.a",
1464                "rule1",
1465                "test",
1466                None,
1467                crate::packs::Severity::Critical,
1468                &[],
1469            ),
1470        );
1471        agg.record(
1472            "cmd3",
1473            3,
1474            &EvaluationResult::denied_by_pack_pattern(
1475                "pack.b",
1476                "rule1",
1477                "test",
1478                None,
1479                crate::packs::Severity::Critical,
1480                &[],
1481            ),
1482        );
1483
1484        let parse_stats = ParseStats::default();
1485        let result = agg.finalize(parse_stats);
1486
1487        // Rules should be sorted by count desc, then rule_id asc
1488        assert_eq!(result.rules.len(), 2);
1489        assert_eq!(result.rules[0].rule_id, "pack.b:rule1"); // count=2
1490        assert_eq!(result.rules[0].count, 2);
1491        assert_eq!(result.rules[1].rule_id, "pack.a:rule1"); // count=1
1492        assert_eq!(result.rules[1].count, 1);
1493    }
1494
1495    #[test]
1496    fn aggregator_samples_first_k_exemplars() {
1497        let config = SimulationConfig {
1498            exemplar_limit: 2,
1499            ..Default::default()
1500        };
1501        let mut agg = SimulationAggregator::new(config);
1502
1503        // Add 5 occurrences of the same rule
1504        for i in 1..=5 {
1505            agg.record(
1506                &format!("cmd{i}"),
1507                i,
1508                &EvaluationResult::denied_by_pack_pattern(
1509                    "pack.a",
1510                    "rule1",
1511                    "test",
1512                    None,
1513                    crate::packs::Severity::Critical,
1514                    &[],
1515                ),
1516            );
1517        }
1518
1519        let parse_stats = ParseStats::default();
1520        let result = agg.finalize(parse_stats);
1521
1522        // Should only have first 2 exemplars
1523        assert_eq!(result.rules[0].exemplars.len(), 2);
1524        assert_eq!(result.rules[0].exemplars[0].command, "cmd1");
1525        assert_eq!(result.rules[0].exemplars[0].line_number, 1);
1526        assert_eq!(result.rules[0].exemplars[1].command, "cmd2");
1527        assert_eq!(result.rules[0].exemplars[1].line_number, 2);
1528    }
1529
1530    #[test]
1531    fn exemplar_truncation_respects_max_len() {
1532        let config = SimulationConfig {
1533            exemplar_limit: 1,
1534            max_exemplar_command_len: 10, // Total should be <= 10 chars
1535            include_allowlisted: true,
1536        };
1537        let mut agg = SimulationAggregator::new(config);
1538
1539        // Command is 20 chars, should be truncated to fit within 10 chars including "..."
1540        agg.record(
1541            "12345678901234567890",
1542            1,
1543            &EvaluationResult::denied_by_pack_pattern(
1544                "pack.a",
1545                "rule1",
1546                "test",
1547                None,
1548                crate::packs::Severity::Critical,
1549                &[],
1550            ),
1551        );
1552
1553        let parse_stats = ParseStats::default();
1554        let result = agg.finalize(parse_stats);
1555
1556        // Truncated command should be at most max_exemplar_command_len (10) chars
1557        let exemplar = &result.rules[0].exemplars[0];
1558        assert!(
1559            exemplar.command.len() <= 10,
1560            "Expected at most 10 chars, got {}: '{}'",
1561            exemplar.command.len(),
1562            exemplar.command
1563        );
1564        assert!(
1565            exemplar.command.ends_with("..."),
1566            "Expected ellipsis, got: '{}'",
1567            exemplar.command
1568        );
1569        assert_eq!(exemplar.original_length, 20);
1570    }
1571
1572    #[test]
1573    fn aggregation_is_deterministic() {
1574        let commands = vec![
1575            ParsedCommand {
1576                command: "rm -rf /".to_string(),
1577                format: SimulateInputFormat::PlainCommand,
1578                line_number: 1,
1579            },
1580            ParsedCommand {
1581                command: "git reset --hard".to_string(),
1582                format: SimulateInputFormat::PlainCommand,
1583                line_number: 2,
1584            },
1585            ParsedCommand {
1586                command: "rm -rf /tmp".to_string(),
1587                format: SimulateInputFormat::PlainCommand,
1588                line_number: 3,
1589            },
1590        ];
1591
1592        let config = Config::default();
1593        let sim_config = SimulationConfig::default();
1594
1595        // Run simulation multiple times
1596        let first = run_simulation(
1597            commands.clone(),
1598            ParseStats::default(),
1599            &config,
1600            sim_config.clone(),
1601        );
1602
1603        for _ in 0..10 {
1604            let result = run_simulation(
1605                commands.clone(),
1606                ParseStats::default(),
1607                &config,
1608                sim_config.clone(),
1609            );
1610
1611            // Compare summaries
1612            assert_eq!(first.summary.total_commands, result.summary.total_commands);
1613            assert_eq!(first.summary.allow_count, result.summary.allow_count);
1614            assert_eq!(first.summary.deny_count, result.summary.deny_count);
1615
1616            // Compare rule order
1617            assert_eq!(first.rules.len(), result.rules.len());
1618            for (a, b) in first.rules.iter().zip(result.rules.iter()) {
1619                assert_eq!(a.rule_id, b.rule_id);
1620                assert_eq!(a.count, b.count);
1621            }
1622        }
1623    }
1624}