1use serde::{Deserialize, Serialize};
21use std::io::{BufRead, BufReader, Read};
22
23pub const SIMULATE_SCHEMA_VERSION: u32 = 1;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum SimulateInputFormat {
30 PlainCommand,
32 HookJson,
34 DecisionLog,
36}
37
38#[derive(Debug, Clone)]
40pub enum ParsedLine {
41 Command {
43 command: String,
44 format: SimulateInputFormat,
45 },
46 Ignore { reason: &'static str },
48 Malformed { error: String },
50 Empty,
52}
53
54#[derive(Debug, Clone)]
56pub struct SimulateLimits {
57 pub max_lines: Option<usize>,
59 pub max_bytes: Option<usize>,
61 pub max_command_bytes: Option<usize>,
63}
64
65impl Default for SimulateLimits {
66 fn default() -> Self {
67 Self {
68 max_lines: None,
69 max_bytes: None,
70 max_command_bytes: Some(64 * 1024), }
72 }
73}
74
75#[derive(Debug, Clone, Default, Serialize, Deserialize)]
77pub struct ParseStats {
78 pub lines_read: usize,
80 pub bytes_read: usize,
82 pub commands_extracted: usize,
84 pub malformed_count: usize,
86 pub ignored_count: usize,
88 pub empty_count: usize,
90 pub stopped_at_limit: bool,
92 pub limit_hit: Option<LimitHit>,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99pub enum LimitHit {
100 MaxLines,
101 MaxBytes,
102}
103
104pub struct SimulateParser<R: Read> {
108 reader: BufReader<R>,
109 limits: SimulateLimits,
110 stats: ParseStats,
111 strict: bool,
112}
113
114impl<R: Read> SimulateParser<R> {
115 pub fn new(reader: R, limits: SimulateLimits) -> Self {
117 Self {
118 reader: BufReader::new(reader),
119 limits,
120 stats: ParseStats::default(),
121 strict: false,
122 }
123 }
124
125 #[must_use]
127 pub const fn strict(mut self, strict: bool) -> Self {
128 self.strict = strict;
129 self
130 }
131
132 pub const fn stats(&self) -> &ParseStats {
134 &self.stats
135 }
136
137 pub fn into_stats(self) -> ParseStats {
139 self.stats
140 }
141
142 pub fn next_line(&mut self) -> Option<Result<ParsedLine, ParseError>> {
147 if let Some(max_lines) = self.limits.max_lines {
149 if self.stats.lines_read >= max_lines {
150 self.stats.stopped_at_limit = true;
151 self.stats.limit_hit = Some(LimitHit::MaxLines);
152 return None;
153 }
154 }
155
156 if let Some(max_bytes) = self.limits.max_bytes {
157 if self.stats.bytes_read >= max_bytes {
158 self.stats.stopped_at_limit = true;
159 self.stats.limit_hit = Some(LimitHit::MaxBytes);
160 return None;
161 }
162 }
163
164 let mut line = String::new();
166 match self.reader.read_line(&mut line) {
167 Ok(0) => return None, Ok(n) => {
169 self.stats.lines_read += 1;
170 self.stats.bytes_read += n;
171 }
172 Err(e) => {
173 return Some(Err(ParseError::Io(e.to_string())));
174 }
175 }
176
177 let parsed = parse_line(&line, self.limits.max_command_bytes);
179
180 match &parsed {
182 ParsedLine::Command { .. } => self.stats.commands_extracted += 1,
183 ParsedLine::Malformed { error } => {
184 self.stats.malformed_count += 1;
185 if self.strict {
186 return Some(Err(ParseError::Malformed {
187 line: self.stats.lines_read,
188 error: error.clone(),
189 }));
190 }
191 }
192 ParsedLine::Ignore { .. } => self.stats.ignored_count += 1,
193 ParsedLine::Empty => self.stats.empty_count += 1,
194 }
195
196 Some(Ok(parsed))
197 }
198
199 pub fn collect_commands(mut self) -> Result<(Vec<ParsedCommand>, ParseStats), ParseError> {
208 let mut commands = Vec::new();
209
210 while let Some(result) = self.next_line() {
211 match result? {
212 ParsedLine::Command { command, format } => {
213 commands.push(ParsedCommand {
214 command,
215 format,
216 line_number: self.stats.lines_read,
217 });
218 }
219 ParsedLine::Ignore { .. } | ParsedLine::Malformed { .. } | ParsedLine::Empty => {
220 }
222 }
223 }
224
225 Ok((commands, self.stats))
226 }
227}
228
229impl<R: Read> Iterator for SimulateParser<R> {
231 type Item = Result<ParsedLine, ParseError>;
232
233 fn next(&mut self) -> Option<Self::Item> {
234 self.next_line()
235 }
236}
237
238#[derive(Debug, Clone, Serialize)]
240pub struct ParsedCommand {
241 pub command: String,
243 pub format: SimulateInputFormat,
245 pub line_number: usize,
247}
248
249#[derive(Debug, Clone)]
251pub enum ParseError {
252 Io(String),
254 Malformed { line: usize, error: String },
256}
257
258impl std::fmt::Display for ParseError {
259 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260 match self {
261 Self::Io(e) => write!(f, "I/O error: {e}"),
262 Self::Malformed { line, error } => write!(f, "Line {line}: {error}"),
263 }
264 }
265}
266
267impl std::error::Error for ParseError {}
268
269fn parse_line(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
275 let trimmed = line.trim();
276
277 if trimmed.is_empty() {
279 return ParsedLine::Empty;
280 }
281
282 if trimmed.starts_with("DCG_LOG_V") {
284 return parse_decision_log(trimmed, max_command_bytes);
285 }
286
287 if trimmed.starts_with('{') {
291 if let Some(result) = try_parse_hook_json(trimmed, max_command_bytes) {
292 return result;
293 }
294 return parse_plain_command(trimmed, max_command_bytes);
296 }
297
298 parse_plain_command(trimmed, max_command_bytes)
300}
301
302fn try_parse_hook_json(line: &str, max_command_bytes: Option<usize>) -> Option<ParsedLine> {
308 let value: serde_json::Value = serde_json::from_str(line).ok()?;
312 let serde_json::Value::Object(map) = value else {
313 return None;
314 };
315
316 let tool_name_value = map.get("tool_name")?;
317 let serde_json::Value::String(tool_name) = tool_name_value else {
318 return Some(ParsedLine::Malformed {
319 error: "tool_name must be a string".to_string(),
320 });
321 };
322
323 if tool_name != "Bash" && tool_name != "launch-process" {
325 return Some(ParsedLine::Ignore {
326 reason: "non-Bash/launch-process tool",
327 });
328 }
329
330 let tool_input_value = map.get("tool_input").ok_or_else(|| ParsedLine::Malformed {
331 error: "missing tool_input".to_string(),
332 });
333 let tool_input_value = match tool_input_value {
334 Ok(value) => value,
335 Err(err) => return Some(err),
336 };
337
338 let serde_json::Value::Object(tool_input_map) = tool_input_value else {
339 return Some(ParsedLine::Malformed {
340 error: "tool_input must be an object".to_string(),
341 });
342 };
343
344 let command_value = tool_input_map
345 .get("command")
346 .ok_or_else(|| ParsedLine::Malformed {
347 error: "missing command in tool_input".to_string(),
348 });
349 let command_value = match command_value {
350 Ok(value) => value,
351 Err(err) => return Some(err),
352 };
353
354 let serde_json::Value::String(command) = command_value else {
355 return Some(ParsedLine::Malformed {
356 error: "command must be a string".to_string(),
357 });
358 };
359
360 if let Some(max_bytes) = max_command_bytes {
362 if command.len() > max_bytes {
363 return Some(ParsedLine::Malformed {
364 error: format!(
365 "command exceeds max length ({} > {max_bytes} bytes)",
366 command.len()
367 ),
368 });
369 }
370 }
371
372 Some(ParsedLine::Command {
373 command: command.clone(),
374 format: SimulateInputFormat::HookJson,
375 })
376}
377
378fn parse_decision_log(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
380 use base64::Engine;
381
382 let parts: Vec<&str> = line.splitn(5, '|').collect();
388
389 if parts.len() < 4 {
390 return ParsedLine::Malformed {
391 error: "invalid decision log format (expected at least 4 pipe-separated fields)"
392 .to_string(),
393 };
394 }
395
396 let version = parts[0];
397 if version != "DCG_LOG_V1" {
398 return ParsedLine::Malformed {
399 error: format!("unsupported log version: {version}"),
400 };
401 }
402
403 let command_b64 = parts[3];
408
409 let command = match base64::engine::general_purpose::STANDARD.decode(command_b64) {
411 Ok(bytes) => match String::from_utf8(bytes) {
412 Ok(s) => s,
413 Err(_) => {
414 return ParsedLine::Malformed {
415 error: "command is not valid UTF-8".to_string(),
416 };
417 }
418 },
419 Err(e) => {
420 return ParsedLine::Malformed {
421 error: format!("invalid base64 in command field: {e}"),
422 };
423 }
424 };
425
426 if let Some(max_bytes) = max_command_bytes {
428 if command.len() > max_bytes {
429 return ParsedLine::Malformed {
430 error: format!(
431 "command exceeds max length ({} > {max_bytes} bytes)",
432 command.len()
433 ),
434 };
435 }
436 }
437
438 ParsedLine::Command {
439 command,
440 format: SimulateInputFormat::DecisionLog,
441 }
442}
443
444fn parse_plain_command(line: &str, max_command_bytes: Option<usize>) -> ParsedLine {
446 if let Some(max_bytes) = max_command_bytes {
448 if line.len() > max_bytes {
449 return ParsedLine::Malformed {
450 error: format!(
451 "command exceeds max length ({} > {max_bytes} bytes)",
452 line.len()
453 ),
454 };
455 }
456 }
457
458 ParsedLine::Command {
459 command: line.to_string(),
460 format: SimulateInputFormat::PlainCommand,
461 }
462}
463
464use crate::config::Config;
472use crate::evaluator::{EvaluationDecision, EvaluationResult, evaluate_command_with_pack_order};
473use crate::packs::REGISTRY;
474use std::collections::{HashMap, HashSet};
475
476pub const DEFAULT_EXEMPLAR_LIMIT: usize = 3;
478
479#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
481#[serde(rename_all = "snake_case")]
482pub enum SimulateDecision {
483 Allow,
485 Warn,
487 Deny,
489}
490
491impl SimulateDecision {
492 #[inline]
494 #[must_use]
495 pub const fn from_evaluation(result: &EvaluationResult) -> Self {
496 match result.decision {
497 EvaluationDecision::Allow => Self::Allow,
498 EvaluationDecision::Deny => {
499 match result.effective_mode {
501 Some(crate::packs::DecisionMode::Warn) => Self::Warn,
502 Some(crate::packs::DecisionMode::Log) => Self::Allow,
503 _ => Self::Deny,
504 }
505 }
506 }
507 }
508}
509
510#[derive(Debug, Clone, Serialize, Deserialize)]
512pub struct Exemplar {
513 pub command: String,
515 pub line_number: usize,
517 pub original_length: usize,
519}
520
521#[derive(Debug, Clone, Serialize, Deserialize)]
523pub struct RuleStats {
524 pub rule_id: String,
526 pub pack_id: String,
528 pub pattern_name: String,
530 pub count: usize,
532 pub decision: SimulateDecision,
534 pub exemplars: Vec<Exemplar>,
536}
537
538#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct PackStats {
541 pub pack_id: String,
543 pub count: usize,
545 pub by_decision: HashMap<String, usize>,
547}
548
549#[derive(Debug, Clone, Default, Serialize, Deserialize)]
551pub struct SimulationSummary {
552 pub total_commands: usize,
554 pub allow_count: usize,
556 pub warn_count: usize,
558 pub deny_count: usize,
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
564pub struct SimulationResult {
565 pub schema_version: u32,
567 pub summary: SimulationSummary,
569 pub rules: Vec<RuleStats>,
571 pub packs: Vec<PackStats>,
573 pub parse_stats: ParseStats,
575}
576
577#[derive(Debug, Clone)]
579pub struct SimulationConfig {
580 pub exemplar_limit: usize,
582 pub max_exemplar_command_len: usize,
584 pub include_allowlisted: bool,
586}
587
588impl Default for SimulationConfig {
589 fn default() -> Self {
590 Self {
591 exemplar_limit: DEFAULT_EXEMPLAR_LIMIT,
592 max_exemplar_command_len: 200,
593 include_allowlisted: true,
594 }
595 }
596}
597
598#[derive(Debug)]
602pub struct SimulationAggregator {
603 config: SimulationConfig,
604 summary: SimulationSummary,
605 rule_builders: HashMap<String, RuleStatsBuilder>,
606 pack_counts: HashMap<String, HashMap<SimulateDecision, usize>>,
607}
608
609#[derive(Debug)]
611struct RuleStatsBuilder {
612 pack_id: String,
613 pattern_name: String,
614 count: usize,
615 decision: SimulateDecision,
616 exemplars: Vec<Exemplar>,
617 exemplar_limit: usize,
618}
619
620impl RuleStatsBuilder {
621 fn new(
622 pack_id: String,
623 pattern_name: String,
624 decision: SimulateDecision,
625 exemplar_limit: usize,
626 ) -> Self {
627 Self {
628 pack_id,
629 pattern_name,
630 count: 0,
631 decision,
632 exemplars: Vec::with_capacity(exemplar_limit),
633 exemplar_limit,
634 }
635 }
636
637 fn add_match(&mut self, command: &str, line_number: usize, max_len: usize) {
638 self.count += 1;
639 if self.exemplars.len() < self.exemplar_limit {
640 let truncated = if command.len() > max_len {
641 let target = max_len.saturating_sub(3);
643 let mut end = target;
644 while end > 0 && !command.is_char_boundary(end) {
645 end -= 1;
646 }
647 format!("{}...", &command[..end])
648 } else {
649 command.to_string()
650 };
651 self.exemplars.push(Exemplar {
652 command: truncated,
653 line_number,
654 original_length: command.len(),
655 });
656 }
657 }
658
659 fn build(self, rule_id: String) -> RuleStats {
660 RuleStats {
661 rule_id,
662 pack_id: self.pack_id,
663 pattern_name: self.pattern_name,
664 count: self.count,
665 decision: self.decision,
666 exemplars: self.exemplars,
667 }
668 }
669}
670
671impl SimulationAggregator {
672 #[must_use]
674 pub fn new(config: SimulationConfig) -> Self {
675 Self {
676 config,
677 summary: SimulationSummary::default(),
678 rule_builders: HashMap::new(),
679 pack_counts: HashMap::new(),
680 }
681 }
682
683 pub fn record(&mut self, command: &str, line_number: usize, result: &EvaluationResult) {
685 self.summary.total_commands += 1;
686 let decision = SimulateDecision::from_evaluation(result);
687
688 match decision {
689 SimulateDecision::Allow => self.summary.allow_count += 1,
690 SimulateDecision::Warn => self.summary.warn_count += 1,
691 SimulateDecision::Deny => self.summary.deny_count += 1,
692 }
693
694 if let Some(ref pattern_info) = result.pattern_info {
695 let pack_id = pattern_info
696 .pack_id
697 .as_deref()
698 .unwrap_or("unknown")
699 .to_string();
700 let pattern_name = pattern_info
701 .pattern_name
702 .as_deref()
703 .unwrap_or("unknown")
704 .to_string();
705 let rule_id = format!("{pack_id}:{pattern_name}");
706
707 let builder = self.rule_builders.entry(rule_id).or_insert_with(|| {
708 RuleStatsBuilder::new(
709 pack_id.clone(),
710 pattern_name,
711 decision,
712 self.config.exemplar_limit,
713 )
714 });
715 builder.add_match(command, line_number, self.config.max_exemplar_command_len);
716
717 let pack_decisions = self.pack_counts.entry(pack_id).or_default();
718 *pack_decisions.entry(decision).or_insert(0) += 1;
719 } else if let Some(ref allowlist_override) = result.allowlist_override {
720 if self.config.include_allowlisted {
721 let pack_id = allowlist_override
722 .matched
723 .pack_id
724 .as_deref()
725 .unwrap_or("unknown")
726 .to_string();
727 let pattern_name = allowlist_override
728 .matched
729 .pattern_name
730 .as_deref()
731 .unwrap_or("unknown")
732 .to_string();
733 let rule_id = format!("{pack_id}:{pattern_name}");
734
735 let builder = self.rule_builders.entry(rule_id).or_insert_with(|| {
736 RuleStatsBuilder::new(
737 pack_id.clone(),
738 pattern_name,
739 SimulateDecision::Allow,
740 self.config.exemplar_limit,
741 )
742 });
743 builder.add_match(command, line_number, self.config.max_exemplar_command_len);
744
745 let pack_decisions = self.pack_counts.entry(pack_id).or_default();
746 *pack_decisions.entry(SimulateDecision::Allow).or_insert(0) += 1;
747 }
748 }
749 }
750
751 #[must_use]
753 pub fn finalize(self, parse_stats: ParseStats) -> SimulationResult {
754 let mut rules: Vec<RuleStats> = self
755 .rule_builders
756 .into_iter()
757 .map(|(rule_id, builder)| builder.build(rule_id))
758 .collect();
759
760 rules.sort_by(|a, b| {
761 b.count
762 .cmp(&a.count)
763 .then_with(|| a.rule_id.cmp(&b.rule_id))
764 });
765
766 let mut packs: Vec<PackStats> = self
767 .pack_counts
768 .into_iter()
769 .map(|(pack_id, decisions)| {
770 let count = decisions.values().sum();
771 let by_decision: HashMap<String, usize> = decisions
772 .into_iter()
773 .map(|(d, c)| {
774 let key = match d {
775 SimulateDecision::Allow => "allow",
776 SimulateDecision::Warn => "warn",
777 SimulateDecision::Deny => "deny",
778 };
779 (key.to_string(), c)
780 })
781 .collect();
782 PackStats {
783 pack_id,
784 count,
785 by_decision,
786 }
787 })
788 .collect();
789
790 packs.sort_by(|a, b| {
791 b.count
792 .cmp(&a.count)
793 .then_with(|| a.pack_id.cmp(&b.pack_id))
794 });
795
796 SimulationResult {
797 schema_version: SIMULATE_SCHEMA_VERSION,
798 summary: self.summary,
799 rules,
800 packs,
801 parse_stats,
802 }
803 }
804}
805
806pub fn run_simulation<I>(
808 commands: I,
809 parse_stats: ParseStats,
810 config: &Config,
811 sim_config: SimulationConfig,
812) -> SimulationResult
813where
814 I: IntoIterator<Item = ParsedCommand>,
815{
816 let enabled_packs: HashSet<String> = config.enabled_pack_ids();
817 let ordered_packs = REGISTRY.expand_enabled_ordered(&enabled_packs);
818 let keywords = REGISTRY.collect_enabled_keywords(&enabled_packs);
819 let keyword_index = REGISTRY.build_enabled_keyword_index(&ordered_packs);
820 let compiled_overrides = config.overrides.compile();
821 let allowlists = crate::allowlist::load_default_allowlists();
822 let heredoc_settings = config.heredoc_settings();
823
824 let mut aggregator = SimulationAggregator::new(sim_config);
825
826 for cmd in commands {
827 let result = evaluate_command_with_pack_order(
828 &cmd.command,
829 &keywords,
830 &ordered_packs,
831 keyword_index.as_ref(),
832 &compiled_overrides,
833 &allowlists,
834 &heredoc_settings,
835 );
836 aggregator.record(&cmd.command, cmd.line_number, &result);
837 }
838
839 aggregator.finalize(parse_stats)
840}
841
842pub fn run_simulation_from_reader<R: std::io::Read>(
848 reader: R,
849 limits: SimulateLimits,
850 config: &Config,
851 sim_config: SimulationConfig,
852 strict: bool,
853) -> Result<SimulationResult, ParseError> {
854 let parser = SimulateParser::new(reader, limits).strict(strict);
855 let (commands, parse_stats) = parser.collect_commands()?;
856 Ok(run_simulation(commands, parse_stats, config, sim_config))
857}
858
859use crate::scan::ScanRedactMode;
864
865#[derive(Debug, Clone)]
867pub struct SimulateOutputConfig {
868 pub redact: ScanRedactMode,
870 pub truncate: usize,
872 pub top: usize,
874 pub verbose: bool,
876}
877
878impl Default for SimulateOutputConfig {
879 fn default() -> Self {
880 Self {
881 redact: ScanRedactMode::None,
882 truncate: 120,
883 top: 20,
884 verbose: false,
885 }
886 }
887}
888
889#[derive(Debug, Clone, Serialize, Deserialize)]
891pub struct SimulateJsonOutput {
892 pub schema_version: u32,
893 pub totals: SimulateJsonTotals,
894 pub rules: Vec<SimulateJsonRule>,
895 pub errors: SimulateJsonErrors,
896}
897
898#[derive(Debug, Clone, Serialize, Deserialize)]
899pub struct SimulateJsonTotals {
900 pub commands: usize,
901 pub allowed: usize,
902 pub warned: usize,
903 pub denied: usize,
904}
905
906#[derive(Debug, Clone, Serialize, Deserialize)]
907pub struct SimulateJsonRule {
908 pub rule_id: String,
909 pub count: usize,
910 pub decision: String,
911 pub exemplars: Vec<String>,
912}
913
914#[derive(Debug, Clone, Serialize, Deserialize)]
915pub struct SimulateJsonErrors {
916 pub malformed_count: usize,
917 pub ignored_count: usize,
918 pub stopped_at_limit: bool,
919 #[serde(skip_serializing_if = "Option::is_none")]
920 pub limit_hit: Option<String>,
921}
922
923#[must_use]
925pub fn redact_and_truncate_command(cmd: &str, config: &SimulateOutputConfig) -> String {
926 let redacted = match config.redact {
927 ScanRedactMode::None => cmd.to_string(),
928 ScanRedactMode::Quoted => crate::scan::redact_quoted_strings(cmd),
929 ScanRedactMode::Aggressive => crate::scan::redact_aggressively(cmd),
930 };
931
932 if config.truncate > 0 && redacted.len() > config.truncate {
933 let target = config.truncate.saturating_sub(3);
934 let mut end = target;
935 while end > 0 && !redacted.is_char_boundary(end) {
936 end -= 1;
937 }
938 format!("{}...", &redacted[..end])
939 } else {
940 redacted
941 }
942}
943
944#[must_use]
946#[allow(clippy::format_push_string)]
947pub fn format_pretty_output(result: &SimulationResult, config: &SimulateOutputConfig) -> String {
948 let mut output = String::new();
949 output.push_str("Simulation Results\n==================\n\n");
950 output.push_str("Summary:\n");
951 output.push_str(&format!(
952 " Total commands: {}\n",
953 result.summary.total_commands
954 ));
955 output.push_str(&format!(
956 " Allowed: {}\n",
957 result.summary.allow_count
958 ));
959 output.push_str(&format!(
960 " Warned: {}\n",
961 result.summary.warn_count
962 ));
963 output.push_str(&format!(
964 " Denied: {}\n",
965 result.summary.deny_count
966 ));
967 output.push('\n');
968
969 if !result.rules.is_empty() {
970 output.push_str("Rules Triggered (sorted by count):\n");
971 let rules_to_show: Vec<_> = if config.top > 0 {
972 result.rules.iter().take(config.top).collect()
973 } else {
974 result.rules.iter().collect()
975 };
976 for rule in rules_to_show {
977 let decision_str = match rule.decision {
978 SimulateDecision::Allow => "allow",
979 SimulateDecision::Warn => "warn",
980 SimulateDecision::Deny => "DENY",
981 };
982 output.push_str(&format!(
983 " {:>5} x {} [{}]\n",
984 rule.count, rule.rule_id, decision_str
985 ));
986 if config.verbose {
987 for ex in &rule.exemplars {
988 let display_cmd = redact_and_truncate_command(&ex.command, config);
989 output.push_str(&format!(" L{}: {}\n", ex.line_number, display_cmd));
990 }
991 }
992 }
993 if config.top > 0 && result.rules.len() > config.top {
994 output.push_str(&format!(
995 " ... and {} more rules\n",
996 result.rules.len() - config.top
997 ));
998 }
999 output.push('\n');
1000 }
1001
1002 if !result.packs.is_empty() {
1003 output.push_str("Packs Summary:\n");
1004 for pack in &result.packs {
1005 output.push_str(&format!(" {:>5} x {}\n", pack.count, pack.pack_id));
1006 }
1007 output.push('\n');
1008 }
1009
1010 output.push_str("Parse Statistics:\n");
1011 output.push_str(&format!(
1012 " Lines read: {}\n",
1013 result.parse_stats.lines_read
1014 ));
1015 output.push_str(&format!(
1016 " Commands extracted: {}\n",
1017 result.parse_stats.commands_extracted
1018 ));
1019 output.push_str(&format!(
1020 " Malformed lines: {}\n",
1021 result.parse_stats.malformed_count
1022 ));
1023 output.push_str(&format!(
1024 " Ignored lines: {}\n",
1025 result.parse_stats.ignored_count
1026 ));
1027 if result.parse_stats.stopped_at_limit {
1028 if let Some(ref limit) = result.parse_stats.limit_hit {
1029 output.push_str(&format!(" Stopped at limit: {limit:?}\n"));
1030 }
1031 }
1032 output
1033}
1034
1035pub fn format_json_output(
1041 result: SimulationResult,
1042 config: &SimulateOutputConfig,
1043) -> Result<String, serde_json::Error> {
1044 let rules_to_show: Vec<_> = if config.top > 0 {
1045 result.rules.into_iter().take(config.top).collect()
1046 } else {
1047 result.rules
1048 };
1049
1050 let json_rules: Vec<SimulateJsonRule> = rules_to_show
1051 .into_iter()
1052 .map(|r| {
1053 let exemplars: Vec<String> = r
1054 .exemplars
1055 .iter()
1056 .map(|ex| redact_and_truncate_command(&ex.command, config))
1057 .collect();
1058 SimulateJsonRule {
1059 rule_id: r.rule_id,
1060 count: r.count,
1061 decision: match r.decision {
1062 SimulateDecision::Allow => "allow".to_string(),
1063 SimulateDecision::Warn => "warn".to_string(),
1064 SimulateDecision::Deny => "deny".to_string(),
1065 },
1066 exemplars,
1067 }
1068 })
1069 .collect();
1070
1071 let output = SimulateJsonOutput {
1072 schema_version: result.schema_version,
1073 totals: SimulateJsonTotals {
1074 commands: result.summary.total_commands,
1075 allowed: result.summary.allow_count,
1076 warned: result.summary.warn_count,
1077 denied: result.summary.deny_count,
1078 },
1079 rules: json_rules,
1080 errors: SimulateJsonErrors {
1081 malformed_count: result.parse_stats.malformed_count,
1082 ignored_count: result.parse_stats.ignored_count,
1083 stopped_at_limit: result.parse_stats.stopped_at_limit,
1084 limit_hit: result.parse_stats.limit_hit.map(|l| format!("{l:?}")),
1085 },
1086 };
1087
1088 serde_json::to_string_pretty(&output)
1089}
1090
1091#[cfg(test)]
1096mod tests {
1097 use super::*;
1098
1099 #[test]
1104 fn detect_plain_command() {
1105 let result = parse_line("git status --short", None);
1106 assert!(
1107 matches!(&result, ParsedLine::Command { .. }),
1108 "expected Command, got {result:?}"
1109 );
1110 if let ParsedLine::Command { command, format } = result {
1111 assert_eq!(command, "git status --short");
1112 assert_eq!(format, SimulateInputFormat::PlainCommand);
1113 }
1114 }
1115
1116 #[test]
1117 fn detect_hook_json_bash() {
1118 let line = r#"{"tool_name":"Bash","tool_input":{"command":"git status"}}"#;
1119 let result = parse_line(line, None);
1120 assert!(
1121 matches!(&result, ParsedLine::Command { .. }),
1122 "expected Command, got {result:?}"
1123 );
1124 if let ParsedLine::Command { command, format } = result {
1125 assert_eq!(command, "git status");
1126 assert_eq!(format, SimulateInputFormat::HookJson);
1127 }
1128 }
1129
1130 #[test]
1131 fn detect_hook_json_launch_process() {
1132 let line = r#"{"tool_name":"launch-process","tool_input":{"command":"git status"}}"#;
1134 let result = parse_line(line, None);
1135 assert!(
1136 matches!(&result, ParsedLine::Command { .. }),
1137 "expected Command, got {result:?}"
1138 );
1139 if let ParsedLine::Command { command, format } = result {
1140 assert_eq!(command, "git status");
1141 assert_eq!(format, SimulateInputFormat::HookJson);
1142 }
1143 }
1144
1145 #[test]
1146 fn detect_hook_json_non_bash_ignored() {
1147 let line = r#"{"tool_name":"Read","tool_input":{"path":"/etc/passwd"}}"#;
1148 let result = parse_line(line, None);
1149 assert!(
1150 matches!(&result, ParsedLine::Ignore { .. }),
1151 "expected Ignore, got {result:?}"
1152 );
1153 if let ParsedLine::Ignore { reason } = result {
1154 assert_eq!(reason, "non-Bash/launch-process tool");
1155 }
1156 }
1157
1158 #[test]
1159 fn detect_hook_json_command_wrong_type() {
1160 let line = r#"{"tool_name":"Bash","tool_input":{"command":123}}"#;
1161 let result = parse_line(line, None);
1162 assert!(
1163 matches!(&result, ParsedLine::Malformed { .. }),
1164 "expected Malformed, got {result:?}"
1165 );
1166 if let ParsedLine::Malformed { error } = result {
1167 assert_eq!(error, "command must be a string");
1168 }
1169 }
1170
1171 #[test]
1172 fn detect_hook_json_tool_name_wrong_type() {
1173 let line = r#"{"tool_name":42,"tool_input":{"command":"git status"}}"#;
1174 let result = parse_line(line, None);
1175 assert!(
1176 matches!(&result, ParsedLine::Malformed { .. }),
1177 "expected Malformed, got {result:?}"
1178 );
1179 if let ParsedLine::Malformed { error } = result {
1180 assert_eq!(error, "tool_name must be a string");
1181 }
1182 }
1183
1184 #[test]
1185 fn detect_decision_log() {
1186 let line = "DCG_LOG_V1|2026-01-09T00:00:00Z|allow|Z2l0IHN0YXR1cw==|";
1188 let result = parse_line(line, None);
1189 assert!(
1190 matches!(&result, ParsedLine::Command { .. }),
1191 "expected Command, got {result:?}"
1192 );
1193 if let ParsedLine::Command { command, format } = result {
1194 assert_eq!(command, "git status");
1195 assert_eq!(format, SimulateInputFormat::DecisionLog);
1196 }
1197 }
1198
1199 #[test]
1200 fn empty_line() {
1201 assert!(matches!(parse_line("", None), ParsedLine::Empty));
1202 assert!(matches!(parse_line(" ", None), ParsedLine::Empty));
1203 assert!(matches!(parse_line("\t\n", None), ParsedLine::Empty));
1204 }
1205
1206 #[test]
1207 fn invalid_json_falls_back_to_plain_command() {
1208 let result = parse_line("{invalid json}", None);
1211 assert!(
1212 matches!(&result, ParsedLine::Command { .. }),
1213 "expected Command (PlainCommand), got {result:?}"
1214 );
1215 if let ParsedLine::Command { command, format } = result {
1216 assert_eq!(command, "{invalid json}");
1217 assert_eq!(format, SimulateInputFormat::PlainCommand);
1218 }
1219 }
1220
1221 #[test]
1222 fn shell_brace_block_as_plain_command() {
1223 let result = parse_line("{ echo hello; } | cat", None);
1225 assert!(
1226 matches!(&result, ParsedLine::Command { .. }),
1227 "expected Command (PlainCommand), got {result:?}"
1228 );
1229 if let ParsedLine::Command { command, format } = result {
1230 assert_eq!(command, "{ echo hello; } | cat");
1231 assert_eq!(format, SimulateInputFormat::PlainCommand);
1232 }
1233 }
1234
1235 #[test]
1236 fn valid_json_missing_command_is_malformed() {
1237 let line = r#"{"tool_name":"Bash","tool_input":{}}"#;
1240 let result = parse_line(line, None);
1241 assert!(
1242 matches!(&result, ParsedLine::Malformed { .. }),
1243 "expected Malformed, got {result:?}"
1244 );
1245 if let ParsedLine::Malformed { error } = result {
1246 assert!(error.contains("missing command"));
1247 }
1248 }
1249
1250 #[test]
1251 fn malformed_decision_log_wrong_version() {
1252 let line = "DCG_LOG_V99|timestamp|allow|cmd|";
1253 let result = parse_line(line, None);
1254 assert!(
1255 matches!(&result, ParsedLine::Malformed { .. }),
1256 "expected Malformed, got {result:?}"
1257 );
1258 if let ParsedLine::Malformed { error } = result {
1259 assert!(error.contains("unsupported log version"));
1260 }
1261 }
1262
1263 #[test]
1268 fn command_length_limit() {
1269 let long_cmd = "x".repeat(1000);
1270 let result = parse_line(&long_cmd, Some(500));
1271 assert!(
1272 matches!(&result, ParsedLine::Malformed { .. }),
1273 "expected Malformed, got {result:?}"
1274 );
1275 if let ParsedLine::Malformed { error } = result {
1276 assert!(error.contains("exceeds max length"));
1277 }
1278 }
1279
1280 #[test]
1281 fn command_within_limit() {
1282 let cmd = "git status";
1283 let result = parse_line(cmd, Some(500));
1284 assert!(matches!(result, ParsedLine::Command { .. }));
1285 }
1286
1287 #[test]
1292 fn parser_collects_commands() {
1293 let input = r#"git status
1294{"tool_name":"Bash","tool_input":{"command":"git log"}}
1295{"tool_name":"Read","tool_input":{"path":"file.txt"}}
1296
1297echo hello
1298"#;
1299
1300 let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default());
1301 let (commands, stats) = parser.collect_commands().unwrap();
1302
1303 assert_eq!(commands.len(), 3);
1304 assert_eq!(commands[0].command, "git status");
1305 assert_eq!(commands[0].format, SimulateInputFormat::PlainCommand);
1306 assert_eq!(commands[1].command, "git log");
1307 assert_eq!(commands[1].format, SimulateInputFormat::HookJson);
1308 assert_eq!(commands[2].command, "echo hello");
1309
1310 assert_eq!(stats.lines_read, 5);
1311 assert_eq!(stats.commands_extracted, 3);
1312 assert_eq!(stats.ignored_count, 1); assert_eq!(stats.empty_count, 1);
1314 assert_eq!(stats.malformed_count, 0);
1315 }
1316
1317 #[test]
1318 fn parser_respects_line_limit() {
1319 let input = "line1\nline2\nline3\nline4\nline5\n";
1320
1321 let limits = SimulateLimits {
1322 max_lines: Some(3),
1323 ..Default::default()
1324 };
1325 let parser = SimulateParser::new(input.as_bytes(), limits);
1326 let (commands, stats) = parser.collect_commands().unwrap();
1327
1328 assert_eq!(commands.len(), 3);
1329 assert_eq!(stats.lines_read, 3);
1330 assert!(stats.stopped_at_limit);
1331 assert!(matches!(stats.limit_hit, Some(LimitHit::MaxLines)));
1332 }
1333
1334 #[test]
1335 fn parser_strict_mode_fails_on_malformed() {
1336 let input = r#"git status
1338{"tool_name":"Bash","tool_input":{}}
1339echo hello
1340"#;
1341
1342 let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default()).strict(true);
1343 let result = parser.collect_commands();
1344
1345 assert!(result.is_err());
1346 let err = result.unwrap_err();
1347 assert!(matches!(err, ParseError::Malformed { line: 2, .. }));
1348 }
1349
1350 #[test]
1351 fn parser_non_strict_continues_on_malformed() {
1352 let input = r#"git status
1354{"tool_name":"Bash","tool_input":{}}
1355echo hello
1356"#;
1357
1358 let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default()).strict(false);
1359 let (commands, stats) = parser.collect_commands().unwrap();
1360
1361 assert_eq!(commands.len(), 2); assert_eq!(stats.malformed_count, 1);
1363 }
1364
1365 #[test]
1366 fn parser_treats_invalid_json_as_plain_command() {
1367 let input = r"git status
1369{ echo hello; }
1370echo world
1371";
1372
1373 let parser = SimulateParser::new(input.as_bytes(), SimulateLimits::default());
1374 let (commands, stats) = parser.collect_commands().unwrap();
1375
1376 assert_eq!(commands.len(), 3); assert_eq!(commands[1].command, "{ echo hello; }");
1378 assert_eq!(commands[1].format, SimulateInputFormat::PlainCommand);
1379 assert_eq!(stats.malformed_count, 0);
1380 }
1381
1382 #[test]
1387 fn parsing_is_deterministic() {
1388 let lines = [
1389 "git status",
1390 r#"{"tool_name":"Bash","tool_input":{"command":"ls"}}"#,
1391 "{broken",
1392 "",
1393 "DCG_LOG_V1|ts|allow|Z2l0IHN0YXR1cw==|",
1394 ];
1395
1396 for line in lines {
1398 let first = parse_line(line, None);
1399 for _ in 0..100 {
1400 let result = parse_line(line, None);
1401 assert_eq!(
1402 format!("{first:?}"),
1403 format!("{result:?}"),
1404 "Non-deterministic parsing for: {line}"
1405 );
1406 }
1407 }
1408 }
1409
1410 #[test]
1415 fn aggregator_counts_decisions_correctly() {
1416 let config = SimulationConfig::default();
1417 let mut agg = SimulationAggregator::new(config);
1418
1419 agg.record("ls", 1, &EvaluationResult::allowed());
1421 agg.record("git status", 2, &EvaluationResult::allowed());
1422 agg.record(
1423 "rm -rf /",
1424 3,
1425 &EvaluationResult::denied_by_pack("core.filesystem", "destructive", None),
1426 );
1427
1428 let parse_stats = ParseStats {
1429 lines_read: 3,
1430 commands_extracted: 3,
1431 ..Default::default()
1432 };
1433 let result = agg.finalize(parse_stats);
1434
1435 assert_eq!(result.summary.total_commands, 3);
1436 assert_eq!(result.summary.allow_count, 2);
1437 assert_eq!(result.summary.deny_count, 1);
1438 assert_eq!(result.summary.warn_count, 0);
1439 }
1440
1441 #[test]
1442 fn aggregator_sorts_rules_deterministically() {
1443 let config = SimulationConfig::default();
1444 let mut agg = SimulationAggregator::new(config);
1445
1446 agg.record(
1448 "cmd1",
1449 1,
1450 &EvaluationResult::denied_by_pack_pattern(
1451 "pack.b",
1452 "rule1",
1453 "test",
1454 None,
1455 crate::packs::Severity::Critical,
1456 &[],
1457 ),
1458 );
1459 agg.record(
1460 "cmd2",
1461 2,
1462 &EvaluationResult::denied_by_pack_pattern(
1463 "pack.a",
1464 "rule1",
1465 "test",
1466 None,
1467 crate::packs::Severity::Critical,
1468 &[],
1469 ),
1470 );
1471 agg.record(
1472 "cmd3",
1473 3,
1474 &EvaluationResult::denied_by_pack_pattern(
1475 "pack.b",
1476 "rule1",
1477 "test",
1478 None,
1479 crate::packs::Severity::Critical,
1480 &[],
1481 ),
1482 );
1483
1484 let parse_stats = ParseStats::default();
1485 let result = agg.finalize(parse_stats);
1486
1487 assert_eq!(result.rules.len(), 2);
1489 assert_eq!(result.rules[0].rule_id, "pack.b:rule1"); assert_eq!(result.rules[0].count, 2);
1491 assert_eq!(result.rules[1].rule_id, "pack.a:rule1"); assert_eq!(result.rules[1].count, 1);
1493 }
1494
1495 #[test]
1496 fn aggregator_samples_first_k_exemplars() {
1497 let config = SimulationConfig {
1498 exemplar_limit: 2,
1499 ..Default::default()
1500 };
1501 let mut agg = SimulationAggregator::new(config);
1502
1503 for i in 1..=5 {
1505 agg.record(
1506 &format!("cmd{i}"),
1507 i,
1508 &EvaluationResult::denied_by_pack_pattern(
1509 "pack.a",
1510 "rule1",
1511 "test",
1512 None,
1513 crate::packs::Severity::Critical,
1514 &[],
1515 ),
1516 );
1517 }
1518
1519 let parse_stats = ParseStats::default();
1520 let result = agg.finalize(parse_stats);
1521
1522 assert_eq!(result.rules[0].exemplars.len(), 2);
1524 assert_eq!(result.rules[0].exemplars[0].command, "cmd1");
1525 assert_eq!(result.rules[0].exemplars[0].line_number, 1);
1526 assert_eq!(result.rules[0].exemplars[1].command, "cmd2");
1527 assert_eq!(result.rules[0].exemplars[1].line_number, 2);
1528 }
1529
1530 #[test]
1531 fn exemplar_truncation_respects_max_len() {
1532 let config = SimulationConfig {
1533 exemplar_limit: 1,
1534 max_exemplar_command_len: 10, include_allowlisted: true,
1536 };
1537 let mut agg = SimulationAggregator::new(config);
1538
1539 agg.record(
1541 "12345678901234567890",
1542 1,
1543 &EvaluationResult::denied_by_pack_pattern(
1544 "pack.a",
1545 "rule1",
1546 "test",
1547 None,
1548 crate::packs::Severity::Critical,
1549 &[],
1550 ),
1551 );
1552
1553 let parse_stats = ParseStats::default();
1554 let result = agg.finalize(parse_stats);
1555
1556 let exemplar = &result.rules[0].exemplars[0];
1558 assert!(
1559 exemplar.command.len() <= 10,
1560 "Expected at most 10 chars, got {}: '{}'",
1561 exemplar.command.len(),
1562 exemplar.command
1563 );
1564 assert!(
1565 exemplar.command.ends_with("..."),
1566 "Expected ellipsis, got: '{}'",
1567 exemplar.command
1568 );
1569 assert_eq!(exemplar.original_length, 20);
1570 }
1571
1572 #[test]
1573 fn aggregation_is_deterministic() {
1574 let commands = vec![
1575 ParsedCommand {
1576 command: "rm -rf /".to_string(),
1577 format: SimulateInputFormat::PlainCommand,
1578 line_number: 1,
1579 },
1580 ParsedCommand {
1581 command: "git reset --hard".to_string(),
1582 format: SimulateInputFormat::PlainCommand,
1583 line_number: 2,
1584 },
1585 ParsedCommand {
1586 command: "rm -rf /tmp".to_string(),
1587 format: SimulateInputFormat::PlainCommand,
1588 line_number: 3,
1589 },
1590 ];
1591
1592 let config = Config::default();
1593 let sim_config = SimulationConfig::default();
1594
1595 let first = run_simulation(
1597 commands.clone(),
1598 ParseStats::default(),
1599 &config,
1600 sim_config.clone(),
1601 );
1602
1603 for _ in 0..10 {
1604 let result = run_simulation(
1605 commands.clone(),
1606 ParseStats::default(),
1607 &config,
1608 sim_config.clone(),
1609 );
1610
1611 assert_eq!(first.summary.total_commands, result.summary.total_commands);
1613 assert_eq!(first.summary.allow_count, result.summary.allow_count);
1614 assert_eq!(first.summary.deny_count, result.summary.deny_count);
1615
1616 assert_eq!(first.rules.len(), result.rules.len());
1618 for (a, b) in first.rules.iter().zip(result.rules.iter()) {
1619 assert_eq!(a.rule_id, b.rule_id);
1620 assert_eq!(a.count, b.count);
1621 }
1622 }
1623 }
1624}