use std::collections::HashMap;
use serde::Deserialize;
#[derive(Debug, Clone, Deserialize)]
pub struct HookInput {
pub tool_name: Option<String>,
pub tool_input: Option<ToolInput>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ToolInput {
pub command: Option<String>,
pub file_path: Option<String>,
pub content: Option<String>,
pub new_string: Option<String>,
pub old_string: Option<String>,
pub new_source: Option<String>,
#[serde(flatten)]
pub extra: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum ScanContext {
BashCommand,
WriteContent,
EditNewString,
NotebookCell,
McpCommand,
}
impl ScanContext {
#[must_use]
pub const fn downgrade_block(&self) -> bool {
matches!(
self,
Self::WriteContent | Self::EditNewString | Self::NotebookCell
)
}
#[must_use]
pub const fn is_content(&self) -> bool {
self.downgrade_block()
}
#[must_use]
pub const fn is_command(&self) -> bool {
matches!(self, Self::BashCommand | Self::McpCommand)
}
}
#[derive(Debug, Clone)]
pub struct ScannableContent {
pub context: ScanContext,
pub text: String,
}
#[must_use]
pub fn extract_scannable_content(input: &HookInput) -> Vec<ScannableContent> {
let mut items = Vec::new();
let tool_name = input.tool_name.as_deref().unwrap_or("");
let Some(tool_input) = &input.tool_input else {
return items;
};
match tool_name {
"Bash" => {
if let Some(cmd) = &tool_input.command {
items.push(ScannableContent {
context: ScanContext::BashCommand,
text: cmd.clone(),
});
}
}
"Write" => {
if let Some(content) = &tool_input.content {
items.push(ScannableContent {
context: ScanContext::WriteContent,
text: content.clone(),
});
}
}
"Edit" => {
if let Some(new_str) = &tool_input.new_string {
items.push(ScannableContent {
context: ScanContext::EditNewString,
text: new_str.clone(),
});
}
}
"NotebookEdit" => {
if let Some(src) = &tool_input.new_source {
items.push(ScannableContent {
context: ScanContext::NotebookCell,
text: src.clone(),
});
}
}
_ if tool_name.starts_with("mcp__") => {
McpStringCollector { items: &mut items }.collect_from_map(&tool_input.extra);
if let Some(cmd) = &tool_input.command {
items.push(ScannableContent {
context: ScanContext::McpCommand,
text: cmd.clone(),
});
}
}
_ => {}
}
items
}
const MCP_JSON_MAX_DEPTH: usize = 8;
const MCP_JSON_MAX_STRINGS: usize = 50;
struct McpStringCollector<'a> {
items: &'a mut Vec<ScannableContent>,
}
impl McpStringCollector<'_> {
fn is_full(&self) -> bool {
self.items.len() >= MCP_JSON_MAX_STRINGS
}
fn collect_from_map(&mut self, map: &HashMap<String, serde_json::Value>) {
for value in map.values() {
if self.is_full() {
break;
}
self.collect_value(value, 0);
}
}
fn collect_value(&mut self, value: &serde_json::Value, depth: usize) {
if depth >= MCP_JSON_MAX_DEPTH || self.is_full() {
return;
}
match value {
serde_json::Value::String(s) if !s.is_empty() => {
self.items.push(ScannableContent {
context: ScanContext::McpCommand,
text: s.clone(),
});
}
serde_json::Value::Array(arr) => {
for v in arr {
self.collect_value(v, depth + 1);
}
}
serde_json::Value::Object(obj) => {
for v in obj.values() {
self.collect_value(v, depth + 1);
}
}
_ => {}
}
}
}
#[must_use]
pub fn extract_command(input: &HookInput) -> Option<&str> {
input.tool_input.as_ref()?.command.as_deref()
}
pub fn parse_reader<R: std::io::Read>(reader: R) -> anyhow::Result<HookInput> {
let input = std::io::read_to_string(reader)?;
Ok(serde_json::from_str(&input)?)
}
pub fn parse_stdin() -> anyhow::Result<HookInput> {
parse_reader(std::io::stdin())
}
#[must_use]
pub fn scan_content_lines(content: &str) -> Vec<String> {
use crate::engine::{PrefixPrefilter, Prefilter};
let prefilter = PrefixPrefilter;
content
.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.filter(|line| !line.starts_with('#') && !line.starts_with("//"))
.filter(|line| !prefilter.is_safe(line))
.map(String::from)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_bash_hook() {
let json = r#"{"tool_name": "Bash", "tool_input": {"command": "ls -la"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert_eq!(extract_command(&input), Some("ls -la"));
}
#[test]
fn parse_missing_command() {
let json = r#"{"tool_name": "Write", "tool_input": {"file_path": "/tmp/test"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert_eq!(extract_command(&input), None);
}
#[test]
fn parse_empty_input() {
let json = r#"{}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert_eq!(extract_command(&input), None);
}
#[test]
fn parse_invalid_json_returns_error() {
let result = parse_reader("not json".as_bytes());
assert!(result.is_err());
}
#[test]
fn parse_reader_with_extra_fields() {
let json = r#"{"tool_name": "Bash", "tool_input": {"command": "ls"}, "extra": true}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert_eq!(extract_command(&input), Some("ls"));
}
#[test]
fn hook_input_is_clone() {
let json = r#"{"tool_name": "Bash", "tool_input": {"command": "ls"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let cloned = input.clone();
assert_eq!(extract_command(&cloned), Some("ls"));
}
#[test]
fn extract_bash_scannable() {
let json = r#"{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert_eq!(items.len(), 1);
assert_eq!(items[0].context, ScanContext::BashCommand);
assert_eq!(items[0].text, "rm -rf /");
}
#[test]
fn extract_write_scannable() {
let json = r#"{"tool_name": "Write", "tool_input": {"file_path": "/tmp/test.sh", "content": "rm -rf /"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert_eq!(items.len(), 1);
assert_eq!(items[0].context, ScanContext::WriteContent);
assert!(items[0].context.downgrade_block());
}
#[test]
fn extract_edit_scannable() {
let json = r#"{"tool_name": "Edit", "tool_input": {"file_path": "/tmp/test.py", "old_string": "pass", "new_string": "DROP TABLE users"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert_eq!(items.len(), 1);
assert_eq!(items[0].context, ScanContext::EditNewString);
}
#[test]
fn extract_notebook_scannable() {
let json = r#"{"tool_name": "NotebookEdit", "tool_input": {"new_source": "import os; os.system('rm -rf /')"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert_eq!(items.len(), 1);
assert_eq!(items[0].context, ScanContext::NotebookCell);
}
#[test]
fn extract_mcp_scannable() {
let json = r#"{"tool_name": "mcp__kubernetes__k8s-pod-exec", "tool_input": {"command": "kubectl delete namespace prod", "namespace": "production"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.len() >= 2); assert!(items.iter().any(|i| i.context == ScanContext::McpCommand));
}
#[test]
fn extract_read_tool_empty() {
let json = r#"{"tool_name": "Read", "tool_input": {"file_path": "/etc/passwd"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_no_tool_input() {
let json = r#"{"tool_name": "Bash"}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn bash_context_does_not_downgrade() {
assert!(!ScanContext::BashCommand.downgrade_block());
}
#[test]
fn write_context_downgrades() {
assert!(ScanContext::WriteContent.downgrade_block());
}
#[test]
fn mcp_context_does_not_downgrade() {
assert!(!ScanContext::McpCommand.downgrade_block());
}
#[test]
fn edit_context_downgrades() {
assert!(ScanContext::EditNewString.downgrade_block());
}
#[test]
fn notebook_context_downgrades() {
assert!(ScanContext::NotebookCell.downgrade_block());
}
#[test]
fn scan_context_is_command() {
assert!(ScanContext::BashCommand.is_command());
assert!(ScanContext::McpCommand.is_command());
assert!(!ScanContext::WriteContent.is_command());
assert!(!ScanContext::EditNewString.is_command());
assert!(!ScanContext::NotebookCell.is_command());
}
#[test]
fn scan_context_is_content() {
assert!(!ScanContext::BashCommand.is_content());
assert!(!ScanContext::McpCommand.is_content());
assert!(ScanContext::WriteContent.is_content());
assert!(ScanContext::EditNewString.is_content());
assert!(ScanContext::NotebookCell.is_content());
}
#[test]
fn scan_content_lines_filters_blanks_and_comments() {
let content = "#!/bin/bash\n# comment\n\nrm -rf /\n// js comment\nls -la\n";
let lines = scan_content_lines(content);
assert_eq!(lines, vec!["rm -rf /"]);
}
#[test]
fn scan_content_lines_empty() {
assert!(scan_content_lines("").is_empty());
assert!(scan_content_lines(" \n \n").is_empty());
}
#[test]
fn mcp_max_strings_enforced() {
let mut fields = String::new();
for i in 0..100 {
if i > 0 {
fields.push_str(", ");
}
fields.push_str(&format!(r#""field_{i}": "value_{i}""#));
}
let json = format!(
r#"{{"tool_name": "mcp__test__tool", "tool_input": {{{fields}}}}}"#
);
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(
items.len() <= MCP_JSON_MAX_STRINGS,
"expected at most {} items, got {}",
MCP_JSON_MAX_STRINGS,
items.len()
);
}
#[test]
fn mcp_max_depth_enforced() {
let mut json = r#"{"tool_name": "mcp__test__deep", "tool_input": {"a": "#.to_owned();
for _ in 0..20 {
json.push_str(r#"{"nested": "#);
}
json.push_str(r#""deep_value""#);
for _ in 0..20 {
json.push('}');
}
json.push_str("}}");
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(
!items.iter().any(|i| i.text == "deep_value"),
"deep_value should not be collected at depth > {MCP_JSON_MAX_DEPTH}"
);
}
#[test]
fn mcp_with_command_field() {
let json = r#"{"tool_name": "mcp__k8s__exec", "tool_input": {"command": "kubectl get pods"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(
items.iter().any(|i| i.text == "kubectl get pods" && i.context == ScanContext::McpCommand),
"command field should be extracted for MCP tools"
);
}
#[test]
fn extract_bash_no_command() {
let json = r#"{"tool_name": "Bash", "tool_input": {}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_write_no_content() {
let json = r#"{"tool_name": "Write", "tool_input": {"file_path": "/tmp/test"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_edit_no_new_string() {
let json = r#"{"tool_name": "Edit", "tool_input": {"file_path": "/tmp/test", "old_string": "old"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_notebook_no_new_source() {
let json = r#"{"tool_name": "NotebookEdit", "tool_input": {"cell_index": 0}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_unknown_tool_empty() {
let json = r#"{"tool_name": "SomeNewTool", "tool_input": {"data": "rm -rf /"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty());
}
#[test]
fn extract_no_tool_name() {
let json = r#"{"tool_input": {"command": "rm -rf /"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty(), "missing tool_name should match no branch");
}
#[test]
fn mcp_nested_array_strings() {
let json = r#"{"tool_name": "mcp__test__arr", "tool_input": {"commands": ["rm -rf /", "ls -la"]}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.iter().any(|i| i.text == "rm -rf /"));
assert!(items.iter().any(|i| i.text == "ls -la"));
}
#[test]
fn mcp_nested_object_strings() {
let json = r#"{"tool_name": "mcp__test__obj", "tool_input": {"config": {"cmd": "terraform destroy", "env": "prod"}}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.iter().any(|i| i.text == "terraform destroy"));
assert!(items.iter().any(|i| i.text == "prod"));
}
#[test]
fn mcp_empty_string_skipped() {
let json = r#"{"tool_name": "mcp__test__empty", "tool_input": {"field": ""}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty(), "empty strings should be skipped");
}
#[test]
fn mcp_non_string_values_skipped() {
let json = r#"{"tool_name": "mcp__test__types", "tool_input": {"num": 42, "bool": true, "null_val": null}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.is_empty(), "non-string JSON values should be skipped");
}
#[test]
fn parse_reader_empty_string_is_error() {
let result = parse_reader("".as_bytes());
assert!(result.is_err());
}
#[test]
fn parse_reader_null_fields() {
let json = r#"{"tool_name": null, "tool_input": null}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert!(input.tool_name.is_none());
assert!(input.tool_input.is_none());
}
#[test]
fn scan_content_lines_only_comments() {
let content = "# comment 1\n# comment 2\n// js comment\n";
assert!(scan_content_lines(content).is_empty());
}
#[test]
fn scan_content_lines_indented_dangerous() {
let content = " rm -rf /tmp ";
let lines = scan_content_lines(content);
assert_eq!(lines.len(), 1);
assert_eq!(lines[0], "rm -rf /tmp");
}
#[test]
fn scan_content_lines_prefilter_optimization() {
let content = "let x = 1;\nconst y = 2;\nfunction hello() {}";
assert!(scan_content_lines(content).is_empty());
let content = "rm -rf /tmp\nDROP TABLE users";
let lines = scan_content_lines(content);
assert_eq!(lines.len(), 2);
}
#[test]
fn extract_mcp_bare_prefix() {
let json = r#"{"tool_name": "mcp__", "tool_input": {"field": "value"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(
items.iter().any(|i| i.text == "value"),
"mcp__ with bare prefix should still extract strings"
);
}
#[test]
fn extract_mcp_triple_underscore() {
let json = r#"{"tool_name": "mcp___foo", "tool_input": {"cmd": "terraform destroy"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let items = extract_scannable_content(&input);
assert!(items.iter().any(|i| i.text == "terraform destroy"));
}
#[test]
fn scan_context_debug_all_variants() {
let variants = [
ScanContext::BashCommand,
ScanContext::WriteContent,
ScanContext::EditNewString,
ScanContext::NotebookCell,
ScanContext::McpCommand,
];
for ctx in variants {
let debug = format!("{ctx:?}");
assert!(!debug.is_empty());
}
}
#[test]
fn hook_input_debug() {
let json = r#"{"tool_name": "Bash", "tool_input": {"command": "ls"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
let debug = format!("{input:?}");
assert!(debug.contains("Bash"));
}
#[test]
fn scannable_content_debug() {
let item = ScannableContent {
context: ScanContext::BashCommand,
text: "ls -la".to_owned(),
};
let debug = format!("{item:?}");
assert!(debug.contains("BashCommand"));
assert!(debug.contains("ls -la"));
}
#[test]
fn scannable_content_clone() {
let item = ScannableContent {
context: ScanContext::WriteContent,
text: "content".to_owned(),
};
let cloned = item.clone();
assert_eq!(cloned.context, ScanContext::WriteContent);
assert_eq!(cloned.text, "content");
}
#[test]
fn extract_command_from_write_tool() {
let json = r#"{"tool_name": "Write", "tool_input": {"file_path": "/tmp/test", "content": "data"}}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert!(extract_command(&input).is_none());
}
#[test]
fn extract_command_when_no_tool_input() {
let json = r#"{"tool_name": "Bash"}"#;
let input = parse_reader(json.as_bytes()).unwrap();
assert!(extract_command(&input).is_none());
}
#[test]
fn scan_content_lines_shebang_filtered() {
let content = "#!/bin/bash\nrm -rf /tmp\n";
let lines = scan_content_lines(content);
assert!(!lines.iter().any(|l| l.starts_with("#!")));
assert!(lines.iter().any(|l| l.contains("rm")));
}
}