Skip to main content

keyhog_scanner/structured/parsers/
json.rs

1use super::{line::find_line_number, ExtractedPair};
2
3/// Parse Terraform state JSON and recursively extract `value` fields.
4pub fn parse_tfstate(text: &str) -> Vec<ExtractedPair> {
5    let mut pairs = Vec::new();
6    let value: serde_json::Value = match serde_json::from_str(text) {
7        Ok(v) => v,
8        Err(error) => {
9            tracing::debug!(target: "keyhog::structured", %error, "tfstate JSON parse failed");
10            return pairs;
11        }
12    };
13    extract_tfstate_values(&value, text, &mut pairs, 0);
14    pairs
15}
16
17/// Cap recursion depth on adversarial JSON. A 2 MiB document of nested arrays
18/// can exceed the default thread stack; 256 is beyond real Terraform state.
19const MAX_TFSTATE_DEPTH: usize = 256;
20
21fn extract_tfstate_values(
22    value: &serde_json::Value,
23    text: &str,
24    pairs: &mut Vec<ExtractedPair>,
25    depth: usize,
26) {
27    if depth >= MAX_TFSTATE_DEPTH {
28        return;
29    }
30    match value {
31        serde_json::Value::Object(map) => {
32            for (k, v) in map {
33                if k == "value" {
34                    let val_str = match v {
35                        serde_json::Value::String(s) => s.clone(),
36                        serde_json::Value::Number(n) => n.to_string(),
37                        serde_json::Value::Bool(b) => b.to_string(),
38                        _ => String::new(),
39                    };
40                    if !val_str.is_empty() {
41                        let line = find_line_number(text, &val_str).unwrap_or(1);
42                        pairs.push(ExtractedPair {
43                            context: "tfstate-value".to_string(),
44                            value: val_str,
45                            line,
46                        });
47                    }
48                }
49                extract_tfstate_values(v, text, pairs, depth + 1);
50            }
51        }
52        serde_json::Value::Array(arr) => {
53            for v in arr {
54                extract_tfstate_values(v, text, pairs, depth + 1);
55            }
56        }
57        _ => {}
58    }
59}
60
61/// Parse Jupyter notebook JSON and extract code cell sources.
62pub fn parse_jupyter(text: &str) -> Vec<ExtractedPair> {
63    let mut pairs = Vec::new();
64    let value: serde_json::Value = match serde_json::from_str(text) {
65        Ok(v) => v,
66        Err(error) => {
67            tracing::debug!(target: "keyhog::structured", %error, "Jupyter notebook JSON parse failed");
68            return pairs;
69        }
70    };
71    let cells = match value.get("cells") {
72        Some(serde_json::Value::Array(arr)) => arr,
73        _ => return pairs,
74    };
75    for (idx, cell) in cells.iter().enumerate() {
76        let cell_type = cell.get("cell_type").and_then(|c| c.as_str()).unwrap_or("");
77        if cell_type != "code" {
78            continue;
79        }
80        let source = match cell.get("source") {
81            Some(v) => v,
82            None => continue,
83        };
84        let (source_text, line) = match source {
85            serde_json::Value::String(s) => {
86                let line = find_line_number(text, s).unwrap_or(1);
87                (s.clone(), line)
88            }
89            serde_json::Value::Array(arr) => {
90                let parts: Vec<String> = arr
91                    .iter()
92                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
93                    .collect();
94                let joined = parts.join("");
95                let anchor = parts
96                    .iter()
97                    .find_map(|p| {
98                        let trimmed_end = p.trim_end_matches(['\n', '\r']);
99                        if trimmed_end.is_empty() {
100                            None
101                        } else {
102                            Some(trimmed_end.to_string())
103                        }
104                    })
105                    .unwrap_or_else(|| joined.clone());
106                let line = find_line_number(text, &anchor).unwrap_or(1);
107                (joined, line)
108            }
109            _ => continue,
110        };
111        if !source_text.trim().is_empty() {
112            pairs.push(ExtractedPair {
113                context: format!("jupyter-cell-{}", idx),
114                value: source_text,
115                line,
116            });
117        }
118    }
119    pairs
120}