Skip to main content

agm_core/validator/
code.rs

1//! Code block validation (spec S23, S23.6).
2//!
3//! Pass 3 (structural): validates code blocks for required fields, path safety,
4//! and absence of secrets.
5
6use std::sync::OnceLock;
7
8use regex::Regex;
9
10use crate::error::codes::ErrorCode;
11use crate::error::diagnostic::{AgmError, ErrorLocation};
12use crate::model::code::{CodeAction, CodeBlock};
13use crate::model::node::Node;
14
15// ---------------------------------------------------------------------------
16// Secret detection regexes
17// ---------------------------------------------------------------------------
18
19static SECRET_KEYWORD: OnceLock<Regex> = OnceLock::new();
20static AWS_KEY: OnceLock<Regex> = OnceLock::new();
21static TOKEN_PREFIX: OnceLock<Regex> = OnceLock::new();
22
23fn secret_keyword_regex() -> &'static Regex {
24    SECRET_KEYWORD.get_or_init(|| {
25        Regex::new(
26            r#"(?i)(password|secret|api_key|api_secret|token|private_key)\s*[:=]\s*["'][^"']{8,}["']"#,
27        )
28        .unwrap()
29    })
30}
31
32fn aws_key_regex() -> &'static Regex {
33    AWS_KEY.get_or_init(|| Regex::new(r"(AKIA|ASIA)[A-Z0-9]{16}").unwrap())
34}
35
36fn token_prefix_regex() -> &'static Regex {
37    TOKEN_PREFIX.get_or_init(|| {
38        Regex::new(r"(?i)(sk-|pk_live_|pk_test_|ghp_|gho_|glpat-)[a-zA-Z0-9]{20,}").unwrap()
39    })
40}
41
42/// Returns true if the body appears to contain a secret credential.
43fn contains_secret(body: &str) -> bool {
44    secret_keyword_regex().is_match(body)
45        || aws_key_regex().is_match(body)
46        || token_prefix_regex().is_match(body)
47}
48
49/// Returns true if the target path is unsafe (absolute or traversal).
50fn is_unsafe_path(path: &str) -> bool {
51    path.starts_with('/') || path.starts_with('\\') || path.contains("..")
52}
53
54/// Validates a single code block, returning any errors.
55fn validate_block(
56    block: &CodeBlock,
57    node_id: &str,
58    line: usize,
59    file_name: &str,
60    errors: &mut Vec<AgmError>,
61) {
62    let loc = ErrorLocation::full(file_name, line, node_id);
63
64    // V008 — lang must be present
65    if block.lang.is_none() {
66        errors.push(AgmError::new(
67            ErrorCode::V008,
68            "Code block missing required field: `lang`",
69            loc.clone(),
70        ));
71    }
72
73    // V008 — body must not be empty
74    if block.body.trim().is_empty() {
75        errors.push(AgmError::new(
76            ErrorCode::V008,
77            "Code block missing required field: `body` (empty)",
78            loc.clone(),
79        ));
80    }
81
82    // V008 — replace action requires `old`
83    if block.action == CodeAction::Replace && block.old.is_none() {
84        errors.push(AgmError::new(
85            ErrorCode::V008,
86            "Code block with `action: replace` missing required field: `old`",
87            loc.clone(),
88        ));
89    }
90
91    // V008 — insert_before / insert_after require `anchor`
92    if matches!(
93        block.action,
94        CodeAction::InsertBefore | CodeAction::InsertAfter
95    ) && block.anchor.is_none()
96    {
97        errors.push(AgmError::new(
98            ErrorCode::V008,
99            format!(
100                "Code block with `action: {}` missing required field: `anchor`",
101                block.action
102            ),
103            loc.clone(),
104        ));
105    }
106
107    // V015 — target path must be relative and traversal-free
108    if let Some(ref target) = block.target {
109        if is_unsafe_path(target) {
110            errors.push(AgmError::new(
111                ErrorCode::V015,
112                format!("`target` path is absolute or contains traversal: `{target}`"),
113                loc.clone(),
114            ));
115        }
116    }
117
118    // V008 — body must not contain secrets (security heuristic)
119    if contains_secret(&block.body) {
120        errors.push(AgmError::new(
121            ErrorCode::V008,
122            "Code block appears to contain a secret or credential",
123            loc,
124        ));
125    }
126}
127
128/// Validates all code blocks on a node (both `code` and `code_blocks`).
129///
130/// Rules: V008 (missing lang, empty body, replace without old,
131/// insert without anchor, secret detection), V015 (unsafe target path).
132#[must_use]
133pub fn validate_code(node: &Node, file_name: &str) -> Vec<AgmError> {
134    let mut errors = Vec::new();
135    let line = node.span.start_line;
136    let id = node.id.as_str();
137
138    if let Some(ref block) = node.code {
139        validate_block(block, id, line, file_name, &mut errors);
140    }
141
142    if let Some(ref blocks) = node.code_blocks {
143        for block in blocks {
144            validate_block(block, id, line, file_name, &mut errors);
145        }
146    }
147
148    errors
149}
150
151#[cfg(test)]
152mod tests {
153
154    use super::*;
155    use crate::model::code::{CodeAction, CodeBlock};
156    use crate::model::fields::{NodeType, Span};
157    use crate::model::node::Node;
158
159    fn minimal_node() -> Node {
160        Node {
161            id: "test.node".to_owned(),
162            node_type: NodeType::Facts,
163            summary: "a test node".to_owned(),
164            span: Span::new(5, 7),
165            ..Default::default()
166        }
167    }
168
169    fn valid_block() -> CodeBlock {
170        CodeBlock {
171            lang: Some("rust".to_owned()),
172            target: Some("src/main.rs".to_owned()),
173            action: CodeAction::Append,
174            body: "fn hello() {}".to_owned(),
175            anchor: None,
176            old: None,
177        }
178    }
179
180    #[test]
181    fn test_validate_code_no_code_returns_empty() {
182        let node = minimal_node();
183        let errors = validate_code(&node, "test.agm");
184        assert!(errors.is_empty());
185    }
186
187    #[test]
188    fn test_validate_code_valid_block_returns_empty() {
189        let mut node = minimal_node();
190        node.code = Some(valid_block());
191        let errors = validate_code(&node, "test.agm");
192        assert!(errors.is_empty());
193    }
194
195    #[test]
196    fn test_validate_code_missing_lang_returns_v008() {
197        let mut node = minimal_node();
198        let mut block = valid_block();
199        block.lang = None;
200        node.code = Some(block);
201        let errors = validate_code(&node, "test.agm");
202        assert!(
203            errors
204                .iter()
205                .any(|e| e.code == ErrorCode::V008 && e.message.contains("lang"))
206        );
207    }
208
209    #[test]
210    fn test_validate_code_empty_body_returns_v008() {
211        let mut node = minimal_node();
212        let mut block = valid_block();
213        block.body = "   ".to_owned();
214        node.code = Some(block);
215        let errors = validate_code(&node, "test.agm");
216        assert!(
217            errors
218                .iter()
219                .any(|e| e.code == ErrorCode::V008 && e.message.contains("body"))
220        );
221    }
222
223    #[test]
224    fn test_validate_code_replace_no_old_returns_v008() {
225        let mut node = minimal_node();
226        let mut block = valid_block();
227        block.action = CodeAction::Replace;
228        block.old = None;
229        node.code = Some(block);
230        let errors = validate_code(&node, "test.agm");
231        assert!(
232            errors
233                .iter()
234                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`old`"))
235        );
236    }
237
238    #[test]
239    fn test_validate_code_replace_with_old_returns_empty() {
240        let mut node = minimal_node();
241        let mut block = valid_block();
242        block.action = CodeAction::Replace;
243        block.old = Some("old code".to_owned());
244        node.code = Some(block);
245        let errors = validate_code(&node, "test.agm");
246        assert!(!errors.iter().any(|e| e.message.contains("`old`")));
247    }
248
249    #[test]
250    fn test_validate_code_insert_before_no_anchor_returns_v008() {
251        let mut node = minimal_node();
252        let mut block = valid_block();
253        block.action = CodeAction::InsertBefore;
254        block.anchor = None;
255        node.code = Some(block);
256        let errors = validate_code(&node, "test.agm");
257        assert!(
258            errors
259                .iter()
260                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`anchor`"))
261        );
262    }
263
264    #[test]
265    fn test_validate_code_insert_after_no_anchor_returns_v008() {
266        let mut node = minimal_node();
267        let mut block = valid_block();
268        block.action = CodeAction::InsertAfter;
269        block.anchor = None;
270        node.code = Some(block);
271        let errors = validate_code(&node, "test.agm");
272        assert!(
273            errors
274                .iter()
275                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`anchor`"))
276        );
277    }
278
279    #[test]
280    fn test_validate_code_absolute_target_returns_v015() {
281        let mut node = minimal_node();
282        let mut block = valid_block();
283        block.target = Some("/etc/passwd".to_owned());
284        node.code = Some(block);
285        let errors = validate_code(&node, "test.agm");
286        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
287    }
288
289    #[test]
290    fn test_validate_code_traversal_target_returns_v015() {
291        let mut node = minimal_node();
292        let mut block = valid_block();
293        block.target = Some("src/../etc/secret".to_owned());
294        node.code = Some(block);
295        let errors = validate_code(&node, "test.agm");
296        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
297    }
298
299    #[test]
300    fn test_validate_code_windows_absolute_target_returns_v015() {
301        let mut node = minimal_node();
302        let mut block = valid_block();
303        block.target = Some("\\Windows\\System32".to_owned());
304        node.code = Some(block);
305        let errors = validate_code(&node, "test.agm");
306        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
307    }
308
309    #[test]
310    fn test_validate_code_secret_password_returns_v008() {
311        let mut node = minimal_node();
312        let mut block = valid_block();
313        block.body = r#"password = "super_secret_pass123""#.to_owned();
314        node.code = Some(block);
315        let errors = validate_code(&node, "test.agm");
316        assert!(
317            errors
318                .iter()
319                .any(|e| e.code == ErrorCode::V008 && e.message.contains("secret"))
320        );
321    }
322
323    #[test]
324    fn test_validate_code_secret_aws_key_returns_v008() {
325        let mut node = minimal_node();
326        let mut block = valid_block();
327        block.body = "AKIAIOSFODNN7EXAMPLE".to_owned();
328        node.code = Some(block);
329        let errors = validate_code(&node, "test.agm");
330        assert!(errors.iter().any(|e| e.code == ErrorCode::V008));
331    }
332
333    #[test]
334    fn test_validate_code_secret_github_token_returns_v008() {
335        let mut node = minimal_node();
336        let mut block = valid_block();
337        block.body = "ghp_abcdefghijklmnopqrstuvwxyz1234".to_owned();
338        node.code = Some(block);
339        let errors = validate_code(&node, "test.agm");
340        assert!(errors.iter().any(|e| e.code == ErrorCode::V008));
341    }
342
343    #[test]
344    fn test_validate_code_validates_code_blocks_vec() {
345        let mut node = minimal_node();
346        let mut bad_block = valid_block();
347        bad_block.lang = None;
348        node.code_blocks = Some(vec![valid_block(), bad_block]);
349        let errors = validate_code(&node, "test.agm");
350        assert!(
351            errors
352                .iter()
353                .any(|e| e.code == ErrorCode::V008 && e.message.contains("lang"))
354        );
355    }
356}