Skip to main content

agm_core/validator/
code.rs

1//! Code block validation (spec S23, S23.6).
2//!
3//! Pass 3 (structural): validates code blocks for required fields, path safety,
4//! and absence of secrets.
5
6use std::sync::OnceLock;
7
8use regex::Regex;
9
10use crate::error::codes::ErrorCode;
11use crate::error::diagnostic::{AgmError, ErrorLocation};
12use crate::model::code::{CodeAction, CodeBlock};
13use crate::model::node::Node;
14
15// ---------------------------------------------------------------------------
16// Secret detection regexes
17// ---------------------------------------------------------------------------
18
19static SECRET_KEYWORD: OnceLock<Regex> = OnceLock::new();
20static AWS_KEY: OnceLock<Regex> = OnceLock::new();
21static TOKEN_PREFIX: OnceLock<Regex> = OnceLock::new();
22
23fn secret_keyword_regex() -> &'static Regex {
24    SECRET_KEYWORD.get_or_init(|| {
25        Regex::new(
26            r#"(?i)(password|secret|api_key|api_secret|token|private_key)\s*[:=]\s*["'][^"']{8,}["']"#,
27        )
28        .unwrap()
29    })
30}
31
32fn aws_key_regex() -> &'static Regex {
33    AWS_KEY.get_or_init(|| Regex::new(r"(AKIA|ASIA)[A-Z0-9]{16}").unwrap())
34}
35
36fn token_prefix_regex() -> &'static Regex {
37    TOKEN_PREFIX.get_or_init(|| {
38        Regex::new(r"(?i)(sk-|pk_live_|pk_test_|ghp_|gho_|glpat-)[a-zA-Z0-9]{20,}").unwrap()
39    })
40}
41
42/// Returns true if the body appears to contain a secret credential.
43fn contains_secret(body: &str) -> bool {
44    secret_keyword_regex().is_match(body)
45        || aws_key_regex().is_match(body)
46        || token_prefix_regex().is_match(body)
47}
48
49/// Returns true if the target path is unsafe (absolute or traversal).
50fn is_unsafe_path(path: &str) -> bool {
51    path.starts_with('/') || path.starts_with('\\') || path.contains("..")
52}
53
54/// Validates a single code block, returning any errors.
55fn validate_block(
56    block: &CodeBlock,
57    node_id: &str,
58    line: usize,
59    file_name: &str,
60    errors: &mut Vec<AgmError>,
61) {
62    let loc = ErrorLocation::full(file_name, line, node_id);
63
64    // V008 — lang must be present
65    if block.lang.is_none() {
66        errors.push(AgmError::new(
67            ErrorCode::V008,
68            "Code block missing required field: `lang`",
69            loc.clone(),
70        ));
71    }
72
73    // V008 — body must not be empty
74    if block.body.trim().is_empty() {
75        errors.push(AgmError::new(
76            ErrorCode::V008,
77            "Code block missing required field: `body` (empty)",
78            loc.clone(),
79        ));
80    }
81
82    // V008 — replace action requires `old`
83    if block.action == CodeAction::Replace && block.old.is_none() {
84        errors.push(AgmError::new(
85            ErrorCode::V008,
86            "Code block with `action: replace` missing required field: `old`",
87            loc.clone(),
88        ));
89    }
90
91    // V008 — insert_before / insert_after require `anchor`
92    if matches!(
93        block.action,
94        CodeAction::InsertBefore | CodeAction::InsertAfter
95    ) && block.anchor.is_none()
96    {
97        errors.push(AgmError::new(
98            ErrorCode::V008,
99            format!(
100                "Code block with `action: {}` missing required field: `anchor`",
101                block.action
102            ),
103            loc.clone(),
104        ));
105    }
106
107    // V015 — target path must be relative and traversal-free
108    if let Some(ref target) = block.target {
109        if is_unsafe_path(target) {
110            errors.push(AgmError::new(
111                ErrorCode::V015,
112                format!("`target` path is absolute or contains traversal: `{target}`"),
113                loc.clone(),
114            ));
115        }
116    }
117
118    // V008 — body must not contain secrets (security heuristic)
119    if contains_secret(&block.body) {
120        errors.push(AgmError::new(
121            ErrorCode::V008,
122            "Code block appears to contain a secret or credential",
123            loc,
124        ));
125    }
126}
127
128/// Validates all code blocks on a node (both `code` and `code_blocks`).
129///
130/// Rules: V008 (missing lang, empty body, replace without old,
131/// insert without anchor, secret detection), V015 (unsafe target path).
132#[must_use]
133pub fn validate_code(node: &Node, file_name: &str) -> Vec<AgmError> {
134    let mut errors = Vec::new();
135    let line = node.span.start_line;
136    let id = node.id.as_str();
137
138    if let Some(ref block) = node.code {
139        validate_block(block, id, line, file_name, &mut errors);
140    }
141
142    if let Some(ref blocks) = node.code_blocks {
143        for block in blocks {
144            validate_block(block, id, line, file_name, &mut errors);
145        }
146    }
147
148    errors
149}
150
151#[cfg(test)]
152mod tests {
153    use std::collections::BTreeMap;
154
155    use super::*;
156    use crate::model::code::{CodeAction, CodeBlock};
157    use crate::model::fields::{NodeType, Span};
158    use crate::model::node::Node;
159
160    fn minimal_node() -> Node {
161        Node {
162            id: "test.node".to_owned(),
163            node_type: NodeType::Facts,
164            summary: "a test node".to_owned(),
165            priority: None,
166            stability: None,
167            confidence: None,
168            status: None,
169            depends: None,
170            related_to: None,
171            replaces: None,
172            conflicts: None,
173            see_also: None,
174            items: None,
175            steps: None,
176            fields: None,
177            input: None,
178            output: None,
179            detail: None,
180            rationale: None,
181            tradeoffs: None,
182            resolution: None,
183            examples: None,
184            notes: None,
185            code: None,
186            code_blocks: None,
187            verify: None,
188            agent_context: None,
189            target: None,
190            execution_status: None,
191            executed_by: None,
192            executed_at: None,
193            execution_log: None,
194            retry_count: None,
195            parallel_groups: None,
196            memory: None,
197            scope: None,
198            applies_when: None,
199            valid_from: None,
200            valid_until: None,
201            tags: None,
202            aliases: None,
203            keywords: None,
204            extra_fields: BTreeMap::new(),
205            span: Span::new(5, 7),
206        }
207    }
208
209    fn valid_block() -> CodeBlock {
210        CodeBlock {
211            lang: Some("rust".to_owned()),
212            target: Some("src/main.rs".to_owned()),
213            action: CodeAction::Append,
214            body: "fn hello() {}".to_owned(),
215            anchor: None,
216            old: None,
217        }
218    }
219
220    #[test]
221    fn test_validate_code_no_code_returns_empty() {
222        let node = minimal_node();
223        let errors = validate_code(&node, "test.agm");
224        assert!(errors.is_empty());
225    }
226
227    #[test]
228    fn test_validate_code_valid_block_returns_empty() {
229        let mut node = minimal_node();
230        node.code = Some(valid_block());
231        let errors = validate_code(&node, "test.agm");
232        assert!(errors.is_empty());
233    }
234
235    #[test]
236    fn test_validate_code_missing_lang_returns_v008() {
237        let mut node = minimal_node();
238        let mut block = valid_block();
239        block.lang = None;
240        node.code = Some(block);
241        let errors = validate_code(&node, "test.agm");
242        assert!(
243            errors
244                .iter()
245                .any(|e| e.code == ErrorCode::V008 && e.message.contains("lang"))
246        );
247    }
248
249    #[test]
250    fn test_validate_code_empty_body_returns_v008() {
251        let mut node = minimal_node();
252        let mut block = valid_block();
253        block.body = "   ".to_owned();
254        node.code = Some(block);
255        let errors = validate_code(&node, "test.agm");
256        assert!(
257            errors
258                .iter()
259                .any(|e| e.code == ErrorCode::V008 && e.message.contains("body"))
260        );
261    }
262
263    #[test]
264    fn test_validate_code_replace_no_old_returns_v008() {
265        let mut node = minimal_node();
266        let mut block = valid_block();
267        block.action = CodeAction::Replace;
268        block.old = None;
269        node.code = Some(block);
270        let errors = validate_code(&node, "test.agm");
271        assert!(
272            errors
273                .iter()
274                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`old`"))
275        );
276    }
277
278    #[test]
279    fn test_validate_code_replace_with_old_returns_empty() {
280        let mut node = minimal_node();
281        let mut block = valid_block();
282        block.action = CodeAction::Replace;
283        block.old = Some("old code".to_owned());
284        node.code = Some(block);
285        let errors = validate_code(&node, "test.agm");
286        assert!(!errors.iter().any(|e| e.message.contains("`old`")));
287    }
288
289    #[test]
290    fn test_validate_code_insert_before_no_anchor_returns_v008() {
291        let mut node = minimal_node();
292        let mut block = valid_block();
293        block.action = CodeAction::InsertBefore;
294        block.anchor = None;
295        node.code = Some(block);
296        let errors = validate_code(&node, "test.agm");
297        assert!(
298            errors
299                .iter()
300                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`anchor`"))
301        );
302    }
303
304    #[test]
305    fn test_validate_code_insert_after_no_anchor_returns_v008() {
306        let mut node = minimal_node();
307        let mut block = valid_block();
308        block.action = CodeAction::InsertAfter;
309        block.anchor = None;
310        node.code = Some(block);
311        let errors = validate_code(&node, "test.agm");
312        assert!(
313            errors
314                .iter()
315                .any(|e| e.code == ErrorCode::V008 && e.message.contains("`anchor`"))
316        );
317    }
318
319    #[test]
320    fn test_validate_code_absolute_target_returns_v015() {
321        let mut node = minimal_node();
322        let mut block = valid_block();
323        block.target = Some("/etc/passwd".to_owned());
324        node.code = Some(block);
325        let errors = validate_code(&node, "test.agm");
326        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
327    }
328
329    #[test]
330    fn test_validate_code_traversal_target_returns_v015() {
331        let mut node = minimal_node();
332        let mut block = valid_block();
333        block.target = Some("src/../etc/secret".to_owned());
334        node.code = Some(block);
335        let errors = validate_code(&node, "test.agm");
336        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
337    }
338
339    #[test]
340    fn test_validate_code_windows_absolute_target_returns_v015() {
341        let mut node = minimal_node();
342        let mut block = valid_block();
343        block.target = Some("\\Windows\\System32".to_owned());
344        node.code = Some(block);
345        let errors = validate_code(&node, "test.agm");
346        assert!(errors.iter().any(|e| e.code == ErrorCode::V015));
347    }
348
349    #[test]
350    fn test_validate_code_secret_password_returns_v008() {
351        let mut node = minimal_node();
352        let mut block = valid_block();
353        block.body = r#"password = "super_secret_pass123""#.to_owned();
354        node.code = Some(block);
355        let errors = validate_code(&node, "test.agm");
356        assert!(
357            errors
358                .iter()
359                .any(|e| e.code == ErrorCode::V008 && e.message.contains("secret"))
360        );
361    }
362
363    #[test]
364    fn test_validate_code_secret_aws_key_returns_v008() {
365        let mut node = minimal_node();
366        let mut block = valid_block();
367        block.body = "AKIAIOSFODNN7EXAMPLE".to_owned();
368        node.code = Some(block);
369        let errors = validate_code(&node, "test.agm");
370        assert!(errors.iter().any(|e| e.code == ErrorCode::V008));
371    }
372
373    #[test]
374    fn test_validate_code_secret_github_token_returns_v008() {
375        let mut node = minimal_node();
376        let mut block = valid_block();
377        block.body = "ghp_abcdefghijklmnopqrstuvwxyz1234".to_owned();
378        node.code = Some(block);
379        let errors = validate_code(&node, "test.agm");
380        assert!(errors.iter().any(|e| e.code == ErrorCode::V008));
381    }
382
383    #[test]
384    fn test_validate_code_validates_code_blocks_vec() {
385        let mut node = minimal_node();
386        let mut bad_block = valid_block();
387        bad_block.lang = None;
388        node.code_blocks = Some(vec![valid_block(), bad_block]);
389        let errors = validate_code(&node, "test.agm");
390        assert!(
391            errors
392                .iter()
393                .any(|e| e.code == ErrorCode::V008 && e.message.contains("lang"))
394        );
395    }
396}