Skip to main content

rsigma_runtime/sources/
include.rs

1//! Include expansion for dynamic pipelines.
2//!
3//! Expands `Transformation::Include { template }` directives by fetching the
4//! referenced source and parsing it as a list of transformation YAML objects,
5//! then splicing them into the transformations list.
6
7use std::collections::HashMap;
8
9use rsigma_eval::pipeline::sources::SourceType;
10use rsigma_eval::pipeline::transformations::Transformation;
11use rsigma_eval::{Pipeline, TransformationItem};
12
13/// Maximum include nesting depth (prevents cycles).
14const MAX_INCLUDE_DEPTH: usize = 1;
15
16/// Expand all `Include` transformations in a pipeline.
17///
18/// For each `Include { template }`, the template references a source ID.
19/// The resolved source data is expected to be a YAML array of transformation
20/// objects. These are parsed and spliced into the pipeline at the include position.
21///
22/// Security: if `allow_remote_include` is false, includes referencing HTTP or NATS
23/// sources produce an error.
24///
25/// Recursive includes are not allowed (max depth 1). If an included fragment
26/// itself contains `Include` directives, expansion fails with an error.
27pub fn expand_includes(
28    pipeline: &mut Pipeline,
29    resolved: &HashMap<String, serde_json::Value>,
30    allow_remote_include: bool,
31) -> Result<(), String> {
32    expand_includes_with_depth(pipeline, resolved, allow_remote_include, 0)
33}
34
35fn expand_includes_with_depth(
36    pipeline: &mut Pipeline,
37    resolved: &HashMap<String, serde_json::Value>,
38    allow_remote_include: bool,
39    depth: usize,
40) -> Result<(), String> {
41    if depth > MAX_INCLUDE_DEPTH {
42        return Err(
43            "recursive includes are not allowed (max depth 1); included content cannot itself contain include directives".to_string()
44        );
45    }
46
47    let mut expanded_transformations = Vec::new();
48    let mut had_include = false;
49
50    for item in &pipeline.transformations {
51        if let Transformation::Include { template } = &item.transformation {
52            had_include = true;
53            let source_id = extract_source_id(template);
54
55            // Security check: block remote includes if not allowed
56            if !allow_remote_include
57                && let Some(source) = pipeline.sources.iter().find(|s| s.id == source_id)
58            {
59                match &source.source_type {
60                    SourceType::Http { .. } | SourceType::Nats { .. } => {
61                        return Err(format!(
62                            "include references remote source '{source_id}'; use --allow-remote-include to permit"
63                        ));
64                    }
65                    _ => {}
66                }
67            }
68
69            if let Some(data) = resolved.get(&source_id) {
70                let items = parse_transformation_array(data)?;
71
72                // Check for nested includes (depth enforcement)
73                for parsed_item in &items {
74                    if matches!(parsed_item.transformation, Transformation::Include { .. }) {
75                        return Err(format!(
76                            "included content from source '{source_id}' contains nested include directives; recursive includes are not allowed (max depth 1)"
77                        ));
78                    }
79                }
80
81                expanded_transformations.extend(items);
82            } else {
83                return Err(format!(
84                    "include references unresolved source '{source_id}'"
85                ));
86            }
87        } else {
88            expanded_transformations.push(item.clone());
89        }
90    }
91
92    if had_include {
93        pipeline.transformations = expanded_transformations;
94    }
95
96    Ok(())
97}
98
99/// Extract the source ID from a template string like `${source.my_transforms}`.
100fn extract_source_id(template: &str) -> String {
101    let trimmed = template.trim();
102    if let Some(inner) = trimmed.strip_prefix("${source.")
103        && let Some(id) = inner.strip_suffix('}')
104    {
105        return id.split('.').next().unwrap_or(id).to_string();
106    }
107    trimmed.to_string()
108}
109
110/// Parse a JSON value as an array of transformation objects.
111///
112/// Each element should be a JSON object with at minimum a "type" field.
113/// Uses rsigma-eval's `parse_transformation_items` to handle the full
114/// transformation grammar.
115fn parse_transformation_array(data: &serde_json::Value) -> Result<Vec<TransformationItem>, String> {
116    if !data.is_array() {
117        return Err("include source data must be an array of transformation objects".to_string());
118    }
119
120    // Convert JSON -> YAML string -> yaml_serde::Value, then use the eval parser
121    let yaml_str =
122        serde_json::to_string(data).map_err(|e| format!("include serialization: {e}"))?;
123    let yaml_val: yaml_serde::Value = yaml_serde::from_str(&yaml_str)
124        .map_err(|e| format!("include data is not valid YAML: {e}"))?;
125
126    rsigma_eval::parse_transformation_items(&yaml_val)
127        .map_err(|e| format!("include parse error: {e}"))
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn extract_source_id_simple() {
136        assert_eq!(
137            extract_source_id("${source.my_transforms}"),
138            "my_transforms"
139        );
140    }
141
142    #[test]
143    fn extract_source_id_with_path() {
144        assert_eq!(extract_source_id("${source.config.transforms}"), "config");
145    }
146
147    #[test]
148    fn extract_source_id_plain_string() {
149        assert_eq!(extract_source_id("my_source"), "my_source");
150    }
151
152    #[test]
153    fn nested_include_rejected() {
154        let mut pipeline = Pipeline {
155            name: "test".to_string(),
156            priority: 0,
157            vars: HashMap::new(),
158            transformations: vec![TransformationItem {
159                id: None,
160                transformation: Transformation::Include {
161                    template: "${source.transforms}".to_string(),
162                },
163                rule_conditions: vec![],
164                rule_cond_expr: None,
165                detection_item_conditions: vec![],
166                field_name_conditions: vec![],
167                field_name_cond_not: false,
168            }],
169            finalizers: vec![],
170            sources: vec![],
171            source_refs: vec![],
172        };
173
174        // The resolved source data contains an include directive itself
175        let nested_yaml = serde_json::json!([
176            {"type": "include", "include": "${source.other}"}
177        ]);
178        let mut resolved = HashMap::new();
179        resolved.insert("transforms".to_string(), nested_yaml);
180
181        let result = expand_includes(&mut pipeline, &resolved, true);
182        assert!(result.is_err());
183        let err = result.unwrap_err();
184        assert!(
185            err.contains("nested include") || err.contains("recursive"),
186            "error should mention nesting: {err}"
187        );
188    }
189}