Skip to main content

rsigma_runtime/sources/
include.rs

1//! Include expansion for dynamic pipelines.
2//!
3//! Expands `Transformation::Include { template }` directives by fetching the
4//! referenced source and parsing it as a list of transformation YAML objects,
5//! then splicing them into the transformations list.
6
7use std::collections::HashMap;
8
9use rsigma_eval::pipeline::sources::SourceType;
10use rsigma_eval::pipeline::transformations::Transformation;
11use rsigma_eval::{Pipeline, TransformationItem};
12
13/// Maximum include nesting depth (prevents cycles).
14const MAX_INCLUDE_DEPTH: usize = 1;
15
16/// Expand all `Include` transformations in a pipeline.
17///
18/// For each `Include { template }`, the template references a source ID.
19/// The resolved source data is expected to be a YAML array of transformation
20/// objects. These are parsed and spliced into the pipeline at the include position.
21///
22/// Security: if `allow_remote_include` is false, includes referencing HTTP or NATS
23/// sources produce an error.
24///
25/// Recursive includes are not allowed (max depth 1). If an included fragment
26/// itself contains `Include` directives, expansion fails with an error.
27pub fn expand_includes(
28    pipeline: &mut Pipeline,
29    resolved: &HashMap<String, serde_json::Value>,
30    allow_remote_include: bool,
31) -> Result<(), String> {
32    expand_includes_with_depth(pipeline, resolved, allow_remote_include, 0)
33}
34
35fn expand_includes_with_depth(
36    pipeline: &mut Pipeline,
37    resolved: &HashMap<String, serde_json::Value>,
38    allow_remote_include: bool,
39    depth: usize,
40) -> Result<(), String> {
41    if depth > MAX_INCLUDE_DEPTH {
42        return Err(
43            "recursive includes are not allowed (max depth 1); included content cannot itself contain include directives".to_string()
44        );
45    }
46
47    let mut expanded_transformations = Vec::new();
48    let mut had_include = false;
49
50    for item in &pipeline.transformations {
51        if let Transformation::Include { template } = &item.transformation {
52            had_include = true;
53            let source_id = extract_source_id(template);
54
55            // Security check: block remote includes if not allowed
56            if !allow_remote_include
57                && let Some(source) = pipeline.sources.iter().find(|s| s.id == source_id)
58            {
59                match &source.source_type {
60                    SourceType::Http { .. } | SourceType::Nats { .. } => {
61                        return Err(format!(
62                            "include references remote source '{}'; use --allow-remote-include to permit",
63                            source_id
64                        ));
65                    }
66                    _ => {}
67                }
68            }
69
70            if let Some(data) = resolved.get(&source_id) {
71                let items = parse_transformation_array(data)?;
72
73                // Check for nested includes (depth enforcement)
74                for parsed_item in &items {
75                    if matches!(parsed_item.transformation, Transformation::Include { .. }) {
76                        return Err(format!(
77                            "included content from source '{}' contains nested include directives; recursive includes are not allowed (max depth 1)",
78                            source_id
79                        ));
80                    }
81                }
82
83                expanded_transformations.extend(items);
84            } else {
85                return Err(format!(
86                    "include references unresolved source '{source_id}'"
87                ));
88            }
89        } else {
90            expanded_transformations.push(item.clone());
91        }
92    }
93
94    if had_include {
95        pipeline.transformations = expanded_transformations;
96    }
97
98    Ok(())
99}
100
101/// Extract the source ID from a template string like `${source.my_transforms}`.
102fn extract_source_id(template: &str) -> String {
103    let trimmed = template.trim();
104    if let Some(inner) = trimmed.strip_prefix("${source.")
105        && let Some(id) = inner.strip_suffix('}')
106    {
107        return id.split('.').next().unwrap_or(id).to_string();
108    }
109    trimmed.to_string()
110}
111
112/// Parse a JSON value as an array of transformation objects.
113///
114/// Each element should be a JSON object with at minimum a "type" field.
115/// Uses rsigma-eval's `parse_transformation_items` to handle the full
116/// transformation grammar.
117fn parse_transformation_array(data: &serde_json::Value) -> Result<Vec<TransformationItem>, String> {
118    if !data.is_array() {
119        return Err("include source data must be an array of transformation objects".to_string());
120    }
121
122    // Convert JSON -> YAML string -> yaml_serde::Value, then use the eval parser
123    let yaml_str =
124        serde_json::to_string(data).map_err(|e| format!("include serialization: {e}"))?;
125    let yaml_val: yaml_serde::Value = yaml_serde::from_str(&yaml_str)
126        .map_err(|e| format!("include data is not valid YAML: {e}"))?;
127
128    rsigma_eval::parse_transformation_items(&yaml_val)
129        .map_err(|e| format!("include parse error: {e}"))
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn extract_source_id_simple() {
138        assert_eq!(
139            extract_source_id("${source.my_transforms}"),
140            "my_transforms"
141        );
142    }
143
144    #[test]
145    fn extract_source_id_with_path() {
146        assert_eq!(extract_source_id("${source.config.transforms}"), "config");
147    }
148
149    #[test]
150    fn extract_source_id_plain_string() {
151        assert_eq!(extract_source_id("my_source"), "my_source");
152    }
153
154    #[test]
155    fn nested_include_rejected() {
156        let mut pipeline = Pipeline {
157            name: "test".to_string(),
158            priority: 0,
159            vars: HashMap::new(),
160            transformations: vec![TransformationItem {
161                id: None,
162                transformation: Transformation::Include {
163                    template: "${source.transforms}".to_string(),
164                },
165                rule_conditions: vec![],
166                rule_cond_expr: None,
167                detection_item_conditions: vec![],
168                field_name_conditions: vec![],
169                field_name_cond_not: false,
170            }],
171            finalizers: vec![],
172            sources: vec![],
173            source_refs: vec![],
174        };
175
176        // The resolved source data contains an include directive itself
177        let nested_yaml = serde_json::json!([
178            {"type": "include", "include": "${source.other}"}
179        ]);
180        let mut resolved = HashMap::new();
181        resolved.insert("transforms".to_string(), nested_yaml);
182
183        let result = expand_includes(&mut pipeline, &resolved, true);
184        assert!(result.is_err());
185        let err = result.unwrap_err();
186        assert!(
187            err.contains("nested include") || err.contains("recursive"),
188            "error should mention nesting: {err}"
189        );
190    }
191}