Skip to main content

bids_io/
path_builder.rs

1//! Build BIDS-compliant file paths from entity key-value pairs.
2//!
3//! Uses configurable patterns with optional sections (`[/ses-{session}]`),
4//! value constraints (`{suffix<T1w|T2w>}`), and defaults (`{datatype|anat}`)
5//! to construct paths that conform to BIDS naming conventions.
6
7use bids_core::entities::{Entities, EntityValue};
8use regex::Regex;
9use std::sync::LazyLock;
10
11static PATTERN_FIND: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r"\{([\w\d]*?)(?:<([^>]+)>)?(?:\|((?:\.?[\w])+))?\}").unwrap());
13
14/// Build a file path given entities and a list of path patterns.
15///
16/// Supports list values in entities — returns a Vec of paths when any entity
17/// has multiple values (Cartesian product).
18///
19/// Returns `None` if no pattern matches all mandatory entities.
20pub fn build_path(entities: &Entities, patterns: &[&str], strict: bool) -> Option<String> {
21    let result = build_path_multi(entities, patterns, strict);
22    result.map(|v| match v.len() {
23        1 => v.into_iter().next().expect("length checked"),
24        _ => v.join(","),
25    })
26}
27
28/// Build potentially multiple paths when entities contain list values.
29pub fn build_path_multi(
30    entities: &Entities,
31    patterns: &[&str],
32    strict: bool,
33) -> Option<Vec<String>> {
34    // Check if any entity value contains list-like values (comma-separated)
35    // For now, we just expand the single entity set
36    for pattern in patterns {
37        if let Some(result) = try_build_single(entities, pattern, strict) {
38            return Some(vec![result]);
39        }
40    }
41    None
42}
43
44/// Build paths with entity expansion: when entities map to Vec of values,
45/// produce Cartesian product of all combinations.
46pub fn build_paths_expanded(
47    entities: &std::collections::HashMap<String, Vec<String>>,
48    patterns: &[&str],
49    strict: bool,
50) -> Vec<String> {
51    let keys: Vec<&String> = entities.keys().collect();
52    let value_lists: Vec<&Vec<String>> = keys.iter().map(|k| &entities[*k]).collect();
53
54    let combos = cartesian_product(&value_lists);
55    let mut results = Vec::new();
56
57    for combo in combos {
58        let mut ents = Entities::new();
59        for (i, key) in keys.iter().enumerate() {
60            ents.insert((*key).clone(), EntityValue::Str(combo[i].clone()));
61        }
62        if let Some(path) = build_path(&ents, patterns, strict) {
63            results.push(path);
64        }
65    }
66    results
67}
68
69fn cartesian_product(lists: &[&Vec<String>]) -> Vec<Vec<String>> {
70    if lists.is_empty() {
71        return vec![vec![]];
72    }
73    let mut result = vec![vec![]];
74    for list in lists {
75        let mut new_result = Vec::new();
76        for existing in &result {
77            for item in *list {
78                let mut combo = existing.clone();
79                combo.push(item.clone());
80                new_result.push(combo);
81            }
82        }
83        result = new_result;
84    }
85    result
86}
87
88fn try_build_single(entities: &Entities, pattern: &str, strict: bool) -> Option<String> {
89    let matches: Vec<_> = PATTERN_FIND.captures_iter(pattern).collect();
90
91    let defined: Vec<String> = matches
92        .iter()
93        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
94        .collect();
95
96    if strict {
97        let defined_set: std::collections::HashSet<&str> =
98            defined.iter().map(std::string::String::as_str).collect();
99        for key in entities.keys() {
100            if entities
101                .get(key)
102                .is_some_and(|v| !v.as_str_lossy().is_empty())
103                && !defined_set.contains(key.as_str())
104            {
105                return None;
106            }
107        }
108    }
109
110    let mut new_path = pattern.to_string();
111    let mut tmp_entities = entities.clone();
112
113    // Remove None/empty entities
114    tmp_entities.retain(|_, v| {
115        let s = v.as_str_lossy();
116        !s.is_empty()
117    });
118
119    for cap in &matches {
120        let full = cap.get(0)?.as_str();
121        let name = cap.get(1)?.as_str();
122        let valid = cap.get(2).map(|m| m.as_str()).unwrap_or("");
123        let defval = cap.get(3).map(|m| m.as_str()).unwrap_or("");
124
125        if !valid.is_empty()
126            && let Some(ent_val) = tmp_entities.get(name)
127        {
128            let val_str = ent_val.as_str_lossy();
129            let expanded: Vec<String> = valid.split('|').flat_map(expand_options).collect();
130            if !expanded.iter().any(|v| v == &val_str) {
131                return None;
132            }
133        }
134
135        if !defval.is_empty() && !tmp_entities.contains_key(name) {
136            tmp_entities.insert(name.to_string(), EntityValue::Str(defval.to_string()));
137        }
138
139        new_path = new_path.replace(full, &format!("{{{name}}}"));
140    }
141
142    // Handle optional sections
143    static OPT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*?)\]").unwrap());
144    static PH_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{(\w+)\}").unwrap());
145    loop {
146        let before = new_path.clone();
147        new_path = OPT_RE
148            .replace_all(&new_path, |caps: &regex::Captures| {
149                let inner = &caps[1];
150                for pcap in PH_RE.captures_iter(inner) {
151                    let ent_name = &pcap[1];
152                    if tmp_entities.contains_key(ent_name) {
153                        return inner.to_string();
154                    }
155                }
156                String::new()
157            })
158            .to_string();
159        if new_path == before {
160            break;
161        }
162    }
163
164    // Check all remaining placeholders have values
165    for cap in PH_RE.captures_iter(&new_path) {
166        let name = cap.get(1)?.as_str();
167        if !tmp_entities.contains_key(name) {
168            return None;
169        }
170    }
171
172    // Handle extension with/without leading dot
173    if let Some(ext_val) = tmp_entities.get("extension") {
174        let ext_str = ext_val.as_str_lossy();
175        if !ext_str.starts_with('.') {
176            tmp_entities.insert("extension".into(), EntityValue::Str(format!(".{ext_str}")));
177        }
178    }
179
180    // Replace all placeholders
181    for (name, val) in &tmp_entities {
182        let placeholder = format!("{{{name}}}");
183        new_path = new_path.replace(&placeholder, &val.as_str_lossy());
184    }
185
186    if new_path.is_empty() {
187        None
188    } else {
189        Some(new_path)
190    }
191}
192
193/// Expand bracket options in value strings.
194pub fn expand_options(value: &str) -> Vec<String> {
195    static BRACKET_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*?)\]").unwrap());
196    let bracket_re = &*BRACKET_RE;
197    if !bracket_re.is_match(value) {
198        return vec![value.to_string()];
199    }
200
201    let parts: Vec<Vec<char>> = bracket_re
202        .captures_iter(value)
203        .map(|cap| cap[1].chars().collect())
204        .collect();
205
206    let template = bracket_re.replace_all(value, "\x00").to_string();
207    let segments: Vec<&str> = template.split('\x00').collect();
208
209    let mut results = vec![segments[0].to_string()];
210    for (i, opts) in parts.iter().enumerate() {
211        let suffix = segments.get(i + 1).unwrap_or(&"");
212        let mut new_results = Vec::new();
213        for r in &results {
214            for &c in opts {
215                new_results.push(format!("{r}{c}{suffix}"));
216            }
217        }
218        results = new_results;
219    }
220
221    results
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn test_build_path_basic() {
230        let mut entities = Entities::new();
231        entities.insert("subject".into(), EntityValue::Str("001".into()));
232        entities.insert("suffix".into(), EntityValue::Str("T1w".into()));
233        entities.insert("extension".into(), EntityValue::Str(".nii".into()));
234
235        let patterns = &[
236            "sub-{subject}[/ses-{session}]/anat/sub-{subject}[_ses-{session}]_{suffix<T1w|T2w>}{extension<.nii|.nii.gz>|.nii.gz}",
237        ];
238
239        let result = build_path(&entities, patterns, false);
240        assert!(result.is_some());
241        assert_eq!(result.unwrap(), "sub-001/anat/sub-001_T1w.nii");
242    }
243
244    #[test]
245    fn test_build_path_with_default() {
246        let mut entities = Entities::new();
247        entities.insert("subject".into(), EntityValue::Str("001".into()));
248        entities.insert("extension".into(), EntityValue::Str(".bvec".into()));
249
250        let patterns = &[
251            "sub-{subject}[/ses-{session}]/{datatype|dwi}/sub-{subject}[_ses-{session}]_{suffix|dwi}{extension<.bval|.bvec|.json|.nii.gz|.nii>|.nii.gz}",
252        ];
253
254        let result = build_path(&entities, patterns, true);
255        assert!(result.is_some());
256        assert_eq!(result.unwrap(), "sub-001/dwi/sub-001_dwi.bvec");
257    }
258
259    #[test]
260    fn test_expand_options() {
261        assert_eq!(expand_options("json"), vec!["json"]);
262        let expanded = expand_options("[Jj]son");
263        assert_eq!(expanded, vec!["Json", "json"]);
264    }
265
266    #[test]
267    fn test_cartesian_product() {
268        let a = vec!["01".to_string(), "02".to_string()];
269        let b = vec!["rest".to_string()];
270        let result = cartesian_product(&[&a, &b]);
271        assert_eq!(result.len(), 2);
272    }
273
274    #[test]
275    fn test_build_paths_expanded() {
276        let mut entities = std::collections::HashMap::new();
277        entities.insert(
278            "subject".to_string(),
279            vec!["01".to_string(), "02".to_string()],
280        );
281        entities.insert("suffix".to_string(), vec!["T1w".to_string()]);
282        entities.insert("extension".to_string(), vec![".nii.gz".to_string()]);
283
284        let patterns = &["sub-{subject}/anat/sub-{subject}_{suffix}{extension}"];
285        let results = build_paths_expanded(&entities, patterns, false);
286        assert_eq!(results.len(), 2);
287        assert!(results[0].contains("sub-01"));
288        assert!(results[1].contains("sub-02"));
289    }
290}