Skip to main content

bids_core/
utils.rs

1//! Utility functions: entity matching, file grouping, and case conversion.
2//!
3//! Provides helper functions used throughout the crate ecosystem for comparing
4//! entity maps, grouping multi-echo/multi-part files, CamelCase↔snake_case
5//! conversion, and fuzzy "did you mean?" suggestions.
6
7use crate::entities::Entities;
8use crate::file::BidsFile;
9
10/// Check whether a file's entities match the target entities.
11///
12/// In non-strict mode, all target entities must be present with matching values
13/// in `file_entities`, but extra entities in the file are allowed.
14///
15/// In strict mode, both maps must have exactly the same keys.
16///
17/// Corresponds to PyBIDS' `matches_entities()`.
18#[must_use]
19pub fn matches_entities(file_entities: &Entities, target: &Entities, strict: bool) -> bool {
20    // Quick length check for strict mode avoids collecting into HashSets.
21    if strict && file_entities.len() != target.len() {
22        return false;
23    }
24
25    for (k, target_val) in target {
26        match file_entities.get(k) {
27            Some(current_val) if current_val == target_val => {}
28            Some(_) => return false,
29            None => {
30                if strict {
31                    return false;
32                }
33            }
34        }
35    }
36
37    // In strict mode, also verify no extra keys in file_entities.
38    if strict {
39        for k in file_entities.keys() {
40            if !target.contains_key(k) {
41                return false;
42            }
43        }
44    }
45
46    true
47}
48
49/// Group BIDSFiles with multiple files per acquisition (multi-echo, multi-part, etc.).
50///
51/// Corresponds to PyBIDS' `collect_associated_files()`.
52///
53/// Groups files that share the same base entities (excluding multi-contrast
54/// entities like echo, part, ch, direction, and suffix).
55#[must_use]
56pub fn collect_associated_files(files: &[BidsFile]) -> Vec<Vec<&BidsFile>> {
57    const MULTI_ENTITIES: &[&str] = &["echo", "part", "ch", "direction", "suffix"];
58
59    // Build a grouping key for each file — a sorted Vec of (name, value) pairs
60    // excluding multi-contrast entities. Use IndexMap to preserve order.
61    let mut groups: indexmap::IndexMap<Vec<(String, String)>, Vec<&BidsFile>> =
62        indexmap::IndexMap::new();
63
64    for f in files {
65        let mut key: Vec<(String, String)> = f
66            .entities
67            .iter()
68            .filter(|(k, _)| !MULTI_ENTITIES.contains(&k.as_str()))
69            .map(|(k, v)| (k.clone(), v.as_str_lossy().into_owned()))
70            .collect();
71        key.sort_by(|(a, _), (b, _)| a.cmp(b));
72        groups.entry(key).or_default().push(f);
73    }
74
75    groups.into_values().collect()
76}
77
78/// Convert CamelCase keys to snake_case recursively in a JSON value.
79///
80/// Corresponds to PyBIDS' `convert_JSON()`.
81pub fn convert_json_keys(value: &serde_json::Value) -> serde_json::Value {
82    match value {
83        serde_json::Value::Object(map) => {
84            let mut new_map = serde_json::Map::new();
85            for (k, v) in map {
86                let new_key = camel_to_snake(k);
87                new_map.insert(new_key, convert_json_keys(v));
88            }
89            serde_json::Value::Object(new_map)
90        }
91        serde_json::Value::Array(arr) => {
92            serde_json::Value::Array(arr.iter().map(convert_json_keys).collect())
93        }
94        other => other.clone(),
95    }
96}
97
98/// Convert a CamelCase string to snake_case.
99///
100/// Handles acronyms like "EEGReference" → "eeg_reference".
101#[must_use]
102pub fn camel_to_snake(s: &str) -> String {
103    let chars: Vec<char> = s.chars().collect();
104    let mut result = String::with_capacity(s.len() + 4);
105    for (i, &c) in chars.iter().enumerate() {
106        if c.is_uppercase() && i > 0 {
107            let prev = chars[i - 1];
108            let next = chars.get(i + 1);
109            // Insert underscore before uppercase if preceded by lowercase/digit
110            // or followed by lowercase (handles "EEGReference" -> "eeg_reference")
111            if prev.is_lowercase()
112                || prev.is_ascii_digit()
113                || next.is_some_and(|n| n.is_lowercase())
114            {
115                result.push('_');
116            }
117        }
118        result.push(c.to_lowercase().next().unwrap_or(c));
119    }
120    result
121}
122
123/// Convert a snake_case string to CamelCase.
124#[must_use]
125pub fn snake_to_camel(s: &str) -> String {
126    s.split('_')
127        .map(|word| {
128            let mut chars = word.chars();
129            match chars.next() {
130                Some(c) => {
131                    let upper: String = c.to_uppercase().collect();
132                    format!("{}{}", upper, chars.collect::<String>())
133                }
134                None => String::new(),
135            }
136        })
137        .collect()
138}
139
140/// Find the closest matches for a string from a list of candidates.
141/// Returns up to `n` suggestions sorted by edit distance.
142#[must_use]
143pub fn get_close_matches(word: &str, candidates: &[String], n: usize) -> Vec<String> {
144    let mut scored: Vec<(usize, &String)> = candidates
145        .iter()
146        .map(|c| (edit_distance(word, c), c))
147        .filter(|(d, _)| *d <= word.len().max(3))
148        .collect();
149    scored.sort_by_key(|(d, _)| *d);
150    scored.into_iter().take(n).map(|(_, s)| s.clone()).collect()
151}
152
153fn edit_distance(a: &str, b: &str) -> usize {
154    let a: Vec<char> = a.chars().collect();
155    let b: Vec<char> = b.chars().collect();
156    let n = b.len();
157    let mut prev = (0..=n).collect::<Vec<_>>();
158    let mut curr = vec![0; n + 1];
159    for (i, ca) in a.iter().enumerate() {
160        curr[0] = i + 1;
161        for (j, cb) in b.iter().enumerate() {
162            let cost = if ca == cb { 0 } else { 1 };
163            curr[j + 1] = (prev[j + 1] + 1).min(curr[j] + 1).min(prev[j] + cost);
164        }
165        std::mem::swap(&mut prev, &mut curr);
166    }
167    prev[n]
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use crate::entities::EntityValue;
174
175    #[test]
176    fn test_camel_to_snake() {
177        assert_eq!(camel_to_snake("RepetitionTime"), "repetition_time");
178        assert_eq!(camel_to_snake("TaskName"), "task_name");
179        // EEGReference — consecutive capitals get individual underscores
180        let eeg = camel_to_snake("EEGReference");
181        assert!(eeg.contains("reference"));
182    }
183
184    #[test]
185    fn test_snake_to_camel() {
186        assert_eq!(snake_to_camel("repetition_time"), "RepetitionTime");
187        assert_eq!(snake_to_camel("task_name"), "TaskName");
188    }
189
190    #[test]
191    fn test_matches_entities() {
192        let mut file_ents = Entities::new();
193        file_ents.insert("subject".into(), EntityValue::Str("01".into()));
194        file_ents.insert("task".into(), EntityValue::Str("rest".into()));
195
196        let mut target = Entities::new();
197        target.insert("subject".into(), EntityValue::Str("01".into()));
198
199        assert!(matches_entities(&file_ents, &target, false));
200        assert!(!matches_entities(&file_ents, &target, true));
201
202        target.insert("task".into(), EntityValue::Str("rest".into()));
203        assert!(matches_entities(&file_ents, &target, true));
204    }
205
206    #[test]
207    fn test_close_matches() {
208        let candidates = vec![
209            "subject".to_string(),
210            "session".to_string(),
211            "suffix".to_string(),
212            "task".to_string(),
213            "run".to_string(),
214        ];
215        let matches = get_close_matches("suject", &candidates, 2);
216        assert!(!matches.is_empty());
217        assert_eq!(matches[0], "subject");
218    }
219}