Skip to main content

bids_schema/
lib.rs

1#![deny(unsafe_code)]
2//! BIDS specification schema loader and version tracking.
3//!
4//! Provides entity definitions, valid datatypes/suffixes/extensions,
5//! BIDS filename validation, and specification version management —
6//! replacing bidsschematools + bids-validator.
7//!
8//! # Spec Version Tracking
9//!
10//! All spec-derived knowledge is concentrated in this crate and in
11//! `bids-core/src/configs/`. When the BIDS spec releases a new version:
12//!
13//! 1. Add a [`SpecChange`](version::SpecChange) entry to
14//!    [`version::CHANGELOG`].
15//! 2. Update [`version::SUPPORTED_BIDS_VERSION`].
16//! 3. Update `BidsSchema::built_in()` with new entities/datatypes/suffixes.
17//! 4. Update `bids-core/src/configs/bids.json` with new patterns.
18//! 5. Run `cargo test --workspace` to catch regressions.
19//!
20//! See the [`version`] module for full documentation on the migration process.
21
22pub mod version;
23
24use regex::Regex;
25use serde::{Deserialize, Serialize};
26use std::collections::{HashMap, HashSet};
27
28pub use version::{BidsVersion, Compatibility, MIN_COMPATIBLE_VERSION, SUPPORTED_BIDS_VERSION};
29
30/// A BIDS entity definition from the schema.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct EntityDef {
33    pub name: String,
34    /// The BIDS key prefix (e.g., "sub" for subject, "ses" for session).
35    pub key: String,
36    /// Format: "label", "index", etc.
37    pub format: String,
38    /// Whether this entity creates a directory level.
39    pub is_directory: bool,
40}
41
42/// Full BIDS schema.
43#[derive(Debug, Clone)]
44pub struct BidsSchema {
45    pub version: String,
46    pub entities: Vec<EntityDef>,
47    pub datatypes: HashSet<String>,
48    pub suffixes: HashSet<String>,
49    pub extensions: HashSet<String>,
50    /// Filename validation patterns: datatype → vec of regex.
51    pub file_patterns: HashMap<String, Vec<Regex>>,
52}
53
54impl BidsSchema {
55    /// Load the bundled default schema.
56    pub fn load() -> Self {
57        Self::built_in()
58    }
59
60    /// Check compatibility between this schema and a dataset's declared BIDS version.
61    ///
62    /// # Example
63    ///
64    /// ```
65    /// use bids_schema::{BidsSchema, BidsVersion};
66    ///
67    /// let schema = BidsSchema::load();
68    /// let compat = schema.check_dataset_version("1.8.0");
69    /// assert!(compat.is_ok());
70    /// ```
71    #[must_use]
72    pub fn check_dataset_version(&self, dataset_version_str: &str) -> Compatibility {
73        match BidsVersion::parse(dataset_version_str) {
74            Some(dv) => {
75                let lib_ver = BidsVersion::parse(&self.version).unwrap_or(SUPPORTED_BIDS_VERSION);
76                lib_ver.check_compatibility(&dv)
77            }
78            None => Compatibility::Incompatible {
79                reason: format!("Cannot parse BIDS version: '{dataset_version_str}'"),
80            },
81        }
82    }
83
84    /// Built-in schema derived from BIDS 1.9.0 specification.
85    ///
86    /// **Maintainer note:** When updating to a new BIDS spec version, update:
87    /// - The entity, datatype, suffix, and extension lists below
88    /// - The `version` field
89    /// - [`version::SUPPORTED_BIDS_VERSION`]
90    /// - [`version::CHANGELOG`]
91    /// - `bids-core/src/configs/bids.json`
92    fn built_in() -> Self {
93        let entities = vec![
94            ent("subject", "sub", "label", true),
95            ent("session", "ses", "label", true),
96            ent("sample", "sample", "label", false),
97            ent("task", "task", "label", false),
98            ent("tracksys", "tracksys", "label", false),
99            ent("acquisition", "acq", "label", false),
100            ent("ceagent", "ce", "label", false),
101            ent("staining", "stain", "label", false),
102            ent("tracer", "trc", "label", false),
103            ent("reconstruction", "rec", "label", false),
104            ent("direction", "dir", "label", false),
105            ent("run", "run", "index", false),
106            ent("modality", "mod", "label", false),
107            ent("echo", "echo", "index", false),
108            ent("flip", "flip", "index", false),
109            ent("inversion", "inv", "index", false),
110            ent("mtransfer", "mt", "label", false),
111            ent("part", "part", "label", false),
112            ent("processing", "proc", "label", false),
113            ent("hemisphere", "hemi", "label", false),
114            ent("space", "space", "label", false),
115            ent("split", "split", "index", false),
116            ent("recording", "recording", "label", false),
117            ent("chunk", "chunk", "index", false),
118            ent("atlas", "atlas", "label", false),
119            ent("resolution", "res", "label", false),
120            ent("density", "den", "label", false),
121            ent("label", "label", "label", false),
122            ent("description", "desc", "label", false),
123        ];
124
125        let datatypes: HashSet<String> = [
126            "anat", "beh", "dwi", "eeg", "fmap", "func", "ieeg", "meg", "micr", "motion", "mrs",
127            "nirs", "perf", "pet",
128        ]
129        .iter()
130        .map(std::string::ToString::to_string)
131        .collect();
132
133        let suffixes: HashSet<String> = [
134            "T1w",
135            "T2w",
136            "T2star",
137            "FLAIR",
138            "PD",
139            "PDT2",
140            "inplaneT1",
141            "inplaneT2",
142            "angio",
143            "defacemask",
144            "bold",
145            "cbv",
146            "sbref",
147            "phase",
148            "dwi",
149            "phasediff",
150            "magnitude1",
151            "magnitude2",
152            "phase1",
153            "phase2",
154            "fieldmap",
155            "epi",
156            "events",
157            "physio",
158            "stim",
159            "channels",
160            "electrodes",
161            "coordsystem",
162            "eeg",
163            "ieeg",
164            "meg",
165            "headshape",
166            "photo",
167            "pet",
168            "blood",
169            "asl",
170            "m0scan",
171            "aslcontext",
172            "asllabeling",
173            "motion",
174            "nirs",
175            "optodes",
176            "svs",
177            "mrsi",
178            "unloc",
179            "mrsref",
180            "TEM",
181            "SEM",
182            "uCT",
183            "BF",
184            "DF",
185            "PC",
186            "DIC",
187            "FLUO",
188            "CONF",
189            "participants",
190            "scans",
191            "sessions",
192            "regressors",
193            "timeseries",
194        ]
195        .iter()
196        .map(std::string::ToString::to_string)
197        .collect();
198
199        let extensions: HashSet<String> = [
200            ".nii", ".nii.gz", ".json", ".tsv", ".tsv.gz", ".bval", ".bvec", ".edf", ".bdf",
201            ".set", ".fdt", ".vhdr", ".vmrk", ".eeg", ".fif", ".dat", ".pos", ".sqd", ".con",
202            ".ds", ".snirf", ".mefd", ".nwb", ".png", ".tif", ".ome.tif", ".ome.btf", ".jpg",
203        ]
204        .iter()
205        .map(std::string::ToString::to_string)
206        .collect();
207
208        // Core filename validation patterns
209        let mut file_patterns: HashMap<String, Vec<Regex>> = HashMap::new();
210        let sub = r"sub-[a-zA-Z0-9]+";
211        let ses = r"(?:_ses-[a-zA-Z0-9]+)?";
212        let entities_pat = r"(?:_[a-z]+-[a-zA-Z0-9]+)*";
213
214        // Pattern with optional ses- directory AND ses- entity in filename
215        let ses_dir = r"(?:/ses-[a-zA-Z0-9]+)?";
216        for dt in &datatypes {
217            let pat = format!(
218                r"^{sub}{ses_dir}/{dt}/{sub}{ses}{entities_pat}_[a-zA-Z0-9]+\.[a-zA-Z0-9.]+$"
219            );
220            if let Ok(re) = Regex::new(&pat) {
221                file_patterns.entry(dt.clone()).or_default().push(re);
222            }
223        }
224
225        // Root-level files
226        let root_patterns = vec![
227            Regex::new(r"^participants\.tsv$").unwrap(),
228            Regex::new(r"^participants\.json$").unwrap(),
229            Regex::new(r"^dataset_description\.json$").unwrap(),
230            Regex::new(r"^README.*$").unwrap(),
231            Regex::new(r"^CHANGES$").unwrap(),
232            Regex::new(r"^LICENSE$").unwrap(),
233            // Task/acq-level sidecars
234            Regex::new(r"^(?:task-[a-zA-Z0-9]+_)?(?:acq-[a-zA-Z0-9]+_)?[a-zA-Z0-9]+\.json$")
235                .unwrap(),
236        ];
237        file_patterns.insert("root".into(), root_patterns);
238
239        // Scans files
240        let scans_pat = Regex::new(&format!(
241            r"^{sub}(?:/ses-[a-zA-Z0-9]+)?/{sub}(?:_ses-[a-zA-Z0-9]+)?_scans\.tsv$"
242        ))
243        .unwrap();
244        file_patterns
245            .entry("scans".into())
246            .or_default()
247            .push(scans_pat);
248
249        // Session files
250        let ses_pat = Regex::new(&format!(r"^{sub}/{sub}_sessions\.tsv$")).unwrap();
251        file_patterns
252            .entry("sessions".into())
253            .or_default()
254            .push(ses_pat);
255
256        Self {
257            version: SUPPORTED_BIDS_VERSION.to_string(),
258            entities,
259            datatypes,
260            suffixes,
261            extensions,
262            file_patterns,
263        }
264    }
265
266    /// Validate a relative file path against BIDS naming rules.
267    pub fn is_valid(&self, relative_path: &str) -> bool {
268        let path = relative_path.trim_start_matches('/');
269
270        // Check root-level files
271        if !path.contains('/') {
272            return self
273                .file_patterns
274                .get("root")
275                .is_some_and(|pats| pats.iter().any(|p| p.is_match(path)));
276        }
277
278        // Check all datatype patterns
279        for patterns in self.file_patterns.values() {
280            if patterns.iter().any(|p| p.is_match(path)) {
281                return true;
282            }
283        }
284        false
285    }
286
287    /// Get entity definition by name.
288    pub fn get_entity(&self, name: &str) -> Option<&EntityDef> {
289        self.entities.iter().find(|e| e.name == name)
290    }
291
292    /// Get entity definition by BIDS key (e.g., "sub", "ses").
293    pub fn get_entity_by_key(&self, key: &str) -> Option<&EntityDef> {
294        self.entities.iter().find(|e| e.key == key)
295    }
296
297    /// Check if a datatype is valid.
298    pub fn is_valid_datatype(&self, dt: &str) -> bool {
299        self.datatypes.contains(dt)
300    }
301
302    /// Check if a suffix is valid.
303    pub fn is_valid_suffix(&self, s: &str) -> bool {
304        self.suffixes.contains(s)
305    }
306
307    /// Check if an extension is valid.
308    pub fn is_valid_extension(&self, e: &str) -> bool {
309        self.extensions.contains(e)
310    }
311
312    /// Generate a regex pattern string for an entity.
313    pub fn entity_pattern(&self, name: &str) -> Option<String> {
314        let ent = self.get_entity(name)?;
315        let value_pattern = match ent.format.as_str() {
316            "index" => r"\d+",
317            _ => r"[a-zA-Z0-9]+",
318        };
319        if ent.is_directory {
320            Some(format!(
321                r"[/\\]+{}-({value})",
322                ent.key,
323                value = value_pattern
324            ))
325        } else {
326            Some(format!(
327                r"[_/\\]+{}-({value})",
328                ent.key,
329                value = value_pattern
330            ))
331        }
332    }
333}
334
335fn ent(name: &str, key: &str, format: &str, is_dir: bool) -> EntityDef {
336    EntityDef {
337        name: name.into(),
338        key: key.into(),
339        format: format.into(),
340        is_directory: is_dir,
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn test_load_schema() {
350        let schema = BidsSchema::load();
351        assert_eq!(schema.version, "1.9.0");
352        assert!(schema.entities.len() >= 25);
353        assert!(schema.datatypes.contains("eeg"));
354        assert!(schema.datatypes.contains("func"));
355        assert!(schema.suffixes.contains("bold"));
356        assert!(schema.extensions.contains(".nii.gz"));
357    }
358
359    #[test]
360    fn test_entity_pattern() {
361        let schema = BidsSchema::load();
362        let pat = schema.entity_pattern("subject").unwrap();
363        assert!(pat.contains("sub-"));
364        let pat = schema.entity_pattern("run").unwrap();
365        assert!(pat.contains(r"\d+"));
366    }
367
368    #[test]
369    fn test_is_valid() {
370        let schema = BidsSchema::load();
371        assert!(schema.is_valid("participants.tsv"));
372        assert!(schema.is_valid("dataset_description.json"));
373        assert!(schema.is_valid("sub-01/eeg/sub-01_task-rest_eeg.edf"));
374        assert!(schema.is_valid("sub-01/func/sub-01_task-rest_bold.nii.gz"));
375    }
376
377    #[test]
378    fn test_valid_types() {
379        let schema = BidsSchema::load();
380        assert!(schema.is_valid_datatype("eeg"));
381        assert!(!schema.is_valid_datatype("xyz"));
382        assert!(schema.is_valid_suffix("bold"));
383        assert!(schema.is_valid_extension(".nii.gz"));
384    }
385}