1#![deny(unsafe_code)]
2pub mod version;
23
24use regex::Regex;
25use serde::{Deserialize, Serialize};
26use std::collections::{HashMap, HashSet};
27
28pub use version::{BidsVersion, Compatibility, MIN_COMPATIBLE_VERSION, SUPPORTED_BIDS_VERSION};
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct EntityDef {
33 pub name: String,
34 pub key: String,
36 pub format: String,
38 pub is_directory: bool,
40}
41
42#[derive(Debug, Clone)]
44pub struct BidsSchema {
45 pub version: String,
46 pub entities: Vec<EntityDef>,
47 pub datatypes: HashSet<String>,
48 pub suffixes: HashSet<String>,
49 pub extensions: HashSet<String>,
50 pub file_patterns: HashMap<String, Vec<Regex>>,
52}
53
54impl BidsSchema {
55 pub fn load() -> Self {
57 Self::built_in()
58 }
59
60 #[must_use]
72 pub fn check_dataset_version(&self, dataset_version_str: &str) -> Compatibility {
73 match BidsVersion::parse(dataset_version_str) {
74 Some(dv) => {
75 let lib_ver = BidsVersion::parse(&self.version).unwrap_or(SUPPORTED_BIDS_VERSION);
76 lib_ver.check_compatibility(&dv)
77 }
78 None => Compatibility::Incompatible {
79 reason: format!("Cannot parse BIDS version: '{dataset_version_str}'"),
80 },
81 }
82 }
83
84 fn built_in() -> Self {
93 let entities = vec![
94 ent("subject", "sub", "label", true),
95 ent("session", "ses", "label", true),
96 ent("sample", "sample", "label", false),
97 ent("task", "task", "label", false),
98 ent("tracksys", "tracksys", "label", false),
99 ent("acquisition", "acq", "label", false),
100 ent("ceagent", "ce", "label", false),
101 ent("staining", "stain", "label", false),
102 ent("tracer", "trc", "label", false),
103 ent("reconstruction", "rec", "label", false),
104 ent("direction", "dir", "label", false),
105 ent("run", "run", "index", false),
106 ent("modality", "mod", "label", false),
107 ent("echo", "echo", "index", false),
108 ent("flip", "flip", "index", false),
109 ent("inversion", "inv", "index", false),
110 ent("mtransfer", "mt", "label", false),
111 ent("part", "part", "label", false),
112 ent("processing", "proc", "label", false),
113 ent("hemisphere", "hemi", "label", false),
114 ent("space", "space", "label", false),
115 ent("split", "split", "index", false),
116 ent("recording", "recording", "label", false),
117 ent("chunk", "chunk", "index", false),
118 ent("atlas", "atlas", "label", false),
119 ent("resolution", "res", "label", false),
120 ent("density", "den", "label", false),
121 ent("label", "label", "label", false),
122 ent("description", "desc", "label", false),
123 ];
124
125 let datatypes: HashSet<String> = [
126 "anat", "beh", "dwi", "eeg", "fmap", "func", "ieeg", "meg", "micr", "motion", "mrs",
127 "nirs", "perf", "pet",
128 ]
129 .iter()
130 .map(std::string::ToString::to_string)
131 .collect();
132
133 let suffixes: HashSet<String> = [
134 "T1w",
135 "T2w",
136 "T2star",
137 "FLAIR",
138 "PD",
139 "PDT2",
140 "inplaneT1",
141 "inplaneT2",
142 "angio",
143 "defacemask",
144 "bold",
145 "cbv",
146 "sbref",
147 "phase",
148 "dwi",
149 "phasediff",
150 "magnitude1",
151 "magnitude2",
152 "phase1",
153 "phase2",
154 "fieldmap",
155 "epi",
156 "events",
157 "physio",
158 "stim",
159 "channels",
160 "electrodes",
161 "coordsystem",
162 "eeg",
163 "ieeg",
164 "meg",
165 "headshape",
166 "photo",
167 "pet",
168 "blood",
169 "asl",
170 "m0scan",
171 "aslcontext",
172 "asllabeling",
173 "motion",
174 "nirs",
175 "optodes",
176 "svs",
177 "mrsi",
178 "unloc",
179 "mrsref",
180 "TEM",
181 "SEM",
182 "uCT",
183 "BF",
184 "DF",
185 "PC",
186 "DIC",
187 "FLUO",
188 "CONF",
189 "participants",
190 "scans",
191 "sessions",
192 "regressors",
193 "timeseries",
194 ]
195 .iter()
196 .map(std::string::ToString::to_string)
197 .collect();
198
199 let extensions: HashSet<String> = [
200 ".nii", ".nii.gz", ".json", ".tsv", ".tsv.gz", ".bval", ".bvec", ".edf", ".bdf",
201 ".set", ".fdt", ".vhdr", ".vmrk", ".eeg", ".fif", ".dat", ".pos", ".sqd", ".con",
202 ".ds", ".snirf", ".mefd", ".nwb", ".png", ".tif", ".ome.tif", ".ome.btf", ".jpg",
203 ]
204 .iter()
205 .map(std::string::ToString::to_string)
206 .collect();
207
208 let mut file_patterns: HashMap<String, Vec<Regex>> = HashMap::new();
210 let sub = r"sub-[a-zA-Z0-9]+";
211 let ses = r"(?:_ses-[a-zA-Z0-9]+)?";
212 let entities_pat = r"(?:_[a-z]+-[a-zA-Z0-9]+)*";
213
214 let ses_dir = r"(?:/ses-[a-zA-Z0-9]+)?";
216 for dt in &datatypes {
217 let pat = format!(
218 r"^{sub}{ses_dir}/{dt}/{sub}{ses}{entities_pat}_[a-zA-Z0-9]+\.[a-zA-Z0-9.]+$"
219 );
220 if let Ok(re) = Regex::new(&pat) {
221 file_patterns.entry(dt.clone()).or_default().push(re);
222 }
223 }
224
225 let root_patterns = vec![
227 Regex::new(r"^participants\.tsv$").unwrap(),
228 Regex::new(r"^participants\.json$").unwrap(),
229 Regex::new(r"^dataset_description\.json$").unwrap(),
230 Regex::new(r"^README.*$").unwrap(),
231 Regex::new(r"^CHANGES$").unwrap(),
232 Regex::new(r"^LICENSE$").unwrap(),
233 Regex::new(r"^(?:task-[a-zA-Z0-9]+_)?(?:acq-[a-zA-Z0-9]+_)?[a-zA-Z0-9]+\.json$")
235 .unwrap(),
236 ];
237 file_patterns.insert("root".into(), root_patterns);
238
239 let scans_pat = Regex::new(&format!(
241 r"^{sub}(?:/ses-[a-zA-Z0-9]+)?/{sub}(?:_ses-[a-zA-Z0-9]+)?_scans\.tsv$"
242 ))
243 .unwrap();
244 file_patterns
245 .entry("scans".into())
246 .or_default()
247 .push(scans_pat);
248
249 let ses_pat = Regex::new(&format!(r"^{sub}/{sub}_sessions\.tsv$")).unwrap();
251 file_patterns
252 .entry("sessions".into())
253 .or_default()
254 .push(ses_pat);
255
256 Self {
257 version: SUPPORTED_BIDS_VERSION.to_string(),
258 entities,
259 datatypes,
260 suffixes,
261 extensions,
262 file_patterns,
263 }
264 }
265
266 pub fn is_valid(&self, relative_path: &str) -> bool {
268 let path = relative_path.trim_start_matches('/');
269
270 if !path.contains('/') {
272 return self
273 .file_patterns
274 .get("root")
275 .is_some_and(|pats| pats.iter().any(|p| p.is_match(path)));
276 }
277
278 for patterns in self.file_patterns.values() {
280 if patterns.iter().any(|p| p.is_match(path)) {
281 return true;
282 }
283 }
284 false
285 }
286
287 pub fn get_entity(&self, name: &str) -> Option<&EntityDef> {
289 self.entities.iter().find(|e| e.name == name)
290 }
291
292 pub fn get_entity_by_key(&self, key: &str) -> Option<&EntityDef> {
294 self.entities.iter().find(|e| e.key == key)
295 }
296
297 pub fn is_valid_datatype(&self, dt: &str) -> bool {
299 self.datatypes.contains(dt)
300 }
301
302 pub fn is_valid_suffix(&self, s: &str) -> bool {
304 self.suffixes.contains(s)
305 }
306
307 pub fn is_valid_extension(&self, e: &str) -> bool {
309 self.extensions.contains(e)
310 }
311
312 pub fn entity_pattern(&self, name: &str) -> Option<String> {
314 let ent = self.get_entity(name)?;
315 let value_pattern = match ent.format.as_str() {
316 "index" => r"\d+",
317 _ => r"[a-zA-Z0-9]+",
318 };
319 if ent.is_directory {
320 Some(format!(
321 r"[/\\]+{}-({value})",
322 ent.key,
323 value = value_pattern
324 ))
325 } else {
326 Some(format!(
327 r"[_/\\]+{}-({value})",
328 ent.key,
329 value = value_pattern
330 ))
331 }
332 }
333}
334
335fn ent(name: &str, key: &str, format: &str, is_dir: bool) -> EntityDef {
336 EntityDef {
337 name: name.into(),
338 key: key.into(),
339 format: format.into(),
340 is_directory: is_dir,
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 #[test]
349 fn test_load_schema() {
350 let schema = BidsSchema::load();
351 assert_eq!(schema.version, "1.9.0");
352 assert!(schema.entities.len() >= 25);
353 assert!(schema.datatypes.contains("eeg"));
354 assert!(schema.datatypes.contains("func"));
355 assert!(schema.suffixes.contains("bold"));
356 assert!(schema.extensions.contains(".nii.gz"));
357 }
358
359 #[test]
360 fn test_entity_pattern() {
361 let schema = BidsSchema::load();
362 let pat = schema.entity_pattern("subject").unwrap();
363 assert!(pat.contains("sub-"));
364 let pat = schema.entity_pattern("run").unwrap();
365 assert!(pat.contains(r"\d+"));
366 }
367
368 #[test]
369 fn test_is_valid() {
370 let schema = BidsSchema::load();
371 assert!(schema.is_valid("participants.tsv"));
372 assert!(schema.is_valid("dataset_description.json"));
373 assert!(schema.is_valid("sub-01/eeg/sub-01_task-rest_eeg.edf"));
374 assert!(schema.is_valid("sub-01/func/sub-01_task-rest_bold.nii.gz"));
375 }
376
377 #[test]
378 fn test_valid_types() {
379 let schema = BidsSchema::load();
380 assert!(schema.is_valid_datatype("eeg"));
381 assert!(!schema.is_valid_datatype("xyz"));
382 assert!(schema.is_valid_suffix("bold"));
383 assert!(schema.is_valid_extension(".nii.gz"));
384 }
385}