Skip to main content

bids_validate/
lib.rs

1#![deny(unsafe_code)]
2//! BIDS dataset validation utilities.
3//!
4//! Provides functions for validating BIDS dataset roots, derivative directories,
5//! and file indexing patterns. Used by `bids-layout` during dataset indexing to
6//! determine which files to include or exclude.
7//!
8//! # Root Validation
9//!
10//! [`validate_root()`] checks that a path exists, is a directory, and contains
11//! a valid `dataset_description.json`. If validation is enabled, the description
12//! is parsed and checked for required fields.
13//!
14//! # Derivative Validation
15//!
16//! [`validate_derivative_path()`] ensures derivative datasets have a valid
17//! `dataset_description.json` with pipeline information (either `GeneratedBy`
18//! or the legacy `PipelineDescription`).
19//!
20//! # Ignore / Force Patterns
21//!
22//! Default ignore patterns exclude `code/`, `models/`, `sourcedata/`, `stimuli/`,
23//! hidden files (`.`-prefixed), and common non-BIDS directories. These can be
24//! overridden with custom patterns via [`validate_indexing_args()`].
25
26use bids_core::dataset_description::DatasetDescription;
27use bids_core::error::{BidsError, Result};
28use regex::Regex;
29use std::path::{Path, PathBuf};
30use std::sync::LazyLock;
31
32/// Directories to ignore by default during indexing.
33pub static DEFAULT_IGNORE: LazyLock<Vec<Regex>> = LazyLock::new(|| {
34    vec![
35        Regex::new(r"^/(code|models|sourcedata|stimuli)").unwrap(),
36        Regex::new(r"/\.").unwrap(), // dotfiles
37    ]
38});
39
40/// Validate the root directory of a BIDS dataset.
41pub fn validate_root(root: &Path, validate: bool) -> Result<(PathBuf, Option<DatasetDescription>)> {
42    let root = root
43        .canonicalize()
44        .map_err(|_| BidsError::RootNotFound(root.to_string_lossy().to_string()))?;
45
46    if !root.exists() {
47        return Err(BidsError::RootNotFound(root.to_string_lossy().to_string()));
48    }
49
50    let desc_path = root.join("dataset_description.json");
51    if !desc_path.exists() {
52        if validate {
53            return Err(BidsError::MissingDatasetDescription);
54        }
55        return Ok((root, None));
56    }
57
58    match DatasetDescription::from_dir(&root) {
59        Ok(desc) => {
60            if validate {
61                desc.validate()?;
62            }
63            Ok((root, Some(desc)))
64        }
65        Err(e) => {
66            if validate {
67                Err(e)
68            } else {
69                Ok((root, None))
70            }
71        }
72    }
73}
74
75/// Validate a derivatives directory and return the pipeline name.
76pub fn validate_derivative_path(path: &Path) -> Result<String> {
77    let desc = DatasetDescription::from_dir(path)?;
78    desc.pipeline_name()
79        .map(std::string::ToString::to_string)
80        .ok_or_else(|| {
81            BidsError::DerivativesValidation(
82                "Every valid BIDS-derivatives dataset must have a GeneratedBy.Name field \
83             set inside 'dataset_description.json'"
84                    .to_string(),
85            )
86        })
87}
88
89/// Check if a path should be ignored during indexing.
90pub fn should_ignore(path: &Path, root: &Path, ignore_patterns: &[Regex]) -> bool {
91    let rel = path
92        .strip_prefix(root)
93        .map(|p| format!("/{}", p.to_string_lossy()))
94        .unwrap_or_default();
95
96    ignore_patterns.iter().any(|pat| pat.is_match(&rel))
97}
98
99/// Check if a path matches BIDS naming conventions (basic validation).
100pub fn is_bids_file(path: &Path) -> bool {
101    let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
102        return false;
103    };
104    const ROOT_FILES: &[&str] = &[
105        "dataset_description.json",
106        "participants.tsv",
107        "participants.json",
108        "README",
109        "CHANGES",
110        "LICENSE",
111    ];
112    ROOT_FILES.contains(&name)
113        || name.starts_with("sub-")
114        || name.starts_with("task-")
115        || name.starts_with("acq-")
116        || name.starts_with("sample-")
117}
118
119/// Resolve a BIDS URI or relative IntendedFor path to an absolute path.
120pub fn resolve_intended_for(intent: &str, root: &Path, subject: &str) -> Option<PathBuf> {
121    if let Some(rest) = intent.strip_prefix("bids::") {
122        Some(root.join(rest))
123    } else if intent.starts_with("bids:") {
124        None // Named dataset URI — cross-dataset reference
125    } else {
126        Some(root.join(format!("sub-{subject}")).join(intent))
127    }
128}
129
130/// Validate and sort indexing arguments.
131///
132/// Returns (ignore_patterns, force_index_patterns), both sorted from specific to general.
133pub fn validate_indexing_args(
134    ignore: Option<Vec<Regex>>,
135    force_index: Option<Vec<Regex>>,
136    _root: &Path,
137) -> Result<(Vec<Regex>, Vec<Regex>)> {
138    let mut ignore = ignore.unwrap_or_else(|| DEFAULT_IGNORE.clone());
139
140    // Always ignore dotfiles
141    let dotfile_re = Regex::new(r"/\.").unwrap();
142    if !ignore.iter().any(|r| r.as_str() == dotfile_re.as_str()) {
143        ignore.push(dotfile_re);
144    }
145
146    let force_index = force_index.unwrap_or_default();
147
148    // Validate no derivatives in force_index
149    for entry in &force_index {
150        if entry.as_str().contains("derivatives") {
151            return Err(BidsError::Validation(
152                "Do not pass 'derivatives' in force_index. Use add_derivatives() instead."
153                    .to_string(),
154            ));
155        }
156    }
157
158    Ok((ignore, force_index))
159}
160
161/// A single validation issue found in a BIDS dataset.
162#[derive(Debug, Clone)]
163pub struct ValidationIssue {
164    /// Severity: `"error"` or `"warning"`.
165    pub severity: String,
166    /// Short code (e.g., `"MISSING_DATASET_DESCRIPTION"`, `"INVALID_FILENAME"`).
167    pub code: String,
168    /// Human-readable message.
169    pub message: String,
170    /// File path that triggered the issue (if applicable).
171    pub path: Option<String>,
172}
173
174impl std::fmt::Display for ValidationIssue {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        write!(
177            f,
178            "[{}] {}: {}",
179            self.severity.to_uppercase(),
180            self.code,
181            self.message
182        )?;
183        if let Some(ref p) = self.path {
184            write!(f, " ({p})")?;
185        }
186        Ok(())
187    }
188}
189
190/// Result of a full dataset validation.
191#[derive(Debug, Clone)]
192pub struct ValidationResult {
193    /// All issues found.
194    pub issues: Vec<ValidationIssue>,
195}
196
197impl ValidationResult {
198    /// Returns `true` if no errors were found (warnings are OK).
199    #[must_use]
200    pub fn is_valid(&self) -> bool {
201        !self.issues.iter().any(|i| i.severity == "error")
202    }
203
204    /// Count of errors.
205    #[must_use]
206    pub fn error_count(&self) -> usize {
207        self.issues.iter().filter(|i| i.severity == "error").count()
208    }
209
210    /// Count of warnings.
211    #[must_use]
212    pub fn warning_count(&self) -> usize {
213        self.issues
214            .iter()
215            .filter(|i| i.severity == "warning")
216            .count()
217    }
218}
219
220impl std::fmt::Display for ValidationResult {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        writeln!(
223            f,
224            "Validation: {} errors, {} warnings",
225            self.error_count(),
226            self.warning_count()
227        )?;
228        for issue in &self.issues {
229            writeln!(f, "  {issue}")?;
230        }
231        Ok(())
232    }
233}
234
235/// Perform a full BIDS validation of a dataset directory.
236///
237/// Checks:
238/// - `dataset_description.json` exists and has required fields
239/// - `README` exists
240/// - All files under `sub-*` directories follow BIDS naming conventions
241/// - No unexpected files at the root level
242/// - Metadata consistency (TR values match across runs, etc.)
243///
244/// This is a lighter-weight alternative to the official `bids-validator`;
245/// it catches the most common structural issues.
246///
247/// # Errors
248///
249/// Returns an I/O error if the directory can't be read.
250pub fn validate_dataset(root: &Path) -> Result<ValidationResult> {
251    let mut issues = Vec::new();
252
253    // 1. Check dataset_description.json
254    let desc_path = root.join("dataset_description.json");
255    if !desc_path.exists() {
256        issues.push(ValidationIssue {
257            severity: "error".into(),
258            code: "MISSING_DATASET_DESCRIPTION".into(),
259            message: "dataset_description.json is required at the root".into(),
260            path: None,
261        });
262    } else {
263        match DatasetDescription::from_dir(root) {
264            Ok(desc) => {
265                if desc.name.is_empty() {
266                    issues.push(ValidationIssue {
267                        severity: "error".into(),
268                        code: "MISSING_NAME".into(),
269                        message: "Name field is required in dataset_description.json".into(),
270                        path: Some(desc_path.to_string_lossy().into()),
271                    });
272                }
273                if desc.bids_version.is_empty() {
274                    issues.push(ValidationIssue {
275                        severity: "error".into(),
276                        code: "MISSING_BIDS_VERSION".into(),
277                        message: "BIDSVersion field is required in dataset_description.json".into(),
278                        path: Some(desc_path.to_string_lossy().into()),
279                    });
280                }
281            }
282            Err(_) => {
283                issues.push(ValidationIssue {
284                    severity: "error".into(),
285                    code: "INVALID_DATASET_DESCRIPTION".into(),
286                    message: "dataset_description.json cannot be parsed".into(),
287                    path: Some(desc_path.to_string_lossy().into()),
288                });
289            }
290        }
291    }
292
293    // 2. Check README
294    let has_readme = root.join("README").exists()
295        || root.join("README.md").exists()
296        || root.join("README.rst").exists()
297        || root.join("README.txt").exists();
298    if !has_readme {
299        issues.push(ValidationIssue {
300            severity: "warning".into(),
301            code: "MISSING_README".into(),
302            message: "A README file is recommended at the dataset root".into(),
303            path: None,
304        });
305    }
306
307    // 3. Check subject directories
308    let mut has_subjects = false;
309    if let Ok(entries) = std::fs::read_dir(root) {
310        for entry in entries.flatten() {
311            let name = entry.file_name().to_string_lossy().to_string();
312            if name.starts_with("sub-") && entry.file_type().is_ok_and(|t| t.is_dir()) {
313                has_subjects = true;
314                validate_subject_dir(&entry.path(), root, &mut issues);
315            }
316        }
317    }
318
319    if !has_subjects {
320        issues.push(ValidationIssue {
321            severity: "error".into(),
322            code: "NO_SUBJECTS".into(),
323            message: "No subject directories (sub-*) found".into(),
324            path: None,
325        });
326    }
327
328    Ok(ValidationResult { issues })
329}
330
331fn validate_subject_dir(sub_dir: &Path, root: &Path, issues: &mut Vec<ValidationIssue>) {
332    let schema = bids_schema::BidsSchema::load();
333
334    let entries: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(sub_dir)
335        .into_iter()
336        .filter_map(|e| e.ok())
337        .filter(|e| e.file_type().is_file())
338        .collect();
339
340    {
341        for entry in entries {
342            let path = entry.path();
343            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
344
345            // Skip hidden files and known non-BIDS
346            if name.starts_with('.') {
347                continue;
348            }
349
350            // Check filename follows BIDS pattern
351            let rel = path
352                .strip_prefix(root)
353                .map(|p| p.to_string_lossy().to_string())
354                .unwrap_or_default();
355
356            if !schema.is_valid(&rel) && !name.ends_with(".json") {
357                // Don't flag JSON sidecars as errors — they follow looser patterns
358                issues.push(ValidationIssue {
359                    severity: "warning".into(),
360                    code: "INVALID_FILENAME".into(),
361                    message: "File does not match any BIDS naming pattern".to_string(),
362                    path: Some(rel),
363                });
364            }
365        }
366    }
367}
368
369/// Check if a path should be force-indexed.
370pub fn should_force_index(path: &Path, root: &Path, force_patterns: &[Regex]) -> bool {
371    if force_patterns.is_empty() {
372        return false;
373    }
374    let rel = path
375        .strip_prefix(root)
376        .map(|p| format!("/{}", p.to_string_lossy()))
377        .unwrap_or_default();
378    force_patterns.iter().any(|pat| pat.is_match(&rel))
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn test_resolve_intended_for() {
387        let root = Path::new("/data/bids");
388        assert_eq!(
389            resolve_intended_for("bids::sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
390            Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
391        );
392        assert_eq!(
393            resolve_intended_for("anat/sub-01_T1w.nii.gz", root, "01"),
394            Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
395        );
396        assert_eq!(
397            resolve_intended_for("bids:other:sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
398            None
399        );
400    }
401
402    #[test]
403    fn test_validate_indexing_args() {
404        let root = Path::new("/data");
405        let (ignore, force) = validate_indexing_args(None, None, root).unwrap();
406        assert!(!ignore.is_empty());
407        assert!(force.is_empty());
408    }
409
410    #[test]
411    fn test_validate_indexing_args_no_derivatives() {
412        let root = Path::new("/data");
413        let force = vec![Regex::new("derivatives").unwrap()];
414        let result = validate_indexing_args(None, Some(force), root);
415        assert!(result.is_err());
416    }
417
418    #[test]
419    fn test_should_force_index() {
420        let root = Path::new("/data");
421        let patterns = vec![Regex::new(r"/extra/").unwrap()];
422        assert!(should_force_index(
423            Path::new("/data/extra/file.txt"),
424            root,
425            &patterns
426        ));
427        assert!(!should_force_index(
428            Path::new("/data/sub-01/file.txt"),
429            root,
430            &patterns
431        ));
432    }
433}