#![deny(unsafe_code)]
use bids_core::dataset_description::DatasetDescription;
use bids_core::error::{BidsError, Result};
use regex::Regex;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
pub static DEFAULT_IGNORE: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
Regex::new(r"^/(code|models|sourcedata|stimuli)").unwrap(),
Regex::new(r"/\.").unwrap(), ]
});
pub fn validate_root(root: &Path, validate: bool) -> Result<(PathBuf, Option<DatasetDescription>)> {
let root = root
.canonicalize()
.map_err(|_| BidsError::RootNotFound(root.to_string_lossy().to_string()))?;
if !root.exists() {
return Err(BidsError::RootNotFound(root.to_string_lossy().to_string()));
}
let desc_path = root.join("dataset_description.json");
if !desc_path.exists() {
if validate {
return Err(BidsError::MissingDatasetDescription);
}
return Ok((root, None));
}
match DatasetDescription::from_dir(&root) {
Ok(desc) => {
if validate {
desc.validate()?;
}
Ok((root, Some(desc)))
}
Err(e) => {
if validate {
Err(e)
} else {
Ok((root, None))
}
}
}
}
pub fn validate_derivative_path(path: &Path) -> Result<String> {
let desc = DatasetDescription::from_dir(path)?;
desc.pipeline_name()
.map(std::string::ToString::to_string)
.ok_or_else(|| {
BidsError::DerivativesValidation(
"Every valid BIDS-derivatives dataset must have a GeneratedBy.Name field \
set inside 'dataset_description.json'"
.to_string(),
)
})
}
pub fn should_ignore(path: &Path, root: &Path, ignore_patterns: &[Regex]) -> bool {
let rel = path
.strip_prefix(root)
.map(|p| format!("/{}", p.to_string_lossy()))
.unwrap_or_default();
ignore_patterns.iter().any(|pat| pat.is_match(&rel))
}
pub fn is_bids_file(path: &Path) -> bool {
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
const ROOT_FILES: &[&str] = &[
"dataset_description.json",
"participants.tsv",
"participants.json",
"README",
"CHANGES",
"LICENSE",
];
ROOT_FILES.contains(&name)
|| name.starts_with("sub-")
|| name.starts_with("task-")
|| name.starts_with("acq-")
|| name.starts_with("sample-")
}
pub fn resolve_intended_for(intent: &str, root: &Path, subject: &str) -> Option<PathBuf> {
if let Some(rest) = intent.strip_prefix("bids::") {
Some(root.join(rest))
} else if intent.starts_with("bids:") {
None } else {
Some(root.join(format!("sub-{subject}")).join(intent))
}
}
pub fn validate_indexing_args(
ignore: Option<Vec<Regex>>,
force_index: Option<Vec<Regex>>,
_root: &Path,
) -> Result<(Vec<Regex>, Vec<Regex>)> {
let mut ignore = ignore.unwrap_or_else(|| DEFAULT_IGNORE.clone());
let dotfile_re = Regex::new(r"/\.").unwrap();
if !ignore.iter().any(|r| r.as_str() == dotfile_re.as_str()) {
ignore.push(dotfile_re);
}
let force_index = force_index.unwrap_or_default();
for entry in &force_index {
if entry.as_str().contains("derivatives") {
return Err(BidsError::Validation(
"Do not pass 'derivatives' in force_index. Use add_derivatives() instead."
.to_string(),
));
}
}
Ok((ignore, force_index))
}
#[derive(Debug, Clone)]
pub struct ValidationIssue {
pub severity: String,
pub code: String,
pub message: String,
pub path: Option<String>,
}
impl std::fmt::Display for ValidationIssue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}] {}: {}",
self.severity.to_uppercase(),
self.code,
self.message
)?;
if let Some(ref p) = self.path {
write!(f, " ({p})")?;
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub issues: Vec<ValidationIssue>,
}
impl ValidationResult {
#[must_use]
pub fn is_valid(&self) -> bool {
!self.issues.iter().any(|i| i.severity == "error")
}
#[must_use]
pub fn error_count(&self) -> usize {
self.issues.iter().filter(|i| i.severity == "error").count()
}
#[must_use]
pub fn warning_count(&self) -> usize {
self.issues
.iter()
.filter(|i| i.severity == "warning")
.count()
}
}
impl std::fmt::Display for ValidationResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(
f,
"Validation: {} errors, {} warnings",
self.error_count(),
self.warning_count()
)?;
for issue in &self.issues {
writeln!(f, " {issue}")?;
}
Ok(())
}
}
pub fn validate_dataset(root: &Path) -> Result<ValidationResult> {
let mut issues = Vec::new();
let desc_path = root.join("dataset_description.json");
if !desc_path.exists() {
issues.push(ValidationIssue {
severity: "error".into(),
code: "MISSING_DATASET_DESCRIPTION".into(),
message: "dataset_description.json is required at the root".into(),
path: None,
});
} else {
match DatasetDescription::from_dir(root) {
Ok(desc) => {
if desc.name.is_empty() {
issues.push(ValidationIssue {
severity: "error".into(),
code: "MISSING_NAME".into(),
message: "Name field is required in dataset_description.json".into(),
path: Some(desc_path.to_string_lossy().into()),
});
}
if desc.bids_version.is_empty() {
issues.push(ValidationIssue {
severity: "error".into(),
code: "MISSING_BIDS_VERSION".into(),
message: "BIDSVersion field is required in dataset_description.json".into(),
path: Some(desc_path.to_string_lossy().into()),
});
}
}
Err(_) => {
issues.push(ValidationIssue {
severity: "error".into(),
code: "INVALID_DATASET_DESCRIPTION".into(),
message: "dataset_description.json cannot be parsed".into(),
path: Some(desc_path.to_string_lossy().into()),
});
}
}
}
let has_readme = root.join("README").exists()
|| root.join("README.md").exists()
|| root.join("README.rst").exists()
|| root.join("README.txt").exists();
if !has_readme {
issues.push(ValidationIssue {
severity: "warning".into(),
code: "MISSING_README".into(),
message: "A README file is recommended at the dataset root".into(),
path: None,
});
}
let mut has_subjects = false;
if let Ok(entries) = std::fs::read_dir(root) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.starts_with("sub-") && entry.file_type().is_ok_and(|t| t.is_dir()) {
has_subjects = true;
validate_subject_dir(&entry.path(), root, &mut issues);
}
}
}
if !has_subjects {
issues.push(ValidationIssue {
severity: "error".into(),
code: "NO_SUBJECTS".into(),
message: "No subject directories (sub-*) found".into(),
path: None,
});
}
Ok(ValidationResult { issues })
}
fn validate_subject_dir(sub_dir: &Path, root: &Path, issues: &mut Vec<ValidationIssue>) {
let schema = bids_schema::BidsSchema::load();
let entries: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(sub_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.collect();
{
for entry in entries {
let path = entry.path();
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.starts_with('.') {
continue;
}
let rel = path
.strip_prefix(root)
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
if !schema.is_valid(&rel) && !name.ends_with(".json") {
issues.push(ValidationIssue {
severity: "warning".into(),
code: "INVALID_FILENAME".into(),
message: "File does not match any BIDS naming pattern".to_string(),
path: Some(rel),
});
}
}
}
}
pub fn should_force_index(path: &Path, root: &Path, force_patterns: &[Regex]) -> bool {
if force_patterns.is_empty() {
return false;
}
let rel = path
.strip_prefix(root)
.map(|p| format!("/{}", p.to_string_lossy()))
.unwrap_or_default();
force_patterns.iter().any(|pat| pat.is_match(&rel))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve_intended_for() {
let root = Path::new("/data/bids");
assert_eq!(
resolve_intended_for("bids::sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
);
assert_eq!(
resolve_intended_for("anat/sub-01_T1w.nii.gz", root, "01"),
Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
);
assert_eq!(
resolve_intended_for("bids:other:sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
None
);
}
#[test]
fn test_validate_indexing_args() {
let root = Path::new("/data");
let (ignore, force) = validate_indexing_args(None, None, root).unwrap();
assert!(!ignore.is_empty());
assert!(force.is_empty());
}
#[test]
fn test_validate_indexing_args_no_derivatives() {
let root = Path::new("/data");
let force = vec![Regex::new("derivatives").unwrap()];
let result = validate_indexing_args(None, Some(force), root);
assert!(result.is_err());
}
#[test]
fn test_should_force_index() {
let root = Path::new("/data");
let patterns = vec![Regex::new(r"/extra/").unwrap()];
assert!(should_force_index(
Path::new("/data/extra/file.txt"),
root,
&patterns
));
assert!(!should_force_index(
Path::new("/data/sub-01/file.txt"),
root,
&patterns
));
}
}