1#![deny(unsafe_code)]
2use bids_core::dataset_description::DatasetDescription;
27use bids_core::error::{BidsError, Result};
28use regex::Regex;
29use std::path::{Path, PathBuf};
30use std::sync::LazyLock;
31
32pub static DEFAULT_IGNORE: LazyLock<Vec<Regex>> = LazyLock::new(|| {
34 vec![
35 Regex::new(r"^/(code|models|sourcedata|stimuli)").unwrap(),
36 Regex::new(r"/\.").unwrap(), ]
38});
39
40pub fn validate_root(root: &Path, validate: bool) -> Result<(PathBuf, Option<DatasetDescription>)> {
42 let root = root
43 .canonicalize()
44 .map_err(|_| BidsError::RootNotFound(root.to_string_lossy().to_string()))?;
45
46 if !root.exists() {
47 return Err(BidsError::RootNotFound(root.to_string_lossy().to_string()));
48 }
49
50 let desc_path = root.join("dataset_description.json");
51 if !desc_path.exists() {
52 if validate {
53 return Err(BidsError::MissingDatasetDescription);
54 }
55 return Ok((root, None));
56 }
57
58 match DatasetDescription::from_dir(&root) {
59 Ok(desc) => {
60 if validate {
61 desc.validate()?;
62 }
63 Ok((root, Some(desc)))
64 }
65 Err(e) => {
66 if validate {
67 Err(e)
68 } else {
69 Ok((root, None))
70 }
71 }
72 }
73}
74
75pub fn validate_derivative_path(path: &Path) -> Result<String> {
77 let desc = DatasetDescription::from_dir(path)?;
78 desc.pipeline_name()
79 .map(std::string::ToString::to_string)
80 .ok_or_else(|| {
81 BidsError::DerivativesValidation(
82 "Every valid BIDS-derivatives dataset must have a GeneratedBy.Name field \
83 set inside 'dataset_description.json'"
84 .to_string(),
85 )
86 })
87}
88
89pub fn should_ignore(path: &Path, root: &Path, ignore_patterns: &[Regex]) -> bool {
91 let rel = path
92 .strip_prefix(root)
93 .map(|p| format!("/{}", p.to_string_lossy()))
94 .unwrap_or_default();
95
96 ignore_patterns.iter().any(|pat| pat.is_match(&rel))
97}
98
99pub fn is_bids_file(path: &Path) -> bool {
101 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
102 return false;
103 };
104 const ROOT_FILES: &[&str] = &[
105 "dataset_description.json",
106 "participants.tsv",
107 "participants.json",
108 "README",
109 "CHANGES",
110 "LICENSE",
111 ];
112 ROOT_FILES.contains(&name)
113 || name.starts_with("sub-")
114 || name.starts_with("task-")
115 || name.starts_with("acq-")
116 || name.starts_with("sample-")
117}
118
119pub fn resolve_intended_for(intent: &str, root: &Path, subject: &str) -> Option<PathBuf> {
121 if let Some(rest) = intent.strip_prefix("bids::") {
122 Some(root.join(rest))
123 } else if intent.starts_with("bids:") {
124 None } else {
126 Some(root.join(format!("sub-{subject}")).join(intent))
127 }
128}
129
130pub fn validate_indexing_args(
134 ignore: Option<Vec<Regex>>,
135 force_index: Option<Vec<Regex>>,
136 _root: &Path,
137) -> Result<(Vec<Regex>, Vec<Regex>)> {
138 let mut ignore = ignore.unwrap_or_else(|| DEFAULT_IGNORE.clone());
139
140 let dotfile_re = Regex::new(r"/\.").unwrap();
142 if !ignore.iter().any(|r| r.as_str() == dotfile_re.as_str()) {
143 ignore.push(dotfile_re);
144 }
145
146 let force_index = force_index.unwrap_or_default();
147
148 for entry in &force_index {
150 if entry.as_str().contains("derivatives") {
151 return Err(BidsError::Validation(
152 "Do not pass 'derivatives' in force_index. Use add_derivatives() instead."
153 .to_string(),
154 ));
155 }
156 }
157
158 Ok((ignore, force_index))
159}
160
161#[derive(Debug, Clone)]
163pub struct ValidationIssue {
164 pub severity: String,
166 pub code: String,
168 pub message: String,
170 pub path: Option<String>,
172}
173
174impl std::fmt::Display for ValidationIssue {
175 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176 write!(
177 f,
178 "[{}] {}: {}",
179 self.severity.to_uppercase(),
180 self.code,
181 self.message
182 )?;
183 if let Some(ref p) = self.path {
184 write!(f, " ({p})")?;
185 }
186 Ok(())
187 }
188}
189
190#[derive(Debug, Clone)]
192pub struct ValidationResult {
193 pub issues: Vec<ValidationIssue>,
195}
196
197impl ValidationResult {
198 #[must_use]
200 pub fn is_valid(&self) -> bool {
201 !self.issues.iter().any(|i| i.severity == "error")
202 }
203
204 #[must_use]
206 pub fn error_count(&self) -> usize {
207 self.issues.iter().filter(|i| i.severity == "error").count()
208 }
209
210 #[must_use]
212 pub fn warning_count(&self) -> usize {
213 self.issues
214 .iter()
215 .filter(|i| i.severity == "warning")
216 .count()
217 }
218}
219
220impl std::fmt::Display for ValidationResult {
221 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222 writeln!(
223 f,
224 "Validation: {} errors, {} warnings",
225 self.error_count(),
226 self.warning_count()
227 )?;
228 for issue in &self.issues {
229 writeln!(f, " {issue}")?;
230 }
231 Ok(())
232 }
233}
234
235pub fn validate_dataset(root: &Path) -> Result<ValidationResult> {
251 let mut issues = Vec::new();
252
253 let desc_path = root.join("dataset_description.json");
255 if !desc_path.exists() {
256 issues.push(ValidationIssue {
257 severity: "error".into(),
258 code: "MISSING_DATASET_DESCRIPTION".into(),
259 message: "dataset_description.json is required at the root".into(),
260 path: None,
261 });
262 } else {
263 match DatasetDescription::from_dir(root) {
264 Ok(desc) => {
265 if desc.name.is_empty() {
266 issues.push(ValidationIssue {
267 severity: "error".into(),
268 code: "MISSING_NAME".into(),
269 message: "Name field is required in dataset_description.json".into(),
270 path: Some(desc_path.to_string_lossy().into()),
271 });
272 }
273 if desc.bids_version.is_empty() {
274 issues.push(ValidationIssue {
275 severity: "error".into(),
276 code: "MISSING_BIDS_VERSION".into(),
277 message: "BIDSVersion field is required in dataset_description.json".into(),
278 path: Some(desc_path.to_string_lossy().into()),
279 });
280 }
281 }
282 Err(_) => {
283 issues.push(ValidationIssue {
284 severity: "error".into(),
285 code: "INVALID_DATASET_DESCRIPTION".into(),
286 message: "dataset_description.json cannot be parsed".into(),
287 path: Some(desc_path.to_string_lossy().into()),
288 });
289 }
290 }
291 }
292
293 let has_readme = root.join("README").exists()
295 || root.join("README.md").exists()
296 || root.join("README.rst").exists()
297 || root.join("README.txt").exists();
298 if !has_readme {
299 issues.push(ValidationIssue {
300 severity: "warning".into(),
301 code: "MISSING_README".into(),
302 message: "A README file is recommended at the dataset root".into(),
303 path: None,
304 });
305 }
306
307 let mut has_subjects = false;
309 if let Ok(entries) = std::fs::read_dir(root) {
310 for entry in entries.flatten() {
311 let name = entry.file_name().to_string_lossy().to_string();
312 if name.starts_with("sub-") && entry.file_type().is_ok_and(|t| t.is_dir()) {
313 has_subjects = true;
314 validate_subject_dir(&entry.path(), root, &mut issues);
315 }
316 }
317 }
318
319 if !has_subjects {
320 issues.push(ValidationIssue {
321 severity: "error".into(),
322 code: "NO_SUBJECTS".into(),
323 message: "No subject directories (sub-*) found".into(),
324 path: None,
325 });
326 }
327
328 Ok(ValidationResult { issues })
329}
330
331fn validate_subject_dir(sub_dir: &Path, root: &Path, issues: &mut Vec<ValidationIssue>) {
332 let schema = bids_schema::BidsSchema::load();
333
334 let entries: Vec<walkdir::DirEntry> = walkdir::WalkDir::new(sub_dir)
335 .into_iter()
336 .filter_map(|e| e.ok())
337 .filter(|e| e.file_type().is_file())
338 .collect();
339
340 {
341 for entry in entries {
342 let path = entry.path();
343 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
344
345 if name.starts_with('.') {
347 continue;
348 }
349
350 let rel = path
352 .strip_prefix(root)
353 .map(|p| p.to_string_lossy().to_string())
354 .unwrap_or_default();
355
356 if !schema.is_valid(&rel) && !name.ends_with(".json") {
357 issues.push(ValidationIssue {
359 severity: "warning".into(),
360 code: "INVALID_FILENAME".into(),
361 message: "File does not match any BIDS naming pattern".to_string(),
362 path: Some(rel),
363 });
364 }
365 }
366 }
367}
368
369pub fn should_force_index(path: &Path, root: &Path, force_patterns: &[Regex]) -> bool {
371 if force_patterns.is_empty() {
372 return false;
373 }
374 let rel = path
375 .strip_prefix(root)
376 .map(|p| format!("/{}", p.to_string_lossy()))
377 .unwrap_or_default();
378 force_patterns.iter().any(|pat| pat.is_match(&rel))
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384
385 #[test]
386 fn test_resolve_intended_for() {
387 let root = Path::new("/data/bids");
388 assert_eq!(
389 resolve_intended_for("bids::sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
390 Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
391 );
392 assert_eq!(
393 resolve_intended_for("anat/sub-01_T1w.nii.gz", root, "01"),
394 Some(PathBuf::from("/data/bids/sub-01/anat/sub-01_T1w.nii.gz"))
395 );
396 assert_eq!(
397 resolve_intended_for("bids:other:sub-01/anat/sub-01_T1w.nii.gz", root, "01"),
398 None
399 );
400 }
401
402 #[test]
403 fn test_validate_indexing_args() {
404 let root = Path::new("/data");
405 let (ignore, force) = validate_indexing_args(None, None, root).unwrap();
406 assert!(!ignore.is_empty());
407 assert!(force.is_empty());
408 }
409
410 #[test]
411 fn test_validate_indexing_args_no_derivatives() {
412 let root = Path::new("/data");
413 let force = vec![Regex::new("derivatives").unwrap()];
414 let result = validate_indexing_args(None, Some(force), root);
415 assert!(result.is_err());
416 }
417
418 #[test]
419 fn test_should_force_index() {
420 let root = Path::new("/data");
421 let patterns = vec![Regex::new(r"/extra/").unwrap()];
422 assert!(should_force_index(
423 Path::new("/data/extra/file.txt"),
424 root,
425 &patterns
426 ));
427 assert!(!should_force_index(
428 Path::new("/data/sub-01/file.txt"),
429 root,
430 &patterns
431 ));
432 }
433}