Skip to main content

bids_layout/
layout.rs

1//! The main `BidsLayout` type and its builder.
2//!
3//! [`BidsLayout`] is the primary entry point for working with a BIDS dataset.
4//! It indexes the directory tree into a SQLite database and provides fluent
5//! query methods, metadata inheritance, derivative support, and path building.
6
7use bids_core::config::Config;
8use bids_core::dataset_description::DatasetDescription;
9use bids_core::entities::{Entities, EntityValue};
10use bids_core::error::{BidsError, Result};
11use bids_core::file::BidsFile;
12use bids_core::metadata::BidsMetadata;
13use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15
16use crate::db::Database;
17use crate::get_builder::GetBuilder;
18use crate::indexer::{self, IndexerOptions};
19use crate::query::{QueryFilter, Scope};
20
21/// The main entry point for interacting with a BIDS dataset.
22///
23/// `BidsLayout` indexes a BIDS dataset directory into a SQLite database and
24/// provides a fluent query API for finding files by their BIDS entities. It
25/// handles JSON sidecar metadata inheritance, file associations, derivative
26/// datasets, and path building.
27///
28/// This is the Rust equivalent of PyBIDS' `BIDSLayout` class.
29///
30/// # Thread Safety
31///
32/// `BidsLayout` wraps a `rusqlite::Connection` and is therefore `!Send` and
33/// `!Sync`. It **cannot** be shared across threads or sent to async tasks.
34///
35/// For multi-threaded or async workloads:
36///
37/// 1. **Save once, load per-thread** — Use [`save()`](Self::save) to persist
38///    the index, then [`load()`](Self::load) in each thread/task:
39///    ```no_run
40///    # use bids_layout::BidsLayout;
41///    let layout = BidsLayout::new("/data").unwrap();
42///    layout.save(std::path::Path::new("/tmp/index.sqlite")).unwrap();
43///
44///    // In each thread:
45///    let local = BidsLayout::load(std::path::Path::new("/tmp/index.sqlite")).unwrap();
46///    ```
47///
48/// 2. **Create per-thread** — Call `BidsLayout::new()` in each thread. The
49///    directory walk is fast for typical datasets (< 100ms for 10k files).
50///
51/// # Creating a Layout
52///
53/// ```no_run
54/// use bids_layout::BidsLayout;
55///
56/// // Simple: index with defaults (validation enabled, in-memory database)
57/// let layout = BidsLayout::new("/path/to/bids/dataset").unwrap();
58///
59/// // Builder: customize indexing behavior
60/// let layout = BidsLayout::builder("/path/to/dataset")
61///     .validate(false)                            // skip BIDS validation
62///     .database_path("/tmp/index.sqlite")         // persistent database
63///     .index_metadata(true)                       // index JSON sidecars
64///     .add_derivative("/path/to/derivatives/fmriprep")
65///     .build()
66///     .unwrap();
67/// ```
68///
69/// # Querying Files
70///
71/// ```no_run
72/// # use bids_layout::BidsLayout;
73/// # let layout = BidsLayout::new("/path").unwrap();
74/// // Fluent query API
75/// let files = layout.get()
76///     .suffix("bold")
77///     .extension(".nii.gz")
78///     .subject("01")
79///     .task("rest")
80///     .collect().unwrap();
81///
82/// // Get unique entity values
83/// let subjects = layout.get_subjects().unwrap();
84/// let tasks = layout.get_tasks().unwrap();
85///
86/// // Get metadata with BIDS inheritance
87/// let md = layout.get_metadata(&files[0].path).unwrap();
88/// let tr = md.get_f64("RepetitionTime");
89/// ```
90///
91/// # Derivatives
92///
93/// ```no_run
94/// # use bids_layout::BidsLayout;
95/// # let mut layout = BidsLayout::new("/path").unwrap();
96/// layout.add_derivatives("/path/to/derivatives").unwrap();
97///
98/// // Query across raw + derivatives
99/// let all_files = layout.get().scope("all").suffix("bold").collect().unwrap();
100///
101/// // Query derivatives only
102/// let deriv_files = layout.get().scope("derivatives").collect().unwrap();
103/// ```
104pub struct BidsLayout {
105    root: PathBuf,
106    db: Database,
107    description: Option<DatasetDescription>,
108    pub is_derivative: bool,
109    pub source_pipeline: Option<String>,
110    derivatives: HashMap<String, BidsLayout>,
111    configs: Vec<Config>,
112    #[allow(dead_code)]
113    regex_search: bool,
114    /// The BIDS spec version compatibility status for this dataset.
115    ///
116    /// Set during construction by comparing the dataset's declared
117    /// `BIDSVersion` against the library's supported version.
118    spec_compatibility: Option<bids_schema::Compatibility>,
119}
120
121impl BidsLayout {
122    /// Create a new layout with default settings (validation enabled, in-memory DB).
123    ///
124    /// # Errors
125    ///
126    /// Returns an error if the root path doesn't exist, `dataset_description.json`
127    /// is missing or invalid, or the filesystem walk fails.
128    pub fn new(root: impl AsRef<Path>) -> Result<Self> {
129        Self::builder(root).build()
130    }
131
132    pub fn builder(root: impl AsRef<Path>) -> LayoutBuilder {
133        LayoutBuilder::new(root)
134    }
135
136    /// Load a layout from an existing database file.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if the database file doesn't exist, can't be opened,
141    /// or doesn't contain valid layout info.
142    pub fn load(database_path: &Path) -> Result<Self> {
143        let db = Database::open(database_path)?;
144        let (root_str, config_str) = db
145            .get_layout_info()?
146            .ok_or_else(|| BidsError::Database("No layout info in database".into()))?;
147        let root = PathBuf::from(&root_str);
148        let description = DatasetDescription::from_dir(&root).ok();
149        let config_names: Vec<String> = config_str
150            .split(',')
151            .map(std::string::ToString::to_string)
152            .collect();
153        let configs: Vec<Config> = config_names
154            .iter()
155            .filter_map(|name| Config::load(name).ok())
156            .collect();
157
158        let spec_compatibility = description.as_ref().map(|d| {
159            let schema = bids_schema::BidsSchema::load();
160            schema.check_dataset_version(&d.bids_version)
161        });
162
163        Ok(BidsLayout {
164            root,
165            db,
166            description,
167            is_derivative: false,
168            source_pipeline: None,
169            derivatives: HashMap::new(),
170            configs,
171            regex_search: false,
172            spec_compatibility,
173        })
174    }
175
176    /// Save the database to a file for later reloading with [`BidsLayout::load`].
177    ///
178    /// # Errors
179    ///
180    /// Returns a database error if the backup operation fails.
181    pub fn save(&self, path: &Path) -> Result<()> {
182        self.db.save_to(path)
183    }
184
185    #[must_use]
186    pub fn root(&self) -> &Path {
187        &self.root
188    }
189    #[must_use]
190    pub fn description(&self) -> Option<&DatasetDescription> {
191        self.description.as_ref()
192    }
193    pub(crate) fn db(&self) -> &Database {
194        &self.db
195    }
196
197    /// The BIDS specification version declared in `dataset_description.json`.
198    #[must_use]
199    pub fn bids_version(&self) -> Option<&str> {
200        self.description.as_ref().map(|d| d.bids_version.as_str())
201    }
202
203    /// Compatibility between the dataset's BIDS version and this library.
204    ///
205    /// Returns `None` if no `dataset_description.json` was found (e.g., for
206    /// derivative datasets loaded without validation).
207    #[must_use]
208    pub fn spec_compatibility(&self) -> Option<&bids_schema::Compatibility> {
209        self.spec_compatibility.as_ref()
210    }
211
212    /// Start building a query.
213    pub fn get(&self) -> GetBuilder<'_> {
214        GetBuilder::new(self)
215    }
216
217    pub fn get_subjects(&self) -> Result<Vec<String>> {
218        self.db.get_unique_entity_values("subject")
219    }
220    pub fn get_sessions(&self) -> Result<Vec<String>> {
221        self.db.get_unique_entity_values("session")
222    }
223    pub fn get_tasks(&self) -> Result<Vec<String>> {
224        self.db.get_unique_entity_values("task")
225    }
226    pub fn get_runs(&self) -> Result<Vec<String>> {
227        self.db.get_unique_entity_values("run")
228    }
229    pub fn get_datatypes(&self) -> Result<Vec<String>> {
230        self.db.get_unique_entity_values("datatype")
231    }
232    pub fn get_suffixes(&self) -> Result<Vec<String>> {
233        self.db.get_unique_entity_values("suffix")
234    }
235    pub fn get_entities(&self) -> Result<Vec<String>> {
236        self.db.get_entity_names()
237    }
238    pub fn get_entity_values(&self, entity: &str) -> Result<Vec<String>> {
239        self.db.get_unique_entity_values(entity)
240    }
241
242    /// Resolve a path (relative to root if not absolute) to an absolute string.
243    fn resolve_path(&self, path: impl AsRef<Path>) -> String {
244        let p = if path.as_ref().is_absolute() {
245            path.as_ref().to_path_buf()
246        } else {
247            self.root.join(path)
248        };
249        p.to_string_lossy().into_owned()
250    }
251
252    /// Get a specific file by path.
253    pub fn get_file(&self, path: impl AsRef<Path>) -> Result<Option<BidsFile>> {
254        let path_str = self.resolve_path(path);
255        let tags = self.db.get_tags(&path_str)?;
256        if tags.is_empty() {
257            let all = self.db.all_file_paths()?;
258            if !all.contains(&path_str) {
259                return Ok(None);
260            }
261        }
262        Ok(Some(self.reconstruct_file(&path_str)?))
263    }
264
265    /// Get metadata for a file.
266    pub fn get_metadata(&self, path: impl AsRef<Path>) -> Result<BidsMetadata> {
267        let path_str = self.resolve_path(path);
268        let tags = self.db.get_tags(&path_str)?;
269        let mut md = BidsMetadata::with_source(&path_str);
270        for (name, value, _dtype, is_metadata) in tags {
271            if is_metadata {
272                let json_val =
273                    serde_json::from_str(&value).unwrap_or(serde_json::Value::String(value));
274                md.insert(name, json_val);
275            }
276        }
277        Ok(md)
278    }
279
280    /// Get the scanning repetition time (TR) for matching runs.
281    pub fn get_tr(&self, filters: &[QueryFilter]) -> Result<f64> {
282        let mut all_filters: Vec<(String, Vec<String>, bool)> = QueryFilter::to_tuples(filters);
283        all_filters.push(("suffix".to_string(), vec!["bold".to_string()], false));
284        all_filters.push(("datatype".to_string(), vec!["func".to_string()], false));
285
286        let paths = self.db.query_files(&all_filters)?;
287        if paths.is_empty() {
288            return Err(BidsError::NoMatch(
289                "No functional images match criteria".into(),
290            ));
291        }
292
293        // Collect unique TRs, rounding to 10µs to avoid float comparison issues.
294        let mut trs = std::collections::HashSet::new();
295        for path in &paths {
296            let md = self.get_metadata(path)?;
297            if let Some(tr) = md.get_f64("RepetitionTime") {
298                trs.insert((tr * 100_000.0).round() as i64);
299            }
300        }
301
302        if trs.len() > 1 {
303            return Err(BidsError::NoMatch("Multiple unique TRs found".into()));
304        }
305
306        trs.into_iter()
307            .next()
308            .map(|v| v as f64 / 100_000.0)
309            .ok_or_else(|| BidsError::NoMatch("No RepetitionTime found in metadata".into()))
310    }
311
312    /// Get bvec file for a path.
313    pub fn get_bvec(&self, path: impl AsRef<Path>) -> Result<Option<BidsFile>> {
314        self.get_nearest(
315            path,
316            &[
317                QueryFilter::eq("extension", ".bvec"),
318                QueryFilter::eq("suffix", "dwi"),
319            ],
320        )
321    }
322
323    /// Load the gradient table (b-values + b-vectors) for a DWI file.
324    ///
325    /// Looks up the companion `.bval` and `.bvec` files and parses them
326    /// into a [`bids_io::gradient::GradientTable`].
327    ///
328    /// # Errors
329    ///
330    /// Returns an error if the companion files aren't found or can't be parsed.
331    pub fn get_gradient_table(
332        &self,
333        path: impl AsRef<Path>,
334    ) -> Result<bids_io::gradient::GradientTable> {
335        let bvec_file = self
336            .get_bvec(&path)?
337            .ok_or_else(|| BidsError::NoMatch("No .bvec file found".into()))?;
338        let bval_file = self
339            .get_bval(&path)?
340            .ok_or_else(|| BidsError::NoMatch("No .bval file found".into()))?;
341        bids_io::gradient::GradientTable::from_files(&bval_file.path, &bvec_file.path)
342    }
343
344    /// Get bval file for a path.
345    pub fn get_bval(&self, path: impl AsRef<Path>) -> Result<Option<BidsFile>> {
346        self.get_nearest(
347            path,
348            &[
349                QueryFilter::eq("extension", ".bval"),
350                QueryFilter::eq("suffix", "dwi"),
351            ],
352        )
353    }
354
355    /// Add a derivatives directory.
356    pub fn add_derivatives(&mut self, path: impl AsRef<Path>) -> Result<()> {
357        let path = path.as_ref();
358        let desc_path = path.join("dataset_description.json");
359
360        if desc_path.exists() {
361            let pipeline_name = bids_validate::validate_derivative_path(path)?;
362            if self.derivatives.contains_key(&pipeline_name) {
363                return Err(BidsError::DerivativesValidation(format!(
364                    "Pipeline '{pipeline_name}' already added"
365                )));
366            }
367            let deriv = LayoutBuilder::new(path)
368                .validate(false)
369                .is_derivative(true)
370                .build()?;
371            self.derivatives.insert(pipeline_name, deriv);
372        } else if let Ok(entries) = std::fs::read_dir(path) {
373            for entry in entries.flatten() {
374                if entry.file_type().is_ok_and(|t| t.is_dir()) {
375                    let sub_desc = entry.path().join("dataset_description.json");
376                    if sub_desc.exists() {
377                        self.add_derivatives(entry.path())?;
378                    }
379                }
380            }
381        }
382        Ok(())
383    }
384
385    #[must_use]
386    pub fn get_derivative(&self, name: &str) -> Option<&BidsLayout> {
387        self.derivatives.get(name)
388    }
389    #[must_use]
390    pub fn derivatives(&self) -> &HashMap<String, BidsLayout> {
391        &self.derivatives
392    }
393
394    /// Check if this layout is in the specified scope.
395    fn in_scope(&self, scope: &Scope) -> bool {
396        match scope {
397            Scope::All => true,
398            Scope::Self_ => true,
399            Scope::Raw => !self.is_derivative,
400            Scope::Derivatives => self.is_derivative,
401            Scope::Pipeline(name) => self.source_pipeline.as_deref() == Some(name.as_str()),
402        }
403    }
404
405    /// Get all layouts in the specified scope (recursive through derivatives).
406    fn get_layouts_in_scope(&self, scope: &Scope) -> Vec<&BidsLayout> {
407        if *scope == Scope::Self_ {
408            return vec![self];
409        }
410
411        let mut layouts = Vec::new();
412        if self.in_scope(scope) {
413            layouts.push(self);
414        }
415        for deriv in self.derivatives.values() {
416            layouts.extend(deriv.get_layouts_in_scope(scope));
417        }
418        layouts
419    }
420
421    /// Get the nearest file matching filters, walking up the directory tree.
422    pub fn get_nearest(
423        &self,
424        path: impl AsRef<Path>,
425        filters: &[QueryFilter],
426    ) -> Result<Option<BidsFile>> {
427        let path = path.as_ref();
428
429        // Get the suffix from the source file if not in filters
430        let has_suffix = filters.iter().any(|f| f.entity == "suffix");
431        let mut final_filters = filters.to_vec();
432        if !has_suffix
433            && let Some(bf) = self.get_file(path)?
434            && let Some(EntityValue::Str(s)) = bf.entities.get("suffix")
435        {
436            final_filters.push(QueryFilter::eq("suffix", s));
437        }
438
439        // Get the source file's entities for scoring
440        let source_entities: HashMap<String, String> = if let Some(bf) = self.get_file(path)? {
441            bf.entities
442                .iter()
443                .map(|(k, v)| (k.clone(), v.as_str_lossy().into_owned()))
444                .collect()
445        } else {
446            HashMap::new()
447        };
448
449        // Get all candidate files
450        let filter_tuples: Vec<_> = final_filters
451            .iter()
452            .map(|f| (f.entity.clone(), f.values.clone(), f.regex))
453            .collect();
454        let candidates = self.db.query_files(&filter_tuples)?;
455
456        // Group candidates by directory
457        let mut dir_files: HashMap<PathBuf, Vec<String>> = HashMap::new();
458        for c in &candidates {
459            let p = PathBuf::from(c);
460            let dir = p.parent().unwrap_or(Path::new("")).to_path_buf();
461            dir_files.entry(dir).or_default().push(c.clone());
462        }
463
464        // Walk up from the source file's directory
465        let mut dir = path.parent();
466        while let Some(current_dir) = dir {
467            if let Some(files_in_dir) = dir_files.get(current_dir) {
468                // Score candidates by matching entities
469                let mut best: Option<(usize, String)> = None;
470                for file_path in files_in_dir {
471                    let tags = self.db.get_tags(file_path)?;
472                    let file_ents: HashMap<String, String> = tags
473                        .iter()
474                        .filter(|(_, _, _, m)| !m)
475                        .map(|(n, v, _, _)| (n.clone(), v.clone()))
476                        .collect();
477
478                    let score: usize = source_entities
479                        .iter()
480                        .filter(|(k, v)| file_ents.get(*k) == Some(v))
481                        .count();
482                    if best.as_ref().is_none_or(|(s, _)| score > *s) {
483                        best = Some((score, file_path.clone()));
484                    }
485                }
486
487                if let Some((_, best_path)) = best {
488                    return Ok(Some(self.reconstruct_file(&best_path)?));
489                }
490            }
491
492            if current_dir == self.root {
493                break;
494            }
495            dir = current_dir.parent();
496        }
497
498        Ok(None)
499    }
500
501    /// Parse entities from a filename using this layout's config.
502    #[must_use]
503    pub fn parse_file_entities(&self, filename: &str) -> Entities {
504        let all_entities = self.all_entity_defs();
505        bids_core::entities::parse_file_entities(filename, &all_entities)
506    }
507
508    /// Collect all unique entity definitions from this layout's configs.
509    fn all_entity_defs(&self) -> Vec<bids_core::Entity> {
510        let mut all = Vec::new();
511        let mut seen = std::collections::HashSet::new();
512        for config in &self.configs {
513            for entity in &config.entities {
514                if seen.insert(&entity.name) {
515                    all.push(entity.clone());
516                }
517            }
518        }
519        all
520    }
521
522    /// Build a path from entities using this layout's config patterns.
523    pub fn build_path(
524        &self,
525        source: &Entities,
526        path_patterns: Option<&[&str]>,
527        strict: bool,
528    ) -> Result<String> {
529        let default_patterns: Vec<String>;
530        let patterns: Vec<&str> = if let Some(p) = path_patterns {
531            p.to_vec()
532        } else {
533            default_patterns = self
534                .configs
535                .iter()
536                .filter_map(|c| c.default_path_patterns.as_ref())
537                .flat_map(|p| p.iter().cloned())
538                .collect();
539            default_patterns
540                .iter()
541                .map(std::string::String::as_str)
542                .collect()
543        };
544
545        bids_io::path_builder::build_path(source, &patterns, strict).ok_or_else(|| {
546            BidsError::PathBuilding("Unable to construct path from provided entities".into())
547        })
548    }
549
550    /// Export file index as rows of (path, entity_name, value).
551    pub fn to_df(&self, metadata: bool) -> Result<Vec<(String, String, String)>> {
552        let paths = self.db.all_file_paths()?;
553        let mut rows = Vec::new();
554        for path in &paths {
555            let tags = self.db.get_tags(path)?;
556            for (name, value, _, is_meta) in &tags {
557                if metadata || !is_meta {
558                    rows.push((path.clone(), name.clone(), value.clone()));
559                }
560            }
561        }
562        Ok(rows)
563    }
564
565    /// Deep copy the layout.
566    pub fn clone_layout(&self) -> Result<Self> {
567        // Re-index from the same root
568        Self::builder(&self.root)
569            .validate(false)
570            .is_derivative(self.is_derivative)
571            .build()
572    }
573
574    /// Internal: execute a query with filters.
575    pub(crate) fn query_files_internal(
576        &self,
577        filters: &[(String, Vec<String>, bool)],
578        scope: &Scope,
579    ) -> Result<Vec<String>> {
580        let layouts = self.get_layouts_in_scope(scope);
581        let mut all_paths = Vec::new();
582        for layout in layouts {
583            let paths = layout.db.query_files(filters)?;
584            all_paths.extend(paths);
585        }
586        // Deduplicate
587        let mut seen = std::collections::HashSet::new();
588        all_paths.retain(|p| seen.insert(p.clone()));
589        Ok(all_paths)
590    }
591
592    /// Internal: reconstruct a BidsFile from its path in the database.
593    pub(crate) fn reconstruct_file(&self, path_str: &str) -> Result<BidsFile> {
594        let mut bf = BidsFile::new(path_str);
595        let tags = self.db.get_tags(path_str)?;
596        for (name, value, _dtype, is_metadata) in tags {
597            if !is_metadata {
598                bf.entities.insert(name, EntityValue::Str(value));
599            } else {
600                let json_val =
601                    serde_json::from_str(&value).unwrap_or(serde_json::Value::String(value));
602                bf.metadata.insert(name, json_val);
603            }
604        }
605        Ok(bf)
606    }
607
608    /// Get fieldmap(s) for a specified file path.
609    pub fn get_fieldmap(&self, path: impl AsRef<Path>) -> Result<Vec<HashMap<String, String>>> {
610        let path = path.as_ref();
611        let ents = self.parse_file_entities(&path.to_string_lossy());
612        let subject = ents
613            .get("subject")
614            .map(|v| v.as_str_lossy())
615            .unwrap_or_default();
616
617        let fmap_files = self
618            .get()
619            .subject(&subject)
620            .filter_regex("suffix", "(phasediff|magnitude[12]|phase[12]|fieldmap|epi)")
621            .filter_any("extension", &[".nii.gz", ".nii"])
622            .collect()?;
623
624        let mut fieldmap_set = Vec::new();
625        for file in &fmap_files {
626            let md = self.get_metadata(&file.path)?;
627            let intended = md.get("IntendedFor");
628            if intended.is_none() {
629                continue;
630            }
631
632            let intents: Vec<String> = match intended.unwrap() {
633                serde_json::Value::String(s) => vec![s.clone()],
634                serde_json::Value::Array(a) => a
635                    .iter()
636                    .filter_map(|v| v.as_str().map(String::from))
637                    .collect(),
638                _ => continue,
639            };
640
641            let path_str = path.to_string_lossy();
642            if !intents.iter().any(|i| path_str.ends_with(i)) {
643                continue;
644            }
645
646            let suffix = file
647                .entities
648                .get("suffix")
649                .map(|v| v.as_str_lossy())
650                .unwrap_or_default();
651            let mut fmap = HashMap::new();
652            let fp = file.path.to_string_lossy().to_string();
653
654            match &*suffix {
655                "phasediff" => {
656                    fmap.insert("phasediff".into(), fp.clone());
657                    fmap.insert("magnitude1".into(), fp.replace("phasediff", "magnitude1"));
658                    let mag2 = fp.replace("phasediff", "magnitude2");
659                    if std::path::Path::new(&mag2).exists() {
660                        fmap.insert("magnitude2".into(), mag2);
661                    }
662                    fmap.insert("suffix".into(), "phasediff".into());
663                }
664                "phase1" => {
665                    fmap.insert("phase1".into(), fp.clone());
666                    fmap.insert("magnitude1".into(), fp.replace("phase1", "magnitude1"));
667                    fmap.insert("phase2".into(), fp.replace("phase1", "phase2"));
668                    fmap.insert("magnitude2".into(), fp.replace("phase1", "magnitude2"));
669                    fmap.insert("suffix".into(), "phase".into());
670                }
671                "epi" => {
672                    fmap.insert("epi".into(), fp);
673                    fmap.insert("suffix".into(), "epi".into());
674                }
675                "fieldmap" => {
676                    fmap.insert("fieldmap".into(), fp.clone());
677                    fmap.insert("magnitude".into(), fp.replace("fieldmap", "magnitude"));
678                    fmap.insert("suffix".into(), "fieldmap".into());
679                }
680                _ => continue,
681            }
682            fieldmap_set.push(fmap);
683        }
684        Ok(fieldmap_set)
685    }
686
687    /// Copy BIDSFiles to new locations defined by path patterns.
688    pub fn copy_files(
689        &self,
690        path_patterns: &[&str],
691        mode: bids_core::file::CopyMode,
692        root: Option<&Path>,
693        filters: &[QueryFilter],
694    ) -> Result<Vec<PathBuf>> {
695        let root = root.unwrap_or(&self.root);
696        let tuples = QueryFilter::to_tuples(filters);
697        let files = self.query_files_internal(&tuples, &Scope::All)?;
698        let mut copied = Vec::new();
699        for path_str in &files {
700            let bf = self.reconstruct_file(path_str)?;
701            if let Ok(new_path_str) = self.build_path(&bf.entities, Some(path_patterns), false) {
702                let new_path = root.join(&new_path_str);
703                bf.copy_to(&new_path, mode)?;
704                copied.push(new_path);
705            }
706        }
707        Ok(copied)
708    }
709
710    /// Write data to a file defined by entities and path patterns.
711    pub fn write_to_file(
712        &self,
713        entities: &Entities,
714        path_patterns: Option<&[&str]>,
715        contents: &[u8],
716        strict: bool,
717    ) -> Result<PathBuf> {
718        let path_str = self.build_path(entities, path_patterns, strict)?;
719        let full_path = self.root.join(&path_str);
720        bids_io::writer::write_to_file(
721            &full_path,
722            Some(contents),
723            None,
724            None,
725            None,
726            bids_io::writer::ConflictStrategy::Fail,
727        )?;
728        Ok(full_path)
729    }
730
731    /// Auto-convert entity query values to correct dtype.
732    pub fn sanitize_query_dtypes(&self, entities: &mut Entities) {
733        // Look up entity definitions from config
734        for config in &self.configs {
735            for ent_def in &config.entities {
736                if let Some(val) = entities.get(&ent_def.name) {
737                    let val_str = val.as_str_lossy();
738                    let coerced = ent_def.coerce_value(&val_str);
739                    entities.insert(ent_def.name.clone(), coerced);
740                }
741            }
742        }
743    }
744
745    /// Get file associations from the database.
746    pub fn get_associations(&self, path: &str, kind: Option<&str>) -> Result<Vec<BidsFile>> {
747        let assocs = self.db.get_associations(path, kind)?;
748        let mut files = Vec::new();
749        for (dst, _kind) in &assocs {
750            if let Ok(bf) = self.reconstruct_file(dst) {
751                files.push(bf);
752            }
753        }
754        Ok(files)
755    }
756}
757
758impl std::fmt::Display for BidsLayout {
759    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
760        let root_str = self.root.to_string_lossy();
761        let root_display = if root_str.len() > 30 {
762            format!("...{}", &root_str[root_str.len() - 30..])
763        } else {
764            root_str.to_string()
765        };
766        let n_subjects = self.get_subjects().map(|s| s.len()).unwrap_or(0);
767        let n_sessions = self.get_sessions().map(|s| s.len()).unwrap_or(0);
768        let n_runs = self.get_runs().map(|s| s.len()).unwrap_or(0);
769        write!(
770            f,
771            "BIDS Layout: {root_display} | Subjects: {n_subjects} | Sessions: {n_sessions} | Runs: {n_runs}"
772        )
773    }
774}
775
776// ──────────────────────────────── LayoutBuilder ────────────────────────────────
777
778/// Builder for configuring and constructing a [`BidsLayout`].
779///
780/// Provides fine-grained control over dataset indexing, including validation,
781/// derivative paths, configuration files, database persistence, ignore/force
782/// patterns, and metadata indexing.
783///
784/// # Example
785///
786/// ```no_run
787/// # use bids_layout::BidsLayout;
788/// let layout = BidsLayout::builder("/path/to/dataset")
789///     .validate(true)
790///     .index_metadata(true)
791///     .database_path("/tmp/bids_index.sqlite")
792///     .add_derivative("/path/to/derivatives/fmriprep")
793///     .build()
794///     .unwrap();
795/// ```
796pub struct LayoutBuilder {
797    root: PathBuf,
798    validate: bool,
799    derivatives: Option<Vec<PathBuf>>,
800    configs: Vec<String>,
801    regex_search: bool,
802    database_path: Option<PathBuf>,
803    is_derivative: bool,
804    index_metadata: bool,
805    ignore: Option<Vec<regex::Regex>>,
806    force_index: Option<Vec<regex::Regex>>,
807    config_filename: String,
808}
809
810impl LayoutBuilder {
811    pub fn new(root: impl AsRef<Path>) -> Self {
812        Self {
813            root: root.as_ref().to_path_buf(),
814            validate: true,
815            derivatives: None,
816            configs: vec!["bids".to_string()],
817            regex_search: false,
818            database_path: None,
819            is_derivative: false,
820            index_metadata: true,
821            ignore: None,
822            force_index: None,
823            config_filename: "layout_config.json".to_string(),
824        }
825    }
826
827    #[must_use]
828    pub fn validate(mut self, v: bool) -> Self {
829        self.validate = v;
830        self
831    }
832    #[must_use]
833    pub fn derivatives(mut self, paths: Vec<PathBuf>) -> Self {
834        self.derivatives = Some(paths);
835        self
836    }
837    #[must_use]
838    pub fn add_derivative(mut self, path: impl AsRef<Path>) -> Self {
839        self.derivatives
840            .get_or_insert_with(Vec::new)
841            .push(path.as_ref().to_path_buf());
842        self
843    }
844    #[must_use]
845    pub fn config(mut self, configs: Vec<String>) -> Self {
846        self.configs = configs;
847        self
848    }
849    #[must_use]
850    pub fn regex_search(mut self, v: bool) -> Self {
851        self.regex_search = v;
852        self
853    }
854    #[must_use]
855    pub fn database_path(mut self, path: impl AsRef<Path>) -> Self {
856        self.database_path = Some(path.as_ref().to_path_buf());
857        self
858    }
859    #[must_use]
860    pub fn is_derivative(mut self, v: bool) -> Self {
861        self.is_derivative = v;
862        self
863    }
864    #[must_use]
865    pub fn index_metadata(mut self, v: bool) -> Self {
866        self.index_metadata = v;
867        self
868    }
869    #[must_use]
870    pub fn ignore(mut self, patterns: Vec<regex::Regex>) -> Self {
871        self.ignore = Some(patterns);
872        self
873    }
874    #[must_use]
875    pub fn force_index(mut self, patterns: Vec<regex::Regex>) -> Self {
876        self.force_index = Some(patterns);
877        self
878    }
879    #[must_use]
880    pub fn config_filename(mut self, name: &str) -> Self {
881        self.config_filename = name.to_string();
882        self
883    }
884
885    pub fn build(self) -> Result<BidsLayout> {
886        let (root, description) = bids_validate::validate_root(&self.root, self.validate)?;
887
888        let is_derivative = self.is_derivative
889            || description
890                .as_ref()
891                .is_some_and(bids_core::DatasetDescription::is_derivative);
892        let source_pipeline = if is_derivative {
893            bids_validate::validate_derivative_path(&root).ok()
894        } else {
895            None
896        };
897
898        let default_configs = if is_derivative {
899            vec!["bids".to_string(), "derivatives".to_string()]
900        } else {
901            vec!["bids".to_string()]
902        };
903        let config_names = if self.configs.is_empty() {
904            default_configs
905        } else {
906            self.configs
907        };
908        let configs: Vec<Config> = config_names
909            .iter()
910            .filter_map(|name| Config::load(name).ok())
911            .collect();
912
913        let (ignore, force_index) =
914            bids_validate::validate_indexing_args(self.ignore, self.force_index, &root)?;
915
916        let db = match &self.database_path {
917            Some(path) if Database::exists(path) => Database::open(path)?,
918            db_path => {
919                let db = match db_path {
920                    Some(path) => Database::open(path)?,
921                    None => Database::in_memory()?,
922                };
923                let options = IndexerOptions {
924                    validate: self.validate && !is_derivative,
925                    index_metadata: self.index_metadata,
926                    ignore,
927                    force_index,
928                    config_filename: self.config_filename.clone(),
929                };
930                indexer::index_dataset(&root, &db, &configs, &options)?;
931                db
932            }
933        };
934
935        db.set_layout_info(&root.to_string_lossy(), &config_names.join(","))?;
936
937        let spec_compatibility = description.as_ref().map(|d| {
938            let schema = bids_schema::BidsSchema::load();
939            schema.check_dataset_version(&d.bids_version)
940        });
941
942        // Warn (via eprintln) if the dataset uses a newer spec than we support.
943        // This is non-fatal — the library will still try its best.
944        if let Some(compat) = &spec_compatibility {
945            if compat.has_warnings() {
946                log::warn!("{compat}");
947            }
948        }
949
950        let mut layout = BidsLayout {
951            root,
952            db,
953            description,
954            is_derivative,
955            source_pipeline,
956            derivatives: HashMap::new(),
957            configs,
958            regex_search: self.regex_search,
959            spec_compatibility,
960        };
961
962        if let Some(deriv_paths) = self.derivatives {
963            for path in deriv_paths {
964                layout.add_derivatives(path)?;
965            }
966        }
967
968        Ok(layout)
969    }
970}