Skip to main content

bids_core/
dataset_description.rs

1//! Typed representation of `dataset_description.json`.
2//!
3//! Every BIDS dataset root contains a `dataset_description.json` file with
4//! mandatory fields (`Name`, `BIDSVersion`) and optional fields (authors,
5//! license, funding, etc.). Derivative datasets additionally declare
6//! `GeneratedBy` (or the deprecated `PipelineDescription`).
7
8use serde::{Deserialize, Serialize};
9use serde_json::Value;
10use std::path::Path;
11
12use crate::error::{BidsError, Result};
13
14/// Contents of a BIDS `dataset_description.json` file.
15///
16/// See: <https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files.html>
17///
18/// # Example
19///
20/// ```
21/// use bids_core::DatasetDescription;
22///
23/// let json = r#"{"Name": "My Dataset", "BIDSVersion": "1.9.0"}"#;
24/// let desc: DatasetDescription = serde_json::from_str(json).unwrap();
25/// assert_eq!(desc.name, "My Dataset");
26/// assert!(!desc.is_derivative());
27/// ```
28#[derive(Debug, Clone, Serialize, Deserialize)]
29#[serde(rename_all = "PascalCase")]
30pub struct DatasetDescription {
31    /// Name of the dataset.
32    pub name: String,
33    /// The version of the BIDS standard used.
34    #[serde(rename = "BIDSVersion")]
35    pub bids_version: String,
36    /// What license the dataset is distributed under.
37    #[serde(default)]
38    pub license: Option<String>,
39    /// List of individuals who contributed to the creation/curation of the dataset.
40    #[serde(default)]
41    pub authors: Option<Vec<String>>,
42    /// Text acknowledging contributions of individuals or institutions.
43    #[serde(default)]
44    pub acknowledgements: Option<String>,
45    /// How to acknowledge this dataset when used in publications.
46    #[serde(default)]
47    pub how_to_acknowledge: Option<String>,
48    /// List of sources of funding.
49    #[serde(default)]
50    pub funding: Option<Vec<String>>,
51    /// List of ethics committee approvals.
52    #[serde(default)]
53    pub ethics_approvals: Option<Vec<String>>,
54    /// List of references to publications about the dataset.
55    #[serde(default)]
56    pub references_and_links: Option<Vec<String>>,
57    /// The DOI of the dataset.
58    #[serde(rename = "DatasetDOI", default)]
59    pub dataset_doi: Option<String>,
60    /// Type of dataset: "raw" or "derivative".
61    #[serde(default)]
62    pub dataset_type: Option<String>,
63    /// Information about the pipeline that generated a derivative dataset.
64    #[serde(default)]
65    pub generated_by: Option<Vec<GeneratedBy>>,
66    /// Datasets that were used to generate this derivative dataset.
67    #[serde(default)]
68    pub source_datasets: Option<Vec<Value>>,
69    /// Legacy field (deprecated in BIDS 1.4.0).
70    #[serde(default)]
71    pub pipeline_description: Option<PipelineDescription>,
72}
73
74/// Information about a pipeline that generated a derivative dataset.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76#[serde(rename_all = "PascalCase")]
77pub struct GeneratedBy {
78    pub name: String,
79    #[serde(default)]
80    pub version: Option<String>,
81    #[serde(default)]
82    pub description: Option<String>,
83    #[serde(default)]
84    pub code_url: Option<String>,
85    #[serde(default)]
86    pub container: Option<Value>,
87}
88
89/// Legacy pipeline description (deprecated).
90#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(rename_all = "PascalCase")]
92pub struct PipelineDescription {
93    pub name: String,
94    #[serde(default)]
95    pub version: Option<String>,
96    #[serde(default)]
97    pub description: Option<String>,
98}
99
100impl DatasetDescription {
101    /// Load `dataset_description.json` from the given directory.
102    ///
103    /// # Errors
104    ///
105    /// Returns [`BidsError::MissingDatasetDescription`] if the file doesn't exist,
106    /// or an I/O or JSON error if the file can't be read or parsed.
107    pub fn from_dir(dir: &Path) -> Result<Self> {
108        let path = dir.join("dataset_description.json");
109        if !path.exists() {
110            return Err(BidsError::MissingDatasetDescription);
111        }
112        let contents = std::fs::read_to_string(&path)?;
113        let desc: Self = serde_json::from_str(&contents)?;
114        Ok(desc)
115    }
116
117    /// Validate that mandatory fields are present.
118    ///
119    /// # Errors
120    ///
121    /// Returns [`BidsError::MissingMandatoryField`] if `Name` or `BIDSVersion`
122    /// is empty.
123    pub fn validate(&self) -> Result<()> {
124        if self.name.is_empty() {
125            return Err(BidsError::MissingMandatoryField {
126                field: "Name".into(),
127            });
128        }
129        if self.bids_version.is_empty() {
130            return Err(BidsError::MissingMandatoryField {
131                field: "BIDSVersion".into(),
132            });
133        }
134        Ok(())
135    }
136
137    /// Whether this is a derivative dataset.
138    #[must_use]
139    pub fn is_derivative(&self) -> bool {
140        self.dataset_type.as_deref() == Some("derivative")
141    }
142
143    /// Get the pipeline name for derivative datasets.
144    #[must_use]
145    pub fn pipeline_name(&self) -> Option<&str> {
146        // Try GeneratedBy first (BIDS >= 1.4.0)
147        if let Some(generated_by) = &self.generated_by
148            && let Some(first) = generated_by.first()
149        {
150            return Some(&first.name);
151        }
152        // Fall back to PipelineDescription (deprecated)
153        if let Some(pd) = &self.pipeline_description {
154            return Some(&pd.name);
155        }
156        None
157    }
158
159    /// Save this description to a `dataset_description.json` file.
160    ///
161    /// # Errors
162    ///
163    /// Returns an I/O error if the directory doesn't exist or the file can't
164    /// be written, or a JSON error if serialization fails.
165    pub fn save_to(&self, dir: &Path) -> Result<()> {
166        let path = dir.join("dataset_description.json");
167        let json = serde_json::to_string_pretty(self)?;
168        std::fs::write(path, json)?;
169        Ok(())
170    }
171}
172
173impl std::fmt::Display for DatasetDescription {
174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175        write!(f, "{} (BIDS {})", self.name, self.bids_version)?;
176        if self.is_derivative() {
177            if let Some(pipeline) = self.pipeline_name() {
178                write!(f, " [derivative: {pipeline}]")?;
179            } else {
180                write!(f, " [derivative]")?;
181            }
182        }
183        Ok(())
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_parse_description() {
193        let json = r#"{
194            "Name": "Test Dataset",
195            "BIDSVersion": "1.6.0",
196            "License": "CC0",
197            "Authors": ["Test Author"]
198        }"#;
199        let desc: DatasetDescription = serde_json::from_str(json).unwrap();
200        assert_eq!(desc.name, "Test Dataset");
201        assert_eq!(desc.bids_version, "1.6.0");
202        assert!(!desc.is_derivative());
203    }
204
205    #[test]
206    fn test_derivative_description() {
207        let json = r#"{
208            "Name": "fmriprep",
209            "BIDSVersion": "1.6.0",
210            "DatasetType": "derivative",
211            "GeneratedBy": [{"Name": "fmriprep", "Version": "20.2.0"}]
212        }"#;
213        let desc: DatasetDescription = serde_json::from_str(json).unwrap();
214        assert!(desc.is_derivative());
215        assert_eq!(desc.pipeline_name(), Some("fmriprep"));
216    }
217}