Skip to main content

bids_variables/
io.rs

1//! Variable loading from BIDS datasets.
2//!
3//! Reads variables from all standard BIDS tabular files (participants.tsv,
4//! sessions.tsv, scans.tsv, events.tsv, physio/stim .tsv.gz, regressors.tsv)
5//! and organizes them into a [`NodeIndex`] hierarchy.
6//!
7//! The [`load_variables()`] function is the main entry point, accepting a
8//! `BidsLayout`, optional variable type filters, and an optional level filter.
9
10use bids_core::entities::StringEntities;
11use bids_core::error::Result;
12use bids_io::tsv::read_tsv;
13use bids_layout::BidsLayout;
14
15use crate::node::{NodeIndex, RunInfo};
16use crate::variables::{SimpleVariable, SparseRunVariable};
17
18/// The types of variables that can be loaded.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum VariableType {
21    Events,
22    Physio,
23    Stim,
24    Scans,
25    Participants,
26    Sessions,
27    Regressors,
28}
29
30/// Load variables from a BIDS dataset.
31pub fn load_variables(
32    layout: &BidsLayout,
33    types: Option<&[VariableType]>,
34    level: Option<&str>,
35) -> Result<NodeIndex> {
36    let types = resolve_types(types, level);
37    let mut index = NodeIndex::new();
38    for vtype in &types {
39        match vtype {
40            VariableType::Participants => load_participants(layout, &mut index)?,
41            VariableType::Sessions => load_sessions(layout, &mut index)?,
42            VariableType::Scans => load_scans(layout, &mut index)?,
43            VariableType::Events => load_events(layout, &mut index)?,
44            VariableType::Physio | VariableType::Stim | VariableType::Regressors => {
45                load_time_variables(layout, &mut index, None)?;
46            }
47        }
48    }
49    Ok(index)
50}
51
52fn resolve_types(types: Option<&[VariableType]>, level: Option<&str>) -> Vec<VariableType> {
53    if let Some(t) = types {
54        return t.to_vec();
55    }
56    match level {
57        Some("run") => vec![
58            VariableType::Events,
59            VariableType::Physio,
60            VariableType::Stim,
61            VariableType::Regressors,
62        ],
63        Some("session") => vec![VariableType::Scans],
64        Some("subject") => vec![VariableType::Sessions, VariableType::Scans],
65        Some("dataset") => vec![VariableType::Participants],
66        _ => vec![
67            VariableType::Events,
68            VariableType::Physio,
69            VariableType::Stim,
70            VariableType::Regressors,
71            VariableType::Scans,
72            VariableType::Sessions,
73            VariableType::Participants,
74        ],
75    }
76}
77
78static SUB_RE: std::sync::LazyLock<regex::Regex> =
79    std::sync::LazyLock::new(|| regex::Regex::new(r"sub-([a-zA-Z0-9]+)").unwrap());
80static SES_RE: std::sync::LazyLock<regex::Regex> =
81    std::sync::LazyLock::new(|| regex::Regex::new(r"ses-([a-zA-Z0-9]+)").unwrap());
82
83fn load_participants(layout: &BidsLayout, index: &mut NodeIndex) -> Result<()> {
84    let tsv_path = layout.root().join("participants.tsv");
85    if !tsv_path.exists() {
86        return Ok(());
87    }
88    let rows = read_tsv(&tsv_path)?;
89    if rows.is_empty() {
90        return Ok(());
91    }
92
93    let node_idx = index.create_node("dataset", StringEntities::new());
94    let columns: Vec<String> = rows[0]
95        .keys()
96        .filter(|k| k.as_str() != "participant_id")
97        .cloned()
98        .collect();
99
100    for col_name in &columns {
101        let mut values = Vec::new();
102        let mut row_index = Vec::new();
103        for row in &rows {
104            values.push(row.get(col_name).cloned().unwrap_or_default());
105            let mut ent = StringEntities::new();
106            if let Some(pid) = row.get("participant_id") {
107                ent.insert(
108                    "subject".into(),
109                    pid.strip_prefix("sub-").unwrap_or(pid).into(),
110                );
111            }
112            row_index.push(ent);
113        }
114        let var = SimpleVariable::new(col_name, "participants", values, row_index);
115        if let Some(node) = index.get_node_mut(node_idx) {
116            node.add_variable(var);
117        }
118    }
119    Ok(())
120}
121
122fn load_sessions(layout: &BidsLayout, index: &mut NodeIndex) -> Result<()> {
123    let session_files = layout
124        .get()
125        .suffix("sessions")
126        .extension("tsv")
127        .return_paths()?;
128    for tsv_path in &session_files {
129        let rows = read_tsv(tsv_path)?;
130        if rows.is_empty() {
131            continue;
132        }
133
134        let mut entities = StringEntities::new();
135        let path_str = tsv_path.to_string_lossy();
136        if let Some(caps) = SUB_RE.captures(&path_str) {
137            entities.insert("subject".into(), caps[1].to_string());
138        }
139        let node_idx = index.create_node("subject", entities);
140        let columns: Vec<String> = rows[0]
141            .keys()
142            .filter(|k| k.as_str() != "session_id")
143            .cloned()
144            .collect();
145
146        for col_name in &columns {
147            let mut values = Vec::new();
148            let mut row_index = Vec::new();
149            for row in &rows {
150                values.push(row.get(col_name).cloned().unwrap_or_default());
151                let mut ent = StringEntities::new();
152                if let Some(sid) = row.get("session_id") {
153                    ent.insert(
154                        "session".into(),
155                        sid.strip_prefix("ses-").unwrap_or(sid).into(),
156                    );
157                }
158                row_index.push(ent);
159            }
160            let var = SimpleVariable::new(col_name, "sessions", values, row_index);
161            if let Some(node) = index.get_node_mut(node_idx) {
162                node.add_variable(var);
163            }
164        }
165    }
166    Ok(())
167}
168
169fn load_scans(layout: &BidsLayout, index: &mut NodeIndex) -> Result<()> {
170    let scans_files = layout
171        .get()
172        .suffix("scans")
173        .extension("tsv")
174        .return_paths()?;
175    for tsv_path in &scans_files {
176        let rows = read_tsv(tsv_path)?;
177        if rows.is_empty() {
178            continue;
179        }
180
181        let mut entities = StringEntities::new();
182        let path_str = tsv_path.to_string_lossy();
183        if let Some(caps) = SUB_RE.captures(&path_str) {
184            entities.insert("subject".into(), caps[1].to_string());
185        }
186        if let Some(caps) = SES_RE.captures(&path_str) {
187            entities.insert("session".into(), caps[1].to_string());
188        }
189        let node_idx = index.create_node("session", entities);
190        let columns: Vec<String> = rows[0]
191            .keys()
192            .filter(|k| k.as_str() != "filename")
193            .cloned()
194            .collect();
195
196        for col_name in &columns {
197            let mut values = Vec::new();
198            let mut row_index = Vec::new();
199            for row in &rows {
200                values.push(row.get(col_name).cloned().unwrap_or_default());
201                row_index.push(StringEntities::new());
202            }
203            let var = SimpleVariable::new(col_name, "scans", values, row_index);
204            if let Some(node) = index.get_node_mut(node_idx) {
205                node.add_variable(var);
206            }
207        }
208    }
209    Ok(())
210}
211
212/// Load time-series variables (physio/stim/regressors) with scan_length fallback.
213fn load_time_variables(
214    layout: &BidsLayout,
215    index: &mut NodeIndex,
216    scan_length: Option<f64>,
217) -> Result<()> {
218    // Look for physio/stim files
219    for suffix in &["physio", "stim"] {
220        let files = layout.get().suffix(suffix).extension("tsv.gz").collect()?;
221        for f in &files {
222            let mut entities = StringEntities::new();
223            for (k, v) in &f.entities {
224                entities.insert(k.clone(), v.as_str_lossy().into_owned());
225            }
226            let md = layout.get_metadata(&f.path)?;
227            let sr = md.get_f64("SamplingFrequency").unwrap_or(1.0);
228            let start_time = md.get_f64("StartTime").unwrap_or(0.0);
229            let columns: Vec<String> = md
230                .get_array("Columns")
231                .map(|arr| {
232                    arr.iter()
233                        .filter_map(|v| v.as_str().map(String::from))
234                        .collect()
235                })
236                .unwrap_or_default();
237
238            let duration = scan_length.unwrap_or(0.0);
239            let node_idx = index.get_or_create_run_node(entities.clone(), None, duration, 0.0, 0);
240
241            if let Ok(rows) = bids_io::tsv::read_tsv_gz(&f.path) {
242                for (col_idx, col_name) in columns.iter().enumerate() {
243                    let values: Vec<f64> = rows
244                        .iter()
245                        .filter_map(|row| row.values().nth(col_idx).and_then(|v| v.parse().ok()))
246                        .collect();
247                    if values.is_empty() {
248                        continue;
249                    }
250
251                    // Trim/pad to match scan duration
252                    let n_expected = (duration * sr).ceil() as usize;
253                    let trimmed = if values.len() > n_expected && n_expected > 0 {
254                        let skip = if start_time < 0.0 {
255                            (-start_time * sr).floor() as usize
256                        } else {
257                            0
258                        };
259                        values[skip..].iter().take(n_expected).copied().collect()
260                    } else {
261                        values
262                    };
263
264                    let ri = index
265                        .get_run_node_mut(node_idx)
266                        .map(|rn| rn.get_info())
267                        .unwrap_or(crate::node::RunInfo {
268                            entities: entities.clone(),
269                            duration,
270                            tr: 0.0,
271                            image: None,
272                            n_vols: 0,
273                        });
274                    let var = crate::variables::DenseRunVariable::new(
275                        col_name,
276                        suffix,
277                        trimmed,
278                        sr,
279                        vec![ri],
280                    );
281                    if let Some(rn) = index.get_run_node_mut(node_idx) {
282                        rn.add_dense_variable(var);
283                    }
284                }
285            }
286        }
287    }
288
289    // Regressors/timeseries TSV files
290    let reg_files = layout
291        .get()
292        .filter_any("suffix", &["regressors", "timeseries"])
293        .extension("tsv")
294        .collect()?;
295    for f in &reg_files {
296        let mut entities = StringEntities::new();
297        for (k, v) in &f.entities {
298            entities.insert(k.clone(), v.as_str_lossy().into_owned());
299        }
300
301        let node_idx = index.get_or_create_run_node(
302            entities.clone(),
303            None,
304            scan_length.unwrap_or(0.0),
305            0.0,
306            0,
307        );
308
309        if let Ok(rows) = bids_io::tsv::read_tsv(&f.path) {
310            if rows.is_empty() {
311                continue;
312            }
313            let columns: Vec<String> = rows[0].keys().cloned().collect();
314            let tr = index
315                .get_run_node_mut(node_idx)
316                .map(|rn| rn.repetition_time)
317                .unwrap_or(1.0);
318            let sr = if tr > 0.0 { 1.0 / tr } else { 1.0 };
319
320            for col_name in &columns {
321                let values: Vec<f64> = rows
322                    .iter()
323                    .filter_map(|row| row.get(col_name).and_then(|v| v.parse().ok()))
324                    .collect();
325                if values.is_empty() {
326                    continue;
327                }
328
329                let ri = index
330                    .get_run_node_mut(node_idx)
331                    .map(|rn| rn.get_info())
332                    .unwrap_or(crate::node::RunInfo {
333                        entities: entities.clone(),
334                        duration: 0.0,
335                        tr,
336                        image: None,
337                        n_vols: 0,
338                    });
339                let var = crate::variables::DenseRunVariable::new(
340                    col_name,
341                    "regressors",
342                    values,
343                    sr,
344                    vec![ri],
345                );
346                if let Some(rn) = index.get_run_node_mut(node_idx) {
347                    rn.add_dense_variable(var);
348                }
349            }
350        }
351    }
352
353    Ok(())
354}
355
356fn load_events(layout: &BidsLayout, index: &mut NodeIndex) -> Result<()> {
357    let event_files = layout.get().suffix("events").extension("tsv").collect()?;
358    for ef in &event_files {
359        let rows = read_tsv(&ef.path)?;
360        if rows.is_empty() {
361            continue;
362        }
363
364        let mut entities = StringEntities::new();
365        for (k, v) in &ef.entities {
366            entities.insert(k.clone(), v.as_str_lossy().into_owned());
367        }
368
369        let node_idx = index.get_or_create_run_node(entities.clone(), None, 0.0, 0.0, 0);
370
371        let run_info = index
372            .get_run_node_mut(node_idx)
373            .map(|rn| rn.get_info())
374            .unwrap_or(RunInfo {
375                entities: entities.clone(),
376                duration: 0.0,
377                tr: 0.0,
378                image: None,
379                n_vols: 0,
380            });
381
382        let columns: Vec<String> = rows[0]
383            .keys()
384            .filter(|k| k.as_str() != "onset" && k.as_str() != "duration")
385            .cloned()
386            .collect();
387
388        for col_name in &columns {
389            let mut onset = Vec::new();
390            let mut duration = Vec::new();
391            let mut amplitude = Vec::new();
392            let mut row_index = Vec::new();
393
394            for row in &rows {
395                let o: f64 = row.get("onset").and_then(|v| v.parse().ok()).unwrap_or(0.0);
396                let d: f64 = row
397                    .get("duration")
398                    .and_then(|v| v.parse().ok())
399                    .unwrap_or(0.0);
400                let a = row.get(col_name).cloned().unwrap_or_default();
401                if a.is_empty() {
402                    continue;
403                }
404
405                onset.push(o);
406                duration.push(d);
407                amplitude.push(a);
408                row_index.push(entities.clone());
409            }
410            if onset.is_empty() {
411                continue;
412            }
413
414            let var = SparseRunVariable::new(
415                col_name,
416                "events",
417                onset,
418                duration,
419                amplitude,
420                row_index,
421                vec![run_info.clone()],
422            );
423            if let Some(rn) = index.get_run_node_mut(node_idx) {
424                rn.add_sparse_variable(var);
425            }
426        }
427    }
428    Ok(())
429}