1use bids_core::config::Config;
8use bids_core::entities::Entity;
9use bids_core::error::Result;
10use bids_core::file::BidsFile;
11use bids_io::json::read_json_sidecar;
12use bids_validate::{should_force_index, should_ignore};
13use regex::Regex;
14use std::collections::{HashMap, HashSet};
15use std::path::{Path, PathBuf};
16use walkdir::WalkDir;
17
18use crate::db::Database;
19
20fn collect_unique_entities(configs: &[Config]) -> Vec<Entity> {
22 let mut all = Vec::new();
23 let mut seen = HashSet::new();
24 for config in configs {
25 for entity in &config.entities {
26 if seen.insert(entity.name.clone()) {
27 all.push(entity.clone());
28 }
29 }
30 }
31 all
32}
33
34fn index_single_file(path: &Path, db: &Database, entities: &[Entity]) -> Result<()> {
36 let mut bf = BidsFile::new(path);
37 let path_str = path.to_string_lossy();
38 for entity in entities {
39 if let Some(val) = entity.match_path(&path_str) {
40 bf.entities.insert(entity.name.clone(), val);
41 }
42 }
43 db.insert_file(&bf)?;
44 let file_path_str = path_str.into_owned();
45 for (name, val) in &bf.entities {
46 db.insert_tag(&file_path_str, name, &val.as_str_lossy(), "str", false)?;
47 }
48 Ok(())
49}
50
51pub struct IndexerOptions {
56 pub validate: bool,
57 pub ignore: Vec<Regex>,
58 pub force_index: Vec<Regex>,
59 pub index_metadata: bool,
60 pub config_filename: String,
61}
62
63impl Default for IndexerOptions {
64 fn default() -> Self {
65 Self {
66 validate: true,
67 ignore: bids_validate::DEFAULT_IGNORE.clone(),
68 force_index: Vec::new(),
69 index_metadata: true,
70 config_filename: "layout_config.json".to_string(),
71 }
72 }
73}
74
75pub fn index_dataset(
88 root: &Path,
89 db: &Database,
90 configs: &[Config],
91 options: &IndexerOptions,
92) -> Result<()> {
93 let mut all_entities = collect_unique_entities(configs);
95
96 db.begin_transaction()?;
98
99 let result = index_files(root, db, &mut all_entities, options);
100 if result.is_err() {
101 let _ = db.rollback_transaction();
102 return result;
103 }
104
105 index_zarr_dirs(
107 root,
108 db,
109 &all_entities,
110 &options.ignore,
111 &options.force_index,
112 )?;
113
114 db.commit_transaction()?;
115
116 if options.index_metadata {
118 db.begin_transaction()?;
119 let md_result = index_metadata(root, db);
120 if md_result.is_err() {
121 let _ = db.rollback_transaction();
122 return md_result;
123 }
124 db.commit_transaction()?;
125 }
126
127 Ok(())
128}
129
130fn index_files(
132 root: &Path,
133 db: &Database,
134 all_entities: &mut Vec<Entity>,
135 options: &IndexerOptions,
136) -> Result<()> {
137 for entry in WalkDir::new(root)
139 .follow_links(true)
140 .into_iter()
141 .filter_entry(|e| {
142 if let Ok(rel) = e.path().strip_prefix(root) {
144 let rel_str = rel.to_string_lossy();
145 if rel_str == "derivatives" || rel_str.starts_with("derivatives/") {
146 return false;
147 }
148 }
149 if e.file_type().is_dir()
151 && should_ignore(e.path(), root, &options.ignore)
152 && !should_force_index(e.path(), root, &options.force_index)
153 {
154 return false;
155 }
156 true
157 })
158 .filter_map(std::result::Result::ok)
159 {
160 let path = entry.path();
161
162 if entry.file_type().is_dir() {
164 let config_file = path.join(&options.config_filename);
166 if config_file.exists()
167 && let Ok(cfg) = Config::from_file(&config_file)
168 {
169 for entity in &cfg.entities {
170 if !all_entities.iter().any(|e| e.name == entity.name) {
171 all_entities.push(entity.clone());
172 }
173 }
174 }
175 continue;
176 }
177
178 if path
180 .file_name()
181 .is_some_and(|n| n.to_str() == Some(&options.config_filename))
182 {
183 continue;
184 }
185
186 let is_ignored = should_ignore(path, root, &options.ignore);
188 let is_forced = should_force_index(path, root, &options.force_index);
189
190 if is_ignored && !is_forced {
191 continue;
192 }
193
194 if !is_forced && options.validate && !is_bids_valid(path, root) {
196 continue;
197 }
198
199 if path.is_dir() {
201 continue;
202 }
203
204 let path_str_raw = path.to_string_lossy();
206 if path_str_raw.contains(".zarr/") {
207 continue; }
209
210 index_single_file(path, db, all_entities)?;
211 }
212
213 Ok(())
214}
215
216fn index_zarr_dirs(
218 root: &Path,
219 db: &Database,
220 entities: &[Entity],
221 _ignore: &[Regex],
222 _force: &[Regex],
223) -> Result<()> {
224 for entry in WalkDir::new(root)
225 .follow_links(true)
226 .into_iter()
227 .filter_map(std::result::Result::ok)
228 {
229 let path = entry.path();
230 if entry.file_type().is_dir()
231 && let Some(ext) = path.extension()
232 && ext == "zarr"
233 {
234 index_single_file(path, db, entities)?;
235 }
236 }
237 Ok(())
238}
239
240fn is_bids_valid(path: &Path, root: &Path) -> bool {
242 let rel = match path.strip_prefix(root) {
243 Ok(r) => r,
244 Err(_) => return false,
245 };
246 let rel_str = rel.to_string_lossy();
247
248 if !rel_str.contains('/') && !rel_str.contains('\\') {
250 return true;
251 }
252
253 let first_component = rel
255 .components()
256 .next()
257 .and_then(|c| c.as_os_str().to_str())
258 .unwrap_or("");
259 first_component.starts_with("sub-")
260}
261
262fn index_metadata(root: &Path, db: &Database) -> Result<()> {
264 let all_paths = db.all_file_paths()?;
265
266 let mut json_files: HashSet<PathBuf> = HashSet::new();
268 let mut data_files: Vec<String> = Vec::new();
269
270 for path_str in &all_paths {
271 let path = PathBuf::from(path_str);
272 if path.extension().is_some_and(|e| e == "json") {
273 json_files.insert(path);
274 } else {
275 data_files.push(path_str.clone());
276 }
277 }
278
279 let mut existing_tags: HashMap<String, String> = HashMap::new();
281 for path_str in &all_paths {
282 let tags = db.get_tags(path_str)?;
283 for (entity_name, value, _, _) in &tags {
284 existing_tags.insert(format!("{path_str}_{entity_name}"), value.clone());
285 }
286 }
287
288 let mut seen_assocs: HashSet<String> = HashSet::new();
289
290 for data_path_str in &data_files {
291 let data_path = PathBuf::from(data_path_str);
292 let data_tags = db.get_tags(data_path_str)?;
293
294 let suffix = data_tags
295 .iter()
296 .find(|(n, _, _, _)| n == "suffix")
297 .map(|(_, v, _, _)| v.clone());
298 let extension = data_tags
299 .iter()
300 .find(|(n, _, _, _)| n == "extension")
301 .map(|(_, v, _, _)| v.clone());
302
303 let suffix = match suffix {
304 Some(s) => s,
305 None => continue,
306 };
307
308 let data_entities: HashMap<String, String> = data_tags
309 .iter()
310 .filter(|(n, _, _, _)| n != "suffix" && n != "extension")
311 .map(|(n, v, _, _)| (n.clone(), v.clone()))
312 .collect();
313
314 let mut dir = data_path.parent();
316 let mut sidecar_stack: Vec<PathBuf> = Vec::new();
317
318 while let Some(current_dir) = dir {
319 for json_path in &json_files {
320 if json_path.parent() != Some(current_dir) {
321 continue;
322 }
323
324 let json_stem = json_path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
325 let json_suffix = json_stem.rsplit('_').next().unwrap_or("");
326 if json_suffix != suffix {
327 continue;
328 }
329
330 let json_entities = extract_kv_pairs(json_stem);
331 let all_match = json_entities
332 .iter()
333 .all(|(k, v)| data_entities.get(k).is_none_or(|dv| dv == v));
334
335 if all_match {
336 sidecar_stack.push(json_path.clone());
337
338 let assoc_key =
339 format!("{}#{}#Metadata", json_path.to_string_lossy(), data_path_str);
340 if seen_assocs.insert(assoc_key) {
341 db.insert_association(
342 &json_path.to_string_lossy(),
343 data_path_str,
344 "Metadata",
345 )?;
346 }
347 }
348 }
349
350 if current_dir == root {
351 break;
352 }
353 dir = current_dir.parent();
354 }
355
356 for i in 0..sidecar_stack.len() {
358 if i + 1 < sidecar_stack.len() {
359 let src = sidecar_stack[i].to_string_lossy().to_string();
360 let dst = sidecar_stack[i + 1].to_string_lossy().to_string();
361 let key1 = format!("{src}#{dst}#Child");
362 if seen_assocs.insert(key1) {
363 db.insert_association(&src, &dst, "Child")?;
364 db.insert_association(&dst, &src, "Parent")?;
365 }
366 }
367 }
368
369 sidecar_stack.reverse();
371 let mut merged_metadata: indexmap::IndexMap<String, serde_json::Value> =
372 indexmap::IndexMap::new();
373 for sidecar_path in &sidecar_stack {
374 if let Ok(md) = read_json_sidecar(sidecar_path) {
375 for (k, v) in md {
376 merged_metadata.insert(k, v);
377 }
378 }
379 }
380
381 for (key, value) in &merged_metadata {
383 if value.is_null() {
384 continue;
385 }
386
387 let tag_key = format!("{data_path_str}_{key}");
388 let val_str = match value {
389 serde_json::Value::String(s) => s.clone(),
390 other => other.to_string(),
391 };
392
393 if let Some(existing_val) = existing_tags.get(&tag_key) {
394 if *existing_val != val_str {
395 log::warn!(
396 "conflicting metadata for '{key}' on {data_path_str}: '{existing_val}' vs '{val_str}'"
397 );
398 }
399 continue;
400 }
401 db.insert_tag(data_path_str, key, &val_str, "json", true)?;
402 }
403
404 if let Some(intended) = merged_metadata.get("IntendedFor") {
406 let subject = data_entities.get("subject").cloned().unwrap_or_default();
407 index_intended_for(db, data_path_str, intended, root, &subject)?;
408 }
409
410 index_companion_associations(
412 db,
413 data_path_str,
414 &suffix,
415 extension.as_deref(),
416 &data_entities,
417 )?;
418 }
419
420 Ok(())
421}
422
423fn index_intended_for(
425 db: &Database,
426 data_path: &str,
427 intended: &serde_json::Value,
428 root: &Path,
429 subject: &str,
430) -> Result<()> {
431 let intents: Vec<&str> = match intended {
432 serde_json::Value::String(s) => vec![s.as_str()],
433 serde_json::Value::Array(arr) => arr.iter().filter_map(|v| v.as_str()).collect(),
434 _ => vec![],
435 };
436
437 for intent in intents {
438 if let Some(target) = bids_validate::resolve_intended_for(intent, root, subject) {
439 let target_str = target.to_string_lossy();
440 db.insert_association(data_path, &target_str, "IntendedFor")?;
441 db.insert_association(&target_str, data_path, "InformedBy")?;
442 }
443 }
444 Ok(())
445}
446
447fn index_companion_associations(
449 db: &Database,
450 data_path: &str,
451 suffix: &str,
452 extension: Option<&str>,
453 data_entities: &HashMap<String, String>,
454) -> Result<()> {
455 if extension.is_none() {
456 return Ok(());
457 }
458
459 if matches!(suffix, "events" | "physio" | "stim" | "sbref") {
460 let mut filters: Vec<(String, Vec<String>, bool)> = data_entities
461 .iter()
462 .filter(|(k, _)| matches!(k.as_str(), "subject" | "session" | "task" | "run"))
463 .map(|(k, v)| (k.clone(), vec![v.clone()], false))
464 .collect();
465 filters.push(("suffix".into(), vec!["bold".into(), "eeg".into()], false));
466
467 if let Ok(images) = db.query_files(&filters) {
468 for img in &images {
469 db.insert_association(data_path, img, "IntendedFor")?;
470 db.insert_association(img, data_path, "InformedBy")?;
471 }
472 }
473 }
474
475 if suffix == "dwi" && matches!(extension, Some(".bvec" | ".bval")) {
476 let mut filters: Vec<(String, Vec<String>, bool)> = data_entities
477 .iter()
478 .filter(|(k, _)| matches!(k.as_str(), "subject" | "session" | "run" | "acquisition"))
479 .map(|(k, v)| (k.clone(), vec![v.clone()], false))
480 .collect();
481 filters.push(("suffix".into(), vec!["dwi".into()], false));
482 filters.push((
483 "extension".into(),
484 vec![".nii".into(), ".nii.gz".into()],
485 false,
486 ));
487
488 if let Ok(images) = db.query_files(&filters) {
489 for img in &images {
490 db.insert_association(data_path, img, "IntendedFor")?;
491 db.insert_association(img, data_path, "InformedBy")?;
492 }
493 }
494 }
495
496 Ok(())
497}
498
499fn extract_kv_pairs(stem: &str) -> Vec<(String, String)> {
501 let mut pairs = Vec::new();
502 for part in stem.split('_') {
503 if let Some(idx) = part.find('-') {
504 let key = &part[..idx];
505 let val = &part[idx + 1..];
506 let entity_name = match key {
507 "sub" => "subject",
508 "ses" => "session",
509 "acq" => "acquisition",
510 "ce" => "ceagent",
511 "rec" => "reconstruction",
512 "dir" => "direction",
513 "mod" => "modality",
514 "trc" => "tracer",
515 other => other,
516 };
517 pairs.push((entity_name.to_string(), val.to_string()));
518 }
519 }
520 pairs
521}