1use bids_core::error::{BidsError, Result};
8use bids_core::file::BidsFile;
9use rusqlite::{Connection, functions::FunctionFlags, params};
10use std::path::Path;
11
12fn db_err(e: rusqlite::Error) -> BidsError {
14 BidsError::Database(e.to_string())
15}
16
17pub struct Database {
40 conn: Connection,
41}
42
43impl Database {
44 pub fn in_memory() -> Result<Self> {
46 let conn = Connection::open_in_memory().map_err(db_err)?;
47 let db = Self { conn };
48 db.register_regexp()?;
49 db.create_tables()?;
50 Ok(db)
51 }
52
53 pub fn open(path: &Path) -> Result<Self> {
55 if let Some(parent) = path.parent() {
56 std::fs::create_dir_all(parent)?;
57 }
58 let conn = Connection::open(path).map_err(db_err)?;
59 let db = Self { conn };
60 db.register_regexp()?;
61 db.create_tables()?;
62 Ok(db)
63 }
64
65 pub fn exists(path: &Path) -> bool {
67 path.exists()
68 }
69
70 fn register_regexp(&self) -> Result<()> {
75 use std::cell::RefCell;
76 use std::collections::HashMap;
77
78 thread_local! {
81 static CACHE: RefCell<HashMap<String, regex::Regex>> = RefCell::new(HashMap::new());
82 }
83
84 self.conn
85 .create_scalar_function(
86 "regexp",
87 2,
88 FunctionFlags::SQLITE_UTF8 | FunctionFlags::SQLITE_DETERMINISTIC,
89 |ctx| {
90 let pattern = ctx
91 .get_raw(0)
92 .as_str()
93 .map_err(|e| rusqlite::Error::UserFunctionError(Box::new(e)))?;
94 let value = ctx
95 .get_raw(1)
96 .as_str()
97 .map_err(|e| rusqlite::Error::UserFunctionError(Box::new(e)))?;
98 CACHE.with(|cache| {
99 let mut cache = cache.borrow_mut();
100 if let Some(re) = cache.get(pattern) {
101 return Ok(re.is_match(value));
102 }
103 let re = regex::Regex::new(pattern)
104 .map_err(|e| rusqlite::Error::UserFunctionError(Box::new(e)))?;
105 let result = re.is_match(value);
106 cache.insert(pattern.to_string(), re);
107 Ok(result)
108 })
109 },
110 )
111 .map_err(db_err)?;
112 Ok(())
113 }
114
115 fn create_tables(&self) -> Result<()> {
116 self.conn
117 .execute_batch(
118 "CREATE TABLE IF NOT EXISTS files (
119 path TEXT PRIMARY KEY,
120 filename TEXT NOT NULL,
121 dirname TEXT NOT NULL,
122 is_dir INTEGER NOT NULL DEFAULT 0,
123 file_type TEXT NOT NULL DEFAULT 'Generic'
124 );
125 CREATE TABLE IF NOT EXISTS tags (
126 file_path TEXT NOT NULL,
127 entity_name TEXT NOT NULL,
128 value TEXT NOT NULL,
129 dtype TEXT NOT NULL DEFAULT 'str',
130 is_metadata INTEGER NOT NULL DEFAULT 0,
131 PRIMARY KEY (file_path, entity_name)
132 );
133 CREATE TABLE IF NOT EXISTS associations (
134 src TEXT NOT NULL,
135 dst TEXT NOT NULL,
136 kind TEXT NOT NULL,
137 PRIMARY KEY (src, dst, kind)
138 );
139 CREATE TABLE IF NOT EXISTS layout_info (
140 root TEXT PRIMARY KEY,
141 config TEXT,
142 derivatives TEXT
143 );
144 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
145 CREATE INDEX IF NOT EXISTS idx_tags_entity ON tags(entity_name);
146 CREATE INDEX IF NOT EXISTS idx_tags_value ON tags(value);
147 CREATE INDEX IF NOT EXISTS idx_assoc_src ON associations(src);
148 CREATE INDEX IF NOT EXISTS idx_assoc_dst ON associations(dst);
149 ",
150 )
151 .map_err(db_err)?;
152 Ok(())
153 }
154
155 pub fn begin_transaction(&self) -> Result<()> {
160 self.conn
161 .execute_batch("BEGIN TRANSACTION")
162 .map_err(db_err)?;
163 Ok(())
164 }
165
166 pub fn commit_transaction(&self) -> Result<()> {
168 self.conn.execute_batch("COMMIT").map_err(db_err)?;
169 Ok(())
170 }
171
172 pub fn rollback_transaction(&self) -> Result<()> {
174 self.conn.execute_batch("ROLLBACK").map_err(db_err)?;
175 Ok(())
176 }
177
178 pub fn insert_file(&self, file: &BidsFile) -> Result<()> {
180 self.conn
181 .execute(
182 "INSERT OR REPLACE INTO files (path, filename, dirname, is_dir, file_type)
183 VALUES (?1, ?2, ?3, ?4, ?5)",
184 params![
185 file.path.to_string_lossy().as_ref(),
186 file.filename,
187 file.dirname.to_string_lossy().as_ref(),
188 file.is_dir as i32,
189 format!("{:?}", file.file_type),
190 ],
191 )
192 .map_err(db_err)?;
193 Ok(())
194 }
195
196 pub fn insert_tag(
198 &self,
199 file_path: &str,
200 entity_name: &str,
201 value: &str,
202 dtype: &str,
203 is_metadata: bool,
204 ) -> Result<()> {
205 self.conn
206 .execute(
207 "INSERT OR REPLACE INTO tags (file_path, entity_name, value, dtype, is_metadata)
208 VALUES (?1, ?2, ?3, ?4, ?5)",
209 params![file_path, entity_name, value, dtype, is_metadata as i32],
210 )
211 .map_err(db_err)?;
212 Ok(())
213 }
214
215 pub fn insert_association(&self, src: &str, dst: &str, kind: &str) -> Result<()> {
217 self.conn
218 .execute(
219 "INSERT OR IGNORE INTO associations (src, dst, kind) VALUES (?1, ?2, ?3)",
220 params![src, dst, kind],
221 )
222 .map_err(db_err)?;
223 Ok(())
224 }
225
226 pub fn set_layout_info(&self, root: &str, config: &str) -> Result<()> {
228 self.conn
229 .execute(
230 "INSERT OR REPLACE INTO layout_info (root, config) VALUES (?1, ?2)",
231 params![root, config],
232 )
233 .map_err(db_err)?;
234 Ok(())
235 }
236
237 pub fn get_layout_info(&self) -> Result<Option<(String, String)>> {
239 let mut stmt = self
240 .conn
241 .prepare("SELECT root, config FROM layout_info LIMIT 1")
242 .map_err(db_err)?;
243
244 let result = stmt
245 .query_row([], |row| {
246 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
247 })
248 .ok();
249 Ok(result)
250 }
251
252 pub fn all_file_paths(&self) -> Result<Vec<String>> {
254 let mut stmt = self
255 .conn
256 .prepare("SELECT path FROM files WHERE is_dir = 0")
257 .map_err(db_err)?;
258 let paths: std::result::Result<Vec<String>, _> = stmt
259 .query_map([], |row| row.get(0))
260 .map_err(db_err)?
261 .collect();
262 paths.map_err(db_err)
263 }
264
265 pub fn get_tags(&self, file_path: &str) -> Result<Vec<(String, String, String, bool)>> {
267 let mut stmt = self
268 .conn
269 .prepare("SELECT entity_name, value, dtype, is_metadata FROM tags WHERE file_path = ?1")
270 .map_err(db_err)?;
271 let tags: std::result::Result<Vec<_>, _> = stmt
272 .query_map(params![file_path], |row| {
273 Ok((
274 row.get::<_, String>(0)?,
275 row.get::<_, String>(1)?,
276 row.get::<_, String>(2)?,
277 row.get::<_, bool>(3)?,
278 ))
279 })
280 .map_err(db_err)?
281 .collect();
282 tags.map_err(db_err)
283 }
284
285 pub fn get_unique_entity_values(&self, entity_name: &str) -> Result<Vec<String>> {
287 let mut stmt = self
288 .conn
289 .prepare("SELECT DISTINCT value FROM tags WHERE entity_name = ?1 ORDER BY value")
290 .map_err(db_err)?;
291 let values: std::result::Result<Vec<String>, _> = stmt
292 .query_map(params![entity_name], |row| row.get(0))
293 .map_err(db_err)?
294 .collect();
295 values.map_err(db_err)
296 }
297
298 pub fn get_entity_names(&self) -> Result<Vec<String>> {
300 let mut stmt = self
301 .conn
302 .prepare("SELECT DISTINCT entity_name FROM tags ORDER BY entity_name")
303 .map_err(db_err)?;
304 let names: std::result::Result<Vec<String>, _> = stmt
305 .query_map([], |row| row.get(0))
306 .map_err(db_err)?
307 .collect();
308 names.map_err(db_err)
309 }
310
311 pub fn query_files(&self, filters: &[(String, Vec<String>, bool)]) -> Result<Vec<String>> {
319 if filters.is_empty() {
320 return self.all_file_paths();
321 }
322
323 let mut sql = String::from("SELECT f.path FROM files f WHERE f.is_dir = 0");
324 let mut bind_values: Vec<String> = Vec::new();
325
326 for (i, (entity_name, values, is_regex)) in filters.iter().enumerate() {
327 use std::fmt::Write;
328
329 if values.len() == 1 {
331 match values[0].as_str() {
332 "__NONE__" => {
333 write!(sql, " AND NOT EXISTS (SELECT 1 FROM tags t{i} WHERE t{i}.file_path = f.path AND t{i}.entity_name = ?)").unwrap();
334 bind_values.push(entity_name.clone());
335 continue;
336 }
337 "__ANY__" => {
338 write!(sql, " AND EXISTS (SELECT 1 FROM tags t{i} WHERE t{i}.file_path = f.path AND t{i}.entity_name = ?)").unwrap();
339 bind_values.push(entity_name.clone());
340 continue;
341 }
342 "__OPTIONAL__" => continue,
343 _ => {}
344 }
345 }
346
347 if *is_regex && !values.is_empty() {
348 write!(sql, " AND EXISTS (SELECT 1 FROM tags t{i} WHERE t{i}.file_path = f.path AND t{i}.entity_name = ? AND t{i}.value REGEXP ?)").unwrap();
349 bind_values.push(entity_name.clone());
350 bind_values.push(values[0].clone());
351 } else if values.len() == 1 {
352 write!(sql, " AND EXISTS (SELECT 1 FROM tags t{i} WHERE t{i}.file_path = f.path AND t{i}.entity_name = ? AND t{i}.value = ?)").unwrap();
353 bind_values.push(entity_name.clone());
354 bind_values.push(values[0].clone());
355 } else if !values.is_empty() {
356 let placeholders = "?,".repeat(values.len());
357 let placeholders = &placeholders[..placeholders.len() - 1];
358 write!(sql, " AND EXISTS (SELECT 1 FROM tags t{i} WHERE t{i}.file_path = f.path AND t{i}.entity_name = ? AND t{i}.value IN ({placeholders}))").unwrap();
359 bind_values.push(entity_name.clone());
360 bind_values.extend(values.iter().cloned());
361 }
362 }
363
364 sql.push_str(" ORDER BY f.path");
365
366 let mut stmt = self.conn.prepare(&sql).map_err(db_err)?;
367 let params: Vec<&dyn rusqlite::types::ToSql> = bind_values
368 .iter()
369 .map(|v| v as &dyn rusqlite::types::ToSql)
370 .collect();
371 let paths: std::result::Result<Vec<String>, _> = stmt
372 .query_map(params.as_slice(), |row| row.get::<_, String>(0))
373 .map_err(db_err)?
374 .collect();
375 paths.map_err(db_err)
376 }
377
378 pub fn query_directories(
380 &self,
381 target_entity: &str,
382 filters: &[(String, Vec<String>, bool)],
383 ) -> Result<Vec<String>> {
384 let paths = self.query_files(filters)?;
385 let mut dirs = std::collections::BTreeSet::new();
386 for path_str in &paths {
387 let tags = self.get_tags(path_str)?;
389 if tags.iter().any(|(n, _, _, _)| n == target_entity)
390 && let Some(parent) = std::path::Path::new(path_str).parent()
391 {
392 dirs.insert(parent.to_string_lossy().to_string());
393 }
394 }
395 Ok(dirs.into_iter().collect())
396 }
397
398 pub fn get_associations(&self, src: &str, kind: Option<&str>) -> Result<Vec<(String, String)>> {
400 let (sql, params_vec): (String, Vec<String>) = if let Some(k) = kind {
401 (
402 "SELECT dst, kind FROM associations WHERE src = ?1 AND kind = ?2".to_string(),
403 vec![src.to_string(), k.to_string()],
404 )
405 } else {
406 (
407 "SELECT dst, kind FROM associations WHERE src = ?1".to_string(),
408 vec![src.to_string()],
409 )
410 };
411
412 let mut stmt = self.conn.prepare(&sql).map_err(db_err)?;
413 let params: Vec<&dyn rusqlite::types::ToSql> = params_vec
414 .iter()
415 .map(|v| v as &dyn rusqlite::types::ToSql)
416 .collect();
417 let assocs: std::result::Result<Vec<_>, _> = stmt
418 .query_map(params.as_slice(), |row| {
419 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
420 })
421 .map_err(db_err)?
422 .collect();
423 assocs.map_err(db_err)
424 }
425
426 pub fn file_count(&self) -> Result<usize> {
428 let mut stmt = self
429 .conn
430 .prepare("SELECT COUNT(*) FROM files WHERE is_dir = 0")
431 .map_err(db_err)?;
432 let count: i64 = stmt.query_row([], |row| row.get(0)).map_err(db_err)?;
433 Ok(count as usize)
434 }
435
436 pub fn save_to(&self, path: &Path) -> Result<()> {
438 if let Some(parent) = path.parent() {
439 std::fs::create_dir_all(parent)?;
440 }
441 let mut backup = Connection::open(path).map_err(db_err)?;
442 let b = rusqlite::backup::Backup::new(&self.conn, &mut backup).map_err(db_err)?;
443 b.step(-1).map_err(db_err)?;
444 Ok(())
445 }
446}
447
448#[cfg(test)]
449mod tests {
450 use super::*;
451 use bids_core::file::BidsFile;
452
453 #[test]
454 fn test_database_operations() {
455 let db = Database::in_memory().unwrap();
456 let file = BidsFile::new("/data/sub-01/eeg/sub-01_task-rest_eeg.edf");
457 db.insert_file(&file).unwrap();
458 db.insert_tag(
459 "/data/sub-01/eeg/sub-01_task-rest_eeg.edf",
460 "subject",
461 "01",
462 "str",
463 false,
464 )
465 .unwrap();
466 db.insert_tag(
467 "/data/sub-01/eeg/sub-01_task-rest_eeg.edf",
468 "task",
469 "rest",
470 "str",
471 false,
472 )
473 .unwrap();
474
475 let paths = db.all_file_paths().unwrap();
476 assert_eq!(paths.len(), 1);
477
478 let subjects = db.get_unique_entity_values("subject").unwrap();
479 assert_eq!(subjects, vec!["01"]);
480
481 let filters = vec![("subject".to_string(), vec!["01".to_string()], false)];
482 let results = db.query_files(&filters).unwrap();
483 assert_eq!(results.len(), 1);
484 }
485
486 #[test]
487 fn test_query_none_any() {
488 let db = Database::in_memory().unwrap();
489 let f1 = BidsFile::new("/data/sub-01_task-rest_eeg.edf");
490 let f2 = BidsFile::new("/data/sub-02_eeg.edf");
491 db.insert_file(&f1).unwrap();
492 db.insert_file(&f2).unwrap();
493 db.insert_tag(
494 "/data/sub-01_task-rest_eeg.edf",
495 "subject",
496 "01",
497 "str",
498 false,
499 )
500 .unwrap();
501 db.insert_tag(
502 "/data/sub-01_task-rest_eeg.edf",
503 "task",
504 "rest",
505 "str",
506 false,
507 )
508 .unwrap();
509 db.insert_tag("/data/sub-02_eeg.edf", "subject", "02", "str", false)
510 .unwrap();
511
512 let filters = vec![("task".to_string(), vec!["__ANY__".to_string()], false)];
514 let results = db.query_files(&filters).unwrap();
515 assert_eq!(results.len(), 1);
516 assert!(results[0].contains("sub-01"));
517
518 let filters = vec![("task".to_string(), vec!["__NONE__".to_string()], false)];
520 let results = db.query_files(&filters).unwrap();
521 assert_eq!(results.len(), 1);
522 assert!(results[0].contains("sub-02"));
523 }
524
525 #[test]
526 fn test_transactions() {
527 let db = Database::in_memory().unwrap();
528 db.begin_transaction().unwrap();
529
530 for i in 0..100 {
531 let path = format!("/data/sub-{:03}_eeg.edf", i);
532 let f = BidsFile::new(&path);
533 db.insert_file(&f).unwrap();
534 db.insert_tag(&path, "subject", &format!("{:03}", i), "str", false)
535 .unwrap();
536 }
537
538 db.commit_transaction().unwrap();
539
540 assert_eq!(db.file_count().unwrap(), 100);
541 let subjects = db.get_unique_entity_values("subject").unwrap();
542 assert_eq!(subjects.len(), 100);
543 }
544
545 #[test]
546 fn test_transaction_rollback() {
547 let db = Database::in_memory().unwrap();
548
549 let f = BidsFile::new("/data/sub-01_eeg.edf");
551 db.insert_file(&f).unwrap();
552 assert_eq!(db.file_count().unwrap(), 1);
553
554 db.begin_transaction().unwrap();
556 let f2 = BidsFile::new("/data/sub-02_eeg.edf");
557 db.insert_file(&f2).unwrap();
558 db.rollback_transaction().unwrap();
559
560 assert_eq!(db.file_count().unwrap(), 1);
562 }
563
564 #[test]
565 fn test_regexp_query() {
566 let db = Database::in_memory().unwrap();
567 let f1 = BidsFile::new("/data/sub-01_eeg.edf");
568 let f2 = BidsFile::new("/data/sub-02_eeg.edf");
569 db.insert_file(&f1).unwrap();
570 db.insert_file(&f2).unwrap();
571 db.insert_tag("/data/sub-01_eeg.edf", "subject", "01", "str", false)
572 .unwrap();
573 db.insert_tag("/data/sub-02_eeg.edf", "subject", "02", "str", false)
574 .unwrap();
575
576 let filters = vec![("subject".to_string(), vec!["0[12]".to_string()], true)];
577 let results = db.query_files(&filters).unwrap();
578 assert_eq!(results.len(), 2);
579 }
580}