Skip to main content

vaultdb_core/
vault.rs

1//! [`Vault`]: the library entry point. Discovers a vault from `.obsidian/`,
2//! lists files, loads records, runs structured queries, builds the link
3//! graph. Also defines [`LoadResult`], the parse-diagnostic-bearing return
4//! type from `Vault::load_records`.
5
6use std::path::{Path, PathBuf};
7
8use walkdir::WalkDir;
9
10use crate::error::{Result, VaultdbError};
11use crate::frontmatter;
12use crate::record::Record;
13
14/// Records loaded from a folder, with per-file parse diagnostics.
15///
16/// Files with malformed YAML frontmatter appear in `parse_errors` rather than
17/// being silently dropped. Files without frontmatter at all are loaded as
18/// empty records (this is intentional — they remain queryable by virtual
19/// fields like `_name` / `_path`).
20#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
21pub struct LoadResult {
22    pub records: Vec<Record>,
23    pub parse_errors: Vec<crate::error::ParseError>,
24}
25
26/// Represents a discovered Obsidian vault.
27pub struct Vault {
28    pub root: PathBuf,
29}
30
31impl Vault {
32    /// Discover vault root by walking up from `start` looking for `.obsidian/`.
33    pub fn discover(start: &Path) -> Result<Self> {
34        let mut current = start.to_path_buf();
35        loop {
36            if current.join(".obsidian").is_dir() {
37                return Ok(Vault { root: current });
38            }
39            if !current.pop() {
40                return Err(VaultdbError::VaultNotFound(start.display().to_string()));
41            }
42        }
43    }
44
45    /// Create a Vault with an explicit root path (skips discovery).
46    pub fn with_root(root: PathBuf) -> Self {
47        Vault { root }
48    }
49
50    /// Replay any pending journals from previously-crashed mutations.
51    ///
52    /// Currently the only mutation that writes a journal is
53    /// [`crate::RenameBuilder::execute`] — a rename that crashed between
54    /// the file rename and finishing every backlink rewrite leaves a
55    /// journal at `<vault>/.vaultdb/rename-journal/`. This method
56    /// replays each pending journal idempotently and returns the count
57    /// of journals processed.
58    ///
59    /// Long-lived consumers (eduport-tauri, etc.) should call this
60    /// at startup. Each mutation also runs replay implicitly under
61    /// the vault lock, so the only behavioural difference is timing:
62    /// explicit recovery surfaces leftover work earlier.
63    pub fn recover(&self) -> Result<usize> {
64        crate::lock::with_lock(&self.root, || crate::journal::replay_all(&self.root))
65    }
66
67    /// Resolve a folder argument (relative to vault root) to an absolute path.
68    pub fn resolve_folder(&self, folder: &str) -> Result<PathBuf> {
69        let path = self.root.join(folder);
70        if path.is_dir() {
71            Ok(path)
72        } else {
73            Err(VaultdbError::FolderNotFound(folder.to_string()))
74        }
75    }
76
77    /// List all .md files in a folder. If `recursive`, walks subdirectories.
78    pub fn list_files(&self, folder: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
79        let mut files = Vec::new();
80
81        if recursive {
82            for entry in WalkDir::new(folder)
83                .follow_links(false)
84                .into_iter()
85                .filter_entry(|e| {
86                    // Skip hidden directories — but allow the root entry
87                    // itself, even when it lives under a hidden parent (e.g.
88                    // a TempDir whose name starts with `.tmp`).
89                    e.depth() == 0 || !e.file_name().to_str().is_some_and(|s| s.starts_with('.'))
90                })
91            {
92                let entry = entry.map_err(|e| std::io::Error::other(e.to_string()))?;
93                if entry.file_type().is_file()
94                    && entry.path().extension().is_some_and(|ext| ext == "md")
95                {
96                    files.push(entry.into_path());
97                }
98            }
99        } else {
100            for entry in std::fs::read_dir(folder)? {
101                let entry = entry?;
102                let path = entry.path();
103                if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
104                    files.push(path);
105                }
106            }
107        }
108
109        files.sort();
110        Ok(files)
111    }
112
113    /// Load records from a folder, collecting per-file parse diagnostics.
114    ///
115    /// Files with no frontmatter are loaded as empty records (queryable via
116    /// virtual fields). Files with invalid frontmatter are collected into
117    /// `LoadResult.parse_errors` rather than dropped.
118    ///
119    /// `verbose` is preserved for compatibility with the CLI's `-v` flag — it
120    /// causes parse errors to also be logged to stderr as they're encountered.
121    /// Library consumers that don't want stderr logging should pass `false` and
122    /// inspect `parse_errors` themselves.
123    pub fn load_records(
124        &self,
125        folder: &Path,
126        recursive: bool,
127        verbose: bool,
128    ) -> Result<LoadResult> {
129        let files = self.list_files(folder, recursive)?;
130        let mut records = Vec::new();
131        let mut parse_errors = Vec::new();
132
133        for path in files {
134            match frontmatter::load_record(&path) {
135                Ok(record) => records.push(record),
136                Err(VaultdbError::NoFrontmatter(_)) => {
137                    records.push(Record {
138                        path: path.clone(),
139                        fields: std::collections::BTreeMap::new(),
140                        raw_content: None,
141                    });
142                }
143                Err(VaultdbError::InvalidFrontmatter { file, reason }) => {
144                    if verbose {
145                        eprintln!("skipping (invalid frontmatter): {}: {}", file, reason);
146                    }
147                    parse_errors.push(crate::error::ParseError {
148                        file: std::path::PathBuf::from(&file),
149                        message: reason,
150                    });
151                }
152                Err(e) => return Err(e),
153            }
154        }
155
156        Ok(LoadResult {
157            records,
158            parse_errors,
159        })
160    }
161
162    /// Load records with raw content preserved (for write operations and link extraction),
163    /// collecting per-file parse diagnostics.
164    ///
165    /// Files with no frontmatter are loaded as empty records with their raw content set.
166    /// Files with invalid frontmatter are collected into `LoadResult.parse_errors` rather
167    /// than dropped.
168    pub fn load_records_with_content(
169        &self,
170        folder: &Path,
171        recursive: bool,
172        verbose: bool,
173    ) -> Result<LoadResult> {
174        let files = self.list_files(folder, recursive)?;
175        let mut records = Vec::new();
176        let mut parse_errors = Vec::new();
177
178        for path in files {
179            match frontmatter::load_record_with_content(&path) {
180                Ok(record) => records.push(record),
181                Err(VaultdbError::NoFrontmatter(_)) => {
182                    let content = std::fs::read_to_string(&path)?;
183                    records.push(Record {
184                        path: path.clone(),
185                        fields: std::collections::BTreeMap::new(),
186                        raw_content: Some(content),
187                    });
188                }
189                Err(VaultdbError::InvalidFrontmatter { file, reason }) => {
190                    if verbose {
191                        eprintln!("skipping (invalid frontmatter): {}: {}", file, reason);
192                    }
193                    parse_errors.push(crate::error::ParseError {
194                        file: std::path::PathBuf::from(&file),
195                        message: reason,
196                    });
197                }
198                Err(e) => return Err(e),
199            }
200        }
201
202        Ok(LoadResult {
203            records,
204            parse_errors,
205        })
206    }
207
208    /// Look up a single record by its filename (without the `.md` extension)
209    /// inside the given folder.
210    ///
211    /// Returns `Ok(None)` if no such file exists. Returns `Ok(Some(record))`
212    /// when the file exists and parses cleanly. Returns
213    /// `Err(VaultdbError::InvalidFrontmatter)` if the file exists but its
214    /// frontmatter is malformed — unlike `load_records`, single-record lookup
215    /// surfaces parse errors as a hard error because the caller asked for one
216    /// specific record.
217    pub fn find_by_name(&self, folder: &str, name: &str) -> Result<Option<Record>> {
218        let folder_path = self.resolve_folder(folder)?;
219        let candidate = folder_path.join(format!("{}.md", name));
220        if !candidate.is_file() {
221            return Ok(None);
222        }
223        match frontmatter::load_record(&candidate) {
224            Ok(record) => Ok(Some(record)),
225            Err(VaultdbError::NoFrontmatter(_)) => Ok(Some(Record {
226                path: candidate,
227                fields: std::collections::BTreeMap::new(),
228                raw_content: None,
229            })),
230            Err(e) => Err(e),
231        }
232    }
233
234    /// Build a link graph over the given scope.
235    ///
236    /// `GraphScope::All` walks the whole vault recursively. `Folder(name)`
237    /// scopes to one folder. `Where(expr)` first walks the whole vault, builds
238    /// a temporary graph for predicate evaluation (so link predicates work),
239    /// filters records, and rebuilds the graph from the filtered subset.
240    ///
241    /// Records are loaded with raw content so wikilinks can be extracted.
242    pub fn link_graph(&self, scope: crate::links::GraphScope) -> Result<crate::links::LinkGraph> {
243        use crate::links::{GraphScope, LinkGraph};
244        let records: Vec<Record> = match scope {
245            GraphScope::All => {
246                self.load_records_with_content(&self.root, true, false)?
247                    .records
248            }
249            GraphScope::Folder(folder) => {
250                let path = self.resolve_folder(&folder)?;
251                self.load_records_with_content(&path, true, false)?.records
252            }
253            GraphScope::Where(expr) => {
254                let all = self
255                    .load_records_with_content(&self.root, true, false)?
256                    .records;
257                let idx = LinkGraph::build_with_root(&all, Some(&self.root));
258                all.into_iter()
259                    .filter(|r| crate::filter::evaluate_expr(&expr, r, &self.root, Some(&idx)))
260                    .collect()
261            }
262        };
263        Ok(LinkGraph::build_with_root(&records, Some(&self.root)))
264    }
265
266    /// Run a structured query against the vault. Returns the matching records,
267    /// optionally projected, sorted, and limited per the `Query`'s fields.
268    ///
269    /// The records returned have `raw_content` set to `None` (use
270    /// `load_records_with_content` if you need the body text).
271    ///
272    /// Eager: loads, filters, sorts, limits, and projects all in memory.
273    /// Use [`Vault::query_iter`] for the streaming variant when memory
274    /// pressure matters (large vaults; bounded top-K with sort+limit).
275    pub fn query(&self, q: &crate::query::Query) -> Result<Vec<Record>> {
276        // Run query_iter and collect. The iterator's internal state
277        // already handles filter / sort / limit / projection; we just
278        // gather the result into a Vec. Errors mid-stream propagate.
279        self.query_iter(q)?.collect::<Result<Vec<_>>>()
280    }
281
282    /// Streaming variant of [`Vault::query`].
283    ///
284    /// Returns an iterator yielding `Result<Record>`. The implementation
285    /// chooses the most memory-efficient strategy compatible with the
286    /// query:
287    ///
288    /// - **No sort, no graph predicate, no body-search**: pure file-by-
289    ///   file streaming. Records are loaded one at a time and filtered
290    ///   inline; resident memory is O(1) regardless of vault size.
291    /// - **Sort + limit**: bounded top-K via a binary heap of size
292    ///   `limit`. Memory is O(limit), so "give me the most-recent 50
293    ///   records out of 100K" is cheap.
294    /// - **Sort, no limit; or graph/body predicates**: materializes the
295    ///   working set in memory the same way [`Vault::query`] does, then
296    ///   streams from the buffer. Memory is O(N) — same as the eager
297    ///   call. (We can't stream a sort without materializing, and graph
298    ///   predicates need the link graph built from all records.)
299    ///
300    /// The iterator yields `Err(...)` on per-file IO failures rather
301    /// than aborting the whole query; the caller decides whether to
302    /// stop or continue.
303    pub fn query_iter(&self, q: &crate::query::Query) -> Result<QueryIter> {
304        let folder_path = self.resolve_folder(&q.folder)?;
305        let needs_links = q
306            .filter
307            .as_ref()
308            .is_some_and(crate::filter::expr_uses_links);
309        // Body-content predicates (e.g. `_body contains "foo"`) need
310        // raw_content loaded but DON'T need the link graph. We track
311        // them separately so streaming with body predicates still works
312        // — only the load function changes per file.
313        let needs_body_content = q
314            .filter
315            .as_ref()
316            .is_some_and(crate::filter::expr_needs_body_content);
317
318        // Pure-streaming path: no sort, no graph predicates. We iterate
319        // file paths lazily, load each record on demand, filter, and
320        // yield. Body predicates are fine here — we just call
321        // load_record_with_content per file when needed. Vault size
322        // doesn't affect resident memory.
323        if !needs_links && q.sort.is_none() {
324            let paths = self.list_files(&folder_path, q.recursive)?;
325            let select_set: Option<std::collections::BTreeSet<String>> = q
326                .select
327                .as_ref()
328                .map(|fields| fields.iter().cloned().collect());
329            return Ok(QueryIter {
330                state: QueryIterState::Streaming(StreamingState {
331                    paths: paths.into_iter(),
332                    filter: q.filter.clone(),
333                    select_set,
334                    vault_root: self.root.clone(),
335                    limit: q.limit,
336                    yielded: 0,
337                    needs_content: needs_body_content,
338                }),
339            });
340        }
341
342        // Materialized path: load everything, filter, then sort+limit
343        // (with top-K when both are present and limit < total) and
344        // project. This degrades gracefully into the same behaviour as
345        // the previous eager implementation.
346        let load = if needs_links || needs_body_content {
347            self.load_records_with_content(&folder_path, q.recursive, false)?
348        } else {
349            self.load_records(&folder_path, q.recursive, false)?
350        };
351        let mut records = load.records;
352        let link_index = if needs_links {
353            Some(crate::links::LinkGraph::build(&records))
354        } else {
355            None
356        };
357
358        if let Some(filter) = &q.filter {
359            records.retain(|r| {
360                crate::filter::evaluate_expr(filter, r, &self.root, link_index.as_ref())
361            });
362        }
363
364        match (&q.sort, q.limit) {
365            (Some(sort_key), Some(limit)) if limit < records.len() => {
366                records = top_k_sorted(records, sort_key, limit, &self.root);
367            }
368            (Some(sort_key), maybe_limit) => {
369                sort_records(&mut records, sort_key, &self.root);
370                if let Some(limit) = maybe_limit {
371                    records.truncate(limit);
372                }
373            }
374            (None, Some(limit)) => {
375                records.truncate(limit);
376            }
377            (None, None) => {}
378        }
379
380        if let Some(select) = &q.select {
381            let select_set: std::collections::BTreeSet<&str> =
382                select.iter().map(|s| s.as_str()).collect();
383            for record in records.iter_mut() {
384                record.fields.retain(|k, _| select_set.contains(k.as_str()));
385            }
386        }
387
388        Ok(QueryIter {
389            state: QueryIterState::Materialized(records.into_iter()),
390        })
391    }
392}
393
394/// Streaming iterator yielded by [`Vault::query_iter`]. Each `next()`
395/// produces `Result<Record>` so per-file errors surface to the caller
396/// instead of aborting the whole query.
397pub struct QueryIter {
398    state: QueryIterState,
399}
400
401enum QueryIterState {
402    /// Pure streaming: pulls one file at a time, loads, filters, yields.
403    Streaming(StreamingState),
404    /// Pre-materialized: a Vec collected upfront (sort or graph
405    /// predicates required).
406    Materialized(std::vec::IntoIter<Record>),
407}
408
409struct StreamingState {
410    paths: std::vec::IntoIter<PathBuf>,
411    filter: Option<crate::query::Expr>,
412    select_set: Option<std::collections::BTreeSet<String>>,
413    vault_root: PathBuf,
414    limit: Option<usize>,
415    yielded: usize,
416    /// When true, each file is loaded with body content (raw_content
417    /// populated) so body-search predicates can run. Otherwise we use
418    /// the cheaper frontmatter-only load.
419    needs_content: bool,
420}
421
422impl Iterator for QueryIter {
423    type Item = Result<Record>;
424
425    fn next(&mut self) -> Option<Self::Item> {
426        match &mut self.state {
427            QueryIterState::Streaming(s) => s.next_record(),
428            QueryIterState::Materialized(iter) => iter.next().map(Ok),
429        }
430    }
431}
432
433impl StreamingState {
434    fn next_record(&mut self) -> Option<Result<Record>> {
435        // Stop early once limit is reached — this is part of why
436        // streaming + limit is so cheap on large vaults.
437        if let Some(limit) = self.limit
438            && self.yielded >= limit
439        {
440            return None;
441        }
442        loop {
443            let path = self.paths.next()?;
444            let load_result = if self.needs_content {
445                crate::frontmatter::load_record_with_content(&path)
446            } else {
447                crate::frontmatter::load_record(&path)
448            };
449            let record = match load_result {
450                Ok(r) => r,
451                Err(VaultdbError::NoFrontmatter(_)) => {
452                    // No frontmatter: yield an empty-fields record. If
453                    // body content was requested, populate raw_content
454                    // by reading the file directly so body predicates
455                    // can still run.
456                    let raw_content = if self.needs_content {
457                        std::fs::read_to_string(&path).ok()
458                    } else {
459                        None
460                    };
461                    Record {
462                        path: path.clone(),
463                        fields: std::collections::BTreeMap::new(),
464                        raw_content,
465                    }
466                }
467                Err(VaultdbError::InvalidFrontmatter { .. }) => {
468                    // Skip files with malformed YAML — same behaviour
469                    // as the eager load. Eduport-core / CLI consumers
470                    // that want to surface these should call
471                    // `Vault::load_records` and inspect parse_errors.
472                    continue;
473                }
474                Err(e) => return Some(Err(e)),
475            };
476
477            if let Some(filter) = &self.filter
478                && !crate::filter::evaluate_expr(filter, &record, &self.vault_root, None)
479            {
480                continue;
481            }
482
483            let mut record = record;
484            if let Some(select_set) = &self.select_set {
485                record.fields.retain(|k, _| select_set.contains(k));
486            }
487            self.yielded += 1;
488            return Some(Ok(record));
489        }
490    }
491}
492
493/// Sort `records` in place by the given sort key.
494fn sort_records(records: &mut [Record], sort_key: &crate::query::SortKey, vault_root: &Path) {
495    records.sort_by(|a, b| {
496        let av = a
497            .get(&sort_key.field, vault_root)
498            .unwrap_or(crate::record::Value::Null);
499        let bv = b
500            .get(&sort_key.field, vault_root)
501            .unwrap_or(crate::record::Value::Null);
502        let ord = crate::filter::compare_values(&av, &bv);
503        if sort_key.descending {
504            ord.reverse()
505        } else {
506            ord
507        }
508    });
509}
510
511/// Top-K via a bounded binary heap. Memory: O(k). Returns the K
512/// records with the smallest (or, if descending, largest) sort-key
513/// values, sorted in the requested order.
514///
515/// We use a max-heap (default `BinaryHeap`) wrapped in `Reverse` so it
516/// behaves as a min-heap by default, then push descending-aware
517/// comparisons through the wrapper. The final result is sorted at the
518/// end via `into_sorted_vec`.
519fn top_k_sorted(
520    records: Vec<Record>,
521    sort_key: &crate::query::SortKey,
522    k: usize,
523    vault_root: &Path,
524) -> Vec<Record> {
525    use std::cmp::Ordering;
526
527    if k == 0 {
528        return Vec::new();
529    }
530
531    // Wrapper that compares two records by the sort field. The order
532    // of cmp is chosen so that `BinaryHeap`'s default max-heap behaviour
533    // gives us the correct K records to *evict* — i.e. the heap holds
534    // the K best candidates so far, and the root is the worst of those.
535    struct Entry<'a> {
536        sort_key: &'a crate::query::SortKey,
537        vault_root: &'a Path,
538        record: Record,
539    }
540    impl PartialEq for Entry<'_> {
541        fn eq(&self, other: &Self) -> bool {
542            self.cmp(other) == Ordering::Equal
543        }
544    }
545    impl Eq for Entry<'_> {}
546    impl PartialOrd for Entry<'_> {
547        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
548            Some(self.cmp(other))
549        }
550    }
551    impl Ord for Entry<'_> {
552        fn cmp(&self, other: &Self) -> Ordering {
553            let av = self
554                .record
555                .get(&self.sort_key.field, self.vault_root)
556                .unwrap_or(crate::record::Value::Null);
557            let bv = other
558                .record
559                .get(&self.sort_key.field, other.vault_root)
560                .unwrap_or(crate::record::Value::Null);
561            let ord = crate::filter::compare_values(&av, &bv);
562            if self.sort_key.descending {
563                ord.reverse()
564            } else {
565                ord
566            }
567        }
568    }
569
570    let mut heap: std::collections::BinaryHeap<Entry> =
571        std::collections::BinaryHeap::with_capacity(k + 1);
572    for record in records {
573        let entry = Entry {
574            sort_key,
575            vault_root,
576            record,
577        };
578        if heap.len() < k {
579            heap.push(entry);
580        } else if let Some(top) = heap.peek()
581            && entry < *top
582        {
583            heap.pop();
584            heap.push(entry);
585        }
586    }
587
588    // `into_sorted_vec` returns ascending by `Ord`, which under our
589    // descending-aware Ord gives the user-requested order.
590    heap.into_sorted_vec()
591        .into_iter()
592        .map(|e| e.record)
593        .collect()
594}
595
596#[cfg(test)]
597mod tests {
598    use super::*;
599    use std::fs;
600    use tempfile::TempDir;
601
602    fn create_test_vault() -> TempDir {
603        let dir = TempDir::new().unwrap();
604        // Create .obsidian directory
605        fs::create_dir(dir.path().join(".obsidian")).unwrap();
606        // Create a notes folder
607        fs::create_dir(dir.path().join("notes")).unwrap();
608        // Create some .md files
609        fs::write(
610            dir.path().join("notes/test1.md"),
611            "---\ntags:\n  - type/concept\nstatus: active\n---\nBody 1\n",
612        )
613        .unwrap();
614        fs::write(
615            dir.path().join("notes/test2.md"),
616            "---\ntags:\n  - type/leaf\nstatus: draft\n---\nBody 2\n",
617        )
618        .unwrap();
619        // A file without frontmatter
620        fs::write(
621            dir.path().join("notes/no_fm.md"),
622            "# Just a heading\nNo frontmatter.\n",
623        )
624        .unwrap();
625        // A non-md file (should be ignored)
626        fs::write(dir.path().join("notes/readme.txt"), "not markdown").unwrap();
627        dir
628    }
629
630    #[test]
631    fn discover_vault_from_subfolder() {
632        let dir = create_test_vault();
633        let notes_dir = dir.path().join("notes");
634        let vault = Vault::discover(&notes_dir).unwrap();
635        assert_eq!(vault.root, dir.path());
636    }
637
638    #[test]
639    fn discover_vault_not_found() {
640        let dir = TempDir::new().unwrap();
641        let result = Vault::discover(dir.path());
642        assert!(matches!(result, Err(VaultdbError::VaultNotFound(_))));
643    }
644
645    #[test]
646    fn resolve_folder_existing() {
647        let dir = create_test_vault();
648        let vault = Vault::with_root(dir.path().to_path_buf());
649        let path = vault.resolve_folder("notes").unwrap();
650        assert_eq!(path, dir.path().join("notes"));
651    }
652
653    #[test]
654    fn resolve_folder_missing() {
655        let dir = create_test_vault();
656        let vault = Vault::with_root(dir.path().to_path_buf());
657        let result = vault.resolve_folder("nonexistent");
658        assert!(matches!(result, Err(VaultdbError::FolderNotFound(_))));
659    }
660
661    #[test]
662    fn list_files_only_md() {
663        let dir = create_test_vault();
664        let vault = Vault::with_root(dir.path().to_path_buf());
665        let files = vault.list_files(&dir.path().join("notes"), false).unwrap();
666        assert_eq!(files.len(), 3); // test1.md, test2.md, no_fm.md
667        assert!(files.iter().all(|f| f.extension().unwrap() == "md"));
668    }
669
670    #[test]
671    fn load_records_includes_no_frontmatter() {
672        let dir = create_test_vault();
673        let vault = Vault::with_root(dir.path().to_path_buf());
674        let records = vault
675            .load_records(&dir.path().join("notes"), false, false)
676            .unwrap()
677            .records;
678        // Should load all 3 .md files, including no_fm.md with empty fields
679        assert_eq!(records.len(), 3);
680
681        let no_fm = records
682            .iter()
683            .find(|r| r.virtual_name() == "no_fm")
684            .unwrap();
685        assert!(no_fm.fields.is_empty());
686    }
687
688    #[test]
689    fn load_records_surfaces_invalid_frontmatter_as_parse_errors() {
690        use std::fs;
691
692        let dir = create_test_vault();
693        // Add a file with malformed YAML frontmatter
694        fs::write(
695            dir.path().join("notes/broken.md"),
696            "---\n: : : not yaml\n---\nbody\n",
697        )
698        .unwrap();
699
700        let vault = Vault::with_root(dir.path().to_path_buf());
701        let result = vault
702            .load_records(&dir.path().join("notes"), false, false)
703            .unwrap();
704
705        // The 3 valid-or-empty files (test1, test2, no_fm) load as records;
706        // broken.md is collected as a parse error.
707        assert_eq!(result.records.len(), 3);
708        assert_eq!(result.parse_errors.len(), 1);
709        assert!(result.parse_errors[0].file.ends_with("broken.md"));
710        assert!(!result.parse_errors[0].message.is_empty());
711    }
712
713    #[test]
714    fn recursive_listing() {
715        let dir = create_test_vault();
716        let sub = dir.path().join("notes/sub");
717        fs::create_dir(&sub).unwrap();
718        fs::write(
719            sub.join("nested.md"),
720            "---\ntags:\n  - type/concept\n---\nNested.\n",
721        )
722        .unwrap();
723
724        let vault = Vault::with_root(dir.path().to_path_buf());
725        let files_flat = vault.list_files(&dir.path().join("notes"), false).unwrap();
726        let files_recursive = vault.list_files(&dir.path().join("notes"), true).unwrap();
727
728        assert_eq!(files_flat.len(), 3);
729        assert_eq!(files_recursive.len(), 4); // includes nested.md
730    }
731
732    #[test]
733    fn find_by_name_existing() {
734        let dir = create_test_vault();
735        let vault = Vault::with_root(dir.path().to_path_buf());
736        let r = vault.find_by_name("notes", "test1").unwrap();
737        assert!(r.is_some());
738        assert_eq!(r.unwrap().virtual_name(), "test1");
739    }
740
741    #[test]
742    fn find_by_name_missing() {
743        let dir = create_test_vault();
744        let vault = Vault::with_root(dir.path().to_path_buf());
745        let r = vault.find_by_name("notes", "no-such-record").unwrap();
746        assert!(r.is_none());
747    }
748
749    #[test]
750    fn find_by_name_no_frontmatter_loads_as_empty() {
751        let dir = create_test_vault();
752        let vault = Vault::with_root(dir.path().to_path_buf());
753        // create_test_vault() writes notes/no_fm.md with no frontmatter
754        let r = vault.find_by_name("notes", "no_fm").unwrap().unwrap();
755        assert!(r.fields.is_empty());
756        assert_eq!(r.virtual_name(), "no_fm");
757    }
758
759    #[test]
760    fn find_by_name_invalid_frontmatter_errors() {
761        use std::fs;
762        let dir = create_test_vault();
763        fs::write(dir.path().join("notes/broken.md"), "---\n: : :\n---\n").unwrap();
764        let vault = Vault::with_root(dir.path().to_path_buf());
765        let result = vault.find_by_name("notes", "broken");
766        assert!(matches!(
767            result,
768            Err(VaultdbError::InvalidFrontmatter { .. })
769        ));
770    }
771
772    // ------------------------------------------------------------------
773    // Vault::query tests (Task 3)
774    // ------------------------------------------------------------------
775
776    #[test]
777    fn query_basic_filter() {
778        use crate::query::{Expr, Predicate, Query};
779        use crate::record::Value;
780
781        let dir = create_test_vault();
782        let vault = Vault::with_root(dir.path().to_path_buf());
783
784        // create_test_vault() writes test1.md (status: active) and
785        // test2.md (status: draft), plus no_fm.md (no frontmatter).
786        let q = Query {
787            folder: "notes".into(),
788            filter: Some(Expr::Predicate(Predicate::Equals {
789                field: "status".into(),
790                value: Value::String("active".into()),
791            })),
792            select: None,
793            sort: None,
794            limit: None,
795            recursive: false,
796        };
797
798        let results = vault.query(&q).unwrap();
799        assert_eq!(results.len(), 1, "only test1 has status=active");
800        assert!(results.iter().all(|r| {
801            matches!(
802                r.get("status", &vault.root),
803                Some(Value::String(ref s)) if s == "active"
804            )
805        }));
806    }
807
808    #[test]
809    fn query_with_limit_and_sort() {
810        use crate::query::{Expr, Predicate, Query, SortKey};
811
812        let dir = create_test_vault();
813        let vault = Vault::with_root(dir.path().to_path_buf());
814
815        // Exists predicate on _name matches all 3 records; limit cuts to 2.
816        let q = Query {
817            folder: "notes".into(),
818            filter: Some(Expr::Predicate(Predicate::Exists {
819                field: "_name".into(),
820            })),
821            select: None,
822            sort: Some(SortKey {
823                field: "_name".into(),
824                descending: false,
825            }),
826            limit: Some(2),
827            recursive: false,
828        };
829
830        let results = vault.query(&q).unwrap();
831        assert!(results.len() <= 2, "limit must be respected");
832        // Verify ascending sort: first element's name <= second's
833        if results.len() == 2 {
834            let a = results[0].virtual_name();
835            let b = results[1].virtual_name();
836            assert!(a <= b, "expected ascending order, got {:?} then {:?}", a, b);
837        }
838    }
839
840    #[test]
841    fn query_with_projection() {
842        use crate::query::{Expr, Predicate, Query};
843
844        let dir = create_test_vault();
845        let vault = Vault::with_root(dir.path().to_path_buf());
846
847        // Select only "status"; after projection every record's fields map
848        // must contain only "status" (or be empty if the record had no status).
849        let q = Query {
850            folder: "notes".into(),
851            filter: Some(Expr::Predicate(Predicate::Exists {
852                field: "_name".into(),
853            })),
854            select: Some(vec!["status".into()]),
855            sort: None,
856            limit: None,
857            recursive: false,
858        };
859
860        let results = vault.query(&q).unwrap();
861        // All 3 records are returned (no_fm.md has _name), but after projection
862        // each record's frontmatter fields should only contain "status".
863        assert!(!results.is_empty());
864        let mut found_record_with_status = false;
865        for r in &results {
866            // Every record should have at most "status" in concrete fields
867            assert!(
868                r.fields.keys().all(|k| k == "status"),
869                "expected only 'status' in fields, got {:?}",
870                r.fields.keys().collect::<Vec<_>>()
871            );
872            if r.fields.contains_key("status") {
873                found_record_with_status = true;
874            }
875        }
876        // Some test record must actually have had "status" — otherwise we're testing nothing
877        assert!(
878            found_record_with_status,
879            "expected at least one record to retain 'status' after projection"
880        );
881    }
882
883    #[test]
884    fn query_links_to_target() {
885        use crate::query::{Expr, LinkPredicate, Query};
886        use std::fs;
887
888        let dir = create_test_vault();
889        // Add a record that links to test1
890        fs::write(
891            dir.path().join("notes/linker.md"),
892            "---\ntags:\n  - linker\n---\nLinks to [[test1]]\n",
893        )
894        .unwrap();
895
896        let vault = Vault::with_root(dir.path().to_path_buf());
897        let q = Query {
898            folder: "notes".into(),
899            filter: Some(Expr::LinksTo(LinkPredicate::Target("test1".into()))),
900            select: None,
901            sort: None,
902            limit: None,
903            recursive: false,
904        };
905
906        let results = vault.query(&q).unwrap();
907        // Only `linker` links to test1
908        let names: Vec<String> = results.iter().map(|r| r.virtual_name()).collect();
909        assert!(
910            names.contains(&"linker".to_string()),
911            "expected linker, got {:?}",
912            names
913        );
914    }
915
916    #[test]
917    fn vault_link_graph_all_walks_full_vault() {
918        use crate::links::GraphScope;
919        use std::fs;
920        let dir = create_test_vault();
921        // Add a file with an outgoing wikilink so the graph has at least one edge
922        fs::write(
923            dir.path().join("notes/with_link.md"),
924            "---\nstatus: active\n---\nLinks to [[test1]]\n",
925        )
926        .unwrap();
927        let vault = Vault::with_root(dir.path().to_path_buf());
928        let graph = vault.link_graph(GraphScope::All).unwrap();
929        assert!(
930            graph.incoming_links("test1").contains(&"with_link"),
931            "expected with_link in test1's backlinks"
932        );
933    }
934
935    #[test]
936    fn vault_link_graph_folder_scopes_correctly() {
937        use crate::links::GraphScope;
938        use std::fs;
939        let dir = create_test_vault();
940        fs::write(
941            dir.path().join("notes/with_link.md"),
942            "---\nstatus: active\n---\nLinks to [[test1]]\n",
943        )
944        .unwrap();
945        let vault = Vault::with_root(dir.path().to_path_buf());
946        let graph = vault
947            .link_graph(GraphScope::Folder("notes".into()))
948            .unwrap();
949        assert!(graph.outgoing_links("with_link").contains(&"test1"));
950    }
951
952    // ── query_iter tests ────────────────────────────────────────────────
953
954    #[test]
955    fn query_iter_pure_streaming_yields_all_records() {
956        use crate::query::Query;
957
958        let dir = create_test_vault();
959        let vault = Vault::with_root(dir.path().to_path_buf());
960
961        // No filter, no sort, no limit, no graph predicate ⇒ pure stream.
962        let q = Query {
963            folder: "notes".into(),
964            filter: None,
965            select: None,
966            sort: None,
967            limit: None,
968            recursive: false,
969        };
970        let records: Vec<_> = vault
971            .query_iter(&q)
972            .unwrap()
973            .collect::<Result<Vec<_>>>()
974            .unwrap();
975        // create_test_vault() writes test1.md, test2.md, no_fm.md.
976        assert_eq!(records.len(), 3);
977    }
978
979    #[test]
980    fn query_iter_pure_streaming_filters_inline() {
981        use crate::query::{Expr, Predicate, Query};
982        use crate::record::Value;
983
984        let dir = create_test_vault();
985        let vault = Vault::with_root(dir.path().to_path_buf());
986        let q = Query {
987            folder: "notes".into(),
988            filter: Some(Expr::Predicate(Predicate::Equals {
989                field: "status".into(),
990                value: Value::String("active".into()),
991            })),
992            select: None,
993            sort: None,
994            limit: None,
995            recursive: false,
996        };
997        let records: Vec<_> = vault
998            .query_iter(&q)
999            .unwrap()
1000            .collect::<Result<Vec<_>>>()
1001            .unwrap();
1002        assert_eq!(records.len(), 1, "only test1 has status=active");
1003        assert_eq!(records[0].virtual_name(), "test1");
1004    }
1005
1006    #[test]
1007    fn query_iter_streaming_respects_limit_without_loading_more() {
1008        // Streaming + limit should stop pulling files once `limit`
1009        // matches have been yielded. We can't directly observe the
1010        // load count from the public API, but we can at least verify
1011        // the limit is honored.
1012        use crate::query::{Expr, Predicate, Query};
1013
1014        let dir = create_test_vault();
1015        let vault = Vault::with_root(dir.path().to_path_buf());
1016        let q = Query {
1017            folder: "notes".into(),
1018            filter: Some(Expr::Predicate(Predicate::Exists {
1019                field: "_name".into(),
1020            })),
1021            select: None,
1022            sort: None,
1023            limit: Some(2),
1024            recursive: false,
1025        };
1026        let records: Vec<_> = vault
1027            .query_iter(&q)
1028            .unwrap()
1029            .collect::<Result<Vec<_>>>()
1030            .unwrap();
1031        assert_eq!(records.len(), 2);
1032    }
1033
1034    #[test]
1035    fn query_iter_top_k_when_sort_and_limit_set() {
1036        // Top-K via bounded heap: with N=3 records and limit=2, we should
1037        // see the smallest two (or descending=true: largest two) by name.
1038        use crate::query::{Expr, Predicate, Query, SortKey};
1039
1040        let dir = create_test_vault();
1041        let vault = Vault::with_root(dir.path().to_path_buf());
1042
1043        // create_test_vault() has test1, test2, no_fm. Sort ascending
1044        // by _name and limit 2 → should produce ["no_fm", "test1"].
1045        let q = Query {
1046            folder: "notes".into(),
1047            filter: Some(Expr::Predicate(Predicate::Exists {
1048                field: "_name".into(),
1049            })),
1050            select: None,
1051            sort: Some(SortKey {
1052                field: "_name".into(),
1053                descending: false,
1054            }),
1055            limit: Some(2),
1056            recursive: false,
1057        };
1058        let records: Vec<_> = vault
1059            .query_iter(&q)
1060            .unwrap()
1061            .collect::<Result<Vec<_>>>()
1062            .unwrap();
1063        assert_eq!(records.len(), 2);
1064        assert_eq!(records[0].virtual_name(), "no_fm");
1065        assert_eq!(records[1].virtual_name(), "test1");
1066
1067        // Descending: should produce ["test2", "test1"].
1068        let q_desc = Query {
1069            folder: "notes".into(),
1070            filter: Some(Expr::Predicate(Predicate::Exists {
1071                field: "_name".into(),
1072            })),
1073            select: None,
1074            sort: Some(SortKey {
1075                field: "_name".into(),
1076                descending: true,
1077            }),
1078            limit: Some(2),
1079            recursive: false,
1080        };
1081        let records: Vec<_> = vault
1082            .query_iter(&q_desc)
1083            .unwrap()
1084            .collect::<Result<Vec<_>>>()
1085            .unwrap();
1086        assert_eq!(records.len(), 2);
1087        assert_eq!(records[0].virtual_name(), "test2");
1088        assert_eq!(records[1].virtual_name(), "test1");
1089    }
1090
1091    #[test]
1092    fn query_iter_falls_back_to_buffered_for_graph_predicates() {
1093        // Graph predicates can't run in pure-streaming mode (would need
1094        // the full link graph built upfront). The query_iter call must
1095        // still succeed and return the expected results — it just goes
1096        // through the materialized path internally.
1097        use crate::query::{Expr, LinkPredicate, Query};
1098        use std::fs;
1099
1100        let dir = create_test_vault();
1101        fs::write(
1102            dir.path().join("notes/linker.md"),
1103            "---\ntags:\n  - linker\n---\nLinks to [[test1]]\n",
1104        )
1105        .unwrap();
1106        let vault = Vault::with_root(dir.path().to_path_buf());
1107        let q = Query {
1108            folder: "notes".into(),
1109            filter: Some(Expr::LinksTo(LinkPredicate::Target("test1".into()))),
1110            select: None,
1111            sort: None,
1112            limit: None,
1113            recursive: false,
1114        };
1115        let records: Vec<_> = vault
1116            .query_iter(&q)
1117            .unwrap()
1118            .collect::<Result<Vec<_>>>()
1119            .unwrap();
1120        let names: Vec<String> = records.iter().map(|r| r.virtual_name()).collect();
1121        assert!(
1122            names.contains(&"linker".to_string()),
1123            "expected linker, got {:?}",
1124            names
1125        );
1126    }
1127
1128    #[test]
1129    fn query_eager_and_query_iter_produce_identical_results() {
1130        // Property: for any query, `query()` and `query_iter().collect()`
1131        // should produce exactly the same Vec<Record>. This is a small
1132        // sample but it exercises filter + sort + limit + projection all
1133        // at once.
1134        use crate::query::{Expr, Predicate, Query, SortKey};
1135
1136        let dir = create_test_vault();
1137        let vault = Vault::with_root(dir.path().to_path_buf());
1138
1139        let q = Query {
1140            folder: "notes".into(),
1141            filter: Some(Expr::Predicate(Predicate::Exists {
1142                field: "_name".into(),
1143            })),
1144            select: Some(vec!["status".into()]),
1145            sort: Some(SortKey {
1146                field: "_name".into(),
1147                descending: false,
1148            }),
1149            limit: Some(3),
1150            recursive: false,
1151        };
1152
1153        let eager = vault.query(&q).unwrap();
1154        let streamed: Vec<_> = vault
1155            .query_iter(&q)
1156            .unwrap()
1157            .collect::<Result<Vec<_>>>()
1158            .unwrap();
1159
1160        assert_eq!(eager.len(), streamed.len());
1161        for (a, b) in eager.iter().zip(streamed.iter()) {
1162            assert_eq!(a.virtual_name(), b.virtual_name());
1163            assert_eq!(
1164                a.fields.keys().collect::<Vec<_>>(),
1165                b.fields.keys().collect::<Vec<_>>()
1166            );
1167        }
1168    }
1169
1170    #[test]
1171    fn query_iter_body_contains_finds_records_by_body_text() {
1172        // `_body contains "needle"` is the body-search predicate.
1173        // Records whose body (the file content after the frontmatter)
1174        // contains the needle should match. Frontmatter content does
1175        // NOT count.
1176        use crate::query::{Expr, Predicate, Query};
1177        use crate::record::Value;
1178        use std::fs;
1179
1180        let dir = TempDir::new().unwrap();
1181        fs::create_dir(dir.path().join(".obsidian")).unwrap();
1182        fs::create_dir(dir.path().join("notes")).unwrap();
1183
1184        // a.md: matches in body
1185        fs::write(
1186            dir.path().join("notes/a.md"),
1187            "---\nstatus: active\n---\nThis note discusses microservices.\n",
1188        )
1189        .unwrap();
1190        // b.md: needle appears in frontmatter, NOT body
1191        fs::write(
1192            dir.path().join("notes/b.md"),
1193            "---\ntags:\n  - microservices\n---\nNothing relevant.\n",
1194        )
1195        .unwrap();
1196        // c.md: doesn't match anywhere
1197        fs::write(
1198            dir.path().join("notes/c.md"),
1199            "---\nstatus: draft\n---\nIrrelevant text.\n",
1200        )
1201        .unwrap();
1202
1203        let vault = Vault::with_root(dir.path().to_path_buf());
1204        let q = Query {
1205            folder: "notes".into(),
1206            filter: Some(Expr::Predicate(Predicate::Contains {
1207                field: "_body".into(),
1208                value: Value::String("microservices".into()),
1209            })),
1210            select: None,
1211            sort: None,
1212            limit: None,
1213            recursive: false,
1214        };
1215
1216        let records: Vec<_> = vault
1217            .query_iter(&q)
1218            .unwrap()
1219            .collect::<Result<Vec<_>>>()
1220            .unwrap();
1221
1222        assert_eq!(
1223            records.len(),
1224            1,
1225            "only a.md has 'microservices' in its body, got: {:?}",
1226            records.iter().map(|r| r.virtual_name()).collect::<Vec<_>>()
1227        );
1228        assert_eq!(records[0].virtual_name(), "a");
1229    }
1230
1231    #[test]
1232    fn query_iter_body_matches_runs_regex_on_body_text() {
1233        use crate::query::{Expr, Predicate, Query};
1234        use std::fs;
1235
1236        let dir = TempDir::new().unwrap();
1237        fs::create_dir(dir.path().join(".obsidian")).unwrap();
1238        fs::create_dir(dir.path().join("notes")).unwrap();
1239        fs::write(
1240            dir.path().join("notes/intro.md"),
1241            "---\nstatus: active\n---\n# Introduction\n\nThis is the intro.\n",
1242        )
1243        .unwrap();
1244        fs::write(
1245            dir.path().join("notes/no_heading.md"),
1246            "---\nstatus: active\n---\nJust text, no heading.\n",
1247        )
1248        .unwrap();
1249
1250        let vault = Vault::with_root(dir.path().to_path_buf());
1251
1252        // Match files whose body starts with a level-1 heading.
1253        let q = Query {
1254            folder: "notes".into(),
1255            filter: Some(Expr::Predicate(Predicate::Matches {
1256                field: "_body".into(),
1257                regex: r"^\s*# ".into(),
1258            })),
1259            select: None,
1260            sort: None,
1261            limit: None,
1262            recursive: false,
1263        };
1264        let records: Vec<_> = vault
1265            .query_iter(&q)
1266            .unwrap()
1267            .collect::<Result<Vec<_>>>()
1268            .unwrap();
1269        assert_eq!(records.len(), 1);
1270        assert_eq!(records[0].virtual_name(), "intro");
1271    }
1272
1273    #[test]
1274    fn body_search_works_via_dsl_with_quoted_needle() {
1275        // End-to-end: parse a where-DSL string that uses _body, run
1276        // through query_iter, verify the right records come out.
1277        use crate::query::{Expr, Query};
1278        use std::fs;
1279
1280        let dir = TempDir::new().unwrap();
1281        fs::create_dir(dir.path().join(".obsidian")).unwrap();
1282        fs::create_dir(dir.path().join("notes")).unwrap();
1283        fs::write(
1284            dir.path().join("notes/match.md"),
1285            "---\nstatus: active\n---\nApplied to Stanford last week.\n",
1286        )
1287        .unwrap();
1288        fs::write(
1289            dir.path().join("notes/skip.md"),
1290            "---\nstatus: active\n---\nApplied to MIT.\n",
1291        )
1292        .unwrap();
1293
1294        let vault = Vault::with_root(dir.path().to_path_buf());
1295        let filter = Expr::parse(r#"_body contains "Stanford""#).unwrap();
1296        let q = Query {
1297            folder: "notes".into(),
1298            filter: Some(filter),
1299            select: None,
1300            sort: None,
1301            limit: None,
1302            recursive: false,
1303        };
1304        let records: Vec<_> = vault
1305            .query_iter(&q)
1306            .unwrap()
1307            .collect::<Result<Vec<_>>>()
1308            .unwrap();
1309        assert_eq!(records.len(), 1);
1310        assert_eq!(records[0].virtual_name(), "match");
1311    }
1312
1313    #[test]
1314    fn body_search_combines_with_frontmatter_and_uses_streaming_path() {
1315        // `status = active && _body contains "Stanford"` is exactly
1316        // the kind of query eduport's command palette will use. It
1317        // doesn't reference the link graph, so it should still go
1318        // through the streaming path (just with content loaded).
1319        use crate::query::{Expr, Query};
1320        use std::fs;
1321
1322        let dir = TempDir::new().unwrap();
1323        fs::create_dir(dir.path().join(".obsidian")).unwrap();
1324        fs::create_dir(dir.path().join("notes")).unwrap();
1325        fs::write(
1326            dir.path().join("notes/active_match.md"),
1327            "---\nstatus: active\n---\nApplied to Stanford.\n",
1328        )
1329        .unwrap();
1330        fs::write(
1331            dir.path().join("notes/draft_match.md"),
1332            "---\nstatus: draft\n---\nApplied to Stanford.\n",
1333        )
1334        .unwrap();
1335        fs::write(
1336            dir.path().join("notes/active_no_match.md"),
1337            "---\nstatus: active\n---\nApplied to MIT.\n",
1338        )
1339        .unwrap();
1340
1341        let vault = Vault::with_root(dir.path().to_path_buf());
1342        let filter = Expr::parse(r#"status = active && _body contains "Stanford""#).unwrap();
1343        let q = Query {
1344            folder: "notes".into(),
1345            filter: Some(filter),
1346            select: None,
1347            sort: None,
1348            limit: None,
1349            recursive: false,
1350        };
1351        let records: Vec<_> = vault
1352            .query_iter(&q)
1353            .unwrap()
1354            .collect::<Result<Vec<_>>>()
1355            .unwrap();
1356        assert_eq!(records.len(), 1);
1357        assert_eq!(records[0].virtual_name(), "active_match");
1358    }
1359
1360    #[test]
1361    fn query_iter_skips_invalid_frontmatter_in_streaming_mode() {
1362        // Streaming mode should silently skip files whose YAML
1363        // frontmatter is malformed. The eager path collects them as
1364        // parse_errors; the streaming path matches the eager-path
1365        // behaviour for record yield (the broken file just doesn't
1366        // appear in the result).
1367        use crate::query::Query;
1368        use std::fs;
1369
1370        let dir = create_test_vault();
1371        fs::write(
1372            dir.path().join("notes/broken.md"),
1373            "---\n: : : not yaml\n---\nbody\n",
1374        )
1375        .unwrap();
1376
1377        let vault = Vault::with_root(dir.path().to_path_buf());
1378        let q = Query {
1379            folder: "notes".into(),
1380            filter: None,
1381            select: None,
1382            sort: None,
1383            limit: None,
1384            recursive: false,
1385        };
1386        let records: Vec<_> = vault
1387            .query_iter(&q)
1388            .unwrap()
1389            .collect::<Result<Vec<_>>>()
1390            .unwrap();
1391        // 3 valid + no broken record = 3 (broken.md skipped silently in streaming mode).
1392        assert_eq!(records.len(), 3);
1393        let names: Vec<String> = records.iter().map(|r| r.virtual_name()).collect();
1394        assert!(!names.contains(&"broken".to_string()));
1395    }
1396}