siftdb_core/
lib.rs

1pub mod storage;
2pub mod types;
3pub mod ingest;
4pub mod index;
5pub mod query;
6pub mod bench;
7pub mod fst_index;
8pub mod incremental;
9pub mod inverted_index;
10pub mod tombstone;
11pub mod locking;
12pub mod compaction;
13
14pub use types::*;
15pub use bench::{SiftDBBenchmark, AdvancedBenchmark, AdvancedBenchmarkResults};
16pub use compaction::{CollectionCompactor, CompactionManager, CompactionStatus};
17
18use anyhow::Result;
19use crate::locking::{SWMRLockManager, ReadLock, WriteLock};
20
21/// SiftDB collection - the main entry point
22pub struct SiftDB {
23    path: std::path::PathBuf,
24    lock_manager: SWMRLockManager,
25}
26
27impl SiftDB {
28    /// Open an existing SiftDB collection
29    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
30        let path = path.as_ref().to_path_buf();
31        if !path.exists() {
32            anyhow::bail!("SiftDB collection does not exist at: {}", path.display());
33        }
34        let lock_manager = SWMRLockManager::new(&path);
35        Ok(Self { path, lock_manager })
36    }
37
38    /// Initialize a new SiftDB collection
39    pub fn init<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
40        let path = path.as_ref().to_path_buf();
41        
42        // Create directory structure
43        std::fs::create_dir_all(&path)?;
44        std::fs::create_dir_all(path.join("store"))?;
45        std::fs::create_dir_all(path.join("index"))?;
46        std::fs::create_dir_all(path.join("tmp"))?;
47        std::fs::create_dir_all(path.join("gc"))?;
48
49        // Create initial manifest
50        let manifest = Manifest::new(0);
51        manifest.write_to_file(&path.join("MANIFEST.a"))?;
52
53        let lock_manager = SWMRLockManager::new(&path);
54        Ok(Self { path, lock_manager })
55    }
56
57    /// Get a snapshot of the current state (with default timeout)
58    pub fn snapshot(&self) -> Result<Snapshot> {
59        self.snapshot_with_lock_config(30, "sift-cli".to_string())
60    }
61
62    /// Get a snapshot of the current state with custom lock configuration
63    pub fn snapshot_with_lock_config(&self, timeout_secs: u64, holder_info: String) -> Result<Snapshot> {
64        let manifest_path = self.path.join("MANIFEST.a");
65        let manifest = if manifest_path.exists() {
66            Manifest::read_from_file(&manifest_path)?
67        } else {
68            let manifest_path = self.path.join("MANIFEST.b");
69            if manifest_path.exists() {
70                Manifest::read_from_file(&manifest_path)?
71            } else {
72                anyhow::bail!("No valid manifest found");
73            }
74        };
75
76        // Load indexes once when creating snapshot
77        let path_index = crate::index::PathIndex::read_from_file(&self.path.join("index/path.json"))?;
78        let handles_map = crate::index::HandlesMap::read_from_file(&self.path.join("index/handles.json"))?;
79        
80        // Load inverted index (if it exists, otherwise create empty one)
81        let inverted_index = if self.path.join("index/terms.fst").exists() && self.path.join("index/posting_lists.json").exists() {
82            crate::inverted_index::InvertedIndex::load_from_files(
83                &self.path.join("index/terms.fst"),
84                &self.path.join("index/posting_lists.json")
85            )?
86        } else {
87            crate::inverted_index::InvertedIndex::new()
88        };
89
90        // For now, just create a placeholder for the lock - proper integration pending
91        let _read_lock = self.lock_manager.acquire_read_lock(timeout_secs, holder_info)?;
92        Ok(Snapshot {
93            collection_path: self.path.clone(),
94            epoch: manifest.epoch,
95            path_index,
96            handles_map,
97            inverted_index,
98            segment_cache: std::collections::HashMap::new(),
99            _read_lock: None, // Simplified for now to avoid lifetime issues
100        })
101    }
102
103    /// Perform incremental update of the collection
104    pub fn incremental_update(
105        &self,
106        source_path: &std::path::Path,
107        includes: &[String],
108        excludes: &[String],
109    ) -> Result<crate::incremental::DeltaManifest> {
110        let updater = crate::incremental::IncrementalUpdater::new(&self.path);
111        
112        // Scan for changes
113        let changes = updater.scan_for_changes(source_path, includes, excludes)?;
114        
115        if changes.is_empty() {
116            anyhow::bail!("No changes detected since last import");
117        }
118
119        println!("Found {} changed files:", changes.len());
120        for change in &changes {
121            match change.change_type {
122                crate::incremental::ChangeType::Added => {
123                    println!("  + {}", change.path.display());
124                }
125                crate::incremental::ChangeType::Modified => {
126                    println!("  M {}", change.path.display());
127                }
128                crate::incremental::ChangeType::Deleted => {
129                    println!("  - {}", change.path.display());
130                }
131            }
132        }
133
134        // Apply changes
135        let delta_manifest = updater.apply_changes(changes, source_path)?;
136        
137        Ok(delta_manifest)
138    }
139
140    /// Check if incremental update is available
141    pub fn has_incremental_cache(&self) -> bool {
142        let cache_path = self.path.join("index").join("file_cache.json");
143        cache_path.exists()
144    }
145}
146
147pub fn add(left: u64, right: u64) -> u64 {
148    left + right
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn it_works() {
157        let result = add(2, 2);
158        assert_eq!(result, 4);
159    }
160}