mago_database/
lib.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use crate::change::Change;
8use crate::change::ChangeLog;
9use crate::error::DatabaseError;
10use crate::file::File;
11use crate::file::FileId;
12use crate::file::FileType;
13use crate::file::line_starts;
14
15mod utils;
16
17pub mod change;
18pub mod error;
19pub mod exclusion;
20pub mod file;
21pub mod loader;
22
23/// A mutable database for managing a collection of project files.
24///
25/// This struct acts as the primary "builder" for your file set. It is optimized
26/// for efficient additions, updates, and deletions. Once you have loaded all
27/// files and performed any initial modifications, you can create a high-performance,
28/// immutable snapshot for fast querying by calling [`read_only`](Self::read_only).
29#[derive(Debug, Default)]
30pub struct Database {
31    /// Maps a file's logical name to its `File` object for fast name-based access.
32    files: HashMap<Cow<'static, str>, Arc<File>>,
33    /// Maps a file's stable ID back to its logical name for fast ID-based mutations.
34    id_to_name: HashMap<FileId, Cow<'static, str>>,
35}
36
37/// An immutable, read-optimized snapshot of a file database.
38///
39/// This structure is designed for high-performance lookups and iteration. It stores
40/// all files in a contiguous, sorted vector and uses multiple `HashMap` indices
41/// to provide $O(1)$ average-time access to files by their ID, name, or path.
42///
43/// A `ReadDatabase` is created via [`Database::read_only`].
44#[derive(Debug)]
45pub struct ReadDatabase {
46    /// A contiguous list of all files, sorted by `FileId` for deterministic iteration.
47    files: Vec<Arc<File>>,
48    /// Maps a file's stable ID to its index in the `files` vector.
49    id_to_index: HashMap<FileId, usize>,
50    /// Maps a file's logical name to its index in the `files` vector.
51    name_to_index: HashMap<Cow<'static, str>, usize>,
52    /// Maps a file's absolute path to its index in the `files` vector.
53    path_to_index: HashMap<PathBuf, usize>,
54}
55
56impl Database {
57    /// Creates a new, empty `Database`.
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    /// Adds a file to the database, overwriting any existing file with the same name.
63    pub fn add(&mut self, file: File) {
64        let name = file.name.clone();
65        let id = file.id;
66
67        if let Some(old_file) = self.files.insert(name.clone(), Arc::new(file)) {
68            self.id_to_name.remove(&old_file.id);
69        }
70        self.id_to_name.insert(id, name);
71    }
72
73    /// Updates a file's content in-place using its stable `FileId`.
74    ///
75    /// This recalculates derived data like file size, line endings, and `FileRevision`.
76    /// Returns `true` if a file with the given ID was found and updated.
77    pub fn update(&mut self, id: FileId, new_contents: Cow<'static, str>) -> bool {
78        if let Some(name) = self.id_to_name.get(&id)
79            && let Some(file) = self.files.get_mut(name)
80            && let Some(file) = Arc::get_mut(file)
81        {
82            file.contents = new_contents;
83            file.size = file.contents.len() as u32;
84            file.lines = line_starts(file.contents.as_ref()).collect();
85            return true;
86        }
87        false
88    }
89
90    /// Deletes a file from the database using its stable `FileId`.
91    ///
92    /// Returns `true` if a file with the given ID was found and removed.
93    pub fn delete(&mut self, id: FileId) -> bool {
94        if let Some(name) = self.id_to_name.remove(&id) { self.files.remove(&name).is_some() } else { false }
95    }
96
97    /// Commits a [`ChangeLog`], applying all its recorded operations to the database.
98    ///
99    /// This method consumes the log and applies each `Change` sequentially.
100    /// It will fail if other references to the `ChangeLog` still exist.
101    ///
102    /// # Errors
103    ///
104    /// Returns a [`DatabaseError`] if the log cannot be consumed.
105    pub fn commit(&mut self, change_log: ChangeLog) -> Result<(), DatabaseError> {
106        for change in change_log.into_inner()? {
107            self.apply(change);
108        }
109        Ok(())
110    }
111
112    /// Applies a single `Change` operation to the database.
113    fn apply(&mut self, change: Change) {
114        match change {
115            Change::Add(file) => self.add(file),
116            Change::Update(id, contents) => {
117                self.update(id, contents);
118            }
119            Change::Delete(id) => {
120                self.delete(id);
121            }
122        }
123    }
124
125    /// Creates an independent, immutable snapshot of the database.
126    ///
127    /// This is a potentially expensive one-time operation as it **clones** all file
128    /// data. The resulting [`ReadDatabase`] is highly optimized for fast reads and
129    /// guarantees a deterministic iteration order. The original `Database` is not
130    /// consumed and can continue to be used.
131    pub fn read_only(&self) -> ReadDatabase {
132        let mut files_vec: Vec<Arc<File>> = self.files.values().cloned().collect();
133        files_vec.sort_unstable_by_key(|f| f.id);
134
135        let mut id_to_index = HashMap::with_capacity(files_vec.len());
136        let mut name_to_index = HashMap::with_capacity(files_vec.len());
137        let mut path_to_index = HashMap::with_capacity(files_vec.len());
138
139        for (index, file) in files_vec.iter().enumerate() {
140            id_to_index.insert(file.id, index);
141            name_to_index.insert(file.name.clone(), index);
142            if let Some(path) = &file.path {
143                path_to_index.insert(path.clone(), index);
144            }
145        }
146
147        ReadDatabase { files: files_vec, id_to_index, name_to_index, path_to_index }
148    }
149}
150
151impl ReadDatabase {
152    /// Creates a new `ReadDatabase` containing only a single file.
153    ///
154    /// This is a convenience constructor for situations, such as testing or
155    /// single-file tools, where an operation requires a [`DatabaseReader`]
156    /// implementation but only needs to be aware of one file.
157    ///
158    /// # Arguments
159    ///
160    /// * `file`: The single `File` to include in the database.
161    pub fn single(file: File) -> Self {
162        let mut id_to_index = HashMap::with_capacity(1);
163        let mut name_to_index = HashMap::with_capacity(1);
164        let mut path_to_index = HashMap::with_capacity(1);
165
166        // The index for the single file will always be 0.
167        id_to_index.insert(file.id, 0);
168        name_to_index.insert(file.name.clone(), 0);
169        if let Some(path) = &file.path {
170            path_to_index.insert(path.clone(), 0);
171        }
172
173        Self { files: vec![Arc::new(file)], id_to_index, name_to_index, path_to_index }
174    }
175}
176
177/// A universal interface for reading data from any database implementation.
178///
179/// This trait provides a common API for querying file data, abstracting over
180/// whether the underlying source is the mutable [`Database`] or the read-optimized
181/// [`ReadDatabase`]. This allows for writing generic code that can operate on either.
182pub trait DatabaseReader {
183    /// Retrieves a file's stable ID using its logical name.
184    fn get_id(&self, name: &str) -> Option<FileId>;
185
186    fn get_name(&self, id: &FileId) -> Option<&str> {
187        self.get_by_id(id).map(|file| file.name.as_ref()).ok()
188    }
189
190    /// Retrieves a reference to a file using its stable `FileId`.
191    ///
192    /// # Errors
193    ///
194    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
195    fn get_by_id(&self, id: &FileId) -> Result<&File, DatabaseError>;
196
197    /// Retrieves a reference to a file using its logical name.
198    ///
199    /// # Errors
200    ///
201    /// Returns `DatabaseError::FileNotFound` if no file with the given name exists.
202    fn get_by_name(&self, name: &str) -> Result<&File, DatabaseError>;
203
204    /// Retrieves a reference to a file by its absolute filesystem path.
205    ///
206    /// # Errors
207    ///
208    /// Returns `DatabaseError::FileNotFound` if no file with the given path exists.
209    fn get_by_path(&self, path: &Path) -> Result<&File, DatabaseError>;
210
211    /// Returns an iterator over all files in the database.
212    ///
213    /// The order is not guaranteed for `Database`, but is sorted by `FileId`
214    /// for `ReadDatabase`, providing deterministic iteration.
215    fn files(&self) -> impl Iterator<Item = &File>;
216
217    /// Returns an iterator over all files of a specific `FileType`.
218    fn files_with_type(&self, file_type: FileType) -> impl Iterator<Item = &File> {
219        self.files().filter(move |file| file.file_type == file_type)
220    }
221
222    /// Returns an iterator over all files that do not match a specific `FileType`.
223    fn files_without_type(&self, file_type: FileType) -> impl Iterator<Item = &File> {
224        self.files().filter(move |file| file.file_type != file_type)
225    }
226
227    /// Returns an iterator over the stable IDs of all files in the database.
228    fn file_ids(&self) -> impl Iterator<Item = FileId> {
229        self.files().map(|file| file.id)
230    }
231
232    /// Returns an iterator over the stable IDs of all files of a specific `FileType`.
233    fn file_ids_with_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
234        self.files_with_type(file_type).map(|file| file.id)
235    }
236
237    /// Returns an iterator over the stable IDs of all files that do not match a specific `FileType`.
238    fn file_ids_without_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
239        self.files_without_type(file_type).map(|file| file.id)
240    }
241
242    /// Returns the total number of files in the database.
243    fn len(&self) -> usize;
244
245    /// Returns `true` if the database contains no files.
246    fn is_empty(&self) -> bool {
247        self.len() == 0
248    }
249}
250
251impl DatabaseReader for Database {
252    fn get_id(&self, name: &str) -> Option<FileId> {
253        self.files.get(name).map(|f| f.id)
254    }
255
256    fn get_by_id(&self, id: &FileId) -> Result<&File, DatabaseError> {
257        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
258        let file = self.files.get(name).ok_or(DatabaseError::FileNotFound)?;
259
260        Ok(file.as_ref())
261    }
262
263    fn get_by_name(&self, name: &str) -> Result<&File, DatabaseError> {
264        self.files.get(name).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
265    }
266
267    fn get_by_path(&self, path: &Path) -> Result<&File, DatabaseError> {
268        self.files
269            .values()
270            .find(|file| file.path.as_deref() == Some(path))
271            .map(|file| file.as_ref())
272            .ok_or(DatabaseError::FileNotFound)
273    }
274
275    fn files(&self) -> impl Iterator<Item = &File> {
276        self.files.values().map(|file| file.as_ref())
277    }
278
279    fn len(&self) -> usize {
280        self.files.len()
281    }
282}
283
284impl DatabaseReader for ReadDatabase {
285    fn get_id(&self, name: &str) -> Option<FileId> {
286        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).map(|f| f.id)
287    }
288
289    fn get_by_id(&self, id: &FileId) -> Result<&File, DatabaseError> {
290        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
291
292        self.files.get(*index).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
293    }
294
295    fn get_by_name(&self, name: &str) -> Result<&File, DatabaseError> {
296        self.name_to_index
297            .get(name)
298            .and_then(|&i| self.files.get(i))
299            .map(|file| file.as_ref())
300            .ok_or(DatabaseError::FileNotFound)
301    }
302
303    fn get_by_path(&self, path: &Path) -> Result<&File, DatabaseError> {
304        self.path_to_index
305            .get(path)
306            .and_then(|&i| self.files.get(i))
307            .map(|file| file.as_ref())
308            .ok_or(DatabaseError::FileNotFound)
309    }
310
311    fn files(&self) -> impl Iterator<Item = &File> {
312        self.files.iter().map(|file| file.as_ref())
313    }
314
315    fn len(&self) -> usize {
316        self.files.len()
317    }
318}