mago_database/
lib.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use rayon::iter::IntoParallelIterator;
8use rayon::iter::ParallelIterator;
9use serde::Deserialize;
10use serde::Serialize;
11
12use crate::change::Change;
13use crate::change::ChangeLog;
14use crate::error::DatabaseError;
15use crate::file::File;
16use crate::file::FileId;
17use crate::file::FileType;
18use crate::file::line_starts;
19use crate::operation::FilesystemOperation;
20
21mod utils;
22
23pub mod change;
24pub mod error;
25pub mod exclusion;
26pub mod file;
27pub mod loader;
28
29mod operation;
30
31/// A mutable database for managing a collection of project files.
32///
33/// This struct acts as the primary "builder" for your file set. It is optimized
34/// for efficient additions, updates, and deletions. Once you have loaded all
35/// files and performed any initial modifications, you can create a high-performance,
36/// immutable snapshot for fast querying by calling [`read_only`](Self::read_only).
37///
38/// While this structure implements [`Clone`](std::clone::Clone), it is not intended
39/// for frequent cloning. Instead, it is designed to be used as a single mutable
40/// instance that you modify in place. Cloning is provided for scenarios where
41/// you need to create a backup or checkpoint of the current state before making
42/// further changes.
43#[derive(Debug, Clone, Default, Serialize, Deserialize)]
44pub struct Database {
45    /// Maps a file's logical name to its `File` object for fast name-based access.
46    files: HashMap<Cow<'static, str>, Arc<File>>,
47    /// Maps a file's stable ID back to its logical name for fast ID-based mutations.
48    id_to_name: HashMap<FileId, Cow<'static, str>>,
49}
50
51/// An immutable, read-optimized snapshot of a file database.
52///
53/// This structure is designed for high-performance lookups and iteration. It stores
54/// all files in a contiguous, sorted vector and uses multiple `HashMap` indices
55/// to provide $O(1)$ average-time access to files by their ID, name, or path.
56///
57/// A `ReadDatabase` is created via [`Database::read_only`].
58#[derive(Debug)]
59pub struct ReadDatabase {
60    /// A contiguous list of all files, sorted by `FileId` for deterministic iteration.
61    files: Vec<Arc<File>>,
62    /// Maps a file's stable ID to its index in the `files` vector.
63    id_to_index: HashMap<FileId, usize>,
64    /// Maps a file's logical name to its index in the `files` vector.
65    name_to_index: HashMap<Cow<'static, str>, usize>,
66    /// Maps a file's absolute path to its index in the `files` vector.
67    path_to_index: HashMap<PathBuf, usize>,
68}
69
70impl Database {
71    /// Creates a new, empty `Database`.
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    /// Adds a file to the database, overwriting any existing file with the same name.
77    pub fn add(&mut self, file: File) {
78        let name = file.name.clone();
79        let id = file.id;
80
81        if let Some(old_file) = self.files.insert(name.clone(), Arc::new(file)) {
82            self.id_to_name.remove(&old_file.id);
83        }
84        self.id_to_name.insert(id, name);
85    }
86
87    /// Updates a file's content in-place using its stable `FileId`.
88    ///
89    /// This recalculates derived data like file size, line endings, and `FileRevision`.
90    /// Returns `true` if a file with the given ID was found and updated.
91    pub fn update(&mut self, id: FileId, new_contents: Cow<'static, str>) -> bool {
92        if let Some(name) = self.id_to_name.get(&id)
93            && let Some(file) = self.files.get_mut(name)
94            && let Some(file) = Arc::get_mut(file)
95        {
96            file.contents = new_contents;
97            file.size = file.contents.len() as u32;
98            file.lines = line_starts(file.contents.as_ref()).collect();
99            return true;
100        }
101        false
102    }
103
104    /// Deletes a file from the database using its stable `FileId`.
105    ///
106    /// Returns `true` if a file with the given ID was found and removed.
107    pub fn delete(&mut self, id: FileId) -> bool {
108        if let Some(name) = self.id_to_name.remove(&id) { self.files.remove(&name).is_some() } else { false }
109    }
110
111    /// Commits a [`ChangeLog`], applying all its recorded operations to the database
112    /// and optionally writing them to the filesystem.
113    ///
114    /// # Arguments
115    ///
116    /// * `change_log`: The log of changes to apply.
117    /// * `write_to_disk`: If `true`, changes for files that have a filesystem
118    ///   path will be written to disk in parallel.
119    ///
120    /// # Errors
121    ///
122    /// Returns a [`DatabaseError`] if the log cannot be consumed or if any
123    /// filesystem operation fails.
124    pub fn commit(&mut self, change_log: ChangeLog, write_to_disk: bool) -> Result<(), DatabaseError> {
125        let changes = change_log.into_inner()?;
126        let mut fs_operations = if write_to_disk { Vec::new() } else { Vec::with_capacity(0) };
127
128        for change in changes {
129            match change {
130                Change::Add(file) => {
131                    if write_to_disk && let Some(path) = &file.path {
132                        fs_operations.push(FilesystemOperation::Write(path.clone(), file.contents.clone()));
133                    }
134
135                    self.add(file);
136                }
137                Change::Update(id, contents) => {
138                    if write_to_disk
139                        && let Ok(file) = self.get(&id)
140                        && let Some(path) = &file.path
141                    {
142                        fs_operations.push(FilesystemOperation::Write(path.clone(), contents.clone()));
143                    }
144
145                    self.update(id, contents);
146                }
147                Change::Delete(id) => {
148                    if write_to_disk
149                        && let Ok(file) = self.get(&id)
150                        && let Some(path) = &file.path
151                    {
152                        fs_operations.push(FilesystemOperation::Delete(path.clone()));
153                    }
154
155                    self.delete(id);
156                }
157            }
158        }
159
160        // If requested, perform all collected filesystem operations in parallel.
161        if write_to_disk {
162            fs_operations.into_par_iter().try_for_each(|op| -> Result<(), DatabaseError> { op.execute() })?;
163        }
164
165        Ok(())
166    }
167
168    /// Creates an independent, immutable snapshot of the database.
169    ///
170    /// This is a potentially expensive one-time operation as it **clones** all file
171    /// data. The resulting [`ReadDatabase`] is highly optimized for fast reads and
172    /// guarantees a deterministic iteration order. The original `Database` is not
173    /// consumed and can continue to be used.
174    pub fn read_only(&self) -> ReadDatabase {
175        let mut files_vec: Vec<Arc<File>> = self.files.values().cloned().collect();
176        files_vec.sort_unstable_by_key(|f| f.id);
177
178        let mut id_to_index = HashMap::with_capacity(files_vec.len());
179        let mut name_to_index = HashMap::with_capacity(files_vec.len());
180        let mut path_to_index = HashMap::with_capacity(files_vec.len());
181
182        for (index, file) in files_vec.iter().enumerate() {
183            id_to_index.insert(file.id, index);
184            name_to_index.insert(file.name.clone(), index);
185            if let Some(path) = &file.path {
186                path_to_index.insert(path.clone(), index);
187            }
188        }
189
190        ReadDatabase { files: files_vec, id_to_index, name_to_index, path_to_index }
191    }
192}
193
194impl ReadDatabase {
195    /// Creates a new `ReadDatabase` containing only a single file.
196    ///
197    /// This is a convenience constructor for situations, such as testing or
198    /// single-file tools, where an operation requires a [`DatabaseReader`]
199    /// implementation but only needs to be aware of one file.
200    ///
201    /// # Arguments
202    ///
203    /// * `file`: The single `File` to include in the database.
204    pub fn single(file: File) -> Self {
205        let mut id_to_index = HashMap::with_capacity(1);
206        let mut name_to_index = HashMap::with_capacity(1);
207        let mut path_to_index = HashMap::with_capacity(1);
208
209        // The index for the single file will always be 0.
210        id_to_index.insert(file.id, 0);
211        name_to_index.insert(file.name.clone(), 0);
212        if let Some(path) = &file.path {
213            path_to_index.insert(path.clone(), 0);
214        }
215
216        Self { files: vec![Arc::new(file)], id_to_index, name_to_index, path_to_index }
217    }
218}
219
220/// A universal interface for reading data from any database implementation.
221///
222/// This trait provides a common API for querying file data, abstracting over
223/// whether the underlying source is the mutable [`Database`] or the read-optimized
224/// [`ReadDatabase`]. This allows for writing generic code that can operate on either.
225pub trait DatabaseReader {
226    /// Retrieves a file's stable ID using its logical name.
227    fn get_id(&self, name: &str) -> Option<FileId>;
228
229    /// Retrieves a reference to a file using its stable `FileId`.
230    ///
231    /// # Errors
232    ///
233    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
234    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError>;
235
236    /// Retrieves a reference to a file using its stable `FileId`.
237    ///
238    /// # Errors
239    ///
240    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
241    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError>;
242
243    /// Retrieves a reference to a file using its logical name.
244    ///
245    /// # Errors
246    ///
247    /// Returns `DatabaseError::FileNotFound` if no file with the given name exists.
248    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError>;
249
250    /// Retrieves a reference to a file by its absolute filesystem path.
251    ///
252    /// # Errors
253    ///
254    /// Returns `DatabaseError::FileNotFound` if no file with the given path exists.
255    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError>;
256
257    /// Returns an iterator over all files in the database.
258    ///
259    /// The order is not guaranteed for `Database`, but is sorted by `FileId`
260    /// for `ReadDatabase`, providing deterministic iteration.
261    fn files(&self) -> impl Iterator<Item = Arc<File>>;
262
263    /// Returns an iterator over all files of a specific `FileType`.
264    fn files_with_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
265        self.files().filter(move |file| file.file_type == file_type)
266    }
267
268    /// Returns an iterator over all files that do not match a specific `FileType`.
269    fn files_without_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
270        self.files().filter(move |file| file.file_type != file_type)
271    }
272
273    /// Returns an iterator over the stable IDs of all files in the database.
274    fn file_ids(&self) -> impl Iterator<Item = FileId> {
275        self.files().map(|file| file.id)
276    }
277
278    /// Returns an iterator over the stable IDs of all files of a specific `FileType`.
279    fn file_ids_with_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
280        self.files_with_type(file_type).map(|file| file.id)
281    }
282
283    /// Returns an iterator over the stable IDs of all files that do not match a specific `FileType`.
284    fn file_ids_without_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
285        self.files_without_type(file_type).map(|file| file.id)
286    }
287
288    /// Returns the total number of files in the database.
289    fn len(&self) -> usize;
290
291    /// Returns `true` if the database contains no files.
292    fn is_empty(&self) -> bool {
293        self.len() == 0
294    }
295}
296
297impl DatabaseReader for Database {
298    fn get_id(&self, name: &str) -> Option<FileId> {
299        self.files.get(name).map(|f| f.id)
300    }
301
302    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
303        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
304        let file = self.files.get(name).ok_or(DatabaseError::FileNotFound)?;
305
306        Ok(file.clone())
307    }
308
309    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
310        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
311        self.files.get(name).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
312    }
313
314    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
315        self.files.get(name).cloned().ok_or(DatabaseError::FileNotFound)
316    }
317
318    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
319        self.files.values().find(|file| file.path.as_deref() == Some(path)).cloned().ok_or(DatabaseError::FileNotFound)
320    }
321
322    fn files(&self) -> impl Iterator<Item = Arc<File>> {
323        self.files.values().cloned()
324    }
325
326    fn len(&self) -> usize {
327        self.files.len()
328    }
329}
330
331impl DatabaseReader for ReadDatabase {
332    fn get_id(&self, name: &str) -> Option<FileId> {
333        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).map(|f| f.id)
334    }
335
336    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
337        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
338
339        self.files.get(*index).cloned().ok_or(DatabaseError::FileNotFound)
340    }
341
342    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
343        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
344
345        self.files.get(*index).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
346    }
347
348    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
349        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
350    }
351
352    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
353        self.path_to_index.get(path).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
354    }
355
356    fn files(&self) -> impl Iterator<Item = Arc<File>> {
357        self.files.iter().cloned()
358    }
359
360    fn len(&self) -> usize {
361        self.files.len()
362    }
363}