mago_database/
lib.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use rayon::iter::IntoParallelIterator;
8use rayon::iter::ParallelIterator;
9use serde::Deserialize;
10use serde::Serialize;
11
12use crate::change::Change;
13use crate::change::ChangeLog;
14use crate::error::DatabaseError;
15use crate::file::File;
16use crate::file::FileId;
17use crate::file::FileType;
18use crate::file::line_starts;
19use crate::operation::FilesystemOperation;
20
21mod utils;
22
23pub mod change;
24pub mod error;
25pub mod exclusion;
26pub mod file;
27pub mod loader;
28
29mod operation;
30
31/// A mutable database for managing a collection of project files.
32///
33/// This struct acts as the primary "builder" for your file set. It is optimized
34/// for efficient additions, updates, and deletions. Once you have loaded all
35/// files and performed any initial modifications, you can create a high-performance,
36/// immutable snapshot for fast querying by calling [`read_only`](Self::read_only).
37///
38/// While this structure implements [`Clone`](std::clone::Clone), it is not intended
39/// for frequent cloning. Instead, it is designed to be used as a single mutable
40/// instance that you modify in place. Cloning is provided for scenarios where
41/// you need to create a backup or checkpoint of the current state before making
42/// further changes.
43#[derive(Debug, Clone, Default, Serialize, Deserialize)]
44pub struct Database {
45    /// Maps a file's logical name to its `File` object for fast name-based access.
46    files: HashMap<Cow<'static, str>, Arc<File>>,
47    /// Maps a file's stable ID back to its logical name for fast ID-based mutations.
48    id_to_name: HashMap<FileId, Cow<'static, str>>,
49}
50
51/// An immutable, read-optimized snapshot of a file database.
52///
53/// This structure is designed for high-performance lookups and iteration. It stores
54/// all files in a contiguous, sorted vector and uses multiple `HashMap` indices
55/// to provide $O(1)$ average-time access to files by their ID, name, or path.
56///
57/// A `ReadDatabase` is created via [`Database::read_only`].
58#[derive(Debug)]
59pub struct ReadDatabase {
60    /// A contiguous list of all files, sorted by `FileId` for deterministic iteration.
61    files: Vec<Arc<File>>,
62    /// Maps a file's stable ID to its index in the `files` vector.
63    id_to_index: HashMap<FileId, usize>,
64    /// Maps a file's logical name to its index in the `files` vector.
65    name_to_index: HashMap<Cow<'static, str>, usize>,
66    /// Maps a file's absolute path to its index in the `files` vector.
67    path_to_index: HashMap<PathBuf, usize>,
68}
69
70impl Database {
71    /// Creates a new, empty `Database`.
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    /// Adds a file to the database, overwriting any existing file with the same name.
77    pub fn add(&mut self, file: File) -> FileId {
78        let name = file.name.clone();
79        let id = file.id;
80
81        if let Some(old_file) = self.files.insert(name.clone(), Arc::new(file)) {
82            self.id_to_name.remove(&old_file.id);
83        }
84
85        self.id_to_name.insert(id, name);
86
87        id
88    }
89
90    /// Updates a file's content in-place using its stable `FileId`.
91    ///
92    /// This recalculates derived data like file size, line endings, and `FileRevision`.
93    /// Returns `true` if a file with the given ID was found and updated.
94    pub fn update(&mut self, id: FileId, new_contents: Cow<'static, str>) -> bool {
95        if let Some(name) = self.id_to_name.get(&id)
96            && let Some(file) = self.files.get_mut(name)
97            && let Some(file) = Arc::get_mut(file)
98        {
99            file.contents = new_contents;
100            file.size = file.contents.len() as u32;
101            file.lines = line_starts(file.contents.as_ref()).collect();
102            return true;
103        }
104        false
105    }
106
107    /// Deletes a file from the database using its stable `FileId`.
108    ///
109    /// Returns `true` if a file with the given ID was found and removed.
110    pub fn delete(&mut self, id: FileId) -> bool {
111        if let Some(name) = self.id_to_name.remove(&id) { self.files.remove(&name).is_some() } else { false }
112    }
113
114    /// Commits a [`ChangeLog`], applying all its recorded operations to the database
115    /// and optionally writing them to the filesystem.
116    ///
117    /// # Arguments
118    ///
119    /// * `change_log`: The log of changes to apply.
120    /// * `write_to_disk`: If `true`, changes for files that have a filesystem
121    ///   path will be written to disk in parallel.
122    ///
123    /// # Errors
124    ///
125    /// Returns a [`DatabaseError`] if the log cannot be consumed or if any
126    /// filesystem operation fails.
127    pub fn commit(&mut self, change_log: ChangeLog, write_to_disk: bool) -> Result<(), DatabaseError> {
128        let changes = change_log.into_inner()?;
129        let mut fs_operations = if write_to_disk { Vec::new() } else { Vec::with_capacity(0) };
130
131        for change in changes {
132            match change {
133                Change::Add(file) => {
134                    if write_to_disk && let Some(path) = &file.path {
135                        fs_operations.push(FilesystemOperation::Write(path.clone(), file.contents.clone()));
136                    }
137
138                    self.add(file);
139                }
140                Change::Update(id, contents) => {
141                    if write_to_disk
142                        && let Ok(file) = self.get(&id)
143                        && let Some(path) = &file.path
144                    {
145                        fs_operations.push(FilesystemOperation::Write(path.clone(), contents.clone()));
146                    }
147
148                    self.update(id, contents);
149                }
150                Change::Delete(id) => {
151                    if write_to_disk
152                        && let Ok(file) = self.get(&id)
153                        && let Some(path) = &file.path
154                    {
155                        fs_operations.push(FilesystemOperation::Delete(path.clone()));
156                    }
157
158                    self.delete(id);
159                }
160            }
161        }
162
163        // If requested, perform all collected filesystem operations in parallel.
164        if write_to_disk {
165            fs_operations.into_par_iter().try_for_each(|op| -> Result<(), DatabaseError> { op.execute() })?;
166        }
167
168        Ok(())
169    }
170
171    /// Creates an independent, immutable snapshot of the database.
172    ///
173    /// This is a potentially expensive one-time operation as it **clones** all file
174    /// data. The resulting [`ReadDatabase`] is highly optimized for fast reads and
175    /// guarantees a deterministic iteration order. The original `Database` is not
176    /// consumed and can continue to be used.
177    pub fn read_only(&self) -> ReadDatabase {
178        let mut files_vec: Vec<Arc<File>> = self.files.values().cloned().collect();
179        files_vec.sort_unstable_by_key(|f| f.id);
180
181        let mut id_to_index = HashMap::with_capacity(files_vec.len());
182        let mut name_to_index = HashMap::with_capacity(files_vec.len());
183        let mut path_to_index = HashMap::with_capacity(files_vec.len());
184
185        for (index, file) in files_vec.iter().enumerate() {
186            id_to_index.insert(file.id, index);
187            name_to_index.insert(file.name.clone(), index);
188            if let Some(path) = &file.path {
189                path_to_index.insert(path.clone(), index);
190            }
191        }
192
193        ReadDatabase { files: files_vec, id_to_index, name_to_index, path_to_index }
194    }
195}
196
197impl ReadDatabase {
198    /// Creates a new `ReadDatabase` containing only a single file.
199    ///
200    /// This is a convenience constructor for situations, such as testing or
201    /// single-file tools, where an operation requires a [`DatabaseReader`]
202    /// implementation but only needs to be aware of one file.
203    ///
204    /// # Arguments
205    ///
206    /// * `file`: The single `File` to include in the database.
207    pub fn single(file: File) -> Self {
208        let mut id_to_index = HashMap::with_capacity(1);
209        let mut name_to_index = HashMap::with_capacity(1);
210        let mut path_to_index = HashMap::with_capacity(1);
211
212        // The index for the single file will always be 0.
213        id_to_index.insert(file.id, 0);
214        name_to_index.insert(file.name.clone(), 0);
215        if let Some(path) = &file.path {
216            path_to_index.insert(path.clone(), 0);
217        }
218
219        Self { files: vec![Arc::new(file)], id_to_index, name_to_index, path_to_index }
220    }
221}
222
223/// A universal interface for reading data from any database implementation.
224///
225/// This trait provides a common API for querying file data, abstracting over
226/// whether the underlying source is the mutable [`Database`] or the read-optimized
227/// [`ReadDatabase`]. This allows for writing generic code that can operate on either.
228pub trait DatabaseReader {
229    /// Retrieves a file's stable ID using its logical name.
230    fn get_id(&self, name: &str) -> Option<FileId>;
231
232    /// Retrieves a reference to a file using its stable `FileId`.
233    ///
234    /// # Errors
235    ///
236    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
237    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError>;
238
239    /// Retrieves a reference to a file using its stable `FileId`.
240    ///
241    /// # Errors
242    ///
243    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
244    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError>;
245
246    /// Retrieves a reference to a file using its logical name.
247    ///
248    /// # Errors
249    ///
250    /// Returns `DatabaseError::FileNotFound` if no file with the given name exists.
251    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError>;
252
253    /// Retrieves a reference to a file by its absolute filesystem path.
254    ///
255    /// # Errors
256    ///
257    /// Returns `DatabaseError::FileNotFound` if no file with the given path exists.
258    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError>;
259
260    /// Returns an iterator over all files in the database.
261    ///
262    /// The order is not guaranteed for `Database`, but is sorted by `FileId`
263    /// for `ReadDatabase`, providing deterministic iteration.
264    fn files(&self) -> impl Iterator<Item = Arc<File>>;
265
266    /// Returns an iterator over all files of a specific `FileType`.
267    fn files_with_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
268        self.files().filter(move |file| file.file_type == file_type)
269    }
270
271    /// Returns an iterator over all files that do not match a specific `FileType`.
272    fn files_without_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
273        self.files().filter(move |file| file.file_type != file_type)
274    }
275
276    /// Returns an iterator over the stable IDs of all files in the database.
277    fn file_ids(&self) -> impl Iterator<Item = FileId> {
278        self.files().map(|file| file.id)
279    }
280
281    /// Returns an iterator over the stable IDs of all files of a specific `FileType`.
282    fn file_ids_with_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
283        self.files_with_type(file_type).map(|file| file.id)
284    }
285
286    /// Returns an iterator over the stable IDs of all files that do not match a specific `FileType`.
287    fn file_ids_without_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
288        self.files_without_type(file_type).map(|file| file.id)
289    }
290
291    /// Returns the total number of files in the database.
292    fn len(&self) -> usize;
293
294    /// Returns `true` if the database contains no files.
295    fn is_empty(&self) -> bool {
296        self.len() == 0
297    }
298}
299
300impl DatabaseReader for Database {
301    fn get_id(&self, name: &str) -> Option<FileId> {
302        self.files.get(name).map(|f| f.id)
303    }
304
305    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
306        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
307        let file = self.files.get(name).ok_or(DatabaseError::FileNotFound)?;
308
309        Ok(file.clone())
310    }
311
312    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
313        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
314        self.files.get(name).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
315    }
316
317    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
318        self.files.get(name).cloned().ok_or(DatabaseError::FileNotFound)
319    }
320
321    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
322        self.files.values().find(|file| file.path.as_deref() == Some(path)).cloned().ok_or(DatabaseError::FileNotFound)
323    }
324
325    fn files(&self) -> impl Iterator<Item = Arc<File>> {
326        self.files.values().cloned()
327    }
328
329    fn len(&self) -> usize {
330        self.files.len()
331    }
332}
333
334impl DatabaseReader for ReadDatabase {
335    fn get_id(&self, name: &str) -> Option<FileId> {
336        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).map(|f| f.id)
337    }
338
339    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
340        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
341
342        self.files.get(*index).cloned().ok_or(DatabaseError::FileNotFound)
343    }
344
345    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
346        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
347
348        self.files.get(*index).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
349    }
350
351    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
352        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
353    }
354
355    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
356        self.path_to_index.get(path).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
357    }
358
359    fn files(&self) -> impl Iterator<Item = Arc<File>> {
360        self.files.iter().cloned()
361    }
362
363    fn len(&self) -> usize {
364        self.files.len()
365    }
366}