mago_database/
lib.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use rayon::iter::IntoParallelIterator;
8use rayon::iter::ParallelIterator;
9
10use crate::change::Change;
11use crate::change::ChangeLog;
12use crate::error::DatabaseError;
13use crate::file::File;
14use crate::file::FileId;
15use crate::file::FileType;
16use crate::file::line_starts;
17use crate::operation::FilesystemOperation;
18
19mod utils;
20
21pub mod change;
22pub mod error;
23pub mod exclusion;
24pub mod file;
25pub mod loader;
26
27mod operation;
28
29/// A mutable database for managing a collection of project files.
30///
31/// This struct acts as the primary "builder" for your file set. It is optimized
32/// for efficient additions, updates, and deletions. Once you have loaded all
33/// files and performed any initial modifications, you can create a high-performance,
34/// immutable snapshot for fast querying by calling [`read_only`](Self::read_only).
35#[derive(Debug, Default)]
36pub struct Database {
37    /// Maps a file's logical name to its `File` object for fast name-based access.
38    files: HashMap<Cow<'static, str>, Arc<File>>,
39    /// Maps a file's stable ID back to its logical name for fast ID-based mutations.
40    id_to_name: HashMap<FileId, Cow<'static, str>>,
41}
42
43/// An immutable, read-optimized snapshot of a file database.
44///
45/// This structure is designed for high-performance lookups and iteration. It stores
46/// all files in a contiguous, sorted vector and uses multiple `HashMap` indices
47/// to provide $O(1)$ average-time access to files by their ID, name, or path.
48///
49/// A `ReadDatabase` is created via [`Database::read_only`].
50#[derive(Debug)]
51pub struct ReadDatabase {
52    /// A contiguous list of all files, sorted by `FileId` for deterministic iteration.
53    files: Vec<Arc<File>>,
54    /// Maps a file's stable ID to its index in the `files` vector.
55    id_to_index: HashMap<FileId, usize>,
56    /// Maps a file's logical name to its index in the `files` vector.
57    name_to_index: HashMap<Cow<'static, str>, usize>,
58    /// Maps a file's absolute path to its index in the `files` vector.
59    path_to_index: HashMap<PathBuf, usize>,
60}
61
62impl Database {
63    /// Creates a new, empty `Database`.
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Adds a file to the database, overwriting any existing file with the same name.
69    pub fn add(&mut self, file: File) {
70        let name = file.name.clone();
71        let id = file.id;
72
73        if let Some(old_file) = self.files.insert(name.clone(), Arc::new(file)) {
74            self.id_to_name.remove(&old_file.id);
75        }
76        self.id_to_name.insert(id, name);
77    }
78
79    /// Updates a file's content in-place using its stable `FileId`.
80    ///
81    /// This recalculates derived data like file size, line endings, and `FileRevision`.
82    /// Returns `true` if a file with the given ID was found and updated.
83    pub fn update(&mut self, id: FileId, new_contents: Cow<'static, str>) -> bool {
84        if let Some(name) = self.id_to_name.get(&id)
85            && let Some(file) = self.files.get_mut(name)
86            && let Some(file) = Arc::get_mut(file)
87        {
88            file.contents = new_contents;
89            file.size = file.contents.len() as u32;
90            file.lines = line_starts(file.contents.as_ref()).collect();
91            return true;
92        }
93        false
94    }
95
96    /// Deletes a file from the database using its stable `FileId`.
97    ///
98    /// Returns `true` if a file with the given ID was found and removed.
99    pub fn delete(&mut self, id: FileId) -> bool {
100        if let Some(name) = self.id_to_name.remove(&id) { self.files.remove(&name).is_some() } else { false }
101    }
102
103    /// Commits a [`ChangeLog`], applying all its recorded operations to the database
104    /// and optionally writing them to the filesystem.
105    ///
106    /// # Arguments
107    ///
108    /// * `change_log`: The log of changes to apply.
109    /// * `write_to_disk`: If `true`, changes for files that have a filesystem
110    ///   path will be written to disk in parallel.
111    ///
112    /// # Errors
113    ///
114    /// Returns a [`DatabaseError`] if the log cannot be consumed or if any
115    /// filesystem operation fails.
116    pub fn commit(&mut self, change_log: ChangeLog, write_to_disk: bool) -> Result<(), DatabaseError> {
117        let changes = change_log.into_inner()?;
118        let mut fs_operations = if write_to_disk { Vec::new() } else { Vec::with_capacity(0) };
119
120        for change in changes {
121            match change {
122                Change::Add(file) => {
123                    if write_to_disk && let Some(path) = &file.path {
124                        fs_operations.push(FilesystemOperation::Write(path.clone(), file.contents.clone()));
125                    }
126
127                    self.add(file);
128                }
129                Change::Update(id, contents) => {
130                    if write_to_disk
131                        && let Ok(file) = self.get(&id)
132                        && let Some(path) = &file.path
133                    {
134                        fs_operations.push(FilesystemOperation::Write(path.clone(), contents.clone()));
135                    }
136
137                    self.update(id, contents);
138                }
139                Change::Delete(id) => {
140                    if write_to_disk
141                        && let Ok(file) = self.get(&id)
142                        && let Some(path) = &file.path
143                    {
144                        fs_operations.push(FilesystemOperation::Delete(path.clone()));
145                    }
146
147                    self.delete(id);
148                }
149            }
150        }
151
152        // If requested, perform all collected filesystem operations in parallel.
153        if write_to_disk {
154            fs_operations.into_par_iter().try_for_each(|op| -> Result<(), DatabaseError> { op.execute() })?;
155        }
156
157        Ok(())
158    }
159
160    /// Creates an independent, immutable snapshot of the database.
161    ///
162    /// This is a potentially expensive one-time operation as it **clones** all file
163    /// data. The resulting [`ReadDatabase`] is highly optimized for fast reads and
164    /// guarantees a deterministic iteration order. The original `Database` is not
165    /// consumed and can continue to be used.
166    pub fn read_only(&self) -> ReadDatabase {
167        let mut files_vec: Vec<Arc<File>> = self.files.values().cloned().collect();
168        files_vec.sort_unstable_by_key(|f| f.id);
169
170        let mut id_to_index = HashMap::with_capacity(files_vec.len());
171        let mut name_to_index = HashMap::with_capacity(files_vec.len());
172        let mut path_to_index = HashMap::with_capacity(files_vec.len());
173
174        for (index, file) in files_vec.iter().enumerate() {
175            id_to_index.insert(file.id, index);
176            name_to_index.insert(file.name.clone(), index);
177            if let Some(path) = &file.path {
178                path_to_index.insert(path.clone(), index);
179            }
180        }
181
182        ReadDatabase { files: files_vec, id_to_index, name_to_index, path_to_index }
183    }
184}
185
186impl ReadDatabase {
187    /// Creates a new `ReadDatabase` containing only a single file.
188    ///
189    /// This is a convenience constructor for situations, such as testing or
190    /// single-file tools, where an operation requires a [`DatabaseReader`]
191    /// implementation but only needs to be aware of one file.
192    ///
193    /// # Arguments
194    ///
195    /// * `file`: The single `File` to include in the database.
196    pub fn single(file: File) -> Self {
197        let mut id_to_index = HashMap::with_capacity(1);
198        let mut name_to_index = HashMap::with_capacity(1);
199        let mut path_to_index = HashMap::with_capacity(1);
200
201        // The index for the single file will always be 0.
202        id_to_index.insert(file.id, 0);
203        name_to_index.insert(file.name.clone(), 0);
204        if let Some(path) = &file.path {
205            path_to_index.insert(path.clone(), 0);
206        }
207
208        Self { files: vec![Arc::new(file)], id_to_index, name_to_index, path_to_index }
209    }
210}
211
212/// A universal interface for reading data from any database implementation.
213///
214/// This trait provides a common API for querying file data, abstracting over
215/// whether the underlying source is the mutable [`Database`] or the read-optimized
216/// [`ReadDatabase`]. This allows for writing generic code that can operate on either.
217pub trait DatabaseReader {
218    /// Retrieves a file's stable ID using its logical name.
219    fn get_id(&self, name: &str) -> Option<FileId>;
220
221    /// Retrieves a reference to a file using its stable `FileId`.
222    ///
223    /// # Errors
224    ///
225    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
226    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError>;
227
228    /// Retrieves a reference to a file using its stable `FileId`.
229    ///
230    /// # Errors
231    ///
232    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
233    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError>;
234
235    /// Retrieves a reference to a file using its logical name.
236    ///
237    /// # Errors
238    ///
239    /// Returns `DatabaseError::FileNotFound` if no file with the given name exists.
240    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError>;
241
242    /// Retrieves a reference to a file by its absolute filesystem path.
243    ///
244    /// # Errors
245    ///
246    /// Returns `DatabaseError::FileNotFound` if no file with the given path exists.
247    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError>;
248
249    /// Returns an iterator over all files in the database.
250    ///
251    /// The order is not guaranteed for `Database`, but is sorted by `FileId`
252    /// for `ReadDatabase`, providing deterministic iteration.
253    fn files(&self) -> impl Iterator<Item = Arc<File>>;
254
255    /// Returns an iterator over all files of a specific `FileType`.
256    fn files_with_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
257        self.files().filter(move |file| file.file_type == file_type)
258    }
259
260    /// Returns an iterator over all files that do not match a specific `FileType`.
261    fn files_without_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
262        self.files().filter(move |file| file.file_type != file_type)
263    }
264
265    /// Returns an iterator over the stable IDs of all files in the database.
266    fn file_ids(&self) -> impl Iterator<Item = FileId> {
267        self.files().map(|file| file.id)
268    }
269
270    /// Returns an iterator over the stable IDs of all files of a specific `FileType`.
271    fn file_ids_with_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
272        self.files_with_type(file_type).map(|file| file.id)
273    }
274
275    /// Returns an iterator over the stable IDs of all files that do not match a specific `FileType`.
276    fn file_ids_without_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
277        self.files_without_type(file_type).map(|file| file.id)
278    }
279
280    /// Returns the total number of files in the database.
281    fn len(&self) -> usize;
282
283    /// Returns `true` if the database contains no files.
284    fn is_empty(&self) -> bool {
285        self.len() == 0
286    }
287}
288
289impl DatabaseReader for Database {
290    fn get_id(&self, name: &str) -> Option<FileId> {
291        self.files.get(name).map(|f| f.id)
292    }
293
294    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
295        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
296        let file = self.files.get(name).ok_or(DatabaseError::FileNotFound)?;
297
298        Ok(file.clone())
299    }
300
301    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
302        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
303        self.files.get(name).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
304    }
305
306    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
307        self.files.get(name).cloned().ok_or(DatabaseError::FileNotFound)
308    }
309
310    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
311        self.files.values().find(|file| file.path.as_deref() == Some(path)).cloned().ok_or(DatabaseError::FileNotFound)
312    }
313
314    fn files(&self) -> impl Iterator<Item = Arc<File>> {
315        self.files.values().cloned()
316    }
317
318    fn len(&self) -> usize {
319        self.files.len()
320    }
321}
322
323impl DatabaseReader for ReadDatabase {
324    fn get_id(&self, name: &str) -> Option<FileId> {
325        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).map(|f| f.id)
326    }
327
328    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
329        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
330
331        self.files.get(*index).cloned().ok_or(DatabaseError::FileNotFound)
332    }
333
334    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
335        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
336
337        self.files.get(*index).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
338    }
339
340    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
341        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
342    }
343
344    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
345        self.path_to_index.get(path).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
346    }
347
348    fn files(&self) -> impl Iterator<Item = Arc<File>> {
349        self.files.iter().cloned()
350    }
351
352    fn len(&self) -> usize {
353        self.files.len()
354    }
355}