mago_database/
lib.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::path::PathBuf;
5use std::sync::Arc;
6
7use rayon::iter::IntoParallelIterator;
8use rayon::iter::ParallelIterator;
9use serde::Deserialize;
10use serde::Serialize;
11
12use crate::change::Change;
13use crate::change::ChangeLog;
14use crate::error::DatabaseError;
15use crate::file::File;
16use crate::file::FileId;
17use crate::file::FileType;
18use crate::file::line_starts;
19use crate::operation::FilesystemOperation;
20
21mod utils;
22
23pub mod change;
24pub mod error;
25pub mod exclusion;
26pub mod file;
27pub mod loader;
28
29mod operation;
30
31/// A mutable database for managing a collection of project files.
32///
33/// This struct acts as the primary "builder" for your file set. It is optimized
34/// for efficient additions, updates, and deletions. Once you have loaded all
35/// files and performed any initial modifications, you can create a high-performance,
36/// immutable snapshot for fast querying by calling [`read_only`](Self::read_only).
37///
38/// While this structure implements [`Clone`](std::clone::Clone), it is not intended
39/// for frequent cloning. Instead, it is designed to be used as a single mutable
40/// instance that you modify in place. Cloning is provided for scenarios where
41/// you need to create a backup or checkpoint of the current state before making
42/// further changes.
43#[derive(Debug, Clone, Default, Serialize, Deserialize)]
44pub struct Database {
45    /// Maps a file's logical name to its `File` object for fast name-based access.
46    files: HashMap<Cow<'static, str>, Arc<File>>,
47    /// Maps a file's stable ID back to its logical name for fast ID-based mutations.
48    id_to_name: HashMap<FileId, Cow<'static, str>>,
49}
50
51/// An immutable, read-optimized snapshot of a file database.
52///
53/// This structure is designed for high-performance lookups and iteration. It stores
54/// all files in a contiguous, sorted vector and uses multiple `HashMap` indices
55/// to provide $O(1)$ average-time access to files by their ID, name, or path.
56///
57/// A `ReadDatabase` is created via [`Database::read_only`].
58#[derive(Debug)]
59pub struct ReadDatabase {
60    /// A contiguous list of all files, sorted by `FileId` for deterministic iteration.
61    files: Vec<Arc<File>>,
62    /// Maps a file's stable ID to its index in the `files` vector.
63    id_to_index: HashMap<FileId, usize>,
64    /// Maps a file's logical name to its index in the `files` vector.
65    name_to_index: HashMap<Cow<'static, str>, usize>,
66    /// Maps a file's absolute path to its index in the `files` vector.
67    path_to_index: HashMap<PathBuf, usize>,
68}
69
70impl Database {
71    /// Creates a new, empty `Database`.
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    /// Creates a new `Database` containing only a single file.
77    ///
78    /// This is a convenience constructor for situations, such as testing or
79    /// single-file tools, where an operation requires a [`Database`]
80    /// implementation but only needs to be aware of one file.
81    pub fn single(file: File) -> Self {
82        let mut db = Self::new();
83        db.add(file);
84        db
85    }
86
87    /// Adds a file to the database, overwriting any existing file with the same name.
88    pub fn add(&mut self, file: File) -> FileId {
89        let name = file.name.clone();
90        let id = file.id;
91
92        if let Some(old_file) = self.files.insert(name.clone(), Arc::new(file)) {
93            self.id_to_name.remove(&old_file.id);
94        }
95
96        self.id_to_name.insert(id, name);
97
98        id
99    }
100
101    /// Updates a file's content in-place using its stable `FileId`.
102    ///
103    /// This recalculates derived data like file size, line endings, and `FileRevision`.
104    /// Returns `true` if a file with the given ID was found and updated.
105    pub fn update(&mut self, id: FileId, new_contents: Cow<'static, str>) -> bool {
106        if let Some(name) = self.id_to_name.get(&id)
107            && let Some(file) = self.files.get_mut(name)
108            && let Some(file) = Arc::get_mut(file)
109        {
110            file.contents = new_contents;
111            file.size = file.contents.len() as u32;
112            file.lines = line_starts(file.contents.as_ref()).collect();
113            return true;
114        }
115        false
116    }
117
118    /// Deletes a file from the database using its stable `FileId`.
119    ///
120    /// Returns `true` if a file with the given ID was found and removed.
121    pub fn delete(&mut self, id: FileId) -> bool {
122        if let Some(name) = self.id_to_name.remove(&id) { self.files.remove(&name).is_some() } else { false }
123    }
124
125    /// Commits a [`ChangeLog`], applying all its recorded operations to the database
126    /// and optionally writing them to the filesystem.
127    ///
128    /// # Arguments
129    ///
130    /// * `change_log`: The log of changes to apply.
131    /// * `write_to_disk`: If `true`, changes for files that have a filesystem
132    ///   path will be written to disk in parallel.
133    ///
134    /// # Errors
135    ///
136    /// Returns a [`DatabaseError`] if the log cannot be consumed or if any
137    /// filesystem operation fails.
138    pub fn commit(&mut self, change_log: ChangeLog, write_to_disk: bool) -> Result<(), DatabaseError> {
139        let changes = change_log.into_inner()?;
140        let mut fs_operations = if write_to_disk { Vec::new() } else { Vec::with_capacity(0) };
141
142        for change in changes {
143            match change {
144                Change::Add(file) => {
145                    if write_to_disk && let Some(path) = &file.path {
146                        fs_operations.push(FilesystemOperation::Write(path.clone(), file.contents.clone()));
147                    }
148
149                    self.add(file);
150                }
151                Change::Update(id, contents) => {
152                    if write_to_disk
153                        && let Ok(file) = self.get(&id)
154                        && let Some(path) = &file.path
155                    {
156                        fs_operations.push(FilesystemOperation::Write(path.clone(), contents.clone()));
157                    }
158
159                    self.update(id, contents);
160                }
161                Change::Delete(id) => {
162                    if write_to_disk
163                        && let Ok(file) = self.get(&id)
164                        && let Some(path) = &file.path
165                    {
166                        fs_operations.push(FilesystemOperation::Delete(path.clone()));
167                    }
168
169                    self.delete(id);
170                }
171            }
172        }
173
174        // If requested, perform all collected filesystem operations in parallel.
175        if write_to_disk {
176            fs_operations.into_par_iter().try_for_each(|op| -> Result<(), DatabaseError> { op.execute() })?;
177        }
178
179        Ok(())
180    }
181
182    /// Creates an independent, immutable snapshot of the database.
183    ///
184    /// This is a potentially expensive one-time operation as it **clones** all file
185    /// data. The resulting [`ReadDatabase`] is highly optimized for fast reads and
186    /// guarantees a deterministic iteration order. The original `Database` is not
187    /// consumed and can continue to be used.
188    pub fn read_only(&self) -> ReadDatabase {
189        let mut files_vec: Vec<Arc<File>> = self.files.values().cloned().collect();
190        files_vec.sort_unstable_by_key(|f| f.id);
191
192        let mut id_to_index = HashMap::with_capacity(files_vec.len());
193        let mut name_to_index = HashMap::with_capacity(files_vec.len());
194        let mut path_to_index = HashMap::with_capacity(files_vec.len());
195
196        for (index, file) in files_vec.iter().enumerate() {
197            id_to_index.insert(file.id, index);
198            name_to_index.insert(file.name.clone(), index);
199            if let Some(path) = &file.path {
200                path_to_index.insert(path.clone(), index);
201            }
202        }
203
204        ReadDatabase { files: files_vec, id_to_index, name_to_index, path_to_index }
205    }
206}
207
208impl ReadDatabase {
209    /// Creates a new `ReadDatabase` containing only a single file.
210    ///
211    /// This is a convenience constructor for situations, such as testing or
212    /// single-file tools, where an operation requires a [`DatabaseReader`]
213    /// implementation but only needs to be aware of one file.
214    ///
215    /// # Arguments
216    ///
217    /// * `file`: The single `File` to include in the database.
218    pub fn single(file: File) -> Self {
219        let mut id_to_index = HashMap::with_capacity(1);
220        let mut name_to_index = HashMap::with_capacity(1);
221        let mut path_to_index = HashMap::with_capacity(1);
222
223        // The index for the single file will always be 0.
224        id_to_index.insert(file.id, 0);
225        name_to_index.insert(file.name.clone(), 0);
226        if let Some(path) = &file.path {
227            path_to_index.insert(path.clone(), 0);
228        }
229
230        Self { files: vec![Arc::new(file)], id_to_index, name_to_index, path_to_index }
231    }
232}
233
234/// A universal interface for reading data from any database implementation.
235///
236/// This trait provides a common API for querying file data, abstracting over
237/// whether the underlying source is the mutable [`Database`] or the read-optimized
238/// [`ReadDatabase`]. This allows for writing generic code that can operate on either.
239pub trait DatabaseReader {
240    /// Retrieves a file's stable ID using its logical name.
241    fn get_id(&self, name: &str) -> Option<FileId>;
242
243    /// Retrieves a reference to a file using its stable `FileId`.
244    ///
245    /// # Errors
246    ///
247    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
248    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError>;
249
250    /// Retrieves a reference to a file using its stable `FileId`.
251    ///
252    /// # Errors
253    ///
254    /// Returns `DatabaseError::FileNotFound` if no file with the given ID exists.
255    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError>;
256
257    /// Retrieves a reference to a file using its logical name.
258    ///
259    /// # Errors
260    ///
261    /// Returns `DatabaseError::FileNotFound` if no file with the given name exists.
262    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError>;
263
264    /// Retrieves a reference to a file by its absolute filesystem path.
265    ///
266    /// # Errors
267    ///
268    /// Returns `DatabaseError::FileNotFound` if no file with the given path exists.
269    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError>;
270
271    /// Returns an iterator over all files in the database.
272    ///
273    /// The order is not guaranteed for `Database`, but is sorted by `FileId`
274    /// for `ReadDatabase`, providing deterministic iteration.
275    fn files(&self) -> impl Iterator<Item = Arc<File>>;
276
277    /// Returns an iterator over all files of a specific `FileType`.
278    fn files_with_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
279        self.files().filter(move |file| file.file_type == file_type)
280    }
281
282    /// Returns an iterator over all files that do not match a specific `FileType`.
283    fn files_without_type(&self, file_type: FileType) -> impl Iterator<Item = Arc<File>> {
284        self.files().filter(move |file| file.file_type != file_type)
285    }
286
287    /// Returns an iterator over the stable IDs of all files in the database.
288    fn file_ids(&self) -> impl Iterator<Item = FileId> {
289        self.files().map(|file| file.id)
290    }
291
292    /// Returns an iterator over the stable IDs of all files of a specific `FileType`.
293    fn file_ids_with_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
294        self.files_with_type(file_type).map(|file| file.id)
295    }
296
297    /// Returns an iterator over the stable IDs of all files that do not match a specific `FileType`.
298    fn file_ids_without_type(&self, file_type: FileType) -> impl Iterator<Item = FileId> {
299        self.files_without_type(file_type).map(|file| file.id)
300    }
301
302    /// Returns the total number of files in the database.
303    fn len(&self) -> usize;
304
305    /// Returns `true` if the database contains no files.
306    fn is_empty(&self) -> bool {
307        self.len() == 0
308    }
309}
310
311impl DatabaseReader for Database {
312    fn get_id(&self, name: &str) -> Option<FileId> {
313        self.files.get(name).map(|f| f.id)
314    }
315
316    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
317        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
318        let file = self.files.get(name).ok_or(DatabaseError::FileNotFound)?;
319
320        Ok(file.clone())
321    }
322
323    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
324        let name = self.id_to_name.get(id).ok_or(DatabaseError::FileNotFound)?;
325        self.files.get(name).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
326    }
327
328    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
329        self.files.get(name).cloned().ok_or(DatabaseError::FileNotFound)
330    }
331
332    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
333        self.files.values().find(|file| file.path.as_deref() == Some(path)).cloned().ok_or(DatabaseError::FileNotFound)
334    }
335
336    fn files(&self) -> impl Iterator<Item = Arc<File>> {
337        self.files.values().cloned()
338    }
339
340    fn len(&self) -> usize {
341        self.files.len()
342    }
343}
344
345impl DatabaseReader for ReadDatabase {
346    fn get_id(&self, name: &str) -> Option<FileId> {
347        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).map(|f| f.id)
348    }
349
350    fn get(&self, id: &FileId) -> Result<Arc<File>, DatabaseError> {
351        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
352
353        self.files.get(*index).cloned().ok_or(DatabaseError::FileNotFound)
354    }
355
356    fn get_ref(&self, id: &FileId) -> Result<&File, DatabaseError> {
357        let index = self.id_to_index.get(id).ok_or(DatabaseError::FileNotFound)?;
358
359        self.files.get(*index).map(|file| file.as_ref()).ok_or(DatabaseError::FileNotFound)
360    }
361
362    fn get_by_name(&self, name: &str) -> Result<Arc<File>, DatabaseError> {
363        self.name_to_index.get(name).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
364    }
365
366    fn get_by_path(&self, path: &Path) -> Result<Arc<File>, DatabaseError> {
367        self.path_to_index.get(path).and_then(|&i| self.files.get(i)).cloned().ok_or(DatabaseError::FileNotFound)
368    }
369
370    fn files(&self) -> impl Iterator<Item = Arc<File>> {
371        self.files.iter().cloned()
372    }
373
374    fn len(&self) -> usize {
375        self.files.len()
376    }
377}