bonsaidb_files/
lib.rs

1//! Large file storage support for BonsaiDb.
2//!
3//! This crate provides support for storing large files in
4//! [BonsaiDb](https://bonsaidb.io/). While BonsaiDb's document size limit is 4
5//! gigabytes, the requirement that each document is loaded in memory fully can
6//! cause higher memory usage when storing larger files.
7//!
8//! # `FileConfig`
9//!
10//! The [`FileConfig`] trait allows customizing the [`CollectionName`]s and
11//! block size. If you want to use smaller or larger blocks, you can. If you
12//! want to store more than one set of files in the same database, you can use
13//! two [`FileConfig`] implementors with different [`CollectionName`]s.
14//!
15//! For most users, the provided implementation [`BonsaiFiles`] will work for
16//! them.
17//!
18//! # Basic Example
19//!
20//! ```rust
21#![doc = include_str!("../examples/basic-files.rs")]
22//! ```
23//! 
24//! # Async Support
25//!
26//! This crate adds implementations of `tokio::io::AsyncRead` and
27//! `tokio::io::AsyncWrite` when the `async` feature flag is enabled.
28#![cfg_attr(feature = "async", doc = "```rust")]
29#![cfg_attr(feature = "async", doc = include_str!("../examples/basic-files-async.rs"))]
30#![cfg_attr(feature = "async", doc = "```")]
31#![forbid(unsafe_code)]
32#![warn(
33    clippy::cargo,
34    missing_docs,
35    // clippy::missing_docs_in_private_items,
36    clippy::pedantic,
37    future_incompatible,
38    rust_2018_idioms,
39)]
40#![allow(
41    clippy::missing_errors_doc, // TODO clippy::missing_errors_doc
42    clippy::option_if_let_else,
43    clippy::module_name_repetitions,
44)]
45
46use std::fmt::Debug;
47use std::marker::PhantomData;
48
49#[cfg(feature = "async")]
50use bonsaidb_core::async_trait::async_trait;
51#[cfg(feature = "async")]
52use bonsaidb_core::connection::AsyncConnection;
53use bonsaidb_core::connection::Connection;
54use bonsaidb_core::key::time::TimestampAsNanoseconds;
55use bonsaidb_core::schema::{
56    CollectionName, InsertError, Qualified, Schema, SchemaName, Schematic,
57};
58pub use bonsaidb_macros::FileConfig;
59use derive_where::derive_where;
60use serde::de::DeserializeOwned;
61use serde::Serialize;
62
63mod schema;
64
65/// Types for accessing files directly from a connection to a database. These
66/// types perform no permission checking beyond what BonsaiDb normally checks as
67/// part of accessing/updating the underlying collections.
68pub mod direct;
69
70/// A configuration for a set of [stored files](direct::File).
71#[cfg_attr(feature = "async", async_trait)]
72pub trait FileConfig: Sized + Send + Sync + Unpin + 'static {
73    /// The type of the `metadata` stored in [`File`](direct::File). If you do
74    /// not need to store metadata, you can set this type to `()`.
75    type Metadata: Serialize + DeserializeOwned + Send + Sync + Debug + Clone;
76
77    /// The maximum size for each write to an underlying file. The file will be
78    /// stored by breaking the data written into chunks no larger than
79    /// `BLOCK_SIZE`.
80    const BLOCK_SIZE: usize;
81    /// Returns the unique collection name to use to store [`File`s][direct::File].
82    fn files_name() -> CollectionName;
83    /// Returns the unique collection name to use to store file blocks.
84    fn blocks_name() -> CollectionName;
85
86    /// Registers the collections for this configuration into `schema`.
87    fn register_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
88        schema.define_collection::<schema::file::File<Self>>()?;
89        schema.define_collection::<schema::block::Block<Self>>()?;
90
91        Ok(())
92    }
93
94    /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
95    /// treated as a full path to the file being built. Otherwise, the argument
96    /// is treated as the file's name.
97    fn build<NameOrPath: AsRef<str>>(name_or_path: NameOrPath) -> direct::FileBuilder<'static, Self>
98    where
99        Self::Metadata: Default,
100    {
101        direct::FileBuilder::new(name_or_path, <Self::Metadata as Default>::default())
102    }
103
104    /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
105    /// treated as a full path to the file being built. Otherwise, the argument
106    /// is treated as the file's name. The file's metadata will be `metadata`
107    /// upon creation. The file's metadata will be `metadata` upon creation.
108    fn build_with_metadata<NameOrPath: AsRef<str>>(
109        name_or_path: NameOrPath,
110        metadata: Self::Metadata,
111    ) -> direct::FileBuilder<'static, Self> {
112        direct::FileBuilder::new(name_or_path, metadata)
113    }
114
115    /// Returns the file with the unique `id` given, if found. This function
116    /// only loads metadata about the file, it does not load the contents of the
117    /// file.
118    fn get<Database: Connection + Clone>(
119        id: u32,
120        database: &Database,
121    ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
122        direct::File::<_, Self>::get(id, database)
123    }
124
125    /// Returns the file located at `path`, if found. This function
126    /// only loads metadata about the file, it does not load the contents of the
127    /// file.
128    fn load<Database: Connection + Clone>(
129        path: &str,
130        database: &Database,
131    ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, Error> {
132        direct::File::<_, Self>::load(path, database)
133    }
134
135    /// Returns the file locate at `path`, or creates an empty file if not
136    /// currently present.
137    ///
138    /// If `expect_present` is true, this function will first check for an
139    /// existing file before attempting to create the file. This parameter is
140    /// purely an optimization, and the function will work regardless of the
141    /// value. Pass true if you expect the file to be present a majority of the
142    /// time this function is invoked. For example, using this function to
143    /// retrieve a file created once and append to the same path in the future,
144    /// passing true will make this function slightly more optimized for the
145    /// most common flow.
146    ///
147    /// Regardless whether `expect_present` is true or false, this function will
148    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
149    /// ACID-compliance to notify of a conflict if another request succeeds
150    /// before this one. If a conflict occurs, this function will then attempt
151    /// to load the document. If the document has been deleted, the
152    /// [`Error::Deleted`] will be returned.
153    fn load_or_create<Database: Connection + Clone>(
154        path: &str,
155        expect_present: bool,
156        database: &Database,
157    ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error>
158    where
159        Self::Metadata: Default,
160    {
161        Self::load_or_create_with_metadata(
162            path,
163            <Self::Metadata as Default>::default(),
164            expect_present,
165            database,
166        )
167    }
168
169    /// Returns the file locate at `path`, or creates an empty file if not
170    /// currently present.
171    ///
172    /// If `expect_present` is true, this function will first check for an
173    /// existing file before attempting to create the file. This parameter is
174    /// purely an optimization, and the function will work regardless of the
175    /// value. Pass true if you expect the file to be present a majority of the
176    /// time this function is invoked. For example, using this function to
177    /// retrieve a file created once and append to the same path in the future,
178    /// passing true will make this function slightly more optimized for the
179    /// most common flow.
180    ///
181    /// Regardless whether `expect_present` is true or false, this function will
182    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
183    /// ACID-compliance to notify of a conflict if another request succeeds
184    /// before this one. If a conflict occurs, this function will then attempt
185    /// to load the document. If the document has been deleted, the
186    /// [`Error::Deleted`] will be returned.
187    fn load_or_create_with_metadata<Database: Connection + Clone>(
188        path: &str,
189        metadata: Self::Metadata,
190        expect_present: bool,
191        database: &Database,
192    ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error> {
193        // First, try loading the file if we expect the file will be present
194        // (ie, a singleton file that is always preseent after the first
195        // launch).
196        if expect_present {
197            if let Some(file) = direct::File::<_, Self>::load(path, database)? {
198                return Ok(file);
199            }
200        }
201
202        // File not found, or we are going to assume the file isn't present.
203        match Self::build_with_metadata(path, metadata).create(database) {
204            Ok(file) => Ok(file),
205            Err(Error::AlreadyExists) => {
206                // Rather than continue to loop, we will just propogate the
207                // previous error in the situation where the file was deleted
208                // between our failed attempt to create and the attempt to
209                // retrieve the conflicted document.
210                direct::File::<_, Self>::load(path, database)?.ok_or(Error::Deleted)
211            }
212            Err(other) => Err(other),
213        }
214    }
215
216    /// Deletes the file at `path`. Returns true if a file was deleted. Does not
217    /// error if the file is not found.
218    fn delete<Database: Connection + Clone>(
219        path: &str,
220        database: &Database,
221    ) -> Result<bool, Error> {
222        if let Some(file) = direct::File::<_, Self>::load(path, database)? {
223            file.delete()?;
224            Ok(true)
225        } else {
226            Ok(false)
227        }
228    }
229
230    /// Returns all files that have a containing path of exactly `path`. It will
231    /// only return files that have been created, and will not return "virtual"
232    /// directories that are part of a file's path but have never been created.
233    ///
234    /// This function only loads metadata about the files, it does not load the
235    /// contents of the files.
236    fn list<Database: Connection + Clone>(
237        path: &str,
238        database: &Database,
239    ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
240        direct::File::<_, Self>::list(path, database)
241    }
242
243    /// Returns all files that have a path starting with `path`.
244    ///
245    /// This function only loads metadata about the files, it does not load the
246    /// contents of the files.
247    fn list_recursive<Database: Connection + Clone>(
248        path: &str,
249        database: &Database,
250    ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
251        direct::File::<_, Self>::list_recursive(path, database)
252    }
253
254    /// Returns statistics for all files contained within this collection. This
255    /// is equivalent to calling [`Self::stats_for_path`] with `"/"` for the
256    /// path.
257    fn stats<Database: Connection + Clone>(
258        database: &Database,
259    ) -> Result<Statistics, bonsaidb_core::Error> {
260        Self::stats_for_path("/", database)
261    }
262
263    /// Returns statistics for all files whose path starts with `path`.
264    fn stats_for_path<Database: Connection + Clone>(
265        path: &str,
266        database: &Database,
267    ) -> Result<Statistics, bonsaidb_core::Error> {
268        direct::File::<_, Self>::stats_for_path(path, database)
269    }
270
271    /// Returns the file with the unique `id` given, if found. This function
272    /// only loads metadata about the file, it does not load the contents of the
273    /// file.
274    #[cfg(feature = "async")]
275    async fn get_async<Database: AsyncConnection + Clone>(
276        id: u32,
277        database: &Database,
278    ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
279        direct::File::<_, Self>::get_async(id, database).await
280    }
281
282    /// Returns the file located at `path`, if found. This function
283    /// only loads metadata about the file, it does not load the contents of the
284    /// file.
285    #[cfg(feature = "async")]
286    async fn load_async<Database: AsyncConnection + Clone>(
287        path: &str,
288        database: &Database,
289    ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, Error> {
290        direct::File::<_, Self>::load_async(path, database).await
291    }
292
293    /// Returns the file locate at `path`, or creates an empty file if not
294    /// currently present.
295    ///
296    /// If `expect_present` is true, this function will first check for an
297    /// existing file before attempting to create the file. This parameter is
298    /// purely an optimization, and the function will work regardless of the
299    /// value. Pass true if you expect the file to be present a majority of the
300    /// time this function is invoked. For example, using this function to
301    /// retrieve a file created once and append to the same path in the future,
302    /// passing true will make this function slightly more optimized for the
303    /// most common flow.
304    ///
305    /// Regardless whether `expect_present` is true or false, this function will
306    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
307    /// ACID-compliance to notify of a conflict if another request succeeds
308    /// before this one. If a conflict occurs, this function will then attempt
309    /// to load the document. If the document has been deleted, the
310    /// [`Error::Deleted`] will be returned.
311    #[cfg(feature = "async")]
312    async fn load_or_create_async<Database: AsyncConnection + Clone>(
313        path: &str,
314        expect_present: bool,
315        database: &Database,
316    ) -> Result<direct::File<direct::Async<Database>, Self>, Error>
317    where
318        Self::Metadata: Default,
319    {
320        Self::load_or_create_with_metadata_async(
321            path,
322            <Self::Metadata as Default>::default(),
323            expect_present,
324            database,
325        )
326        .await
327    }
328
329    /// Returns the file locate at `path`, or creates an empty file if not
330    /// currently present.
331    ///
332    /// If `expect_present` is true, this function will first check for an
333    /// existing file before attempting to create the file. This parameter is
334    /// purely an optimization, and the function will work regardless of the
335    /// value. Pass true if you expect the file to be present a majority of the
336    /// time this function is invoked. For example, using this function to
337    /// retrieve a file created once and append to the same path in the future,
338    /// passing true will make this function slightly more optimized for the
339    /// most common flow.
340    ///
341    /// Regardless whether `expect_present` is true or false, this function will
342    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
343    /// ACID-compliance to notify of a conflict if another request succeeds
344    /// before this one. If a conflict occurs, this function will then attempt
345    /// to load the document. If the document has been deleted, the
346    /// [`Error::Deleted`] will be returned.
347    #[cfg(feature = "async")]
348    async fn load_or_create_with_metadata_async<Database: AsyncConnection + Clone>(
349        path: &str,
350        metadata: Self::Metadata,
351        expect_present: bool,
352        database: &Database,
353    ) -> Result<direct::File<direct::Async<Database>, Self>, Error> {
354        // First, try loading the file if we expect the file will be present
355        // (ie, a singleton file that is always preseent after the first
356        // launch).
357        if expect_present {
358            if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
359                return Ok(file);
360            }
361        }
362
363        // File not found, or we are going to assume the file isn't present.
364        match Self::build_with_metadata(path, metadata)
365            .create_async(database)
366            .await
367        {
368            Ok(file) => Ok(file),
369            Err(Error::AlreadyExists) => {
370                // Rather than continue to loop, we will just propogate the
371                // previous error in the situation where the file was deleted
372                // between our failed attempt to create and the attempt to
373                // retrieve the conflicted document.
374                direct::File::<_, Self>::load_async(path, database)
375                    .await?
376                    .ok_or(Error::Deleted)
377            }
378            Err(other) => Err(other),
379        }
380    }
381
382    /// Deletes the file at `path`. Returns true if a file was deleted. Does not
383    /// error if the file is not found.
384    #[cfg(feature = "async")]
385    async fn delete_async<Database: AsyncConnection + Clone>(
386        path: &str,
387        database: &Database,
388    ) -> Result<bool, Error> {
389        if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
390            file.delete().await?;
391            Ok(true)
392        } else {
393            Ok(false)
394        }
395    }
396
397    /// Returns all files that have a containing path of exactly `path`. It will
398    /// only return files that have been created, and will not return "virtual"
399    /// directories that are part of a file's path but have never been created.
400    ///
401    /// This function only loads metadata about the files, it does not load the
402    /// contents of the files.
403    #[cfg(feature = "async")]
404    async fn list_async<Database: AsyncConnection + Clone>(
405        path: &str,
406        database: &Database,
407    ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
408        direct::File::<_, Self>::list_async(path, database).await
409    }
410
411    /// Returns all files that have a path starting with `path`.
412    ///
413    /// This function only loads metadata about the files, it does not load the
414    /// contents of the files.
415    #[cfg(feature = "async")]
416    async fn list_recursive_async<Database: AsyncConnection + Clone>(
417        path: &str,
418        database: &Database,
419    ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
420        direct::File::<_, Self>::list_recursive_async(path, database).await
421    }
422
423    /// Returns statistics for all files contained within this collection. This
424    /// is equivalent to calling [`Self::stats_for_path_async`] with `"/"` for the
425    /// path.
426    #[cfg(feature = "async")]
427    async fn stats_async<Database: AsyncConnection + Clone>(
428        database: &Database,
429    ) -> Result<Statistics, bonsaidb_core::Error> {
430        Self::stats_for_path_async("/", database).await
431    }
432
433    /// Returns statistics for all files whose path starts with `path`.
434    #[cfg(feature = "async")]
435    async fn stats_for_path_async<Database: AsyncConnection + Clone>(
436        path: &str,
437        database: &Database,
438    ) -> Result<Statistics, bonsaidb_core::Error> {
439        direct::File::<_, Self>::stats_for_path_async(path, database).await
440    }
441}
442
443/// A default configuration for storing files within BonsaiDb.
444#[derive(Debug)]
445pub struct BonsaiFiles;
446
447impl FileConfig for BonsaiFiles {
448    type Metadata = Option<()>;
449
450    const BLOCK_SIZE: usize = 65_536;
451
452    fn files_name() -> CollectionName {
453        CollectionName::new("bonsaidb", "files")
454    }
455
456    fn blocks_name() -> CollectionName {
457        CollectionName::new("bonsaidb", "blocks")
458    }
459}
460
461/// A schema implementation that allows using any [`FileConfig`] as a [`Schema`]
462/// without manually implementing [`Schema`].
463#[derive_where(Default, Debug)]
464pub struct FilesSchema<Config: FileConfig = BonsaiFiles>(PhantomData<Config>);
465
466impl<Config: FileConfig> Schema for FilesSchema<Config> {
467    fn schema_name() -> SchemaName {
468        SchemaName::from(Config::files_name())
469    }
470
471    fn define_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
472        Config::register_collections(schema)
473    }
474}
475
476/// Errors that can be returned when interacting with files.
477#[derive(thiserror::Error, Debug)]
478pub enum Error {
479    /// An underlying database error was returned.
480    #[error("database error: {0}")]
481    Database(bonsaidb_core::Error),
482    /// A name contained an invalid character. Currently, the only disallowed
483    /// character is `/`.
484    #[error("names must not contain '/'")]
485    InvalidName,
486    /// An absolute path was expected, but the path provided did not include a
487    /// leading `/`.
488    #[error("all paths must start with a leading '/'")]
489    InvalidPath,
490    /// An attempt at creating a file failed because a file already existed.
491    #[error("a file already exists at the path provided")]
492    AlreadyExists,
493    /// The file was deleted during the operation.
494    #[error("the file was deleted during the operation")]
495    Deleted,
496}
497
498impl<T> From<InsertError<T>> for Error {
499    fn from(err: InsertError<T>) -> Self {
500        Self::from(err.error)
501    }
502}
503
504impl From<bonsaidb_core::Error> for Error {
505    fn from(err: bonsaidb_core::Error) -> Self {
506        match err {
507            bonsaidb_core::Error::UniqueKeyViolation { .. } => Self::AlreadyExists,
508            other => Self::Database(other),
509        }
510    }
511}
512
513impl From<Error> for bonsaidb_core::Error {
514    fn from(err: Error) -> Self {
515        match err {
516            Error::Database(err) => err,
517            other => Self::other("bonsaidb-files", other),
518        }
519    }
520}
521
522/// Controls which location of a file to remove data from during a truncation.
523#[derive(Clone, Copy, Debug, Eq, PartialEq)]
524pub enum Truncate {
525    /// Remove data from the start (head) of the file when truncating.
526    RemovingStart,
527    /// Remove data from the end (tail) of the file when truncating.
528    RemovingEnd,
529}
530
531/// Statistics about a set of files contained in a collection.
532#[derive(Debug, Clone, Copy, Eq, PartialEq)]
533pub struct Statistics {
534    /// The total number of bytes contained within the path queried. This only
535    /// counts bytes that would be returned when reading the contents of files.
536    /// No metadata information is included in this calculation.
537    pub total_bytes: u64,
538    /// The number of files contained within the path queried.
539    pub file_count: usize,
540    /// The last timestamp data was appended to a file within the path queried.
541    /// This contains `None` if there is no data present.
542    pub last_appended_at: Option<TimestampAsNanoseconds>,
543}
544
545#[cfg(test)]
546mod tests;