bonsaidb_files/lib.rs
1//! Large file storage support for BonsaiDb.
2//!
3//! This crate provides support for storing large files in
4//! [BonsaiDb](https://bonsaidb.io/). While BonsaiDb's document size limit is 4
5//! gigabytes, the requirement that each document is loaded in memory fully can
6//! cause higher memory usage when storing larger files.
7//!
8//! # `FileConfig`
9//!
10//! The [`FileConfig`] trait allows customizing the [`CollectionName`]s and
11//! block size. If you want to use smaller or larger blocks, you can. If you
12//! want to store more than one set of files in the same database, you can use
13//! two [`FileConfig`] implementors with different [`CollectionName`]s.
14//!
15//! For most users, the provided implementation [`BonsaiFiles`] will work for
16//! them.
17//!
18//! # Basic Example
19//!
20//! ```rust
21#![doc = include_str!("../examples/basic-files.rs")]
22//! ```
23//!
24//! # Async Support
25//!
26//! This crate adds implementations of `tokio::io::AsyncRead` and
27//! `tokio::io::AsyncWrite` when the `async` feature flag is enabled.
28#![cfg_attr(feature = "async", doc = "```rust")]
29#![cfg_attr(feature = "async", doc = include_str!("../examples/basic-files-async.rs"))]
30#![cfg_attr(feature = "async", doc = "```")]
31#![forbid(unsafe_code)]
32#![warn(
33 clippy::cargo,
34 missing_docs,
35 // clippy::missing_docs_in_private_items,
36 clippy::pedantic,
37 future_incompatible,
38 rust_2018_idioms,
39)]
40#![allow(
41 clippy::missing_errors_doc, // TODO clippy::missing_errors_doc
42 clippy::option_if_let_else,
43 clippy::module_name_repetitions,
44)]
45
46use std::fmt::Debug;
47use std::marker::PhantomData;
48
49#[cfg(feature = "async")]
50use bonsaidb_core::async_trait::async_trait;
51#[cfg(feature = "async")]
52use bonsaidb_core::connection::AsyncConnection;
53use bonsaidb_core::connection::Connection;
54use bonsaidb_core::key::time::TimestampAsNanoseconds;
55use bonsaidb_core::schema::{
56 CollectionName, InsertError, Qualified, Schema, SchemaName, Schematic,
57};
58pub use bonsaidb_macros::FileConfig;
59use derive_where::derive_where;
60use serde::de::DeserializeOwned;
61use serde::Serialize;
62
63mod schema;
64
65/// Types for accessing files directly from a connection to a database. These
66/// types perform no permission checking beyond what BonsaiDb normally checks as
67/// part of accessing/updating the underlying collections.
68pub mod direct;
69
70/// A configuration for a set of [stored files](direct::File).
71#[cfg_attr(feature = "async", async_trait)]
72pub trait FileConfig: Sized + Send + Sync + Unpin + 'static {
73 /// The type of the `metadata` stored in [`File`](direct::File). If you do
74 /// not need to store metadata, you can set this type to `()`.
75 type Metadata: Serialize + DeserializeOwned + Send + Sync + Debug + Clone;
76
77 /// The maximum size for each write to an underlying file. The file will be
78 /// stored by breaking the data written into chunks no larger than
79 /// `BLOCK_SIZE`.
80 const BLOCK_SIZE: usize;
81 /// Returns the unique collection name to use to store [`File`s][direct::File].
82 fn files_name() -> CollectionName;
83 /// Returns the unique collection name to use to store file blocks.
84 fn blocks_name() -> CollectionName;
85
86 /// Registers the collections for this configuration into `schema`.
87 fn register_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
88 schema.define_collection::<schema::file::File<Self>>()?;
89 schema.define_collection::<schema::block::Block<Self>>()?;
90
91 Ok(())
92 }
93
94 /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
95 /// treated as a full path to the file being built. Otherwise, the argument
96 /// is treated as the file's name.
97 fn build<NameOrPath: AsRef<str>>(name_or_path: NameOrPath) -> direct::FileBuilder<'static, Self>
98 where
99 Self::Metadata: Default,
100 {
101 direct::FileBuilder::new(name_or_path, <Self::Metadata as Default>::default())
102 }
103
104 /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
105 /// treated as a full path to the file being built. Otherwise, the argument
106 /// is treated as the file's name. The file's metadata will be `metadata`
107 /// upon creation. The file's metadata will be `metadata` upon creation.
108 fn build_with_metadata<NameOrPath: AsRef<str>>(
109 name_or_path: NameOrPath,
110 metadata: Self::Metadata,
111 ) -> direct::FileBuilder<'static, Self> {
112 direct::FileBuilder::new(name_or_path, metadata)
113 }
114
115 /// Returns the file with the unique `id` given, if found. This function
116 /// only loads metadata about the file, it does not load the contents of the
117 /// file.
118 fn get<Database: Connection + Clone>(
119 id: u32,
120 database: &Database,
121 ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
122 direct::File::<_, Self>::get(id, database)
123 }
124
125 /// Returns the file located at `path`, if found. This function
126 /// only loads metadata about the file, it does not load the contents of the
127 /// file.
128 fn load<Database: Connection + Clone>(
129 path: &str,
130 database: &Database,
131 ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, Error> {
132 direct::File::<_, Self>::load(path, database)
133 }
134
135 /// Returns the file locate at `path`, or creates an empty file if not
136 /// currently present.
137 ///
138 /// If `expect_present` is true, this function will first check for an
139 /// existing file before attempting to create the file. This parameter is
140 /// purely an optimization, and the function will work regardless of the
141 /// value. Pass true if you expect the file to be present a majority of the
142 /// time this function is invoked. For example, using this function to
143 /// retrieve a file created once and append to the same path in the future,
144 /// passing true will make this function slightly more optimized for the
145 /// most common flow.
146 ///
147 /// Regardless whether `expect_present` is true or false, this function will
148 /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
149 /// ACID-compliance to notify of a conflict if another request succeeds
150 /// before this one. If a conflict occurs, this function will then attempt
151 /// to load the document. If the document has been deleted, the
152 /// [`Error::Deleted`] will be returned.
153 fn load_or_create<Database: Connection + Clone>(
154 path: &str,
155 expect_present: bool,
156 database: &Database,
157 ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error>
158 where
159 Self::Metadata: Default,
160 {
161 Self::load_or_create_with_metadata(
162 path,
163 <Self::Metadata as Default>::default(),
164 expect_present,
165 database,
166 )
167 }
168
169 /// Returns the file locate at `path`, or creates an empty file if not
170 /// currently present.
171 ///
172 /// If `expect_present` is true, this function will first check for an
173 /// existing file before attempting to create the file. This parameter is
174 /// purely an optimization, and the function will work regardless of the
175 /// value. Pass true if you expect the file to be present a majority of the
176 /// time this function is invoked. For example, using this function to
177 /// retrieve a file created once and append to the same path in the future,
178 /// passing true will make this function slightly more optimized for the
179 /// most common flow.
180 ///
181 /// Regardless whether `expect_present` is true or false, this function will
182 /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
183 /// ACID-compliance to notify of a conflict if another request succeeds
184 /// before this one. If a conflict occurs, this function will then attempt
185 /// to load the document. If the document has been deleted, the
186 /// [`Error::Deleted`] will be returned.
187 fn load_or_create_with_metadata<Database: Connection + Clone>(
188 path: &str,
189 metadata: Self::Metadata,
190 expect_present: bool,
191 database: &Database,
192 ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error> {
193 // First, try loading the file if we expect the file will be present
194 // (ie, a singleton file that is always preseent after the first
195 // launch).
196 if expect_present {
197 if let Some(file) = direct::File::<_, Self>::load(path, database)? {
198 return Ok(file);
199 }
200 }
201
202 // File not found, or we are going to assume the file isn't present.
203 match Self::build_with_metadata(path, metadata).create(database) {
204 Ok(file) => Ok(file),
205 Err(Error::AlreadyExists) => {
206 // Rather than continue to loop, we will just propogate the
207 // previous error in the situation where the file was deleted
208 // between our failed attempt to create and the attempt to
209 // retrieve the conflicted document.
210 direct::File::<_, Self>::load(path, database)?.ok_or(Error::Deleted)
211 }
212 Err(other) => Err(other),
213 }
214 }
215
216 /// Deletes the file at `path`. Returns true if a file was deleted. Does not
217 /// error if the file is not found.
218 fn delete<Database: Connection + Clone>(
219 path: &str,
220 database: &Database,
221 ) -> Result<bool, Error> {
222 if let Some(file) = direct::File::<_, Self>::load(path, database)? {
223 file.delete()?;
224 Ok(true)
225 } else {
226 Ok(false)
227 }
228 }
229
230 /// Returns all files that have a containing path of exactly `path`. It will
231 /// only return files that have been created, and will not return "virtual"
232 /// directories that are part of a file's path but have never been created.
233 ///
234 /// This function only loads metadata about the files, it does not load the
235 /// contents of the files.
236 fn list<Database: Connection + Clone>(
237 path: &str,
238 database: &Database,
239 ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
240 direct::File::<_, Self>::list(path, database)
241 }
242
243 /// Returns all files that have a path starting with `path`.
244 ///
245 /// This function only loads metadata about the files, it does not load the
246 /// contents of the files.
247 fn list_recursive<Database: Connection + Clone>(
248 path: &str,
249 database: &Database,
250 ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
251 direct::File::<_, Self>::list_recursive(path, database)
252 }
253
254 /// Returns statistics for all files contained within this collection. This
255 /// is equivalent to calling [`Self::stats_for_path`] with `"/"` for the
256 /// path.
257 fn stats<Database: Connection + Clone>(
258 database: &Database,
259 ) -> Result<Statistics, bonsaidb_core::Error> {
260 Self::stats_for_path("/", database)
261 }
262
263 /// Returns statistics for all files whose path starts with `path`.
264 fn stats_for_path<Database: Connection + Clone>(
265 path: &str,
266 database: &Database,
267 ) -> Result<Statistics, bonsaidb_core::Error> {
268 direct::File::<_, Self>::stats_for_path(path, database)
269 }
270
271 /// Returns the file with the unique `id` given, if found. This function
272 /// only loads metadata about the file, it does not load the contents of the
273 /// file.
274 #[cfg(feature = "async")]
275 async fn get_async<Database: AsyncConnection + Clone>(
276 id: u32,
277 database: &Database,
278 ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
279 direct::File::<_, Self>::get_async(id, database).await
280 }
281
282 /// Returns the file located at `path`, if found. This function
283 /// only loads metadata about the file, it does not load the contents of the
284 /// file.
285 #[cfg(feature = "async")]
286 async fn load_async<Database: AsyncConnection + Clone>(
287 path: &str,
288 database: &Database,
289 ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, Error> {
290 direct::File::<_, Self>::load_async(path, database).await
291 }
292
293 /// Returns the file locate at `path`, or creates an empty file if not
294 /// currently present.
295 ///
296 /// If `expect_present` is true, this function will first check for an
297 /// existing file before attempting to create the file. This parameter is
298 /// purely an optimization, and the function will work regardless of the
299 /// value. Pass true if you expect the file to be present a majority of the
300 /// time this function is invoked. For example, using this function to
301 /// retrieve a file created once and append to the same path in the future,
302 /// passing true will make this function slightly more optimized for the
303 /// most common flow.
304 ///
305 /// Regardless whether `expect_present` is true or false, this function will
306 /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
307 /// ACID-compliance to notify of a conflict if another request succeeds
308 /// before this one. If a conflict occurs, this function will then attempt
309 /// to load the document. If the document has been deleted, the
310 /// [`Error::Deleted`] will be returned.
311 #[cfg(feature = "async")]
312 async fn load_or_create_async<Database: AsyncConnection + Clone>(
313 path: &str,
314 expect_present: bool,
315 database: &Database,
316 ) -> Result<direct::File<direct::Async<Database>, Self>, Error>
317 where
318 Self::Metadata: Default,
319 {
320 Self::load_or_create_with_metadata_async(
321 path,
322 <Self::Metadata as Default>::default(),
323 expect_present,
324 database,
325 )
326 .await
327 }
328
329 /// Returns the file locate at `path`, or creates an empty file if not
330 /// currently present.
331 ///
332 /// If `expect_present` is true, this function will first check for an
333 /// existing file before attempting to create the file. This parameter is
334 /// purely an optimization, and the function will work regardless of the
335 /// value. Pass true if you expect the file to be present a majority of the
336 /// time this function is invoked. For example, using this function to
337 /// retrieve a file created once and append to the same path in the future,
338 /// passing true will make this function slightly more optimized for the
339 /// most common flow.
340 ///
341 /// Regardless whether `expect_present` is true or false, this function will
342 /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
343 /// ACID-compliance to notify of a conflict if another request succeeds
344 /// before this one. If a conflict occurs, this function will then attempt
345 /// to load the document. If the document has been deleted, the
346 /// [`Error::Deleted`] will be returned.
347 #[cfg(feature = "async")]
348 async fn load_or_create_with_metadata_async<Database: AsyncConnection + Clone>(
349 path: &str,
350 metadata: Self::Metadata,
351 expect_present: bool,
352 database: &Database,
353 ) -> Result<direct::File<direct::Async<Database>, Self>, Error> {
354 // First, try loading the file if we expect the file will be present
355 // (ie, a singleton file that is always preseent after the first
356 // launch).
357 if expect_present {
358 if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
359 return Ok(file);
360 }
361 }
362
363 // File not found, or we are going to assume the file isn't present.
364 match Self::build_with_metadata(path, metadata)
365 .create_async(database)
366 .await
367 {
368 Ok(file) => Ok(file),
369 Err(Error::AlreadyExists) => {
370 // Rather than continue to loop, we will just propogate the
371 // previous error in the situation where the file was deleted
372 // between our failed attempt to create and the attempt to
373 // retrieve the conflicted document.
374 direct::File::<_, Self>::load_async(path, database)
375 .await?
376 .ok_or(Error::Deleted)
377 }
378 Err(other) => Err(other),
379 }
380 }
381
382 /// Deletes the file at `path`. Returns true if a file was deleted. Does not
383 /// error if the file is not found.
384 #[cfg(feature = "async")]
385 async fn delete_async<Database: AsyncConnection + Clone>(
386 path: &str,
387 database: &Database,
388 ) -> Result<bool, Error> {
389 if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
390 file.delete().await?;
391 Ok(true)
392 } else {
393 Ok(false)
394 }
395 }
396
397 /// Returns all files that have a containing path of exactly `path`. It will
398 /// only return files that have been created, and will not return "virtual"
399 /// directories that are part of a file's path but have never been created.
400 ///
401 /// This function only loads metadata about the files, it does not load the
402 /// contents of the files.
403 #[cfg(feature = "async")]
404 async fn list_async<Database: AsyncConnection + Clone>(
405 path: &str,
406 database: &Database,
407 ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
408 direct::File::<_, Self>::list_async(path, database).await
409 }
410
411 /// Returns all files that have a path starting with `path`.
412 ///
413 /// This function only loads metadata about the files, it does not load the
414 /// contents of the files.
415 #[cfg(feature = "async")]
416 async fn list_recursive_async<Database: AsyncConnection + Clone>(
417 path: &str,
418 database: &Database,
419 ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
420 direct::File::<_, Self>::list_recursive_async(path, database).await
421 }
422
423 /// Returns statistics for all files contained within this collection. This
424 /// is equivalent to calling [`Self::stats_for_path_async`] with `"/"` for the
425 /// path.
426 #[cfg(feature = "async")]
427 async fn stats_async<Database: AsyncConnection + Clone>(
428 database: &Database,
429 ) -> Result<Statistics, bonsaidb_core::Error> {
430 Self::stats_for_path_async("/", database).await
431 }
432
433 /// Returns statistics for all files whose path starts with `path`.
434 #[cfg(feature = "async")]
435 async fn stats_for_path_async<Database: AsyncConnection + Clone>(
436 path: &str,
437 database: &Database,
438 ) -> Result<Statistics, bonsaidb_core::Error> {
439 direct::File::<_, Self>::stats_for_path_async(path, database).await
440 }
441}
442
443/// A default configuration for storing files within BonsaiDb.
444#[derive(Debug)]
445pub struct BonsaiFiles;
446
447impl FileConfig for BonsaiFiles {
448 type Metadata = Option<()>;
449
450 const BLOCK_SIZE: usize = 65_536;
451
452 fn files_name() -> CollectionName {
453 CollectionName::new("bonsaidb", "files")
454 }
455
456 fn blocks_name() -> CollectionName {
457 CollectionName::new("bonsaidb", "blocks")
458 }
459}
460
461/// A schema implementation that allows using any [`FileConfig`] as a [`Schema`]
462/// without manually implementing [`Schema`].
463#[derive_where(Default, Debug)]
464pub struct FilesSchema<Config: FileConfig = BonsaiFiles>(PhantomData<Config>);
465
466impl<Config: FileConfig> Schema for FilesSchema<Config> {
467 fn schema_name() -> SchemaName {
468 SchemaName::from(Config::files_name())
469 }
470
471 fn define_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
472 Config::register_collections(schema)
473 }
474}
475
476/// Errors that can be returned when interacting with files.
477#[derive(thiserror::Error, Debug)]
478pub enum Error {
479 /// An underlying database error was returned.
480 #[error("database error: {0}")]
481 Database(bonsaidb_core::Error),
482 /// A name contained an invalid character. Currently, the only disallowed
483 /// character is `/`.
484 #[error("names must not contain '/'")]
485 InvalidName,
486 /// An absolute path was expected, but the path provided did not include a
487 /// leading `/`.
488 #[error("all paths must start with a leading '/'")]
489 InvalidPath,
490 /// An attempt at creating a file failed because a file already existed.
491 #[error("a file already exists at the path provided")]
492 AlreadyExists,
493 /// The file was deleted during the operation.
494 #[error("the file was deleted during the operation")]
495 Deleted,
496}
497
498impl<T> From<InsertError<T>> for Error {
499 fn from(err: InsertError<T>) -> Self {
500 Self::from(err.error)
501 }
502}
503
504impl From<bonsaidb_core::Error> for Error {
505 fn from(err: bonsaidb_core::Error) -> Self {
506 match err {
507 bonsaidb_core::Error::UniqueKeyViolation { .. } => Self::AlreadyExists,
508 other => Self::Database(other),
509 }
510 }
511}
512
513impl From<Error> for bonsaidb_core::Error {
514 fn from(err: Error) -> Self {
515 match err {
516 Error::Database(err) => err,
517 other => Self::other("bonsaidb-files", other),
518 }
519 }
520}
521
522/// Controls which location of a file to remove data from during a truncation.
523#[derive(Clone, Copy, Debug, Eq, PartialEq)]
524pub enum Truncate {
525 /// Remove data from the start (head) of the file when truncating.
526 RemovingStart,
527 /// Remove data from the end (tail) of the file when truncating.
528 RemovingEnd,
529}
530
531/// Statistics about a set of files contained in a collection.
532#[derive(Debug, Clone, Copy, Eq, PartialEq)]
533pub struct Statistics {
534 /// The total number of bytes contained within the path queried. This only
535 /// counts bytes that would be returned when reading the contents of files.
536 /// No metadata information is included in this calculation.
537 pub total_bytes: u64,
538 /// The number of files contained within the path queried.
539 pub file_count: usize,
540 /// The last timestamp data was appended to a file within the path queried.
541 /// This contains `None` if there is no data present.
542 pub last_appended_at: Option<TimestampAsNanoseconds>,
543}
544
545#[cfg(test)]
546mod tests;