lmdb_zero/
lib.rs

1// Copyright 2016 FullContact, Inc
2// Copyright 2017 Jason Lingle
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Near-zero-cost, mostly-safe idiomatic bindings to LMDB.
11//!
12//! This crate provides an interface to LMDB which as much as possible is not
13//! abstracted from the model LMDB itself provides, except as necessary to
14//! integrate with the borrow checker. This means that you don't get easy
15//! iterators, but also that you can do almost anything with LMDB through these
16//! bindings as you can through C.
17//!
18//! # Example
19//!
20//! ```
21//! extern crate lmdb_zero as lmdb;
22//! extern crate tempdir;
23//!
24//! # fn main() {
25//! #   let tmp = tempdir::TempDir::new_in(".", "lmdbzero").unwrap();
26//! #   example(tmp.path().to_str().unwrap());
27//! # }
28//! #
29//! fn example(path: &str) {
30//!   // Create the environment, that is, the file containing the database(s).
31//!   // This is unsafe because you need to ensure certain things about the
32//!   // host environment that these bindings can't help you with.
33//!   let env = unsafe {
34//!     lmdb::EnvBuilder::new().unwrap().open(
35//!       path, lmdb::open::Flags::empty(), 0o600).unwrap()
36//!   };
37//!   // Open the default database.
38//!   let db = lmdb::Database::open(
39//!     &env, None, &lmdb::DatabaseOptions::defaults())
40//!     .unwrap();
41//!   {
42//!     // Write some data in a transaction
43//!     let txn = lmdb::WriteTransaction::new(&env).unwrap();
44//!     // An accessor is used to control memory access.
45//!     // NB You can only have one live accessor from a particular transaction
46//!     // at a time. Violating this results in a panic at runtime.
47//!     {
48//!       let mut access = txn.access();
49//!       access.put(&db, "Germany", "Berlin", lmdb::put::Flags::empty()).unwrap();
50//!       access.put(&db, "France", "Paris", lmdb::put::Flags::empty()).unwrap();
51//!       access.put(&db, "Latvia", "Rīga", lmdb::put::Flags::empty()).unwrap();
52//!     }
53//!     // Commit the changes so they are visible to later transactions
54//!     txn.commit().unwrap();
55//!   }
56//!
57//!   {
58//!     // Now let's read the data back
59//!     let txn = lmdb::ReadTransaction::new(&env).unwrap();
60//!     let access = txn.access();
61//!
62//!     // Get the capital of Latvia. Note that the string is *not* copied; the
63//!     // reference actually points into the database memory, and is valid
64//!     // until the transaction is dropped or the accessor is mutated.
65//!     let capital_of_latvia: &str = access.get(&db, "Latvia").unwrap();
66//!     assert_eq!("Rīga", capital_of_latvia);
67//!
68//!     // We can also use cursors to move over the contents of the database.
69//!     let mut cursor = txn.cursor(&db).unwrap();
70//!     assert_eq!(("France", "Paris"), cursor.first(&access).unwrap());
71//!     assert_eq!(("Germany", "Berlin"), cursor.next(&access).unwrap());
72//!     assert_eq!(("Latvia", "Rīga"), cursor.next(&access).unwrap());
73//!     assert!(cursor.next::<str,str>(&access).is_err());
74//!   }
75//! }
76//! ```
77//!
78//! # Anatomy of this crate
79//!
80//! `Environment` is the top-level structure. It is created with an
81//! `EnvBuilder`. An `Environment` is a single file (by default in a
82//! subdirectory) which stores the actual data of all the databases it
83//! contains. It corresponds to an `MDB_env` in the C API.
84//!
85//! A `Database` is a single table of key/value pairs within an environment.
86//! Each environment has a single anonymous database, and may contain a number
87//! of named databases. Note that if you want to use named databases, you need
88//! to use `EnvBuilder::set_maxdbs` before creating the `Environment` to make
89//! room for the handles. A database can only have one `Database` handle per
90//! environment at a time.
91//!
92//! All accesses to the data within an environment are done through
93//! transactions. For this, there are the `ReadTransaction` and
94//! `WriteTransaction` structs. Both of these deref to a `ConstTransaction`,
95//! which provides most of the read-only functionality and can be used for
96//! writing code that can run within either type of transaction. Note that read
97//! transactions are far cheaper than write transactions.
98//!
99//! `ReadTransaction`s can be reused by using `reset()` to turn them into
100//! `ResetTransaction`s and then `refresh()` to turn them into fresh
101//! `ReadTransaction`s.
102//!
103//! One unusual property of this crate are the `ConstAccessor` and
104//! `WriteAccessor` structs, which are obtained once from a transaction and
105//! used to perform actual data manipulation. These are needed to work with the
106//! borrow checker: Cursors have a lifetime bound by their transaction and thus
107//! borrow it, so we need something else to permit borrowing mutable data. The
108//! accessors reflect this borrowing: Reading from the database requires an
109//! immutable borrow of the accessor, while writing (which may invalidate
110//! pointers) requires a mutable borrow of the accessor, thus causing the
111//! borrow checker to ensure that all read accesses are disposed before any
112//! write.
113//!
114//! Finally, the `Cursor` struct can be created from a transaction to permit
115//! more flexible access to a database. Each `Cursor` corresponds to a
116//! `MDB_cursor`. Accessing data through a cursor requires borrowing
117//! appropriately from the accessor of the transaction owning the cursor.
118//!
119//! If you want to define your own types to store in the database, see the
120//! `lmdb_zero::traits` submodule.
121//!
122//! # Lifetimes and Ownership
123//!
124//! Lmdb-zero heavily uses lifetime parameters to allow user code to safely
125//! retain handles into LMDB without extra runtime overhead.
126//!
127//! While this makes the library very flexible, it also makes it somewhat
128//! harder to use when its types need to be referenced explicitly, for example
129//! as struct members. The documentation for each type with lifetime parameters
130//! therefore includes a short discussion of how the lifetimes are intended to
131//! interact and how best to work with them.
132//!
133//! It is also possible to opt-out of compile-time lifetime tracking and
134//! instead use `Arc` or `Rc` around various handles. In this case, all the
135//! lifetime parameters simply become `'static`. See the next section for
136//! details.
137//!
138//! ## Ownership Modes
139//!
140//! As of version 0.4.0, most APIs which construct a value which holds on to
141//! some "parent" value (e.g., creating a `Database` within an `Environment`)
142//! accept anything that can be converted into a [`Supercow`](https://docs.rs/supercow/0.1.0/supercow/).
143//! Deep understanding of `Supercow` itself is not required to use `lmdb-zero`.
144//! The only thing you need to know is that an `Into<Supercow<T>>` means that
145//! you can pass in one of three classes of arguments:
146//!
147//! - `&T`. This is "borrowed mode". The majority of the documentation in this
148//! crate uses borrowed mode. This is zero-overhead and is statically
149//! verifiable (i.e., all usage is checked at compile-time), so it is
150//! recommended that borrowed mode be used whenever reasonably possible. This
151//! mode causes the "child" value to hold a normal reference to the parent,
152//! which means that lifetimes must be tracked in the lifetime parameters. But
153//! because of this, this mode can be inflexible; for example, you cannot use
154//! safe Rust to create a `struct` holding both an `Environment` and its
155//! `Database`s using borrowed mode.
156//!
157//! - `Arc<T>`. This is "shared mode". For `NonSyncSupercow`, `Rc<T>` may also
158//! be used. The child will hold the `Arc` or `Rc`, thus ensuring the parent
159//! lives at least as long as the child. Because of this, the related lifetime
160//! parameters can simply be written as `'static`. It also means that
161//! `Arc`/`Rc` references to the child and parent can be placed together in the
162//! same struct with safe Rust. This comes at a cost: Constructing values in
163//! shared mode incurs allocation; additionally, the ability to statically
164//! verify the lifetime of the parent values is lost.
165//!
166//! - `T`. This is "owned mode". The parent is moved into the child value and
167//! owned by the child from thereon. This is most useful when you only ever
168//! want one child and don't care about retaining ownership of the parent. As
169//! with shared mode, it also allows simply using `'static` as the relevant
170//! lifetime parameters.
171//!
172//! # Major Differences from the LMDB C API
173//!
174//! Databases cannot be created or destroyed within a transaction due to the
175//! awkward memory management semantics. For similar reasons, opening a
176//! database more than once is not permitted (though note that LMDB doesn't
177//! strictly allow this either --- it just silently returns an existing
178//! handle).
179//!
180//! Access to data within the environment is guarded by transaction-specific
181//! "accessors", which must be used in conjunction with the cursor or
182//! transaction. This is how these bindings integrate with the borrow checker.
183//!
184//! APIs which obtain a reference to the owner of an object are not supported.
185//!
186//! Various APIs which radically change behaviour (including memory semantics)
187//! in response to flags are separated into different calls which each express
188//! their memory semantics clearly.
189//!
190//! # Non-Zero Cost
191//!
192//! There are three general areas where this wrapper adds non-zero-cost
193//! abstractions:
194//!
195//! - Opening and closing databases adds locking overhead, since in LMDB it is
196//!   unsynchronised. This shouldn't have much impact since one rarely opens
197//!   and closes databases at a very high rate.
198//!
199//! - There is additional overhead in tracking what database handles are
200//!   currently open so that attempts to reopen one can be prevented.
201//!
202//! - Cursors and transactions track their owners separately. Additionally,
203//!   when two are used in conjunction, a runtime test is required to ensure
204//!   that they actually can be used together. This means that the handle
205//!   values are slightly larger and some function calls have an extra (very
206//!   predictable) branch if the optimiser does not optimise the branch away
207//!   entirely.
208//!
209//! - Using ownership modes other than borrowed (i.e., mundane references)
210//!   incurs extra allocations in addition to the overhead of inherent in that
211//!   ownership mode.
212//!
213//! # Using Zero-Copy
214//!
215//! This crate is primarily focussed on supporting zero-copy on all operations
216//! where this is possible. The examples above demonstrate one aspect of this:
217//! the `&str`s returned when querying for items are pointers into the database
218//! itself, valid as long as the accessor is.
219//!
220//! The main traits to look at are `lmdb_zero::traits::AsLmdbBytes` and
221//! `lmdb_zero::traits::FromLmdbBytes`, which are used to cast between byte
222//! arrays and the types to be stored in the database.
223//! `lmdb_zero::traits::FromReservedLmdbBytes` is used if you want to use the
224//! `reserve` methods (in which you write the key only to the database and get
225//! a pointer to a value to fill in after the fact). If you have a
226//! `#[repr(C)]`, `Copy` struct, you can also use `lmdb_zero::traits::LmdbRaw`
227//! if you just want to shove the raw struct itself into the database. All of
228//! these have caveats which can be found on the struct documentation.
229//!
230//! Be aware that using zero-copy to save anything more interesting than byte
231//! strings means your databases will not be portable to other architectures.
232//! This mainly concerns byte-order, but types like `usize` whose size varies
233//! by platform can also cause problems.
234//!
235//! # Notes on Memory Safety
236//!
237//! It is not possible to use lmdb-zero without at least one unsafe block,
238//! because doing anything with a memory-mapped file requires making
239//! assumptions about the host environment. Lmdb-zero is not in a position to
240//! decide these assumptions, and so they are passed up to the caller.
241//!
242//! However, if these assumptions are met, it should be impossible to cause
243//! memory unsafety (eg, aliasing mutable references; dangling pointers; buffer
244//! under/overflows) by use of lmdb-zero's safe API.
245//!
246//! # Unavailable LMDB APIs
247//!
248//! - `mdb_env_copy`, `mdb_env_copyfd`: Only the `2`-suffixed versions that
249//! take flags are exposed.
250//!
251//! - `mdb_env_set_userctx`, `mdb_env_get_userctx`: Not generally useful for
252//! Rust; unclear how ownership would be expressed; would likely end up forcing
253//! an almost-never-used generic arg on `Environment` on everyone.
254//!
255//! - `mdb_env_set_assert`: Does not seem useful enough to expose.
256//!
257//! - `mdb_txn_env`, `mdb_cursor_txn`, `mdb_cursor_dbi`: Would allow violating
258//! borrow semantics.
259//!
260//! - `mdb_cmp`, `mdb_dcmp`: Doesn't seem useful; this would basically be a
261//! reinterpret cast from the input values to whatever the table comparator
262//! expects and then invoking the `Ord` implementation. If the types match,
263//! this is strictly inferior to just using `Ord` directly; if they don't, it
264//! at best is obfuscating, and at worst completely broken.
265//!
266//! - `mdb_set_relfunc`, `mdb_set_relctx`: Currently a noop in LMDB. Even if it
267//! weren't, it is unlikely that there is any remotely safe or convenient way
268//! to provide an interface to it.
269//!
270//! - `mdb_reader_list`: Doesn't seem useful enough to expose.
271
272#![deny(missing_docs)]
273
274extern crate liblmdb_sys as ffi;
275extern crate libc;
276extern crate supercow;
277#[macro_use] extern crate bitflags;
278#[cfg(test)] extern crate tempdir;
279
280use std::ffi::CStr;
281
282pub use ffi::mdb_mode_t as FileMode;
283pub use ffi::mdb_filehandle_t as Fd;
284
285macro_rules! lmdb_call {
286    ($x:expr) => { {
287        let code = $x;
288        if 0 != code {
289            return Err($crate::Error::Code(code));
290        }
291    } }
292}
293
294/// Returns the LMDB version as a string.
295pub fn version_str() -> &'static str {
296    let mut major: libc::c_int = 0;
297    let mut minor: libc::c_int = 0;
298    let mut rev: libc::c_int = 0;
299    unsafe {
300        CStr::from_ptr(ffi::mdb_version(&mut major, &mut minor, &mut rev))
301            .to_str().unwrap_or("(invalid)")
302    }
303}
304
305/// Returns the LMDB version as (major, minor, revision).
306pub fn version() -> (i32, i32, i32) {
307    let mut major: libc::c_int = 0;
308    let mut minor: libc::c_int = 0;
309    let mut rev: libc::c_int = 0;
310    unsafe {
311        ffi::mdb_version(&mut major, &mut minor, &mut rev);
312    }
313    (major as i32, minor as i32, rev as i32)
314}
315
316/// Empty type used to indicate "don't care" when reading values from LMDB.
317///
318/// `FromLmdbBytes` is implemented for this type by simply returning its only
319/// value without inspecting anything.
320pub struct Ignore;
321
322mod mdb_vals;
323mod ffi2;
324#[cfg(test)] mod test_helpers;
325
326pub mod error;
327pub use error::{Error, LmdbResultExt, Result};
328
329mod env;
330pub use env::{open, copy, EnvBuilder, Environment, Stat, EnvInfo};
331
332mod dbi;
333pub use dbi::{db, Database, DatabaseOptions};
334
335pub mod traits;
336mod unaligned;
337pub use unaligned::{Unaligned, unaligned};
338
339mod tx;
340pub use tx::{ConstTransaction, ReadTransaction, WriteTransaction};
341pub use tx::ResetTransaction;
342pub use tx::{ConstAccessor, WriteAccessor};
343pub use tx::{put, del};
344
345mod cursor;
346pub use cursor::{StaleCursor, Cursor};
347
348mod iter;
349pub use iter::{CursorIter, MaybeOwned};