rbson/raw/
mod.rs

1//! An API for interacting with raw BSON bytes.
2//!
3//! This module provides two document types, [`RawDocumentBuf`] and [`RawDocument`] (akin to
4//! [`std::string::String`] and [`str`]), for working with raw BSON documents. These types differ
5//! from the regular [`crate::Document`] type in that their storage is BSON bytes rather than a
6//! hash-map like Rust type. In certain circumstances, these types can be leveraged for increased
7//! performance.
8//!
9//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a
10//! [`RawArray`] type for modeling a borrowed slice of a document containing a BSON array element.
11//!
12//! A [`RawDocumentBuf`] can be created from a `Vec<u8>` containing raw BSON data. A
13//! [`RawDocument`] can be created from anything that can be borrowed as a `&[u8]`. Both types
14//! can access elements via methods similar to those available on the [`crate::Document`] type.
15//! Note that [`RawDocument::get`] (which [`RawDocument`] calls through to via its `Deref`
16//! implementation) returns a `Result`, since the bytes contained in the document are not fully
17//! validated until trying to access the contained data.
18//!
19//! ```rust
20//! use bson::raw::{
21//!     RawBson,
22//!     RawDocumentBuf,
23//! };
24//!
25//! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON.
26//! let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?;
27//! let elem = doc.get("hi")?.unwrap();
28//!
29//! assert_eq!(
30//!   elem.as_str(),
31//!   Some("y'all"),
32//! );
33//! # Ok::<(), bson::raw::Error>(())
34//! ```
35//!
36//! ### [`crate::Document`] interop
37//!
38//! A [`RawDocument`] can be created from a [`crate::Document`]. Internally, this
39//! serializes the [`crate::Document`] to a `Vec<u8>`, and then includes those bytes in the
40//! [`RawDocument`].
41//!
42//! ```rust
43//! use bson::{
44//!     raw::RawDocumentBuf,
45//!     doc,
46//! };
47//!
48//! let document = doc! {
49//!    "goodbye": {
50//!        "cruel": "world"
51//!    }
52//! };
53//!
54//! let raw = RawDocumentBuf::from_document(&document)?;
55//! let value = raw
56//!     .get_document("goodbye")?
57//!     .get_str("cruel")?;
58//!
59//! assert_eq!(
60//!     value,
61//!     "world",
62//! );
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ### Reference type ([`RawDocument`])
67//!
68//! A BSON document can also be accessed with the [`RawDocument`] type, which is an
69//! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested
70//! documents without reallocation. [`RawDocument`] must always be accessed via a pointer type,
71//! similar to `[T]` and `str`.
72//!
73//! The below example constructs a bson document in a stack-based array,
74//! and extracts a `&str` from it, performing no heap allocation.
75//! ```rust
76//! use bson::raw::RawDocument;
77//!
78//! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00";
79//! assert_eq!(RawDocument::new(bytes)?.get_str("hi")?, "y'all");
80//! # Ok::<(), Box<dyn std::error::Error>>(())
81//! ```
82//!
83//! ### Iteration
84//!
85//! [`RawDocument`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be
86//! accessed via [`RawDocumentBuf::iter`].
87
88//! ```rust
89//! use bson::{
90//!    raw::{
91//!        RawBson,
92//!        RawDocumentBuf,
93//!    },
94//!    doc,
95//! };
96//!
97//! let original_doc = doc! {
98//!     "crate": "bson",
99//!     "year": "2021",
100//! };
101//!
102//! let doc = RawDocumentBuf::from_document(&original_doc)?;
103//! let mut doc_iter = doc.iter();
104//!
105//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?;
106//! assert_eq!(key, "crate");
107//! assert_eq!(value.as_str(), Some("bson"));
108//!
109//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?;
110//! assert_eq!(key, "year");
111//! assert_eq!(value.as_str(), Some("2021"));
112//! # Ok::<(), bson::raw::Error>(())
113//! ```
114
115mod array;
116mod bson;
117mod document;
118mod document_buf;
119mod error;
120mod iter;
121#[cfg(test)]
122mod test;
123
124use std::convert::{TryFrom, TryInto};
125
126use crate::de::MIN_BSON_STRING_SIZE;
127
128pub use self::{
129    array::{RawArray, RawArrayIter},
130    bson::{RawBinary, RawBson},
131    document::RawDocument,
132    document_buf::RawDocumentBuf,
133    error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult},
134    iter::Iter,
135};
136
137pub(crate) use self::bson::RawBsonVisitor;
138
139/// Special newtype name indicating that the type being (de)serialized is a raw BSON document.
140pub(crate) const RAW_DOCUMENT_NEWTYPE: &str = "$__private__bson_RawDocument";
141
142/// Special newtype name indicating that the type being (de)serialized is a raw BSON array.
143pub(crate) const RAW_ARRAY_NEWTYPE: &str = "$__private__bson_RawArray";
144
145/// Special newtype name indicating that the type being (de)serialized is a raw BSON value.
146pub(crate) const RAW_BSON_NEWTYPE: &str = "$__private__bson_RawBson";
147
148/// Given a u8 slice, return an i32 calculated from the first four bytes in
149/// little endian order.
150fn f64_from_slice(val: &[u8]) -> Result<f64> {
151    let arr = val
152        .get(0..8)
153        .and_then(|s| s.try_into().ok())
154        .ok_or_else(|| {
155            Error::new_without_key(ErrorKind::MalformedValue {
156                message: format!("expected 8 bytes to read double, instead got {}", val.len()),
157            })
158        })?;
159    Ok(f64::from_le_bytes(arr))
160}
161
162/// Given a u8 slice, return an i32 calculated from the first four bytes in
163/// little endian order.
164fn i32_from_slice(val: &[u8]) -> Result<i32> {
165    let arr = val
166        .get(0..4)
167        .and_then(|s| s.try_into().ok())
168        .ok_or_else(|| {
169            Error::new_without_key(ErrorKind::MalformedValue {
170                message: format!("expected 4 bytes to read i32, instead got {}", val.len()),
171            })
172        })?;
173    Ok(i32::from_le_bytes(arr))
174}
175
176/// Given an u8 slice, return an i64 calculated from the first 8 bytes in
177/// little endian order.
178fn i64_from_slice(val: &[u8]) -> Result<i64> {
179    let arr = val
180        .get(0..8)
181        .and_then(|s| s.try_into().ok())
182        .ok_or_else(|| {
183            Error::new_without_key(ErrorKind::MalformedValue {
184                message: format!("expected 8 bytes to read i64, instead got {}", val.len()),
185            })
186        })?;
187    Ok(i64::from_le_bytes(arr))
188}
189
190
191/// Given a u8 slice, return an i32 calculated from the first four bytes in
192/// little endian order.
193fn u32_from_slice(val: &[u8]) -> Result<u32> {
194    let arr = val
195        .get(0..4)
196        .and_then(|s| s.try_into().ok())
197        .ok_or_else(|| {
198            Error::new_without_key(ErrorKind::MalformedValue {
199                message: format!("expected 4 bytes to read i32, instead got {}", val.len()),
200            })
201        })?;
202    Ok(u32::from_le_bytes(arr))
203}
204
205/// Given an u8 slice, return an i64 calculated from the first 8 bytes in
206/// little endian order.
207fn u64_from_slice(val: &[u8]) -> Result<u64> {
208    let arr = val
209        .get(0..8)
210        .and_then(|s| s.try_into().ok())
211        .ok_or_else(|| {
212            Error::new_without_key(ErrorKind::MalformedValue {
213                message: format!("expected 8 bytes to read i64, instead got {}", val.len()),
214            })
215        })?;
216    Ok(u64::from_le_bytes(arr))
217}
218
219fn read_nullterminated(buf: &[u8]) -> Result<&str> {
220    let mut splits = buf.splitn(2, |x| *x == 0);
221    let value = splits.next().ok_or_else(|| {
222        Error::new_without_key(ErrorKind::MalformedValue {
223            message: "no value".into(),
224        })
225    })?;
226    if splits.next().is_some() {
227        Ok(try_to_str(value)?)
228    } else {
229        Err(Error::new_without_key(ErrorKind::MalformedValue {
230            message: "expected null terminator".into(),
231        }))
232    }
233}
234
235fn read_lenencoded(buf: &[u8]) -> Result<&str> {
236    let length = i32_from_slice(&buf[..4])?;
237    let end = checked_add(usize_try_from_i32(length)?, 4)?;
238
239    if end < MIN_BSON_STRING_SIZE as usize {
240        return Err(Error::new_without_key(ErrorKind::MalformedValue {
241            message: format!(
242                "BSON length encoded string needs to be at least {} bytes, instead got {}",
243                MIN_BSON_STRING_SIZE, end
244            ),
245        }));
246    }
247
248    if buf.len() < end {
249        return Err(Error::new_without_key(ErrorKind::MalformedValue {
250            message: format!(
251                "expected buffer to contain at least {} bytes, but it only has {}",
252                end,
253                buf.len()
254            ),
255        }));
256    }
257
258    if buf[end - 1] != 0 {
259        return Err(Error::new_without_key(ErrorKind::MalformedValue {
260            message: "expected string to be null-terminated".to_string(),
261        }));
262    }
263
264    // exclude null byte
265    try_to_str(&buf[4..(end - 1)])
266}
267
268fn try_to_str(data: &[u8]) -> Result<&str> {
269    std::str::from_utf8(data).map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e)))
270}
271
272fn usize_try_from_i32(i: i32) -> Result<usize> {
273    usize::try_from(i).map_err(|e| {
274        Error::new_without_key(ErrorKind::MalformedValue {
275            message: e.to_string(),
276        })
277    })
278}
279
280fn checked_add(lhs: usize, rhs: usize) -> Result<usize> {
281    lhs.checked_add(rhs).ok_or_else(|| {
282        Error::new_without_key(ErrorKind::MalformedValue {
283            message: "attempted to add with overflow".to_string(),
284        })
285    })
286}