rbson/raw/mod.rs
1//! An API for interacting with raw BSON bytes.
2//!
3//! This module provides two document types, [`RawDocumentBuf`] and [`RawDocument`] (akin to
4//! [`std::string::String`] and [`str`]), for working with raw BSON documents. These types differ
5//! from the regular [`crate::Document`] type in that their storage is BSON bytes rather than a
6//! hash-map like Rust type. In certain circumstances, these types can be leveraged for increased
7//! performance.
8//!
9//! This module also provides a [`RawBson`] type for modeling any borrowed BSON element and a
10//! [`RawArray`] type for modeling a borrowed slice of a document containing a BSON array element.
11//!
12//! A [`RawDocumentBuf`] can be created from a `Vec<u8>` containing raw BSON data. A
13//! [`RawDocument`] can be created from anything that can be borrowed as a `&[u8]`. Both types
14//! can access elements via methods similar to those available on the [`crate::Document`] type.
15//! Note that [`RawDocument::get`] (which [`RawDocument`] calls through to via its `Deref`
16//! implementation) returns a `Result`, since the bytes contained in the document are not fully
17//! validated until trying to access the contained data.
18//!
19//! ```rust
20//! use bson::raw::{
21//! RawBson,
22//! RawDocumentBuf,
23//! };
24//!
25//! // See http://bsonspec.org/spec.html for details on the binary encoding of BSON.
26//! let doc = RawDocumentBuf::new(b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00".to_vec())?;
27//! let elem = doc.get("hi")?.unwrap();
28//!
29//! assert_eq!(
30//! elem.as_str(),
31//! Some("y'all"),
32//! );
33//! # Ok::<(), bson::raw::Error>(())
34//! ```
35//!
36//! ### [`crate::Document`] interop
37//!
38//! A [`RawDocument`] can be created from a [`crate::Document`]. Internally, this
39//! serializes the [`crate::Document`] to a `Vec<u8>`, and then includes those bytes in the
40//! [`RawDocument`].
41//!
42//! ```rust
43//! use bson::{
44//! raw::RawDocumentBuf,
45//! doc,
46//! };
47//!
48//! let document = doc! {
49//! "goodbye": {
50//! "cruel": "world"
51//! }
52//! };
53//!
54//! let raw = RawDocumentBuf::from_document(&document)?;
55//! let value = raw
56//! .get_document("goodbye")?
57//! .get_str("cruel")?;
58//!
59//! assert_eq!(
60//! value,
61//! "world",
62//! );
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ### Reference type ([`RawDocument`])
67//!
68//! A BSON document can also be accessed with the [`RawDocument`] type, which is an
69//! unsized type that represents the BSON payload as a `[u8]`. This allows accessing nested
70//! documents without reallocation. [`RawDocument`] must always be accessed via a pointer type,
71//! similar to `[T]` and `str`.
72//!
73//! The below example constructs a bson document in a stack-based array,
74//! and extracts a `&str` from it, performing no heap allocation.
75//! ```rust
76//! use bson::raw::RawDocument;
77//!
78//! let bytes = b"\x13\x00\x00\x00\x02hi\x00\x06\x00\x00\x00y'all\x00\x00";
79//! assert_eq!(RawDocument::new(bytes)?.get_str("hi")?, "y'all");
80//! # Ok::<(), Box<dyn std::error::Error>>(())
81//! ```
82//!
83//! ### Iteration
84//!
85//! [`RawDocument`] implements [`IntoIterator`](std::iter::IntoIterator), which can also be
86//! accessed via [`RawDocumentBuf::iter`].
87
88//! ```rust
89//! use bson::{
90//! raw::{
91//! RawBson,
92//! RawDocumentBuf,
93//! },
94//! doc,
95//! };
96//!
97//! let original_doc = doc! {
98//! "crate": "bson",
99//! "year": "2021",
100//! };
101//!
102//! let doc = RawDocumentBuf::from_document(&original_doc)?;
103//! let mut doc_iter = doc.iter();
104//!
105//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?;
106//! assert_eq!(key, "crate");
107//! assert_eq!(value.as_str(), Some("bson"));
108//!
109//! let (key, value): (&str, RawBson) = doc_iter.next().unwrap()?;
110//! assert_eq!(key, "year");
111//! assert_eq!(value.as_str(), Some("2021"));
112//! # Ok::<(), bson::raw::Error>(())
113//! ```
114
115mod array;
116mod bson;
117mod document;
118mod document_buf;
119mod error;
120mod iter;
121#[cfg(test)]
122mod test;
123
124use std::convert::{TryFrom, TryInto};
125
126use crate::de::MIN_BSON_STRING_SIZE;
127
128pub use self::{
129 array::{RawArray, RawArrayIter},
130 bson::{RawBinary, RawBson},
131 document::RawDocument,
132 document_buf::RawDocumentBuf,
133 error::{Error, ErrorKind, Result, ValueAccessError, ValueAccessErrorKind, ValueAccessResult},
134 iter::Iter,
135};
136
137pub(crate) use self::bson::RawBsonVisitor;
138
139/// Special newtype name indicating that the type being (de)serialized is a raw BSON document.
140pub(crate) const RAW_DOCUMENT_NEWTYPE: &str = "$__private__bson_RawDocument";
141
142/// Special newtype name indicating that the type being (de)serialized is a raw BSON array.
143pub(crate) const RAW_ARRAY_NEWTYPE: &str = "$__private__bson_RawArray";
144
145/// Special newtype name indicating that the type being (de)serialized is a raw BSON value.
146pub(crate) const RAW_BSON_NEWTYPE: &str = "$__private__bson_RawBson";
147
148/// Given a u8 slice, return an i32 calculated from the first four bytes in
149/// little endian order.
150fn f64_from_slice(val: &[u8]) -> Result<f64> {
151 let arr = val
152 .get(0..8)
153 .and_then(|s| s.try_into().ok())
154 .ok_or_else(|| {
155 Error::new_without_key(ErrorKind::MalformedValue {
156 message: format!("expected 8 bytes to read double, instead got {}", val.len()),
157 })
158 })?;
159 Ok(f64::from_le_bytes(arr))
160}
161
162/// Given a u8 slice, return an i32 calculated from the first four bytes in
163/// little endian order.
164fn i32_from_slice(val: &[u8]) -> Result<i32> {
165 let arr = val
166 .get(0..4)
167 .and_then(|s| s.try_into().ok())
168 .ok_or_else(|| {
169 Error::new_without_key(ErrorKind::MalformedValue {
170 message: format!("expected 4 bytes to read i32, instead got {}", val.len()),
171 })
172 })?;
173 Ok(i32::from_le_bytes(arr))
174}
175
176/// Given an u8 slice, return an i64 calculated from the first 8 bytes in
177/// little endian order.
178fn i64_from_slice(val: &[u8]) -> Result<i64> {
179 let arr = val
180 .get(0..8)
181 .and_then(|s| s.try_into().ok())
182 .ok_or_else(|| {
183 Error::new_without_key(ErrorKind::MalformedValue {
184 message: format!("expected 8 bytes to read i64, instead got {}", val.len()),
185 })
186 })?;
187 Ok(i64::from_le_bytes(arr))
188}
189
190
191/// Given a u8 slice, return an i32 calculated from the first four bytes in
192/// little endian order.
193fn u32_from_slice(val: &[u8]) -> Result<u32> {
194 let arr = val
195 .get(0..4)
196 .and_then(|s| s.try_into().ok())
197 .ok_or_else(|| {
198 Error::new_without_key(ErrorKind::MalformedValue {
199 message: format!("expected 4 bytes to read i32, instead got {}", val.len()),
200 })
201 })?;
202 Ok(u32::from_le_bytes(arr))
203}
204
205/// Given an u8 slice, return an i64 calculated from the first 8 bytes in
206/// little endian order.
207fn u64_from_slice(val: &[u8]) -> Result<u64> {
208 let arr = val
209 .get(0..8)
210 .and_then(|s| s.try_into().ok())
211 .ok_or_else(|| {
212 Error::new_without_key(ErrorKind::MalformedValue {
213 message: format!("expected 8 bytes to read i64, instead got {}", val.len()),
214 })
215 })?;
216 Ok(u64::from_le_bytes(arr))
217}
218
219fn read_nullterminated(buf: &[u8]) -> Result<&str> {
220 let mut splits = buf.splitn(2, |x| *x == 0);
221 let value = splits.next().ok_or_else(|| {
222 Error::new_without_key(ErrorKind::MalformedValue {
223 message: "no value".into(),
224 })
225 })?;
226 if splits.next().is_some() {
227 Ok(try_to_str(value)?)
228 } else {
229 Err(Error::new_without_key(ErrorKind::MalformedValue {
230 message: "expected null terminator".into(),
231 }))
232 }
233}
234
235fn read_lenencoded(buf: &[u8]) -> Result<&str> {
236 let length = i32_from_slice(&buf[..4])?;
237 let end = checked_add(usize_try_from_i32(length)?, 4)?;
238
239 if end < MIN_BSON_STRING_SIZE as usize {
240 return Err(Error::new_without_key(ErrorKind::MalformedValue {
241 message: format!(
242 "BSON length encoded string needs to be at least {} bytes, instead got {}",
243 MIN_BSON_STRING_SIZE, end
244 ),
245 }));
246 }
247
248 if buf.len() < end {
249 return Err(Error::new_without_key(ErrorKind::MalformedValue {
250 message: format!(
251 "expected buffer to contain at least {} bytes, but it only has {}",
252 end,
253 buf.len()
254 ),
255 }));
256 }
257
258 if buf[end - 1] != 0 {
259 return Err(Error::new_without_key(ErrorKind::MalformedValue {
260 message: "expected string to be null-terminated".to_string(),
261 }));
262 }
263
264 // exclude null byte
265 try_to_str(&buf[4..(end - 1)])
266}
267
268fn try_to_str(data: &[u8]) -> Result<&str> {
269 std::str::from_utf8(data).map_err(|e| Error::new_without_key(ErrorKind::Utf8EncodingError(e)))
270}
271
272fn usize_try_from_i32(i: i32) -> Result<usize> {
273 usize::try_from(i).map_err(|e| {
274 Error::new_without_key(ErrorKind::MalformedValue {
275 message: e.to_string(),
276 })
277 })
278}
279
280fn checked_add(lhs: usize, rhs: usize) -> Result<usize> {
281 lhs.checked_add(rhs).ok_or_else(|| {
282 Error::new_without_key(ErrorKind::MalformedValue {
283 message: "attempted to add with overflow".to_string(),
284 })
285 })
286}