bson/binary/
vector.rs

1use std::{
2    convert::{TryFrom, TryInto},
3    mem::size_of,
4};
5
6use super::{Binary, Error, Result};
7use crate::{spec::BinarySubtype, Bson, RawBson};
8
9const INT8: u8 = 0x03;
10const FLOAT32: u8 = 0x27;
11const PACKED_BIT: u8 = 0x10;
12
13/// A vector of numeric values. This type can be converted into a [`Binary`] of subtype
14/// [`BinarySubtype::Vector`].
15///
16/// ```rust
17/// # use bson::binary::{Binary, Vector};
18/// let vector = Vector::Int8(vec![0, 1, 2]);
19/// let binary = Binary::from(vector);
20/// ```
21///
22/// `Vector` serializes to and deserializes from a `Binary`.
23///
24/// ```rust
25/// # use serde::{Serialize, Deserialize};
26/// # use bson::{binary::Vector, error::Result, spec::ElementType};
27/// #[derive(Serialize, Deserialize)]
28/// struct Data {
29///     vector: Vector,
30/// }
31///
32/// let data = Data { vector: Vector::Int8(vec![0, 1, 2]) };
33/// let document = bson::serialize_to_document(&data).unwrap();
34/// assert_eq!(document.get("vector").unwrap().element_type(), ElementType::Binary);
35///
36/// let data: Data = bson::deserialize_from_document(document).unwrap();
37/// assert_eq!(data.vector, Vector::Int8(vec![0, 1, 2]));
38/// ```
39///
40/// See the
41/// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md)
42/// for more details.
43#[derive(Clone, Debug, PartialEq)]
44pub enum Vector {
45    /// A vector of `i8` values.
46    Int8(Vec<i8>),
47
48    /// A vector of `f32` values.
49    Float32(Vec<f32>),
50
51    /// A vector of packed bits. See [`PackedBitVector::new`] for more details.
52    PackedBit(PackedBitVector),
53}
54
55/// A vector of packed bits. This type can be constructed by calling [`PackedBitVector::new`].
56#[derive(Clone, Debug, PartialEq)]
57pub struct PackedBitVector {
58    vector: Vec<u8>,
59    padding: u8,
60}
61
62impl PackedBitVector {
63    /// Construct a new `PackedBitVector`. Each `u8` value in the provided `vector` represents 8
64    /// single-bit elements in little-endian format. For example, the following vector:
65    ///
66    /// ```rust
67    /// # use bson::{binary::PackedBitVector, error::Result};
68    /// # fn main() -> Result<()> {
69    /// let packed_bits = vec![238, 224];
70    /// let vector = PackedBitVector::new(packed_bits, 0)?;
71    /// # Ok(())
72    /// # }
73    /// ```
74    ///
75    /// represents a 16-bit vector containing the following values:
76    ///
77    /// ```text
78    /// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0]
79    /// ```
80    ///
81    /// Padding can optionally be specified to ignore a number of least-significant bits in the
82    /// final byte. For example, the vector in the previous example with a padding of 4 would
83    /// represent a 12-bit vector containing the following values:
84    ///
85    /// ```text
86    /// [1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0]
87    /// ```
88    ///
89    /// Padding must be within 0-7 inclusive. Padding must be 0 or unspecified if the provided
90    /// vector is empty. The ignored bits in the vector must all be 0.
91    pub fn new(vector: Vec<u8>, padding: impl Into<Option<u8>>) -> Result<Self> {
92        let padding = padding.into().unwrap_or(0);
93        if !(0..8).contains(&padding) {
94            return Err(Error::binary(format!(
95                "vector padding must be within 0-7 inclusive, got {padding}"
96            )));
97        }
98        match vector.last() {
99            Some(last) => {
100                if last.trailing_zeros() < u32::from(padding) {
101                    return Err(Error::binary(
102                        "the ignored bits in a packed bit vector must all be 0",
103                    ));
104                }
105            }
106            None => {
107                if padding != 0 {
108                    return Err(Error::binary(format!(
109                        "cannot specify non-zero padding if the provided vector is empty, got \
110                         {padding}"
111                    )));
112                }
113            }
114        }
115        Ok(Self { vector, padding })
116    }
117}
118
119impl Vector {
120    /// Construct a [`Vector`] from the given bytes. See the
121    /// [specification](https://github.com/mongodb/specifications/blob/master/source/bson-binary-vector/bson-binary-vector.md#specification)
122    /// for details on the expected byte format.
123    pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self> {
124        let bytes = bytes.as_ref();
125
126        if bytes.len() < 2 {
127            return Err(Error::binary(format!(
128                "the provided vector bytes must have a length of at least 2, got {}",
129                bytes.len()
130            )));
131        }
132
133        let d_type = bytes[0];
134        let padding = bytes[1];
135        if d_type != PACKED_BIT && padding != 0 {
136            return Err(Error::binary(format!(
137                "padding can only be specified for a packed bit vector (data type {}), got type {}",
138                PACKED_BIT, d_type
139            )));
140        }
141        let number_bytes = &bytes[2..];
142
143        match d_type {
144            INT8 => {
145                let vector = number_bytes
146                    .iter()
147                    .map(|n| i8::from_le_bytes([*n]))
148                    .collect();
149                Ok(Self::Int8(vector))
150            }
151            FLOAT32 => {
152                const F32_BYTES: usize = size_of::<f32>();
153
154                let mut vector = Vec::new();
155                for chunk in number_bytes.chunks(F32_BYTES) {
156                    let bytes: [u8; F32_BYTES] = chunk.try_into().map_err(|_| {
157                        Error::binary(format!(
158                            "f32 vector values must be {} bytes, got {:?}",
159                            F32_BYTES, chunk,
160                        ))
161                    })?;
162                    vector.push(f32::from_le_bytes(bytes));
163                }
164                Ok(Self::Float32(vector))
165            }
166            PACKED_BIT => {
167                let packed_bit_vector = PackedBitVector::new(number_bytes.to_vec(), padding)?;
168                Ok(Self::PackedBit(packed_bit_vector))
169            }
170            other => Err(Error::binary(format!(
171                "unsupported vector data type: {other}"
172            ))),
173        }
174    }
175
176    fn d_type(&self) -> u8 {
177        match self {
178            Self::Int8(_) => INT8,
179            Self::Float32(_) => FLOAT32,
180            Self::PackedBit(_) => PACKED_BIT,
181        }
182    }
183
184    fn padding(&self) -> u8 {
185        match self {
186            Self::Int8(_) => 0,
187            Self::Float32(_) => 0,
188            Self::PackedBit(PackedBitVector { padding, .. }) => *padding,
189        }
190    }
191}
192
193impl From<&Vector> for Binary {
194    fn from(vector: &Vector) -> Self {
195        let d_type = vector.d_type();
196        let padding = vector.padding();
197        let mut bytes = vec![d_type, padding];
198
199        match vector {
200            Vector::Int8(vector) => {
201                for n in vector {
202                    bytes.extend_from_slice(&n.to_le_bytes());
203                }
204            }
205            Vector::Float32(vector) => {
206                for n in vector {
207                    bytes.extend_from_slice(&n.to_le_bytes());
208                }
209            }
210            Vector::PackedBit(PackedBitVector { vector, .. }) => {
211                for n in vector {
212                    bytes.extend_from_slice(&n.to_le_bytes());
213                }
214            }
215        }
216
217        Self {
218            subtype: BinarySubtype::Vector,
219            bytes,
220        }
221    }
222}
223
224impl From<Vector> for Binary {
225    fn from(vector: Vector) -> Binary {
226        Self::from(&vector)
227    }
228}
229
230impl TryFrom<&Binary> for Vector {
231    type Error = Error;
232
233    fn try_from(binary: &Binary) -> Result<Self> {
234        if binary.subtype != BinarySubtype::Vector {
235            return Err(Error::binary(format!(
236                "expected vector binary subtype, got {:?}",
237                binary.subtype
238            )));
239        }
240        Self::from_bytes(&binary.bytes)
241    }
242}
243
244impl TryFrom<Binary> for Vector {
245    type Error = Error;
246
247    fn try_from(binary: Binary) -> std::result::Result<Self, Self::Error> {
248        Self::try_from(&binary)
249    }
250}
251
252// Convenience impl to allow passing a Vector directly into the doc! macro. From<&Vector> is already
253// implemented by a blanket impl in src/bson.rs.
254impl From<Vector> for Bson {
255    fn from(vector: Vector) -> Self {
256        Self::Binary(Binary::from(vector))
257    }
258}
259
260// Convenience impls to allow passing a Vector directly into the rawdoc! macro
261impl From<&Vector> for RawBson {
262    fn from(vector: &Vector) -> Self {
263        Self::Binary(Binary::from(vector))
264    }
265}
266
267impl From<Vector> for RawBson {
268    fn from(vector: Vector) -> Self {
269        Self::from(&vector)
270    }
271}
272
273#[cfg(feature = "serde")]
274impl serde::Serialize for Vector {
275    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
276    where
277        S: serde::Serializer,
278    {
279        let binary = Binary::from(self);
280        binary.serialize(serializer)
281    }
282}
283
284#[cfg(feature = "serde")]
285impl<'de> serde::Deserialize<'de> for Vector {
286    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
287    where
288        D: serde::Deserializer<'de>,
289    {
290        let binary = Binary::deserialize(deserializer)?;
291        Self::try_from(binary).map_err(serde::de::Error::custom)
292    }
293}