serde_csv_core/
de.rs

1//! Deserialize CSV data into a Rust data structure.
2
3use core::borrow::Borrow;
4use lexical_parse_float::FromLexical;
5use serde::{de::DeserializeSeed, Deserialize};
6
7/// Wrapper for [`csv_core::Reader`] that provides methods for deserialization using [`serde`].
8///
9/// `N` is a capacity of an internal buffer that's used to temporarily store unescaped fields.
10#[derive(Debug)]
11pub struct Reader<const N: usize> {
12    inner: csv_core::Reader,
13    field_buffer: [u8; N],
14}
15
16impl<const N: usize> Default for Reader<N> {
17    fn default() -> Self {
18        Self::from_builder(csv_core::ReaderBuilder::new())
19    }
20}
21
22impl<const N: usize> Reader<N> {
23    /// Constructs a new reader.
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    /// Constructs a new reader from [`csv_core::ReaderBuilder`].
29    ///
30    /// # Example
31    /// ```
32    /// use serde_csv_core::csv_core;
33    ///
34    /// let reader = serde_csv_core::Reader::<16>::from_builder(
35    ///     csv_core::ReaderBuilder::new()
36    ///         .delimiter(b'-')
37    /// );
38    /// ```
39    pub fn from_builder(builder: impl Borrow<csv_core::ReaderBuilder>) -> Self {
40        Self {
41            inner: builder.borrow().build(),
42            field_buffer: [0; N],
43        }
44    }
45
46    /// Deserializes a given CSV byte slice into a value of type `T`.
47    ///
48    /// The second element of the resulting tuple is a number of bytes read.
49    ///
50    /// # Example
51    /// ```
52    /// use heapless::String;
53    /// use serde::Deserialize;
54    ///
55    /// #[derive(Debug, PartialEq, Eq, Deserialize)]
56    /// struct Record {
57    ///     pub country: String<32>,
58    ///     pub city: String<32>,
59    ///     pub population: u32,
60    /// }
61    ///
62    /// let csv = b"Poland,Cracow,766683\n";
63    ///
64    /// let mut reader = serde_csv_core::Reader::<32>::new();
65    /// let (record, nread)  = reader.deserialize::<Record>(&csv[..])?;
66    ///
67    /// assert_eq!(record, Record {
68    ///     country: "Poland".into(),
69    ///     city: "Cracow".into(),
70    ///     population: 766_683,
71    /// });
72    /// assert_eq!(nread, 21);
73    /// # Ok::<(), serde_csv_core::de::Error>(())
74    /// ```
75    pub fn deserialize<'de, T>(&mut self, input: &[u8]) -> Result<(T, usize)>
76    where
77        T: Deserialize<'de>,
78    {
79        let mut deserializer = Deserializer::new(self, input);
80        let value = T::deserialize(&mut deserializer)?;
81        Ok((value, deserializer.bytes_read()))
82    }
83}
84
85/// This type represents all possible errors that can occur when deserializing CSV data.
86#[derive(Debug, PartialEq, Eq)]
87pub enum Error {
88    /// Buffer overflow.
89    Overflow,
90    /// Expected an empty field.
91    ExpectedEmpty,
92    /// Invalid boolean value. Expected either `true` or `false`.
93    InvalidBool,
94    /// Invalid integer.
95    InvalidInt,
96    /// Invalid floating-point number.
97    InvalidFloat,
98    /// Invalid UTF-8 encoded character.
99    InvalidUtf8Char,
100    /// Invalid UTF-8 encoded string.
101    InvalidUtf8String,
102    /// Error with a custom message had to be discarded.
103    Custom,
104}
105
106macro_rules! impl_format {
107    ($self:ident, $write:ident, $f:ident) => {
108        match $self {
109            Self::Overflow => $write!($f, "Buffer overflow."),
110            Self::ExpectedEmpty => $write!($f, "Expected an empty field."),
111            Self::InvalidBool => {
112                $write!(
113                    $f,
114                    "Invalid boolean value. Expected either `true` or `false`."
115                )
116            }
117            Self::InvalidInt => $write!($f, "Invalid integer."),
118            Self::InvalidFloat => $write!($f, "Invalid floating-point number."),
119            Self::InvalidUtf8Char => $write!($f, "Invalid UTF-8 encoded character."),
120            Self::InvalidUtf8String => $write!($f, "Invalid UTF-8 encoded string."),
121            Self::Custom => $write!($f, "CSV does not match deserializer's expected format."),
122        }
123    };
124}
125
126/// Alias for a `core::result::Result` with the error type `serde_csv_core::de::Error`.
127pub type Result<T> = core::result::Result<T, Error>;
128
129impl core::fmt::Display for Error {
130    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
131        impl_format!(self, write, f)
132    }
133}
134
135impl serde::de::StdError for Error {}
136
137impl serde::de::Error for Error {
138    fn custom<T>(_msg: T) -> Self
139    where
140        T: core::fmt::Display,
141    {
142        Self::Custom
143    }
144}
145
146#[cfg(feature = "defmt")]
147impl defmt::Format for Error {
148    fn format(&self, f: defmt::Formatter) {
149        use defmt::write;
150        impl_format!(self, write, f)
151    }
152}
153
154#[derive(Debug)]
155struct Deserializer<'a, const N: usize> {
156    reader: &'a mut Reader<N>,
157    input: &'a [u8],
158    nread: usize,
159    record_end: bool,
160    peeked: Option<usize>,
161}
162
163impl<'a, const N: usize> Deserializer<'a, N> {
164    pub fn new(reader: &'a mut Reader<N>, input: &'a [u8]) -> Self {
165        Self {
166            reader,
167            input,
168            nread: 0,
169            record_end: false,
170            peeked: None,
171        }
172    }
173
174    pub fn bytes_read(&self) -> usize {
175        self.nread
176    }
177
178    fn read_bytes_impl(&mut self) -> Result<usize> {
179        let (result, r, w) = self
180            .reader
181            .inner
182            .read_field(&self.input[self.nread..], &mut self.reader.field_buffer);
183        self.nread += r;
184        match result {
185            csv_core::ReadFieldResult::InputEmpty => {}
186            csv_core::ReadFieldResult::OutputFull => return Err(Error::Overflow),
187            csv_core::ReadFieldResult::Field { record_end } => self.record_end = record_end,
188            csv_core::ReadFieldResult::End => {}
189        }
190        Ok(w)
191    }
192
193    fn peek_bytes(&mut self) -> Result<&[u8]> {
194        let len = match self.peeked {
195            Some(len) => len,
196            None => {
197                let len = self.read_bytes_impl()?;
198                self.peeked = Some(len);
199                len
200            }
201        };
202        Ok(&self.reader.field_buffer[..len])
203    }
204
205    fn read_bytes(&mut self) -> Result<&[u8]> {
206        let len = match self.peeked.take() {
207            Some(len) => len,
208            None => self.read_bytes_impl()?,
209        };
210        Ok(&self.reader.field_buffer[..len])
211    }
212
213    fn read_int<T: atoi::FromRadix10SignedChecked>(&mut self) -> Result<T> {
214        atoi::atoi(self.read_bytes()?).ok_or(Error::InvalidInt)
215    }
216
217    fn read_float<T: FromLexical>(&mut self) -> Result<T> {
218        T::from_lexical(self.read_bytes()?).map_err(|_| Error::InvalidFloat)
219    }
220
221    fn read_str(&mut self) -> Result<&str> {
222        core::str::from_utf8(self.read_bytes()?).map_err(|_| Error::InvalidUtf8String)
223    }
224}
225
226impl<'de, const N: usize> serde::de::Deserializer<'de> for &mut Deserializer<'_, N> {
227    type Error = Error;
228
229    fn deserialize_any<V>(self, _visitor: V) -> Result<V::Value>
230    where
231        V: serde::de::Visitor<'de>,
232    {
233        unimplemented!("`Deserializer::deserialize_any` is not supported");
234    }
235
236    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
237    where
238        V: serde::de::Visitor<'de>,
239    {
240        match self.read_bytes()? {
241            b"true" => visitor.visit_bool(true),
242            b"false" => visitor.visit_bool(false),
243            _ => Err(Error::InvalidBool),
244        }
245    }
246
247    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value>
248    where
249        V: serde::de::Visitor<'de>,
250    {
251        self.read_int().and_then(|v| visitor.visit_i8(v))
252    }
253
254    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value>
255    where
256        V: serde::de::Visitor<'de>,
257    {
258        self.read_int().and_then(|v| visitor.visit_i16(v))
259    }
260
261    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value>
262    where
263        V: serde::de::Visitor<'de>,
264    {
265        self.read_int().and_then(|v| visitor.visit_i32(v))
266    }
267
268    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
269    where
270        V: serde::de::Visitor<'de>,
271    {
272        self.read_int().and_then(|v| visitor.visit_i64(v))
273    }
274
275    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value>
276    where
277        V: serde::de::Visitor<'de>,
278    {
279        self.read_int().and_then(|v| visitor.visit_u8(v))
280    }
281
282    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value>
283    where
284        V: serde::de::Visitor<'de>,
285    {
286        self.read_int().and_then(|v| visitor.visit_u16(v))
287    }
288
289    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
290    where
291        V: serde::de::Visitor<'de>,
292    {
293        self.read_int().and_then(|v| visitor.visit_u32(v))
294    }
295
296    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
297    where
298        V: serde::de::Visitor<'de>,
299    {
300        self.read_int().and_then(|v| visitor.visit_u64(v))
301    }
302
303    fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
304    where
305        V: serde::de::Visitor<'de>,
306    {
307        self.read_float().and_then(|v| visitor.visit_f32(v))
308    }
309
310    fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
311    where
312        V: serde::de::Visitor<'de>,
313    {
314        self.read_float().and_then(|v| visitor.visit_f64(v))
315    }
316
317    fn deserialize_char<V>(self, visitor: V) -> Result<V::Value>
318    where
319        V: serde::de::Visitor<'de>,
320    {
321        let str = self.read_str()?;
322        let mut iter = str.chars();
323        let c = iter.next().ok_or(Error::InvalidUtf8Char)?;
324        if iter.next().is_some() {
325            return Err(Error::InvalidUtf8Char);
326        }
327        visitor.visit_char(c)
328    }
329
330    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
331    where
332        V: serde::de::Visitor<'de>,
333    {
334        visitor.visit_str(self.read_str()?)
335    }
336
337    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
338    where
339        V: serde::de::Visitor<'de>,
340    {
341        visitor.visit_str(self.read_str()?)
342    }
343
344    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
345    where
346        V: serde::de::Visitor<'de>,
347    {
348        self.read_bytes().and_then(|v| visitor.visit_bytes(v))
349    }
350
351    fn deserialize_byte_buf<V>(self, _visitor: V) -> Result<V::Value>
352    where
353        V: serde::de::Visitor<'de>,
354    {
355        unimplemented!("`Deserializer::deserialize_byte_buf` is not supported");
356    }
357
358    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
359    where
360        V: serde::de::Visitor<'de>,
361    {
362        let bytes = self.peek_bytes()?;
363        if bytes.is_empty() {
364            visitor.visit_none()
365        } else {
366            visitor.visit_some(self)
367        }
368    }
369
370    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
371    where
372        V: serde::de::Visitor<'de>,
373    {
374        let bytes = self.read_bytes()?;
375        if !bytes.is_empty() {
376            return Err(Error::ExpectedEmpty);
377        }
378        visitor.visit_unit()
379    }
380
381    fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
382    where
383        V: serde::de::Visitor<'de>,
384    {
385        self.deserialize_unit(visitor)
386    }
387
388    fn deserialize_newtype_struct<V>(self, _name: &'static str, _visitor: V) -> Result<V::Value>
389    where
390        V: serde::de::Visitor<'de>,
391    {
392        unimplemented!("`Deserializer::deserialize_newtype_struct` is not supported");
393    }
394
395    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
396    where
397        V: serde::de::Visitor<'de>,
398    {
399        visitor.visit_seq(self)
400    }
401
402    fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
403    where
404        V: serde::de::Visitor<'de>,
405    {
406        visitor.visit_seq(self)
407    }
408
409    fn deserialize_tuple_struct<V>(
410        self,
411        _name: &'static str,
412        _len: usize,
413        visitor: V,
414    ) -> Result<V::Value>
415    where
416        V: serde::de::Visitor<'de>,
417    {
418        visitor.visit_seq(self)
419    }
420
421    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
422    where
423        V: serde::de::Visitor<'de>,
424    {
425        visitor.visit_seq(self)
426    }
427
428    fn deserialize_struct<V>(
429        self,
430        _name: &'static str,
431        _fields: &'static [&'static str],
432        visitor: V,
433    ) -> Result<V::Value>
434    where
435        V: serde::de::Visitor<'de>,
436    {
437        visitor.visit_seq(self)
438    }
439
440    fn deserialize_enum<V>(
441        self,
442        _name: &'static str,
443        _variants: &'static [&'static str],
444        visitor: V,
445    ) -> Result<V::Value>
446    where
447        V: serde::de::Visitor<'de>,
448    {
449        visitor.visit_enum(self)
450    }
451
452    fn deserialize_identifier<V>(self, _visitor: V) -> Result<V::Value>
453    where
454        V: serde::de::Visitor<'de>,
455    {
456        unimplemented!("`Deserializer::deserialize_identifier` is not supported");
457    }
458
459    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
460    where
461        V: serde::de::Visitor<'de>,
462    {
463        let _ = self.read_bytes()?;
464        visitor.visit_unit()
465    }
466}
467
468impl<'de, const N: usize> serde::de::VariantAccess<'de> for &mut Deserializer<'_, N> {
469    type Error = Error;
470
471    fn unit_variant(self) -> Result<()> {
472        Ok(())
473    }
474
475    fn newtype_variant_seed<U: DeserializeSeed<'de>>(self, _seed: U) -> Result<U::Value> {
476        unimplemented!("`VariantAccess::newtype_variant_seed` is not supported");
477    }
478
479    fn tuple_variant<V: serde::de::Visitor<'de>>(
480        self,
481        _len: usize,
482        _visitor: V,
483    ) -> Result<V::Value> {
484        unimplemented!("`VariantAccess::tuple_variant` is not supported");
485    }
486
487    fn struct_variant<V: serde::de::Visitor<'de>>(
488        self,
489        _fields: &'static [&'static str],
490        _visitor: V,
491    ) -> Result<V::Value> {
492        unimplemented!("`VariantAccess::struct_variant` is not supported");
493    }
494}
495
496impl<'de, const N: usize> serde::de::EnumAccess<'de> for &mut Deserializer<'_, N> {
497    type Error = Error;
498
499    type Variant = Self;
500
501    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
502    where
503        V: DeserializeSeed<'de>,
504    {
505        use serde::de::IntoDeserializer;
506        let variant_name = self.read_bytes()?;
507        seed.deserialize(variant_name.into_deserializer())
508            .map(|v| (v, self))
509    }
510}
511
512impl<'de, const N: usize> serde::de::SeqAccess<'de> for &mut Deserializer<'_, N> {
513    type Error = Error;
514
515    fn next_element_seed<V>(&mut self, seed: V) -> Result<Option<V::Value>>
516    where
517        V: DeserializeSeed<'de>,
518    {
519        if self.record_end {
520            Ok(None)
521        } else {
522            seed.deserialize(&mut **self).map(Some)
523        }
524    }
525}