use core::fmt;
use core::iter;
use core::num;
use core::str;
use std::error::Error as StdError;
use serde::de::value::BorrowedBytesDeserializer;
use serde::de::{
Deserialize, DeserializeSeed, Deserializer, EnumAccess, Error as SerdeError, IntoDeserializer,
MapAccess, SeqAccess, Unexpected, VariantAccess, Visitor,
};
use serde::serde_if_integer128;
use crate::error::{Error, ErrorKind};
use crate::raw::{RawRecord, RawRecordArena, RawRecordIter};
use self::DeserializeErrorKind as DEK;
pub fn deserialize_byte_record_arena<'de, D: Deserialize<'de>>(
records: &'de RawRecordArena,
headers: Option<&'de RawRecord>,
output: &mut Vec<D>,
) -> Result<usize, Error> {
let before = output.len();
for (i, record) in records.iter().enumerate() {
let mut deser = DeRecordWrap(DeByteRecord {
it: record.iter().peekable(),
headers: None, field: 0,
});
output.push(D::deserialize(&mut deser).map_err(|err| {
Error::new(ErrorKind::Deserialize {
index: i,
err,
field_data: record.field_data.to_owned(),
field_ends: record.field_ends.to_owned(),
})
})?);
}
Ok(output.len() - before)
}
#[test]
fn test_deser() {
let record = r#""BSCD","RSCD","CCCD","NOLN","CBCD","STHB","BSNJ","BSNK","STYB","CRE_YMD","CRE_PGM_ID"
"01000002","R2000119","0040001","&1","04101","20170923","台湾・古龍華","コリユウカ","19981221",2019/10/09 8:10:09,"C006"
"#;
#[derive(serde::Deserialize, Debug, Clone)]
struct Record<'i> {
bscd: Option<&'i str>,
rscd: Option<&'i str>,
cccd: Option<&'i str>,
noln: Option<&'i str>,
cbcd: Option<&'i str>,
sthb: Option<&'i str>,
bsnj: Option<&'i str>,
bsnk: Option<&'i str>,
styb: Option<&'i str>,
cre_ymd: Option<&'i str>,
cre_pgm_id: Option<&'i str>,
}
let mut arena = ByteRecordArena::new();
let mut reader = Reader::new(true, b',');
let mut records = Vec::<Record>::new();
reader.fill_arena(record.as_bytes(), &mut arena);
arena.deserialize(&mut records).unwrap();
println!("{:?}", arena);
println!("{:?}", records);
}
trait DeRecord<'r> {
fn has_headers(&self) -> bool;
fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError>;
fn next_header_bytes(&mut self) -> Result<Option<&'r [u8]>, DeserializeError>;
fn next_field(&mut self) -> Result<&'r str, DeserializeError>;
fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError>;
fn peek_field(&mut self) -> Option<&'r [u8]>;
fn error(&self, kind: DeserializeErrorKind) -> DeserializeError;
fn infer_deserialize<'de, V: Visitor<'de>>(
&mut self,
visitor: V,
) -> Result<V::Value, DeserializeError>;
}
struct DeRecordWrap<T>(T);
impl<'r, T: DeRecord<'r>> DeRecord<'r> for DeRecordWrap<T> {
#[inline]
fn has_headers(&self) -> bool {
self.0.has_headers()
}
#[inline]
fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError> {
self.0.next_header()
}
#[inline]
fn next_header_bytes(&mut self) -> Result<Option<&'r [u8]>, DeserializeError> {
self.0.next_header_bytes()
}
#[inline]
fn next_field(&mut self) -> Result<&'r str, DeserializeError> {
self.0.next_field()
}
#[inline]
fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError> {
self.0.next_field_bytes()
}
#[inline]
fn peek_field(&mut self) -> Option<&'r [u8]> {
self.0.peek_field()
}
#[inline]
fn error(&self, kind: DeserializeErrorKind) -> DeserializeError {
self.0.error(kind)
}
#[inline]
fn infer_deserialize<'de, V: Visitor<'de>>(
&mut self,
visitor: V,
) -> Result<V::Value, DeserializeError> {
self.0.infer_deserialize(visitor)
}
}
struct DeByteRecord<'r> {
it: iter::Peekable<RawRecordIter<'r>>,
headers: Option<RawRecordIter<'r>>,
field: u64,
}
impl<'r> DeRecord<'r> for DeByteRecord<'r> {
#[inline]
fn has_headers(&self) -> bool {
self.headers.is_some()
}
#[inline]
fn next_header(&mut self) -> Result<Option<&'r str>, DeserializeError> {
match self.next_header_bytes() {
Ok(Some(field)) => Ok(Some(
str::from_utf8(field).map_err(|err| self.error(DEK::InvalidUtf8(err)))?,
)),
Ok(None) => Ok(None),
Err(err) => Err(err),
}
}
#[inline]
fn next_header_bytes(&mut self) -> Result<Option<&'r [u8]>, DeserializeError> {
Ok(self.headers.as_mut().and_then(|it| it.next()))
}
#[inline]
fn next_field(&mut self) -> Result<&'r str, DeserializeError> {
self.next_field_bytes().and_then(|field| {
str::from_utf8(field).map_err(|err| self.error(DEK::InvalidUtf8(err)))
})
}
#[inline]
fn next_field_bytes(&mut self) -> Result<&'r [u8], DeserializeError> {
match self.it.next() {
Some(field) => {
self.field += 1;
Ok(field)
}
None => Err(DeserializeError {
field: None,
kind: DEK::UnexpectedEndOfRow,
}),
}
}
#[inline]
fn peek_field(&mut self) -> Option<&'r [u8]> {
self.it.peek().map(|s| *s)
}
fn error(&self, kind: DeserializeErrorKind) -> DeserializeError {
DeserializeError {
field: Some(self.field.saturating_sub(1)),
kind: kind,
}
}
fn infer_deserialize<'de, V: Visitor<'de>>(
&mut self,
visitor: V,
) -> Result<V::Value, DeserializeError> {
let x = self.next_field_bytes()?;
if x == b"true" {
return visitor.visit_bool(true);
} else if x == b"false" {
return visitor.visit_bool(false);
} else if let Some(n) = try_positive_integer64_bytes(x) {
return visitor.visit_u64(n);
} else if let Some(n) = try_negative_integer64_bytes(x) {
return visitor.visit_i64(n);
}
serde_if_integer128! {
if let Some(n) = try_positive_integer128_bytes(x) {
return visitor.visit_u128(n);
} else if let Some(n) = try_negative_integer128_bytes(x) {
return visitor.visit_i128(n);
}
}
if let Some(n) = try_float_bytes(x) {
visitor.visit_f64(n)
} else if let Ok(s) = str::from_utf8(x) {
visitor.visit_str(s)
} else {
visitor.visit_bytes(x)
}
}
}
macro_rules! deserialize_int {
($method:ident, $visit:ident, $inttype:ty) => {
fn $method<V: Visitor<'de>>(
self,
visitor: V,
) -> Result<V::Value, Self::Error> {
let field = self.next_field()?;
let num =
if field.starts_with("0x") {
<$inttype>::from_str_radix(&field[2..], 16)
} else {
field.parse()
};
visitor.$visit(num.map_err(|err| self.error(DEK::ParseInt(err)))?)
}
}
}
impl<'a, 'de: 'a, T: DeRecord<'de>> Deserializer<'de> for &'a mut DeRecordWrap<T> {
type Error = DeserializeError;
fn deserialize_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.infer_deserialize(visitor)
}
fn deserialize_bool<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
visitor.visit_bool(
self.next_field()?
.parse()
.map_err(|err| self.error(DEK::ParseBool(err)))?,
)
}
deserialize_int!(deserialize_u8, visit_u8, u8);
deserialize_int!(deserialize_u16, visit_u16, u16);
deserialize_int!(deserialize_u32, visit_u32, u32);
deserialize_int!(deserialize_u64, visit_u64, u64);
serde_if_integer128! {
deserialize_int!(deserialize_u128, visit_u128, u128);
}
deserialize_int!(deserialize_i8, visit_i8, i8);
deserialize_int!(deserialize_i16, visit_i16, i16);
deserialize_int!(deserialize_i32, visit_i32, i32);
deserialize_int!(deserialize_i64, visit_i64, i64);
serde_if_integer128! {
deserialize_int!(deserialize_i128, visit_i128, i128);
}
fn deserialize_f32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
visitor.visit_f32(
self.next_field()?
.parse()
.map_err(|err| self.error(DEK::ParseFloat(err)))?,
)
}
fn deserialize_f64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
visitor.visit_f64(
self.next_field()?
.parse()
.map_err(|err| self.error(DEK::ParseFloat(err)))?,
)
}
fn deserialize_char<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
let field = self.next_field()?;
let len = field.chars().count();
if len != 1 {
return Err(self.error(DEK::Message(format!(
"expected single character but got {} characters in '{}'",
len, field
))));
}
visitor.visit_char(field.chars().next().unwrap())
}
fn deserialize_str<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.next_field()
.and_then(|f| visitor.visit_borrowed_str(f))
}
fn deserialize_string<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.next_field().and_then(|f| visitor.visit_str(f.into()))
}
fn deserialize_bytes<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.next_field_bytes()
.and_then(|f| visitor.visit_borrowed_bytes(f))
}
fn deserialize_byte_buf<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.next_field_bytes()
.and_then(|f| visitor.visit_byte_buf(f.to_vec()))
}
fn deserialize_option<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
match self.peek_field() {
None => visitor.visit_none(),
Some(f) if f.is_empty() => {
self.next_field().expect("empty field");
visitor.visit_none()
}
Some(_) => visitor.visit_some(self),
}
}
fn deserialize_unit<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
visitor.visit_unit()
}
fn deserialize_unit_struct<V: Visitor<'de>>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error> {
visitor.visit_unit()
}
fn deserialize_newtype_struct<V: Visitor<'de>>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error> {
visitor.visit_newtype_struct(self)
}
fn deserialize_seq<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
visitor.visit_seq(self)
}
fn deserialize_tuple<V: Visitor<'de>>(
self,
_len: usize,
visitor: V,
) -> Result<V::Value, Self::Error> {
visitor.visit_seq(self)
}
fn deserialize_tuple_struct<V: Visitor<'de>>(
self,
_name: &'static str,
_len: usize,
visitor: V,
) -> Result<V::Value, Self::Error> {
visitor.visit_seq(self)
}
fn deserialize_map<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
if !self.has_headers() {
visitor.visit_seq(self)
} else {
visitor.visit_map(self)
}
}
fn deserialize_struct<V: Visitor<'de>>(
self,
_name: &'static str,
_fields: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Self::Error> {
if !self.has_headers() {
visitor.visit_seq(self)
} else {
visitor.visit_map(self)
}
}
fn deserialize_identifier<V: Visitor<'de>>(self, _visitor: V) -> Result<V::Value, Self::Error> {
Err(self.error(DEK::Unsupported("deserialize_identifier".into())))
}
fn deserialize_enum<V: Visitor<'de>>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Self::Error> {
visitor.visit_enum(self)
}
fn deserialize_ignored_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
let _ = self.next_field()?;
visitor.visit_unit()
}
}
impl<'a, 'de: 'a, T: DeRecord<'de>> EnumAccess<'de> for &'a mut DeRecordWrap<T> {
type Error = DeserializeError;
type Variant = Self;
fn variant_seed<V: DeserializeSeed<'de>>(
self,
seed: V,
) -> Result<(V::Value, Self::Variant), Self::Error> {
let variant_name = self.next_field()?;
seed.deserialize(variant_name.into_deserializer())
.map(|v| (v, self))
}
}
impl<'a, 'de: 'a, T: DeRecord<'de>> VariantAccess<'de> for &'a mut DeRecordWrap<T> {
type Error = DeserializeError;
fn unit_variant(self) -> Result<(), Self::Error> {
Ok(())
}
fn newtype_variant_seed<U: DeserializeSeed<'de>>(
self,
_seed: U,
) -> Result<U::Value, Self::Error> {
let unexp = Unexpected::UnitVariant;
Err(DeserializeError::invalid_type(unexp, &"newtype variant"))
}
fn tuple_variant<V: Visitor<'de>>(
self,
_len: usize,
_visitor: V,
) -> Result<V::Value, Self::Error> {
let unexp = Unexpected::UnitVariant;
Err(DeserializeError::invalid_type(unexp, &"tuple variant"))
}
fn struct_variant<V: Visitor<'de>>(
self,
_fields: &'static [&'static str],
_visitor: V,
) -> Result<V::Value, Self::Error> {
let unexp = Unexpected::UnitVariant;
Err(DeserializeError::invalid_type(unexp, &"struct variant"))
}
}
impl<'a, 'de: 'a, T: DeRecord<'de>> SeqAccess<'de> for &'a mut DeRecordWrap<T> {
type Error = DeserializeError;
fn next_element_seed<U: DeserializeSeed<'de>>(
&mut self,
seed: U,
) -> Result<Option<U::Value>, Self::Error> {
if self.peek_field().is_none() {
Ok(None)
} else {
seed.deserialize(&mut **self).map(Some)
}
}
}
impl<'a, 'de: 'a, T: DeRecord<'de>> MapAccess<'de> for &'a mut DeRecordWrap<T> {
type Error = DeserializeError;
fn next_key_seed<K: DeserializeSeed<'de>>(
&mut self,
seed: K,
) -> Result<Option<K::Value>, Self::Error> {
assert!(self.has_headers());
let field = match self.next_header_bytes()? {
None => return Ok(None),
Some(field) => field,
};
seed.deserialize(BorrowedBytesDeserializer::new(field))
.map(Some)
}
fn next_value_seed<K: DeserializeSeed<'de>>(
&mut self,
seed: K,
) -> Result<K::Value, Self::Error> {
seed.deserialize(&mut **self)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct DeserializeError {
field: Option<u64>,
kind: DeserializeErrorKind,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum DeserializeErrorKind {
Message(String),
Unsupported(String),
UnexpectedEndOfRow,
InvalidUtf8(str::Utf8Error),
ParseBool(str::ParseBoolError),
ParseInt(num::ParseIntError),
ParseFloat(num::ParseFloatError),
}
impl SerdeError for DeserializeError {
fn custom<T: fmt::Display>(msg: T) -> DeserializeError {
DeserializeError {
field: None,
kind: DEK::Message(msg.to_string()),
}
}
}
impl StdError for DeserializeError {
fn description(&self) -> &str {
self.kind.description()
}
}
impl fmt::Display for DeserializeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(field) = self.field {
write!(f, "field {}: {}", field, self.kind)
} else {
write!(f, "{}", self.kind)
}
}
}
impl fmt::Display for DeserializeErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::DeserializeErrorKind::*;
match *self {
Message(ref msg) => write!(f, "{}", msg),
Unsupported(ref which) => write!(f, "unsupported deserializer method: {}", which),
UnexpectedEndOfRow => write!(f, "{}", self.description()),
InvalidUtf8(ref err) => err.fmt(f),
ParseBool(ref err) => err.fmt(f),
ParseInt(ref err) => err.fmt(f),
ParseFloat(ref err) => err.fmt(f),
}
}
}
impl DeserializeError {
pub fn field(&self) -> Option<u64> {
self.field
}
pub fn kind(&self) -> &DeserializeErrorKind {
&self.kind
}
}
impl DeserializeErrorKind {
fn description(&self) -> &str {
use self::DeserializeErrorKind::*;
match *self {
Message(_) => "deserialization error",
Unsupported(_) => "unsupported deserializer method",
UnexpectedEndOfRow => "expected field, but got end of row",
InvalidUtf8(ref err) => err.description(),
ParseBool(ref err) => err.description(),
ParseInt(ref err) => err.description(),
ParseFloat(ref err) => err.description(),
}
}
}
serde_if_integer128! {
fn try_positive_integer128(s: &str) -> Option<u128> {
s.parse().ok()
}
fn try_negative_integer128(s: &str) -> Option<i128> {
s.parse().ok()
}
}
fn try_positive_integer64(s: &str) -> Option<u64> {
s.parse().ok()
}
fn try_negative_integer64(s: &str) -> Option<i64> {
s.parse().ok()
}
fn try_float(s: &str) -> Option<f64> {
s.parse().ok()
}
fn try_positive_integer64_bytes(s: &[u8]) -> Option<u64> {
str::from_utf8(s).ok().and_then(|s| s.parse().ok())
}
fn try_negative_integer64_bytes(s: &[u8]) -> Option<i64> {
str::from_utf8(s).ok().and_then(|s| s.parse().ok())
}
serde_if_integer128! {
fn try_positive_integer128_bytes(s: &[u8]) -> Option<u128> {
str::from_utf8(s).ok().and_then(|s| s.parse().ok())
}
fn try_negative_integer128_bytes(s: &[u8]) -> Option<i128> {
str::from_utf8(s).ok().and_then(|s| s.parse().ok())
}
}
fn try_float_bytes(s: &[u8]) -> Option<f64> {
str::from_utf8(s).ok().and_then(|s| s.parse().ok())
}