extern crate alloc;
use alloc::borrow::Cow;
use alloc::vec::Vec;
use facet_format::{
ContainerKind, DeserializeErrorKind, FormatParser, ParseError, ParseEvent, ParseEventKind,
SavePoint, ScalarTypeHint, ScalarValue,
};
use facet_reflect::Span;
#[derive(Debug, Clone)]
enum ParserState {
Ready,
InStruct { remaining_fields: usize },
}
#[derive(Debug, Clone, Copy)]
struct FieldSpan<'de> {
value: &'de str,
offset: usize,
len: usize,
}
pub struct CsvParser<'de> {
input: &'de str,
fields: Vec<FieldSpan<'de>>,
field_index: usize,
state_stack: Vec<ParserState>,
peeked: Option<ParseEvent<'de>>,
pending_struct_fields: Option<usize>,
pending_scalar_type: Option<ScalarTypeHint>,
}
impl<'de> CsvParser<'de> {
pub fn new(input: &'de str) -> Self {
let trimmed = input.trim();
let trim_offset = input.len() - input.trim_start().len();
let fields = if trimmed.is_empty() {
Vec::new()
} else {
parse_csv_row_with_spans(trimmed, trim_offset)
};
Self {
input,
fields,
field_index: 0,
state_stack: Vec::new(),
peeked: None,
pending_struct_fields: None,
pending_scalar_type: None,
}
}
fn current_state(&self) -> &ParserState {
self.state_stack.last().unwrap_or(&ParserState::Ready)
}
fn current_field_span(&self) -> Span {
if self.field_index > 0 && self.field_index <= self.fields.len() {
let field = &self.fields[self.field_index - 1];
Span::new(field.offset, field.len)
} else {
Span::new(self.input.len(), 0)
}
}
fn generate_next_event(&mut self) -> Result<ParseEvent<'de>, ParseError> {
if let Some(hint) = self.pending_scalar_type.take() {
if self.field_index > 0 && self.field_index <= self.fields.len() {
let field = &self.fields[self.field_index - 1];
return Ok(self.event(ParseEventKind::Scalar(parse_scalar_with_hint(
field.value,
hint,
))));
} else {
return Err(ParseError::new(
Span::new(self.input.len(), 0),
DeserializeErrorKind::UnexpectedEof {
expected: "field for scalar hint",
},
));
}
}
if let Some(num_fields) = self.pending_struct_fields.take() {
self.state_stack.push(ParserState::InStruct {
remaining_fields: num_fields,
});
return Ok(self.event(ParseEventKind::StructStart(ContainerKind::Object)));
}
match self.current_state().clone() {
ParserState::Ready => {
Err(ParseError::new(
Span::new(0, self.input.len()),
DeserializeErrorKind::InvalidValue {
message: "CSV parser requires hint_struct_fields to know field count"
.into(),
},
))
}
ParserState::InStruct { remaining_fields } => {
if remaining_fields == 0 {
self.state_stack.pop();
Ok(self.event(ParseEventKind::StructEnd))
} else {
if let Some(ParserState::InStruct { remaining_fields }) =
self.state_stack.last_mut()
{
*remaining_fields -= 1;
}
self.field_index += 1;
Ok(self.event(ParseEventKind::OrderedField))
}
}
}
}
}
fn parse_csv_row_with_spans(input: &str, base_offset: usize) -> Vec<FieldSpan<'_>> {
let mut fields = Vec::new();
let mut in_quotes = false;
let mut field_start = 0;
let bytes = input.as_bytes();
for (i, &b) in bytes.iter().enumerate() {
match b {
b'"' => {
in_quotes = !in_quotes;
}
b',' if !in_quotes => {
let field = &input[field_start..i];
let (value, value_offset) = unquote_field_with_offset(field, field_start);
fields.push(FieldSpan {
value,
offset: base_offset + value_offset,
len: value.len(),
});
field_start = i + 1;
}
_ => {}
}
}
let field = &input[field_start..];
let (value, value_offset) = unquote_field_with_offset(field, field_start);
fields.push(FieldSpan {
value,
offset: base_offset + value_offset,
len: value.len(),
});
fields
}
fn unquote_field_with_offset(field: &str, field_start: usize) -> (&str, usize) {
let trim_start = field.len() - field.trim_start().len();
let trimmed = field.trim();
if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
(&trimmed[1..trimmed.len() - 1], field_start + trim_start + 1)
} else {
(trimmed, field_start + trim_start)
}
}
fn parse_scalar_with_hint(value: &str, hint: ScalarTypeHint) -> ScalarValue<'_> {
match hint {
ScalarTypeHint::Bool => {
let val = matches!(value, "true" | "TRUE" | "1" | "yes" | "YES");
ScalarValue::Bool(val)
}
ScalarTypeHint::U8
| ScalarTypeHint::U16
| ScalarTypeHint::U32
| ScalarTypeHint::U64
| ScalarTypeHint::Usize => {
if let Ok(n) = value.parse::<u64>() {
ScalarValue::U64(n)
} else {
ScalarValue::Str(Cow::Borrowed(value))
}
}
ScalarTypeHint::U128 => {
if let Ok(n) = value.parse::<u128>() {
ScalarValue::U128(n)
} else {
ScalarValue::Str(Cow::Borrowed(value))
}
}
ScalarTypeHint::I8
| ScalarTypeHint::I16
| ScalarTypeHint::I32
| ScalarTypeHint::I64
| ScalarTypeHint::Isize => {
if let Ok(n) = value.parse::<i64>() {
ScalarValue::I64(n)
} else {
ScalarValue::Str(Cow::Borrowed(value))
}
}
ScalarTypeHint::I128 => {
if let Ok(n) = value.parse::<i128>() {
ScalarValue::I128(n)
} else {
ScalarValue::Str(Cow::Borrowed(value))
}
}
ScalarTypeHint::F32 | ScalarTypeHint::F64 => {
if let Ok(n) = value.parse::<f64>() {
ScalarValue::F64(n)
} else {
ScalarValue::Str(Cow::Borrowed(value))
}
}
ScalarTypeHint::String | ScalarTypeHint::Char => ScalarValue::Str(Cow::Borrowed(value)),
ScalarTypeHint::Bytes => {
ScalarValue::Str(Cow::Borrowed(value))
}
}
}
impl<'de> FormatParser<'de> for CsvParser<'de> {
fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
if let Some(event) = self.peeked.take() {
return Ok(Some(event));
}
Ok(Some(self.generate_next_event()?))
}
fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
if self.peeked.is_none() {
self.peeked = Some(self.generate_next_event()?);
}
Ok(self.peeked.clone())
}
fn skip_value(&mut self) -> Result<(), ParseError> {
if self.field_index < self.fields.len() {
self.field_index += 1;
}
Ok(())
}
fn save(&mut self) -> SavePoint {
unimplemented!("save/restore not supported for CSV (positional format)")
}
fn restore(&mut self, _save_point: SavePoint) {
unimplemented!("save/restore not supported for CSV (positional format)")
}
fn is_self_describing(&self) -> bool {
false
}
fn hint_struct_fields(&mut self, num_fields: usize) {
self.pending_struct_fields = Some(num_fields);
if matches!(
self.peeked.as_ref().map(|e| &e.kind),
Some(ParseEventKind::OrderedField)
) {
self.peeked = None;
}
}
fn hint_scalar_type(&mut self, hint: ScalarTypeHint) {
self.pending_scalar_type = Some(hint);
if matches!(
self.peeked.as_ref().map(|e| &e.kind),
Some(ParseEventKind::OrderedField)
) {
self.peeked = None;
}
}
fn current_span(&self) -> Option<Span> {
Some(self.current_field_span())
}
}
impl<'de> CsvParser<'de> {
#[inline]
fn event(&self, kind: ParseEventKind<'de>) -> ParseEvent<'de> {
ParseEvent::new(kind, self.current_field_span())
}
}