use az::SaturatingAs;
use typst_syntax::Spanned;
use crate::diag::{LineCol, LoadError, LoadedWithin, ReportPos, SourceResult, bail};
use crate::engine::Engine;
use crate::foundations::{Array, Dict, IntoValue, Type, Value, cast, func, scope};
use crate::loading::{DataSource, Load, Readable};
#[func(scope, title = "CSV")]
pub fn csv(
engine: &mut Engine,
source: Spanned<DataSource>,
#[named]
#[default]
delimiter: Delimiter,
#[named]
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
let loaded = source.load(engine.world)?;
let mut builder = ::csv::ReaderBuilder::new();
let has_headers = row_type == RowType::Dict;
builder.has_headers(has_headers);
builder.delimiter(delimiter.0 as u8);
let mut line_offset: usize = 1;
let mut reader = builder.from_reader(loaded.data.as_slice());
let mut headers: Option<::csv::StringRecord> = None;
if has_headers {
line_offset += 1;
headers = Some(
reader
.headers()
.cloned()
.map_err(|err| format_csv_error(err, 1))
.within(&loaded)?,
);
}
let mut array = Array::new();
for (line, result) in reader.records().enumerate() {
let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).within(&loaded)?;
let item = if let Some(headers) = &headers {
let mut dict = Dict::new();
for (field, value) in headers.iter().zip(&row) {
dict.insert(field.into(), value.into_value());
}
dict.into_value()
} else {
let sub = row.into_iter().map(|field| field.into_value()).collect();
Value::Array(sub)
};
array.push(item);
}
Ok(array)
}
#[scope]
impl csv {
#[func(title = "Decode CSV")]
#[deprecated(
message = "`csv.decode` is deprecated, directly pass bytes to `csv` instead",
until = "0.15.0"
)]
pub fn decode(
engine: &mut Engine,
data: Spanned<Readable>,
#[named]
#[default]
delimiter: Delimiter,
#[named]
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
csv(engine, data.map(Readable::into_source), delimiter, row_type)
}
}
pub struct Delimiter(char);
impl Default for Delimiter {
fn default() -> Self {
Self(',')
}
}
cast! {
Delimiter,
self => self.0.into_value(),
c: char => if c.is_ascii() {
Self(c)
} else {
bail!("delimiter must be an ASCII character")
},
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum RowType {
Array,
Dict,
}
cast! {
RowType,
self => match self {
Self::Array => Type::of::<Array>(),
Self::Dict => Type::of::<Dict>(),
}.into_value(),
ty: Type => {
if ty == Type::of::<Array>() {
Self::Array
} else if ty == Type::of::<Dict>() {
Self::Dict
} else {
bail!("expected `array` or `dictionary`");
}
},
}
fn format_csv_error(err: ::csv::Error, line: usize) -> LoadError {
let msg = "failed to parse CSV";
let pos = (err.kind().position())
.map(|pos| {
let start = pos.byte().saturating_as();
ReportPos::from(start..start)
})
.unwrap_or(LineCol::one_based(line, 1).into());
match err.kind() {
::csv::ErrorKind::Utf8 { .. } => {
LoadError::new(pos, msg, "file is not valid utf-8")
}
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
let err =
format!("found {len} instead of {expected_len} fields in line {line}");
LoadError::new(pos, msg, err)
}
_ => LoadError::new(pos, "failed to parse CSV", err),
}
}