typst_library/loading/
csv.rs1use az::SaturatingAs;
2use typst_syntax::Spanned;
3
4use crate::diag::{LineCol, LoadError, LoadedWithin, ReportPos, SourceResult, bail};
5use crate::engine::Engine;
6use crate::foundations::{Array, Dict, IntoValue, Type, Value, cast, func, scope};
7use crate::loading::{DataSource, Load, Readable};
8
9#[func(scope, title = "CSV")]
27pub fn csv(
28    engine: &mut Engine,
29    source: Spanned<DataSource>,
31    #[named]
34    #[default]
35    delimiter: Delimiter,
36    #[named]
44    #[default(RowType::Array)]
45    row_type: RowType,
46) -> SourceResult<Array> {
47    let loaded = source.load(engine.world)?;
48
49    let mut builder = ::csv::ReaderBuilder::new();
50    let has_headers = row_type == RowType::Dict;
51    builder.has_headers(has_headers);
52    builder.delimiter(delimiter.0 as u8);
53
54    let mut line_offset: usize = 1;
56    let mut reader = builder.from_reader(loaded.data.as_slice());
57    let mut headers: Option<::csv::StringRecord> = None;
58
59    if has_headers {
60        line_offset += 1;
62        headers = Some(
63            reader
64                .headers()
65                .cloned()
66                .map_err(|err| format_csv_error(err, 1))
67                .within(&loaded)?,
68        );
69    }
70
71    let mut array = Array::new();
72    for (line, result) in reader.records().enumerate() {
73        let line = line + line_offset;
77        let row = result.map_err(|err| format_csv_error(err, line)).within(&loaded)?;
78        let item = if let Some(headers) = &headers {
79            let mut dict = Dict::new();
80            for (field, value) in headers.iter().zip(&row) {
81                dict.insert(field.into(), value.into_value());
82            }
83            dict.into_value()
84        } else {
85            let sub = row.into_iter().map(|field| field.into_value()).collect();
86            Value::Array(sub)
87        };
88        array.push(item);
89    }
90
91    Ok(array)
92}
93
94#[scope]
95impl csv {
96    #[func(title = "Decode CSV")]
98    #[deprecated(
99        message = "`csv.decode` is deprecated, directly pass bytes to `csv` instead",
100        until = "0.15.0"
101    )]
102    pub fn decode(
103        engine: &mut Engine,
104        data: Spanned<Readable>,
106        #[named]
109        #[default]
110        delimiter: Delimiter,
111        #[named]
119        #[default(RowType::Array)]
120        row_type: RowType,
121    ) -> SourceResult<Array> {
122        csv(engine, data.map(Readable::into_source), delimiter, row_type)
123    }
124}
125
126pub struct Delimiter(char);
128
129impl Default for Delimiter {
130    fn default() -> Self {
131        Self(',')
132    }
133}
134
135cast! {
136    Delimiter,
137    self => self.0.into_value(),
138    c: char => if c.is_ascii() {
139        Self(c)
140    } else {
141        bail!("delimiter must be an ASCII character")
142    },
143}
144
145#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
147pub enum RowType {
148    Array,
149    Dict,
150}
151
152cast! {
153    RowType,
154    self => match self {
155        Self::Array => Type::of::<Array>(),
156        Self::Dict => Type::of::<Dict>(),
157    }.into_value(),
158    ty: Type => {
159        if ty == Type::of::<Array>() {
160            Self::Array
161        } else if ty == Type::of::<Dict>() {
162            Self::Dict
163        } else {
164            bail!("expected `array` or `dictionary`");
165        }
166    },
167}
168
169fn format_csv_error(err: ::csv::Error, line: usize) -> LoadError {
171    let msg = "failed to parse CSV";
172    let pos = (err.kind().position())
173        .map(|pos| {
174            let start = pos.byte().saturating_as();
175            ReportPos::from(start..start)
176        })
177        .unwrap_or(LineCol::one_based(line, 1).into());
178    match err.kind() {
179        ::csv::ErrorKind::Utf8 { .. } => {
180            LoadError::new(pos, msg, "file is not valid utf-8")
181        }
182        ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
183            let err =
184                format!("found {len} instead of {expected_len} fields in line {line}");
185            LoadError::new(pos, msg, err)
186        }
187        _ => LoadError::new(pos, "failed to parse CSV", err),
188    }
189}