Skip to main content

typst_library/loading/
csv.rs

1use az::SaturatingAs;
2use typst_syntax::Spanned;
3
4use crate::diag::{LineCol, LoadError, LoadedWithin, ReportTextPos, SourceResult, bail};
5use crate::engine::Engine;
6use crate::foundations::{Array, Dict, IntoValue, Type, Value, cast, func};
7use crate::loading::{DataSource, Load};
8
9/// Reads structured data from a CSV file.
10///
11/// The CSV file will be read and parsed into a 2-dimensional array of strings:
12/// Each row in the CSV file will be represented as an array of strings, and all
13/// rows will be collected into a single array. Header rows will not be
14/// stripped.
15///
16/// = Example <example>
17/// ```example
18/// #let results = csv("example.csv")
19///
20/// #table(
21///   columns: 2,
22///   [*Condition*], [*Result*],
23///   ..results.flatten(),
24/// )
25/// ```
26#[func(title = "CSV")]
27pub fn csv(
28    engine: &mut Engine,
29    /// A path to a CSV file or raw CSV bytes.
30    source: Spanned<DataSource>,
31    /// The delimiter that separates columns in the CSV file. Must be a single
32    /// ASCII character.
33    #[named]
34    #[default]
35    delimiter: Delimiter,
36    /// How to represent the file's rows.
37    ///
38    /// - If set to `array`, each row is represented as a plain array of
39    ///   strings.
40    /// - If set to `dictionary`, each row is represented as a dictionary
41    ///   mapping from header keys to strings. This option only makes sense when
42    ///   a header row is present in the CSV file.
43    #[named]
44    #[default(RowType::Array)]
45    row_type: RowType,
46) -> SourceResult<Array> {
47    let loaded = source.load(engine.world)?;
48
49    let mut builder = ::csv::ReaderBuilder::new();
50    let has_headers = row_type == RowType::Dict;
51    builder.has_headers(has_headers);
52    builder.delimiter(delimiter.0 as u8);
53
54    // Counting lines from 1 by default.
55    let mut line_offset: usize = 1;
56    let mut reader = builder.from_reader(loaded.data.as_slice());
57    let mut headers: Option<::csv::StringRecord> = None;
58
59    if has_headers {
60        // Counting lines from 2 because we have a header.
61        line_offset += 1;
62        headers = Some(
63            reader
64                .headers()
65                .cloned()
66                .map_err(|err| format_csv_error(err, 1))
67                .within(&loaded)?,
68        );
69    }
70
71    let mut array = Array::new();
72    for (line, result) in reader.records().enumerate() {
73        // Original solution was to use line from error, but that is
74        // incorrect with `has_headers` set to `false`. See issue:
75        // https://github.com/BurntSushi/rust-csv/issues/184
76        let line = line + line_offset;
77        let row = result.map_err(|err| format_csv_error(err, line)).within(&loaded)?;
78        let item = if let Some(headers) = &headers {
79            let mut dict = Dict::new();
80            for (field, value) in headers.iter().zip(&row) {
81                dict.insert(field.into(), value.into_value());
82            }
83            dict.into_value()
84        } else {
85            let sub = row.into_iter().map(|field| field.into_value()).collect();
86            Value::Array(sub)
87        };
88        array.push(item);
89    }
90
91    Ok(array)
92}
93
94/// The delimiter to use when parsing CSV files.
95pub struct Delimiter(char);
96
97impl Default for Delimiter {
98    fn default() -> Self {
99        Self(',')
100    }
101}
102
103cast! {
104    Delimiter,
105    self => self.0.into_value(),
106    c: char => if c.is_ascii() {
107        Self(c)
108    } else {
109        bail!("delimiter must be an ASCII character")
110    },
111}
112
113/// The type of parsed rows.
114#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
115pub enum RowType {
116    Array,
117    Dict,
118}
119
120cast! {
121    RowType,
122    self => match self {
123        Self::Array => Type::of::<Array>(),
124        Self::Dict => Type::of::<Dict>(),
125    }.into_value(),
126    ty: Type => {
127        if ty == Type::of::<Array>() {
128            Self::Array
129        } else if ty == Type::of::<Dict>() {
130            Self::Dict
131        } else {
132            bail!("expected `array` or `dictionary`");
133        }
134    },
135}
136
137/// Format the user-facing CSV error message.
138fn format_csv_error(err: ::csv::Error, line: usize) -> LoadError {
139    let msg = "failed to parse CSV";
140    let pos = (err.kind().position())
141        .map(|pos| {
142            let start = pos.byte().saturating_as();
143            ReportTextPos::from(start..start)
144        })
145        .unwrap_or(LineCol::one_based(line, 1).into());
146    match err.kind() {
147        ::csv::ErrorKind::Utf8 { .. } => {
148            LoadError::text(pos, msg, "file is not valid UTF-8")
149        }
150        ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
151            let err =
152                format!("found {len} instead of {expected_len} fields in line {line}");
153            LoadError::text(pos, msg, err)
154        }
155        _ => LoadError::text(pos, "failed to parse CSV", err),
156    }
157}