typst_library/loading/
csv.rs

1use ecow::{eco_format, EcoString};
2use typst_syntax::Spanned;
3
4use crate::diag::{bail, At, SourceResult};
5use crate::engine::Engine;
6use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
7use crate::loading::{DataSource, Load, Readable};
8
9/// Reads structured data from a CSV file.
10///
11/// The CSV file will be read and parsed into a 2-dimensional array of strings:
12/// Each row in the CSV file will be represented as an array of strings, and all
13/// rows will be collected into a single array. Header rows will not be
14/// stripped.
15///
16/// # Example
17/// ```example
18/// #let results = csv("example.csv")
19///
20/// #table(
21///   columns: 2,
22///   [*Condition*], [*Result*],
23///   ..results.flatten(),
24/// )
25/// ```
26#[func(scope, title = "CSV")]
27pub fn csv(
28    engine: &mut Engine,
29    /// A [path]($syntax/#paths) to a CSV file or raw CSV bytes.
30    source: Spanned<DataSource>,
31    /// The delimiter that separates columns in the CSV file.
32    /// Must be a single ASCII character.
33    #[named]
34    #[default]
35    delimiter: Delimiter,
36    /// How to represent the file's rows.
37    ///
38    /// - If set to `array`, each row is represented as a plain array of
39    ///   strings.
40    /// - If set to `dictionary`, each row is represented as a dictionary
41    ///   mapping from header keys to strings. This option only makes sense when
42    ///   a header row is present in the CSV file.
43    #[named]
44    #[default(RowType::Array)]
45    row_type: RowType,
46) -> SourceResult<Array> {
47    let data = source.load(engine.world)?;
48
49    let mut builder = ::csv::ReaderBuilder::new();
50    let has_headers = row_type == RowType::Dict;
51    builder.has_headers(has_headers);
52    builder.delimiter(delimiter.0 as u8);
53
54    // Counting lines from 1 by default.
55    let mut line_offset: usize = 1;
56    let mut reader = builder.from_reader(data.as_slice());
57    let mut headers: Option<::csv::StringRecord> = None;
58
59    if has_headers {
60        // Counting lines from 2 because we have a header.
61        line_offset += 1;
62        headers = Some(
63            reader
64                .headers()
65                .map_err(|err| format_csv_error(err, 1))
66                .at(source.span)?
67                .clone(),
68        );
69    }
70
71    let mut array = Array::new();
72    for (line, result) in reader.records().enumerate() {
73        // Original solution was to use line from error, but that is
74        // incorrect with `has_headers` set to `false`. See issue:
75        // https://github.com/BurntSushi/rust-csv/issues/184
76        let line = line + line_offset;
77        let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?;
78        let item = if let Some(headers) = &headers {
79            let mut dict = Dict::new();
80            for (field, value) in headers.iter().zip(&row) {
81                dict.insert(field.into(), value.into_value());
82            }
83            dict.into_value()
84        } else {
85            let sub = row.into_iter().map(|field| field.into_value()).collect();
86            Value::Array(sub)
87        };
88        array.push(item);
89    }
90
91    Ok(array)
92}
93
94#[scope]
95impl csv {
96    /// Reads structured data from a CSV string/bytes.
97    #[func(title = "Decode CSV")]
98    #[deprecated = "`csv.decode` is deprecated, directly pass bytes to `csv` instead"]
99    pub fn decode(
100        engine: &mut Engine,
101        /// CSV data.
102        data: Spanned<Readable>,
103        /// The delimiter that separates columns in the CSV file.
104        /// Must be a single ASCII character.
105        #[named]
106        #[default]
107        delimiter: Delimiter,
108        /// How to represent the file's rows.
109        ///
110        /// - If set to `array`, each row is represented as a plain array of
111        ///   strings.
112        /// - If set to `dictionary`, each row is represented as a dictionary
113        ///   mapping from header keys to strings. This option only makes sense
114        ///   when a header row is present in the CSV file.
115        #[named]
116        #[default(RowType::Array)]
117        row_type: RowType,
118    ) -> SourceResult<Array> {
119        csv(engine, data.map(Readable::into_source), delimiter, row_type)
120    }
121}
122
123/// The delimiter to use when parsing CSV files.
124pub struct Delimiter(char);
125
126impl Default for Delimiter {
127    fn default() -> Self {
128        Self(',')
129    }
130}
131
132cast! {
133    Delimiter,
134    self => self.0.into_value(),
135    c: char => if c.is_ascii() {
136        Self(c)
137    } else {
138        bail!("delimiter must be an ASCII character")
139    },
140}
141
142/// The type of parsed rows.
143#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
144pub enum RowType {
145    Array,
146    Dict,
147}
148
149cast! {
150    RowType,
151    self => match self {
152        Self::Array => Type::of::<Array>(),
153        Self::Dict => Type::of::<Dict>(),
154    }.into_value(),
155    ty: Type => {
156        if ty == Type::of::<Array>() {
157            Self::Array
158        } else if ty == Type::of::<Dict>() {
159            Self::Dict
160        } else {
161            bail!("expected `array` or `dictionary`");
162        }
163    },
164}
165
166/// Format the user-facing CSV error message.
167fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
168    match err.kind() {
169        ::csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(),
170        ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
171            eco_format!(
172                "failed to parse CSV (found {len} instead of \
173                 {expected_len} fields in line {line})"
174            )
175        }
176        _ => eco_format!("failed to parse CSV ({err})"),
177    }
178}