typst_library/loading/
csv.rs1use ecow::{eco_format, EcoString};
2use typst_syntax::Spanned;
3
4use crate::diag::{bail, At, SourceResult};
5use crate::engine::Engine;
6use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
7use crate::loading::{DataSource, Load, Readable};
8
9#[func(scope, title = "CSV")]
27pub fn csv(
28 engine: &mut Engine,
29 source: Spanned<DataSource>,
31 #[named]
34 #[default]
35 delimiter: Delimiter,
36 #[named]
44 #[default(RowType::Array)]
45 row_type: RowType,
46) -> SourceResult<Array> {
47 let data = source.load(engine.world)?;
48
49 let mut builder = ::csv::ReaderBuilder::new();
50 let has_headers = row_type == RowType::Dict;
51 builder.has_headers(has_headers);
52 builder.delimiter(delimiter.0 as u8);
53
54 let mut line_offset: usize = 1;
56 let mut reader = builder.from_reader(data.as_slice());
57 let mut headers: Option<::csv::StringRecord> = None;
58
59 if has_headers {
60 line_offset += 1;
62 headers = Some(
63 reader
64 .headers()
65 .map_err(|err| format_csv_error(err, 1))
66 .at(source.span)?
67 .clone(),
68 );
69 }
70
71 let mut array = Array::new();
72 for (line, result) in reader.records().enumerate() {
73 let line = line + line_offset;
77 let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?;
78 let item = if let Some(headers) = &headers {
79 let mut dict = Dict::new();
80 for (field, value) in headers.iter().zip(&row) {
81 dict.insert(field.into(), value.into_value());
82 }
83 dict.into_value()
84 } else {
85 let sub = row.into_iter().map(|field| field.into_value()).collect();
86 Value::Array(sub)
87 };
88 array.push(item);
89 }
90
91 Ok(array)
92}
93
94#[scope]
95impl csv {
96 #[func(title = "Decode CSV")]
98 #[deprecated = "`csv.decode` is deprecated, directly pass bytes to `csv` instead"]
99 pub fn decode(
100 engine: &mut Engine,
101 data: Spanned<Readable>,
103 #[named]
106 #[default]
107 delimiter: Delimiter,
108 #[named]
116 #[default(RowType::Array)]
117 row_type: RowType,
118 ) -> SourceResult<Array> {
119 csv(engine, data.map(Readable::into_source), delimiter, row_type)
120 }
121}
122
123pub struct Delimiter(char);
125
126impl Default for Delimiter {
127 fn default() -> Self {
128 Self(',')
129 }
130}
131
132cast! {
133 Delimiter,
134 self => self.0.into_value(),
135 c: char => if c.is_ascii() {
136 Self(c)
137 } else {
138 bail!("delimiter must be an ASCII character")
139 },
140}
141
142#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
144pub enum RowType {
145 Array,
146 Dict,
147}
148
149cast! {
150 RowType,
151 self => match self {
152 Self::Array => Type::of::<Array>(),
153 Self::Dict => Type::of::<Dict>(),
154 }.into_value(),
155 ty: Type => {
156 if ty == Type::of::<Array>() {
157 Self::Array
158 } else if ty == Type::of::<Dict>() {
159 Self::Dict
160 } else {
161 bail!("expected `array` or `dictionary`");
162 }
163 },
164}
165
166fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
168 match err.kind() {
169 ::csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(),
170 ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
171 eco_format!(
172 "failed to parse CSV (found {len} instead of \
173 {expected_len} fields in line {line})"
174 )
175 }
176 _ => eco_format!("failed to parse CSV ({err})"),
177 }
178}