typst_library/loading/
csv.rs1use az::SaturatingAs;
2use typst_syntax::Spanned;
3
4use crate::diag::{LineCol, LoadError, LoadedWithin, ReportPos, SourceResult, bail};
5use crate::engine::Engine;
6use crate::foundations::{Array, Dict, IntoValue, Type, Value, cast, func, scope};
7use crate::loading::{DataSource, Load, Readable};
8
9#[func(scope, title = "CSV")]
27pub fn csv(
28 engine: &mut Engine,
29 source: Spanned<DataSource>,
31 #[named]
34 #[default]
35 delimiter: Delimiter,
36 #[named]
44 #[default(RowType::Array)]
45 row_type: RowType,
46) -> SourceResult<Array> {
47 let loaded = source.load(engine.world)?;
48
49 let mut builder = ::csv::ReaderBuilder::new();
50 let has_headers = row_type == RowType::Dict;
51 builder.has_headers(has_headers);
52 builder.delimiter(delimiter.0 as u8);
53
54 let mut line_offset: usize = 1;
56 let mut reader = builder.from_reader(loaded.data.as_slice());
57 let mut headers: Option<::csv::StringRecord> = None;
58
59 if has_headers {
60 line_offset += 1;
62 headers = Some(
63 reader
64 .headers()
65 .cloned()
66 .map_err(|err| format_csv_error(err, 1))
67 .within(&loaded)?,
68 );
69 }
70
71 let mut array = Array::new();
72 for (line, result) in reader.records().enumerate() {
73 let line = line + line_offset;
77 let row = result.map_err(|err| format_csv_error(err, line)).within(&loaded)?;
78 let item = if let Some(headers) = &headers {
79 let mut dict = Dict::new();
80 for (field, value) in headers.iter().zip(&row) {
81 dict.insert(field.into(), value.into_value());
82 }
83 dict.into_value()
84 } else {
85 let sub = row.into_iter().map(|field| field.into_value()).collect();
86 Value::Array(sub)
87 };
88 array.push(item);
89 }
90
91 Ok(array)
92}
93
94#[scope]
95impl csv {
96 #[func(title = "Decode CSV")]
98 #[deprecated(
99 message = "`csv.decode` is deprecated, directly pass bytes to `csv` instead",
100 until = "0.15.0"
101 )]
102 pub fn decode(
103 engine: &mut Engine,
104 data: Spanned<Readable>,
106 #[named]
109 #[default]
110 delimiter: Delimiter,
111 #[named]
119 #[default(RowType::Array)]
120 row_type: RowType,
121 ) -> SourceResult<Array> {
122 csv(engine, data.map(Readable::into_source), delimiter, row_type)
123 }
124}
125
126pub struct Delimiter(char);
128
129impl Default for Delimiter {
130 fn default() -> Self {
131 Self(',')
132 }
133}
134
135cast! {
136 Delimiter,
137 self => self.0.into_value(),
138 c: char => if c.is_ascii() {
139 Self(c)
140 } else {
141 bail!("delimiter must be an ASCII character")
142 },
143}
144
145#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
147pub enum RowType {
148 Array,
149 Dict,
150}
151
152cast! {
153 RowType,
154 self => match self {
155 Self::Array => Type::of::<Array>(),
156 Self::Dict => Type::of::<Dict>(),
157 }.into_value(),
158 ty: Type => {
159 if ty == Type::of::<Array>() {
160 Self::Array
161 } else if ty == Type::of::<Dict>() {
162 Self::Dict
163 } else {
164 bail!("expected `array` or `dictionary`");
165 }
166 },
167}
168
169fn format_csv_error(err: ::csv::Error, line: usize) -> LoadError {
171 let msg = "failed to parse CSV";
172 let pos = (err.kind().position())
173 .map(|pos| {
174 let start = pos.byte().saturating_as();
175 ReportPos::from(start..start)
176 })
177 .unwrap_or(LineCol::one_based(line, 1).into());
178 match err.kind() {
179 ::csv::ErrorKind::Utf8 { .. } => {
180 LoadError::new(pos, msg, "file is not valid utf-8")
181 }
182 ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
183 let err =
184 format!("found {len} instead of {expected_len} fields in line {line}");
185 LoadError::new(pos, msg, err)
186 }
187 _ => LoadError::new(pos, "failed to parse CSV", err),
188 }
189}