typst_library/loading/
csv.rs1use az::SaturatingAs;
2use typst_syntax::Spanned;
3
4use crate::diag::{LineCol, LoadError, LoadedWithin, ReportTextPos, SourceResult, bail};
5use crate::engine::Engine;
6use crate::foundations::{Array, Dict, IntoValue, Type, Value, cast, func};
7use crate::loading::{DataSource, Load};
8
9#[func(title = "CSV")]
27pub fn csv(
28 engine: &mut Engine,
29 source: Spanned<DataSource>,
31 #[named]
34 #[default]
35 delimiter: Delimiter,
36 #[named]
44 #[default(RowType::Array)]
45 row_type: RowType,
46) -> SourceResult<Array> {
47 let loaded = source.load(engine.world)?;
48
49 let mut builder = ::csv::ReaderBuilder::new();
50 let has_headers = row_type == RowType::Dict;
51 builder.has_headers(has_headers);
52 builder.delimiter(delimiter.0 as u8);
53
54 let mut line_offset: usize = 1;
56 let mut reader = builder.from_reader(loaded.data.as_slice());
57 let mut headers: Option<::csv::StringRecord> = None;
58
59 if has_headers {
60 line_offset += 1;
62 headers = Some(
63 reader
64 .headers()
65 .cloned()
66 .map_err(|err| format_csv_error(err, 1))
67 .within(&loaded)?,
68 );
69 }
70
71 let mut array = Array::new();
72 for (line, result) in reader.records().enumerate() {
73 let line = line + line_offset;
77 let row = result.map_err(|err| format_csv_error(err, line)).within(&loaded)?;
78 let item = if let Some(headers) = &headers {
79 let mut dict = Dict::new();
80 for (field, value) in headers.iter().zip(&row) {
81 dict.insert(field.into(), value.into_value());
82 }
83 dict.into_value()
84 } else {
85 let sub = row.into_iter().map(|field| field.into_value()).collect();
86 Value::Array(sub)
87 };
88 array.push(item);
89 }
90
91 Ok(array)
92}
93
94pub struct Delimiter(char);
96
97impl Default for Delimiter {
98 fn default() -> Self {
99 Self(',')
100 }
101}
102
103cast! {
104 Delimiter,
105 self => self.0.into_value(),
106 c: char => if c.is_ascii() {
107 Self(c)
108 } else {
109 bail!("delimiter must be an ASCII character")
110 },
111}
112
113#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
115pub enum RowType {
116 Array,
117 Dict,
118}
119
120cast! {
121 RowType,
122 self => match self {
123 Self::Array => Type::of::<Array>(),
124 Self::Dict => Type::of::<Dict>(),
125 }.into_value(),
126 ty: Type => {
127 if ty == Type::of::<Array>() {
128 Self::Array
129 } else if ty == Type::of::<Dict>() {
130 Self::Dict
131 } else {
132 bail!("expected `array` or `dictionary`");
133 }
134 },
135}
136
137fn format_csv_error(err: ::csv::Error, line: usize) -> LoadError {
139 let msg = "failed to parse CSV";
140 let pos = (err.kind().position())
141 .map(|pos| {
142 let start = pos.byte().saturating_as();
143 ReportTextPos::from(start..start)
144 })
145 .unwrap_or(LineCol::one_based(line, 1).into());
146 match err.kind() {
147 ::csv::ErrorKind::Utf8 { .. } => {
148 LoadError::text(pos, msg, "file is not valid UTF-8")
149 }
150 ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
151 let err =
152 format!("found {len} instead of {expected_len} fields in line {line}");
153 LoadError::text(pos, msg, err)
154 }
155 _ => LoadError::text(pos, "failed to parse CSV", err),
156 }
157}