1use calamine::*;
2use chrono::{Local, LocalResult, Offset, TimeZone, Utc};
3use indexmap::IndexMap;
4use nu_engine::command_prelude::*;
5
6use std::io::Cursor;
7
8#[derive(Clone)]
9pub struct FromXlsx;
10
11impl Command for FromXlsx {
12 fn name(&self) -> &str {
13 "from xlsx"
14 }
15
16 fn signature(&self) -> Signature {
17 Signature::build("from xlsx")
18 .input_output_types(vec![(Type::Binary, Type::table())])
19 .allow_variants_without_examples(true)
20 .named(
21 "sheets",
22 SyntaxShape::List(Box::new(SyntaxShape::String)),
23 "Only convert specified sheets.",
24 Some('s'),
25 )
26 .named(
27 "header-row",
28 SyntaxShape::OneOf(vec![SyntaxShape::Int, SyntaxShape::Nothing]),
29 "Specify row (0-indexed) to designate the header (default first non-empty row) or null for no header",
30 Some('r'),
31 )
32 .category(Category::Formats)
33 }
34
35 fn description(&self) -> &str {
36 "Parse binary Excel(.xlsx) data and create table."
37 }
38
39 fn run(
40 &self,
41 engine_state: &EngineState,
42 stack: &mut Stack,
43 call: &Call,
44 mut input: PipelineData,
45 ) -> Result<PipelineData, ShellError> {
46 let head = call.head;
47
48 let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
49 call.get_flag(engine_state, stack, "sheets")?
50 {
51 convert_columns(columns.as_slice())?
52 } else {
53 vec![]
54 };
55
56 let header_row = match call.get_flag(engine_state, stack, "header-row")? {
57 Some(Value::Int { val, .. }) => Some(HeaderRow::Row(
58 val.try_into()
59 .expect("Header row index should not exceed u32"),
60 )),
61 Some(Value::Nothing { .. }) => None,
62 _ => Some(HeaderRow::FirstNonEmptyRow),
63 };
64
65 let metadata = input.take_metadata().map(|md| md.with_content_type(None));
66 from_xlsx(input, head, sel_sheets, header_row).map(|pd| pd.set_metadata(metadata))
67 }
68
69 fn examples(&self) -> Vec<Example<'_>> {
70 vec![
71 Example {
72 description: "Convert binary .xlsx data to a table.",
73 example: "open --raw test.xlsx | from xlsx",
74 result: None,
75 },
76 Example {
77 description: "Convert binary .xlsx data to a table, specifying the tables.",
78 example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1]",
79 result: None,
80 },
81 Example {
82 description: "Convert binary .xlsx data to a table, specifying the tables and specifying no header row.",
83 example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1] --header-row null",
84 result: None,
85 },
86 ]
87 }
88}
89
90fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
91 let res = columns
92 .iter()
93 .map(|value| match &value {
94 Value::String { val: s, .. } => Ok(s.clone()),
95 _ => Err(ShellError::IncompatibleParametersSingle {
96 msg: "Incorrect column format, Only string as column name".to_string(),
97 span: value.span(),
98 }),
99 })
100 .collect::<Result<Vec<String>, _>>()?;
101
102 Ok(res)
103}
104
105fn collect_binary(input: PipelineData, span: Span) -> Result<Vec<u8>, ShellError> {
106 if let PipelineData::ByteStream(stream, ..) = input {
107 stream.into_bytes()
108 } else {
109 let mut bytes = vec![];
110 let mut values = input.into_iter();
111
112 loop {
113 match values.next() {
114 Some(Value::Binary { val: b, .. }) => {
115 bytes.extend_from_slice(&b);
116 }
117 Some(x) => {
118 return Err(ShellError::UnsupportedInput {
119 msg: "Expected binary from pipeline".to_string(),
120 input: "value originates from here".into(),
121 msg_span: span,
122 input_span: x.span(),
123 });
124 }
125 None => break,
126 }
127 }
128
129 Ok(bytes)
130 }
131}
132
133fn from_xlsx(
134 input: PipelineData,
135 head: Span,
136 sel_sheets: Vec<String>,
137 header_row: Option<HeaderRow>,
138) -> Result<PipelineData, ShellError> {
139 let span = input.span();
140 let bytes = collect_binary(input, head)?;
141 let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
142 let mut xlsx = Xlsx::<_>::new(buf).map_err(|_| ShellError::UnsupportedInput {
143 msg: "Could not load XLSX file".to_string(),
144 input: "value originates from here".into(),
145 msg_span: head,
146 input_span: span.unwrap_or(head),
147 })?;
148
149 let mut dict = IndexMap::new();
150
151 let mut sheet_names = xlsx.sheet_names();
152 if !sel_sheets.is_empty() {
153 sheet_names.retain(|e| sel_sheets.contains(e));
154 }
155
156 let tz = match Local.timestamp_opt(0, 0) {
157 LocalResult::Single(tz) => *tz.offset(),
158 _ => Utc.fix(),
159 };
160
161 for sheet_name in sheet_names {
162 let mut sheet_output = vec![];
163
164 if let Some(hr) = header_row {
165 xlsx.with_header_row(hr);
166 }
167
168 if let Ok(current_sheet) = xlsx.worksheet_range(&sheet_name) {
169 let sheet_width = current_sheet.width();
170 let default_headers = (0..sheet_width)
171 .map(|i| {
172 format!(
173 "column{:0width$}",
174 i,
175 width = (sheet_width.ilog10() + 1) as usize
176 )
177 })
178 .collect::<Vec<String>>();
179
180 let headers = current_sheet
181 .headers()
182 .unwrap_or_else(|| vec!["".to_string(); sheet_width])
183 .into_iter()
184 .zip(default_headers.into_iter())
185 .map(
186 |(name, default)| {
187 if name.is_empty() { default } else { name }
188 },
189 );
190
191 for row in current_sheet.rows().skip(header_row.is_some() as usize) {
192 let record = headers
193 .clone()
194 .zip(row.iter())
195 .map(|(header_name, cell)| {
196 let value = match cell {
197 Data::Empty => Value::nothing(head),
198 Data::String(s) => Value::string(s, head),
199 Data::Float(f) => Value::float(*f, head),
200 Data::Int(i) => Value::int(*i, head),
201 Data::Bool(b) => Value::bool(*b, head),
202 Data::DateTime(d) => d
203 .as_datetime()
204 .and_then(|d| match tz.from_local_datetime(&d) {
205 LocalResult::Single(d) => Some(d),
206 _ => None,
207 })
208 .map(|d| Value::date(d, head))
209 .unwrap_or(Value::nothing(head)),
210 _ => Value::nothing(head),
211 };
212
213 (header_name, value)
214 })
215 .collect();
216
217 sheet_output.push(Value::record(record, head));
218 }
219
220 dict.insert(sheet_name, Value::list(sheet_output, head));
221 } else {
222 return Err(ShellError::UnsupportedInput {
223 msg: "Could not load sheet".to_string(),
224 input: "value originates from here".into(),
225 msg_span: head,
226 input_span: span.unwrap_or(head),
227 });
228 }
229 }
230
231 Ok(PipelineData::value(
232 Value::record(dict.into_iter().collect(), head),
233 None,
234 ))
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn test_examples() -> nu_test_support::Result {
243 nu_test_support::test().examples(FromXlsx)
244 }
245}