nu_command/formats/from/
xlsx.rs1use calamine::*;
2use chrono::{Local, LocalResult, Offset, TimeZone, Utc};
3use indexmap::IndexMap;
4use nu_engine::command_prelude::*;
5
6use std::io::Cursor;
7
8#[derive(Clone)]
9pub struct FromXlsx;
10
11impl Command for FromXlsx {
12 fn name(&self) -> &str {
13 "from xlsx"
14 }
15
16 fn signature(&self) -> Signature {
17 Signature::build("from xlsx")
18 .input_output_types(vec![(Type::Binary, Type::table())])
19 .allow_variants_without_examples(true)
20 .named(
21 "sheets",
22 SyntaxShape::List(Box::new(SyntaxShape::String)),
23 "Only convert specified sheets",
24 Some('s'),
25 )
26 .category(Category::Formats)
27 }
28
29 fn description(&self) -> &str {
30 "Parse binary Excel(.xlsx) data and create table."
31 }
32
33 fn run(
34 &self,
35 engine_state: &EngineState,
36 stack: &mut Stack,
37 call: &Call,
38 input: PipelineData,
39 ) -> Result<PipelineData, ShellError> {
40 let head = call.head;
41
42 let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
43 call.get_flag(engine_state, stack, "sheets")?
44 {
45 convert_columns(columns.as_slice())?
46 } else {
47 vec![]
48 };
49
50 let metadata = input.metadata().map(|md| md.with_content_type(None));
51 from_xlsx(input, head, sel_sheets).map(|pd| pd.set_metadata(metadata))
52 }
53
54 fn examples(&self) -> Vec<Example<'_>> {
55 vec![
56 Example {
57 description: "Convert binary .xlsx data to a table",
58 example: "open --raw test.xlsx | from xlsx",
59 result: None,
60 },
61 Example {
62 description: "Convert binary .xlsx data to a table, specifying the tables",
63 example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1]",
64 result: None,
65 },
66 ]
67 }
68}
69
70fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
71 let res = columns
72 .iter()
73 .map(|value| match &value {
74 Value::String { val: s, .. } => Ok(s.clone()),
75 _ => Err(ShellError::IncompatibleParametersSingle {
76 msg: "Incorrect column format, Only string as column name".to_string(),
77 span: value.span(),
78 }),
79 })
80 .collect::<Result<Vec<String>, _>>()?;
81
82 Ok(res)
83}
84
85fn collect_binary(input: PipelineData, span: Span) -> Result<Vec<u8>, ShellError> {
86 if let PipelineData::ByteStream(stream, ..) = input {
87 stream.into_bytes()
88 } else {
89 let mut bytes = vec![];
90 let mut values = input.into_iter();
91
92 loop {
93 match values.next() {
94 Some(Value::Binary { val: b, .. }) => {
95 bytes.extend_from_slice(&b);
96 }
97 Some(x) => {
98 return Err(ShellError::UnsupportedInput {
99 msg: "Expected binary from pipeline".to_string(),
100 input: "value originates from here".into(),
101 msg_span: span,
102 input_span: x.span(),
103 });
104 }
105 None => break,
106 }
107 }
108
109 Ok(bytes)
110 }
111}
112
113fn from_xlsx(
114 input: PipelineData,
115 head: Span,
116 sel_sheets: Vec<String>,
117) -> Result<PipelineData, ShellError> {
118 let span = input.span();
119 let bytes = collect_binary(input, head)?;
120 let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
121 let mut xlsx = Xlsx::<_>::new(buf).map_err(|_| ShellError::UnsupportedInput {
122 msg: "Could not load XLSX file".to_string(),
123 input: "value originates from here".into(),
124 msg_span: head,
125 input_span: span.unwrap_or(head),
126 })?;
127
128 let mut dict = IndexMap::new();
129
130 let mut sheet_names = xlsx.sheet_names();
131 if !sel_sheets.is_empty() {
132 sheet_names.retain(|e| sel_sheets.contains(e));
133 }
134
135 let tz = match Local.timestamp_opt(0, 0) {
136 LocalResult::Single(tz) => *tz.offset(),
137 _ => Utc.fix(),
138 };
139
140 for sheet_name in sheet_names {
141 let mut sheet_output = vec![];
142
143 if let Ok(current_sheet) = xlsx.worksheet_range(&sheet_name) {
144 for row in current_sheet.rows() {
145 let record = row
146 .iter()
147 .enumerate()
148 .map(|(i, cell)| {
149 let value = match cell {
150 Data::Empty => Value::nothing(head),
151 Data::String(s) => Value::string(s, head),
152 Data::Float(f) => Value::float(*f, head),
153 Data::Int(i) => Value::int(*i, head),
154 Data::Bool(b) => Value::bool(*b, head),
155 Data::DateTime(d) => d
156 .as_datetime()
157 .and_then(|d| match tz.from_local_datetime(&d) {
158 LocalResult::Single(d) => Some(d),
159 _ => None,
160 })
161 .map(|d| Value::date(d, head))
162 .unwrap_or(Value::nothing(head)),
163 _ => Value::nothing(head),
164 };
165
166 (format!("column{i}"), value)
167 })
168 .collect();
169
170 sheet_output.push(Value::record(record, head));
171 }
172
173 dict.insert(sheet_name, Value::list(sheet_output, head));
174 } else {
175 return Err(ShellError::UnsupportedInput {
176 msg: "Could not load sheet".to_string(),
177 input: "value originates from here".into(),
178 msg_span: head,
179 input_span: span.unwrap_or(head),
180 });
181 }
182 }
183
184 Ok(PipelineData::value(
185 Value::record(dict.into_iter().collect(), head),
186 None,
187 ))
188}
189
190#[cfg(test)]
191mod tests {
192 use super::*;
193
194 #[test]
195 fn test_examples() {
196 use crate::test_examples;
197
198 test_examples(FromXlsx {})
199 }
200}