Skip to main content

nu_command/formats/from/
xlsx.rs

1use calamine::*;
2use chrono::{Local, LocalResult, Offset, TimeZone, Utc};
3use indexmap::IndexMap;
4use nu_engine::command_prelude::*;
5
6use std::io::Cursor;
7
8#[derive(Clone)]
9pub struct FromXlsx;
10
11impl Command for FromXlsx {
12    fn name(&self) -> &str {
13        "from xlsx"
14    }
15
16    fn signature(&self) -> Signature {
17        Signature::build("from xlsx")
18            .input_output_types(vec![(Type::Binary, Type::table())])
19            .allow_variants_without_examples(true)
20            .named(
21                "sheets",
22                SyntaxShape::List(Box::new(SyntaxShape::String)),
23                "Only convert specified sheets.",
24                Some('s'),
25            )
26            .named(
27                "header-row",
28                SyntaxShape::OneOf(vec![SyntaxShape::Int, SyntaxShape::Nothing]),
29                "Specify row (0-indexed) to designate the header (default first non-empty row) or null for no header",
30                Some('r'),
31            )
32            .category(Category::Formats)
33    }
34
35    fn description(&self) -> &str {
36        "Parse binary Excel(.xlsx) data and create table."
37    }
38
39    fn run(
40        &self,
41        engine_state: &EngineState,
42        stack: &mut Stack,
43        call: &Call,
44        mut input: PipelineData,
45    ) -> Result<PipelineData, ShellError> {
46        let head = call.head;
47
48        let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
49            call.get_flag(engine_state, stack, "sheets")?
50        {
51            convert_columns(columns.as_slice())?
52        } else {
53            vec![]
54        };
55
56        let header_row = match call.get_flag(engine_state, stack, "header-row")? {
57            Some(Value::Int { val, .. }) => Some(HeaderRow::Row(
58                val.try_into()
59                    .expect("Header row index should not exceed u32"),
60            )),
61            Some(Value::Nothing { .. }) => None,
62            _ => Some(HeaderRow::FirstNonEmptyRow),
63        };
64
65        let metadata = input.take_metadata().map(|md| md.with_content_type(None));
66        from_xlsx(input, head, sel_sheets, header_row).map(|pd| pd.set_metadata(metadata))
67    }
68
69    fn examples(&self) -> Vec<Example<'_>> {
70        vec![
71            Example {
72                description: "Convert binary .xlsx data to a table.",
73                example: "open --raw test.xlsx | from xlsx",
74                result: None,
75            },
76            Example {
77                description: "Convert binary .xlsx data to a table, specifying the tables.",
78                example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1]",
79                result: None,
80            },
81            Example {
82                description: "Convert binary .xlsx data to a table, specifying the tables and specifying no header row.",
83                example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1] --header-row null",
84                result: None,
85            },
86        ]
87    }
88}
89
90fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
91    let res = columns
92        .iter()
93        .map(|value| match &value {
94            Value::String { val: s, .. } => Ok(s.clone()),
95            _ => Err(ShellError::IncompatibleParametersSingle {
96                msg: "Incorrect column format, Only string as column name".to_string(),
97                span: value.span(),
98            }),
99        })
100        .collect::<Result<Vec<String>, _>>()?;
101
102    Ok(res)
103}
104
105fn collect_binary(input: PipelineData, span: Span) -> Result<Vec<u8>, ShellError> {
106    if let PipelineData::ByteStream(stream, ..) = input {
107        stream.into_bytes()
108    } else {
109        let mut bytes = vec![];
110        let mut values = input.into_iter();
111
112        loop {
113            match values.next() {
114                Some(Value::Binary { val: b, .. }) => {
115                    bytes.extend_from_slice(&b);
116                }
117                Some(x) => {
118                    return Err(ShellError::UnsupportedInput {
119                        msg: "Expected binary from pipeline".to_string(),
120                        input: "value originates from here".into(),
121                        msg_span: span,
122                        input_span: x.span(),
123                    });
124                }
125                None => break,
126            }
127        }
128
129        Ok(bytes)
130    }
131}
132
133fn from_xlsx(
134    input: PipelineData,
135    head: Span,
136    sel_sheets: Vec<String>,
137    header_row: Option<HeaderRow>,
138) -> Result<PipelineData, ShellError> {
139    let span = input.span();
140    let bytes = collect_binary(input, head)?;
141    let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
142    let mut xlsx = Xlsx::<_>::new(buf).map_err(|_| ShellError::UnsupportedInput {
143        msg: "Could not load XLSX file".to_string(),
144        input: "value originates from here".into(),
145        msg_span: head,
146        input_span: span.unwrap_or(head),
147    })?;
148
149    let mut dict = IndexMap::new();
150
151    let mut sheet_names = xlsx.sheet_names();
152    if !sel_sheets.is_empty() {
153        sheet_names.retain(|e| sel_sheets.contains(e));
154    }
155
156    let tz = match Local.timestamp_opt(0, 0) {
157        LocalResult::Single(tz) => *tz.offset(),
158        _ => Utc.fix(),
159    };
160
161    for sheet_name in sheet_names {
162        let mut sheet_output = vec![];
163
164        if let Some(hr) = header_row {
165            xlsx.with_header_row(hr);
166        }
167
168        if let Ok(current_sheet) = xlsx.worksheet_range(&sheet_name) {
169            let sheet_width = current_sheet.width();
170            let default_headers = (0..sheet_width)
171                .map(|i| {
172                    format!(
173                        "column{:0width$}",
174                        i,
175                        width = (sheet_width.ilog10() + 1) as usize
176                    )
177                })
178                .collect::<Vec<String>>();
179
180            let headers = current_sheet
181                .headers()
182                .unwrap_or_else(|| vec!["".to_string(); sheet_width])
183                .into_iter()
184                .zip(default_headers.into_iter())
185                .map(
186                    |(name, default)| {
187                        if name.is_empty() { default } else { name }
188                    },
189                );
190
191            for row in current_sheet.rows().skip(header_row.is_some() as usize) {
192                let record = headers
193                    .clone()
194                    .zip(row.iter())
195                    .map(|(header_name, cell)| {
196                        let value = match cell {
197                            Data::Empty => Value::nothing(head),
198                            Data::String(s) => Value::string(s, head),
199                            Data::Float(f) => Value::float(*f, head),
200                            Data::Int(i) => Value::int(*i, head),
201                            Data::Bool(b) => Value::bool(*b, head),
202                            Data::DateTime(d) => d
203                                .as_datetime()
204                                .and_then(|d| match tz.from_local_datetime(&d) {
205                                    LocalResult::Single(d) => Some(d),
206                                    _ => None,
207                                })
208                                .map(|d| Value::date(d, head))
209                                .unwrap_or(Value::nothing(head)),
210                            _ => Value::nothing(head),
211                        };
212
213                        (header_name, value)
214                    })
215                    .collect();
216
217                sheet_output.push(Value::record(record, head));
218            }
219
220            dict.insert(sheet_name, Value::list(sheet_output, head));
221        } else {
222            return Err(ShellError::UnsupportedInput {
223                msg: "Could not load sheet".to_string(),
224                input: "value originates from here".into(),
225                msg_span: head,
226                input_span: span.unwrap_or(head),
227            });
228        }
229    }
230
231    Ok(PipelineData::value(
232        Value::record(dict.into_iter().collect(), head),
233        None,
234    ))
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_examples() -> nu_test_support::Result {
243        nu_test_support::test().examples(FromXlsx)
244    }
245}