nu_command/formats/from/
xlsx.rs

1use calamine::*;
2use chrono::{Local, LocalResult, Offset, TimeZone, Utc};
3use indexmap::IndexMap;
4use nu_engine::command_prelude::*;
5
6use std::io::Cursor;
7
8#[derive(Clone)]
9pub struct FromXlsx;
10
11impl Command for FromXlsx {
12    fn name(&self) -> &str {
13        "from xlsx"
14    }
15
16    fn signature(&self) -> Signature {
17        Signature::build("from xlsx")
18            .input_output_types(vec![(Type::Binary, Type::table())])
19            .allow_variants_without_examples(true)
20            .named(
21                "sheets",
22                SyntaxShape::List(Box::new(SyntaxShape::String)),
23                "Only convert specified sheets",
24                Some('s'),
25            )
26            .category(Category::Formats)
27    }
28
29    fn description(&self) -> &str {
30        "Parse binary Excel(.xlsx) data and create table."
31    }
32
33    fn run(
34        &self,
35        engine_state: &EngineState,
36        stack: &mut Stack,
37        call: &Call,
38        input: PipelineData,
39    ) -> Result<PipelineData, ShellError> {
40        let head = call.head;
41
42        let sel_sheets = if let Some(Value::List { vals: columns, .. }) =
43            call.get_flag(engine_state, stack, "sheets")?
44        {
45            convert_columns(columns.as_slice())?
46        } else {
47            vec![]
48        };
49
50        let metadata = input.metadata().map(|md| md.with_content_type(None));
51        from_xlsx(input, head, sel_sheets).map(|pd| pd.set_metadata(metadata))
52    }
53
54    fn examples(&self) -> Vec<Example<'_>> {
55        vec![
56            Example {
57                description: "Convert binary .xlsx data to a table",
58                example: "open --raw test.xlsx | from xlsx",
59                result: None,
60            },
61            Example {
62                description: "Convert binary .xlsx data to a table, specifying the tables",
63                example: "open --raw test.xlsx | from xlsx --sheets [Spreadsheet1]",
64                result: None,
65            },
66        ]
67    }
68}
69
70fn convert_columns(columns: &[Value]) -> Result<Vec<String>, ShellError> {
71    let res = columns
72        .iter()
73        .map(|value| match &value {
74            Value::String { val: s, .. } => Ok(s.clone()),
75            _ => Err(ShellError::IncompatibleParametersSingle {
76                msg: "Incorrect column format, Only string as column name".to_string(),
77                span: value.span(),
78            }),
79        })
80        .collect::<Result<Vec<String>, _>>()?;
81
82    Ok(res)
83}
84
85fn collect_binary(input: PipelineData, span: Span) -> Result<Vec<u8>, ShellError> {
86    if let PipelineData::ByteStream(stream, ..) = input {
87        stream.into_bytes()
88    } else {
89        let mut bytes = vec![];
90        let mut values = input.into_iter();
91
92        loop {
93            match values.next() {
94                Some(Value::Binary { val: b, .. }) => {
95                    bytes.extend_from_slice(&b);
96                }
97                Some(x) => {
98                    return Err(ShellError::UnsupportedInput {
99                        msg: "Expected binary from pipeline".to_string(),
100                        input: "value originates from here".into(),
101                        msg_span: span,
102                        input_span: x.span(),
103                    });
104                }
105                None => break,
106            }
107        }
108
109        Ok(bytes)
110    }
111}
112
113fn from_xlsx(
114    input: PipelineData,
115    head: Span,
116    sel_sheets: Vec<String>,
117) -> Result<PipelineData, ShellError> {
118    let span = input.span();
119    let bytes = collect_binary(input, head)?;
120    let buf: Cursor<Vec<u8>> = Cursor::new(bytes);
121    let mut xlsx = Xlsx::<_>::new(buf).map_err(|_| ShellError::UnsupportedInput {
122        msg: "Could not load XLSX file".to_string(),
123        input: "value originates from here".into(),
124        msg_span: head,
125        input_span: span.unwrap_or(head),
126    })?;
127
128    let mut dict = IndexMap::new();
129
130    let mut sheet_names = xlsx.sheet_names();
131    if !sel_sheets.is_empty() {
132        sheet_names.retain(|e| sel_sheets.contains(e));
133    }
134
135    let tz = match Local.timestamp_opt(0, 0) {
136        LocalResult::Single(tz) => *tz.offset(),
137        _ => Utc.fix(),
138    };
139
140    for sheet_name in sheet_names {
141        let mut sheet_output = vec![];
142
143        if let Ok(current_sheet) = xlsx.worksheet_range(&sheet_name) {
144            for row in current_sheet.rows() {
145                let record = row
146                    .iter()
147                    .enumerate()
148                    .map(|(i, cell)| {
149                        let value = match cell {
150                            Data::Empty => Value::nothing(head),
151                            Data::String(s) => Value::string(s, head),
152                            Data::Float(f) => Value::float(*f, head),
153                            Data::Int(i) => Value::int(*i, head),
154                            Data::Bool(b) => Value::bool(*b, head),
155                            Data::DateTime(d) => d
156                                .as_datetime()
157                                .and_then(|d| match tz.from_local_datetime(&d) {
158                                    LocalResult::Single(d) => Some(d),
159                                    _ => None,
160                                })
161                                .map(|d| Value::date(d, head))
162                                .unwrap_or(Value::nothing(head)),
163                            _ => Value::nothing(head),
164                        };
165
166                        (format!("column{i}"), value)
167                    })
168                    .collect();
169
170                sheet_output.push(Value::record(record, head));
171            }
172
173            dict.insert(sheet_name, Value::list(sheet_output, head));
174        } else {
175            return Err(ShellError::UnsupportedInput {
176                msg: "Could not load sheet".to_string(),
177                input: "value originates from here".into(),
178                msg_span: head,
179                input_span: span.unwrap_or(head),
180            });
181        }
182    }
183
184    Ok(PipelineData::value(
185        Value::record(dict.into_iter().collect(), head),
186        None,
187    ))
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_examples() {
196        use crate::test_examples;
197
198        test_examples(FromXlsx {})
199    }
200}