spreadsheet_to_json/
data_set.rs

1use std::{fs::File, io::BufReader};
2use calamine::{Reader, Sheets};
3use heck::ToSnakeCase;
4use indexmap::IndexMap;
5use serde::Serialize;
6use serde_json::{json, Value};
7
8use crate::{OptionSet, PathData, ReadMode};
9
10
11/// Core info about a spreadsheet with extension, matched worksheet name and index an all worksheet keys
12#[derive(Debug, Clone)]
13pub struct WorkbookInfo {
14    pub filename: String,
15    pub extension: String,
16    pub selected: Option<Vec<String>>,
17    pub sheets: Vec<String>,
18}
19
20impl WorkbookInfo {
21    pub fn new(path_data: &PathData, selected: &[String], sheet_refs: &[String]) -> Self {
22        WorkbookInfo {
23            extension: path_data.extension(),
24            filename: path_data.filename(), 
25            selected: Some(selected.to_vec()),
26            sheets: sheet_refs.to_vec(),
27        }
28    }
29
30    pub fn simple(path_data: &PathData) -> Self {
31        let sheet_name = "single";
32        WorkbookInfo {
33            extension: path_data.extension(),
34            filename: path_data.filename(), 
35            selected: None,
36            sheets: vec![sheet_name.to_owned()],
37        }
38    }
39
40    pub fn ext(&self) -> String {
41        self.extension.to_owned()
42    }
43
44    pub fn name(&self) -> String {
45        self.filename.to_owned()
46    }
47
48    pub fn sheet(&self, index: usize) -> (String, usize) {
49      let sheet_name = self.sheets.get(index).unwrap_or(&"single".to_owned()).to_owned();
50      (sheet_name, index)
51    }
52
53    pub fn sheets(&self) -> Vec<String> {
54        self.sheets.clone()
55    }
56}
57
58
59// Result set
60#[derive(Debug, Clone)]
61pub struct ResultSet {
62    pub filename: String,
63    pub extension: String,
64    pub selected: Option<Vec<String>>,
65    pub sheets: Vec<String>,
66    pub keys: Vec<String>,
67    pub num_rows: usize,
68    pub data: SpreadData,
69    pub out_ref: Option<String>,
70    pub opts: OptionSet,
71}
72
73impl ResultSet {
74
75  /// Instantiate with Core workbook info, header keys, data set and optional output reference
76  pub fn new(info: &WorkbookInfo, keys: &[String], data_set: DataSet, opts: &OptionSet, out_ref: Option<&str>) -> Self {
77    let (num_rows, data) = match data_set {
78      DataSet::WithRows(size, rows) => (size, rows),
79      DataSet::Count(size) => (size, vec![])
80    };
81    ResultSet {
82      extension: info.ext(),
83      filename: info.name(), 
84      selected: info.selected.clone(),
85      sheets: info.sheets(),
86      keys: keys.to_vec(),
87      num_rows,
88      data: SpreadData::from_single(data),
89      out_ref: out_ref.map(|s| s.to_string()),
90      opts: opts.to_owned()
91    }
92  }
93
94  pub fn from_multiple(sheets: &[SheetDataSet], info: &WorkbookInfo, opts: &OptionSet) -> Self {
95    let selected = None;
96    let mut sheet_names = vec![];
97    let filename = info.filename.clone();
98    let extension = info.extension.clone();
99    let mut keys: Vec<String> = vec![];
100    let mut num_rows = 0;
101    let mut sheet_index: usize = 0;
102    for sheet in sheets {
103      num_rows += sheet.num_rows;
104      sheet_names.push(sheet.name());
105      if sheet_index == 0 {
106        keys = sheet.keys.clone();
107      }
108      sheet_index += 1;
109    }
110    ResultSet {
111      extension,
112      filename, 
113      selected,
114      sheets: sheet_names,
115      keys,
116      num_rows,
117      data: SpreadData::Multiple(sheets.to_vec()),
118      out_ref: None,
119      opts: opts.to_owned()
120    }
121  }
122
123
124  pub fn multimode(&self) -> bool {
125    match self.data {
126      SpreadData::Multiple(_) => true,
127      _ => false
128    }
129  }
130
131  /// Full result set as JSON with criteria, options and data in synchronous mode
132  pub fn to_json(&self) -> Value {
133    let mut result = json!({
134      "name": self.filename,
135      "extension": self.extension,
136      "selected": self.selected.clone().unwrap_or(vec![]),
137      "sheets": self.sheets,
138      "num_rows": self.num_rows,
139      "fields": self.keys,
140      "multimode": self.multimode(),
141      "data": self.data.to_json(),
142      "opts": self.opts.to_json()
143    });
144    if let Some(out_ref_str) = self.out_ref.clone() {
145      result["outref"] = json!(out_ref_str);
146    }
147    result
148  }
149
150   /// Full result set as CLI-friendly lines
151   pub fn to_output_lines(&self, json_lines: bool) -> Vec<String> {
152    let selected_names = self.selected.clone().unwrap_or(vec![]);
153    let num_selected = selected_names.len();
154    let plural = if num_selected > 1 {
155      "s"
156    } else {
157      ""
158    };
159    let mut lines = vec![
160      format!("name:{}", self.filename),
161      format!("extension: {}", self.extension),
162      
163      format!("sheets: {}", self.sheets.join(", ")),
164    ];
165    if num_selected > 0 {
166      lines.push(format!("selected sheet{}: {}", plural, selected_names.join(", ")));
167    }
168    lines.push(format!("row count: {}", self.num_rows));
169    lines.push(format!("fields: {}", self.keys.join(",")));
170    lines.push(format!("multimode: {}", self.multimode()));
171    for opt_line in self.opts.to_lines() {
172      lines.push(opt_line);
173    }
174    if let Some(out_ref_str) = self.out_ref.clone() {
175      lines.push(format!("output reference: {}", out_ref_str));
176    } else {
177      let has_many_sheets = self.sheets.len() > 1;
178      if !has_many_sheets || !self.multimode() {
179        lines.push("data:".to_owned());
180      }
181      if json_lines {
182        for sheet in &self.data.sheets() {
183          if has_many_sheets {
184            lines.push(format!("Sheet `{}` ({}):", sheet.name(), sheet.num_rows));
185          }
186          for item in &sheet.rows {
187            lines.push(format!("{}", json!(item)));
188          }
189        }
190      } else {
191        if self.multimode() {
192          for sheet in self.data.sheets() {
193            lines.push(format!("Sheet `{}` ({}):", sheet.name(), sheet.num_rows));
194            lines.push(format!("{}", json!(sheet)));
195          }
196        } else {
197          lines.push(format!("{}", self.data.to_json()));
198        }
199      }
200    }
201    lines
202  }
203
204  /// Extract the vector of rows as Index Maps of JSON values
205  /// Good for post-processing results
206  pub fn to_vec(&self) -> Vec<IndexMap<String, Value>> {
207    self.data.first_sheet().clone()
208  }
209  
210  /// JSON object of row arrays only
211  pub fn json_data(&self) -> Value {
212    json!(self.data)
213  }
214
215  /// final output as vector of JSON-serializable array
216  pub fn rows(&self) -> Vec<String> {
217    let sheet = self.data.first_sheet();
218    let mut lines = Vec::with_capacity(sheet.len());
219    for row in &self.data.first_sheet() {
220      lines.push(json!(row).to_string());
221    }
222    lines
223  }
224
225}
226
227#[derive(Debug, Clone, Serialize)]
228pub struct SheetDataSet {
229  pub sheet: (String, String),
230  pub num_rows: usize,
231  pub keys: Vec<String>,
232  pub rows: Vec<IndexMap<String, Value>>
233}
234
235impl SheetDataSet {
236
237  
238
239  pub fn new(name: &str, keys: &[String], rows: &[IndexMap<String, Value>], total: usize) -> Self {
240    Self {
241      sheet: (name.to_string(), name.to_snake_case()),
242      keys: keys.to_vec(),
243      rows: rows.to_vec(),
244      num_rows: total
245    }
246  }
247
248  pub fn key(&self) -> String {
249    self.sheet.1.clone()
250  }
251
252  pub fn name(&self) -> String {
253    self.sheet.0.clone()
254  }
255}
256
257#[derive(Debug, Clone, Serialize)]
258pub enum SpreadData {
259   Single(Vec<IndexMap<String, Value>>),
260   Multiple(Vec<SheetDataSet>)
261}
262
263impl SpreadData {
264  pub fn from_single(rows: Vec<IndexMap<String, Value>>) -> Self {
265    SpreadData::Single(rows)
266  }
267
268  pub fn from_multiple(sheet_data: &[SheetDataSet]) -> Self {
269    SpreadData::Multiple(sheet_data.to_owned())
270  }
271
272  pub fn first_sheet(&self) -> Vec<IndexMap<String, Value>> {
273    match self {
274      SpreadData::Single(rows) => rows.to_owned(),
275      SpreadData::Multiple(sheets) => {
276        if let Some(sheet) = sheets.get(0) {
277          sheet.rows.to_owned()
278        } else {
279          vec![]
280        }
281      }
282    }
283  }
284
285  // Only for preview multiple mode
286  pub fn sheets(&self) -> Vec<SheetDataSet> {
287    match self {
288      SpreadData::Single(_) => vec![],
289      SpreadData::Multiple(sheets) => sheets.to_owned()
290    }
291  }
292
293  pub fn to_json(&self) -> Value {
294    match self {
295      SpreadData::Single(sheet) => json!(sheet),
296      SpreadData::Multiple(sheet_map) => json!(sheet_map)
297    }
298  }
299}
300
301
302#[derive(Debug, Clone, Serialize)]
303pub enum DataSet {
304   WithRows(usize, Vec<IndexMap<String, Value>>),
305   Count(usize) 
306}
307
308impl DataSet {
309  pub fn from_count_and_rows(count: usize, rows: Vec<IndexMap<String, Value>>, opts: &OptionSet) -> Self {
310    match opts.read_mode() {
311      ReadMode::Sync | ReadMode::PreviewMultiple => DataSet::WithRows(count, rows),
312      ReadMode::Async => DataSet::Count(count),
313    }
314  }
315}
316
317
318pub fn to_index_map(row: &[serde_json::Value], headers: &[String]) -> IndexMap<String, Value> {
319    let mut hm: IndexMap<String, serde_json::Value> = IndexMap::new();
320    let mut sub_index = 0;
321    for hk in headers {
322        if let Some(cell) = row.get(sub_index) {
323            hm.insert(hk.to_owned(), cell.to_owned());
324        } 
325        sub_index += 1;
326    }
327    hm
328}
329
330pub fn match_sheet_name_and_index(workbook: &mut Sheets<BufReader<File>>, opts: &OptionSet) -> (Vec<String>, Vec<String>, Vec<usize>) {
331  let mut sheet_indices = vec![];
332  let mut selected_names: Vec<String> = vec![];
333  let sheet_names = workbook.worksheets().into_iter().map(|ws| ws.0).collect::<Vec<String>>();
334  if let Some(sheet_keys) = opts.selected.clone() {
335      for sheet_key in sheet_keys {
336          if let Some(sheet_index) = sheet_names.iter().position(|s| s.to_snake_case() == sheet_key.to_snake_case()) {
337              sheet_indices.push(sheet_index);
338              selected_names.push(sheet_names[sheet_index].clone());
339          }
340      }
341  }
342  if sheet_indices.len() < 1 && opts.indices.len() > 0 {
343    for s_index in opts.indices.clone() {
344      let sheet_index = s_index as usize;
345      if let Some(sheet_name) = sheet_names.get(sheet_index) {
346          sheet_indices.push(sheet_index);
347          selected_names.push(sheet_name.to_owned());
348      }
349    }
350  }
351  if sheet_indices.len() < 1 {
352    sheet_indices = vec![0];
353    if sheet_names.len() > 0 {
354      selected_names.push(sheet_names[0].clone());
355    }
356  }
357  (selected_names, sheet_names, sheet_indices)
358}
359