1use heck::ToSnakeCase;
2use indexmap::IndexMap;
3use serde_json::{json, Error, Value};
4use simple_string_patterns::{SimpleMatch, StripCharacters, ToSegments};
5use std::{path::Path, str::FromStr, sync::Arc};
6
7use crate::is_truthy::{extract_truth_patterns, to_truth_options, TruthyOption};
8pub const DEFAULT_MAX_ROWS: usize = 10_000;
10pub const DEFAULT_MAX_ROWS_PREVIEW: usize = 1000;
12
13#[derive(Debug, Clone, Default)]
15pub struct RowOptionSet {
16 pub columns: Vec<Column>,
17 pub decimal_comma: bool, pub date_only: bool,
19}
20
21impl RowOptionSet {
22
23 pub fn simple(cols: &[Column]) -> Self {
25 RowOptionSet {
26 decimal_comma: false,
27 date_only: false,
28 columns: cols.to_vec()
29 }
30 }
31
32 pub fn new(cols: &[Column], decimal_comma: bool, date_only: bool) -> Self {
34 RowOptionSet {
35 decimal_comma: decimal_comma,
36 date_only,
37 columns: cols.to_vec()
38 }
39 }
40
41 pub fn column(&self, index: usize) -> Option<&Column> {
42 self.columns.get(index)
43 }
44
45 pub fn date_mode(&self) -> String {
46 if self.date_only {
47 "date only"
48 } else {
49 "date/time"
50 }.to_string()
51 }
52
53 pub fn decimal_separator(&self) -> String {
54 if self.decimal_comma {
55 ","
56 } else {
57 "."
58 }.to_string()
59 }
60}
61
62#[derive(Debug, Clone, Default)]
64pub struct OptionSet {
65 pub selected: Option<Vec<String>>, pub indices: Vec<u32>, pub path: Option<String>, pub rows: RowOptionSet,
69 pub jsonl: bool,
70 pub max: Option<u32>,
71 pub omit_header: bool,
72 pub header_row: u8,
73 pub read_mode: ReadMode,
74 pub field_mode: FieldNameMode
75}
76
77impl OptionSet {
78 pub fn new(path_str: &str) -> Self {
80 OptionSet {
81 selected: None,
82 indices: vec![0],
83 path: Some(path_str.to_string()),
84 rows: RowOptionSet::default(),
85 jsonl: false,
86 max: None,
87 omit_header: false,
88 header_row: 0,
89 read_mode: ReadMode::Sync,
90 field_mode: FieldNameMode::AutoA1,
91 }
92 }
93
94 pub fn sheet_name(mut self, name: &str) -> Self {
96 self.selected = Some(vec![name.to_string()]);
97 self
98 }
99
100 pub fn sheet_names(mut self, names: &[String]) -> Self {
102 self.selected = Some(names.to_vec());
103 self
104 }
105
106 pub fn sheet_index(mut self, index: u32) -> Self {
108 self.indices = vec![index];
109 self
110 }
111
112 pub fn sheet_indices(mut self, indices: &[u32]) -> Self {
114 self.indices = indices.to_vec();
115 self
116}
117
118 pub fn json_lines(mut self) -> Self {
120 self.jsonl = true;
121 self
122 }
123
124 pub fn set_json_lines(mut self, mode: bool) -> Self {
126 self.jsonl = mode;
127 self
128 }
129
130 pub fn omit_header(mut self) -> Self {
132 self.omit_header = true;
133 self
134 }
135
136 pub fn header_row(mut self, row: u8) -> Self {
138 self.header_row = row;
139 self
140 }
141
142 pub fn max_row_count(mut self, max: u32) -> Self {
144 self.max = Some(max);
145 self
146 }
147
148 pub fn read_mode_async(mut self) -> Self {
151 self.read_mode = ReadMode::Async;
152 self
153 }
154
155 pub fn read_mode_preview(mut self) -> Self {
158 self.read_mode = ReadMode::PreviewMultiple;
159 self
160}
161
162 pub fn set_read_mode(mut self, key: &str) -> Self {
166 self.read_mode = ReadMode::from_key(key);
167 self
168 }
169
170 pub fn multimode(&self) -> bool {
171 self.read_mode.is_multimode()
172 }
173
174 pub fn file_name(&self) -> Option<String> {
175 if let Some(path_str) = self.path.clone() {
176 Path::new(&path_str).file_name().map(|f| f.to_string_lossy().to_string())
177 } else {
178 None
179 }
180 }
181
182 pub fn override_headers(mut self, keys: &[&str]) -> Self {
184 let mut columns: Vec<Column> = Vec::with_capacity(keys.len());
185 for ck in keys {
186 columns.push(Column::new(Some(&ck.to_snake_case())));
187 }
188 self.rows = RowOptionSet::simple(&columns);
189 self
190 }
191
192 pub fn override_columns(mut self, cols: &[Value]) -> Self {
194 let mut columns: Vec<Column> = Vec::with_capacity(cols.len());
195 for json_value in cols {
196 columns.push(Column::from_json(json_value));
197 }
198 self.rows = RowOptionSet::simple(&columns);
199 self
200 }
201
202 pub fn field_name_mode(mut self, system: &str, override_header: bool) -> Self {
204 self.field_mode = FieldNameMode::from_key(system, override_header);
205 self
206 }
207
208 pub fn row_mode(&self) -> String {
209 if self.jsonl {
210 "JSON lines"
211 } else {
212 "JSON"
213 }.to_string()
214 }
215
216 pub fn header_mode(&self) -> String {
217 if self.omit_header {
218 "ignore"
219 } else {
220 "capture"
221 }.to_string()
222 }
223
224 pub fn to_json(&self) -> Value {
226
227 let mut output: IndexMap<String, Value> = IndexMap::new();
228 if let Some(selected) = self.selected.clone() {
229 let selected = if self.multimode() {
230 json!({
231 "sheets": selected,
232 "indices": self.indices.clone()
233 })
234 } else {
235 json!({
236 "sheet": selected.first().unwrap_or(&"".to_string()),
237 "index": self.indices.get(0).unwrap_or(&0)
238 })
239 };
240 output.insert("selected".to_string(), selected.into());
241 }
242 if let Some(fname) = self.file_name() {
243 output.insert("file name".to_string(), fname.into());
244 }
245 if let Some(max_val) = self.max {
246 output.insert("max".to_string(), max_val.into());
247 }
248 output.insert("omit_header".to_string(), self.omit_header.into());
249 output.insert("header_row".to_string(), self.header_row.into());
250 output.insert("read_mode".to_string(), self.read_mode.to_string().into());
251 output.insert("jsonl".to_string(), self.jsonl.into());
252 output.insert("decimal_separator".to_string(), self.rows.decimal_separator().into());
253 output.insert("date_only".to_string(), self.rows.date_only.into());
254 if self.columns().len() > 0 {
255 let columns: Vec<Value> = self.rows.columns.clone().into_iter().map(|c| c.to_json()).collect();
256 output.insert("columns".to_string(), columns.into());
257 }
258 json!(output)
259 }
260
261 pub fn index_list(&self) -> String {
262 self.indices.clone().into_iter().map(|s| s.to_string()).collect::<Vec<String>>().join(", ")
263 }
264
265 pub fn to_lines(&self) -> Vec<String> {
268 let mut lines = vec![];
269 if let Some(s_names) = self.selected.clone() {
270 let plural = if s_names.len() > 1 {
271 "s"
272 } else {
273 ""
274 };
275 lines.push(format!("sheet name{}: {}", plural, s_names.join(",")));
276 } else if self.indices.len() > 0 {
277 lines.push(format!("sheet indices: {}", self.index_list()));
278 }
279 if let Some(fname) = self.file_name() {
280 lines.push(format!("file name: {}", fname));
281 }
282 if self.max.is_some() {
283 let max_val = self.max.unwrap_or(0);
284 if max_val > 0 {
285 lines.push(format!("max rows: {}", max_val));
286 }
287 }
288 lines.extend(vec![
289 format!("mode: {}", self.row_mode()),
290 format!("headers: {}", self.header_mode()),
291 format!("header row: {}", self.header_row),
292 format!("decimal separator: {}", self.rows.decimal_separator()),
293 format!("date mode: {}", self.rows.date_mode()),
294 format!("column style: {}", self.field_mode.to_string())
295 ]);
296
297 if self.columns().len() > 0 {
298 lines.push("columns:".to_string());
299 for col in self.rows.columns.clone() {
300 lines.push(col.to_line());
301 }
302 }
303 lines
304 }
305
306 pub fn header_row_index(&self) -> usize {
308 self.header_row as usize
309 }
310
311 pub fn max_rows(&self) -> usize {
313 if let Some(mr) = self.max {
314 mr as usize
315 } else {
316 match self.read_mode {
317 ReadMode::PreviewMultiple => DEFAULT_MAX_ROWS_PREVIEW,
318 _ => DEFAULT_MAX_ROWS
319 }
320 }
321 }
322
323 #[allow(dead_code)]
325 pub fn columns(&self) -> Vec<Column> {
326 self.rows.columns.clone()
327 }
328
329 pub fn read_mode(&self) -> ReadMode {
331 self.read_mode.clone()
332 }
333
334 pub fn is_async(&self) -> bool {
336 self.read_mode.is_async()
337 }
338
339 pub fn capture_rows(&self) -> bool {
341 match self.read_mode {
342 ReadMode::Async => false,
343 _ => true
344 }
345 }
346
347}
348
349
350#[derive(Debug, Clone)]
352pub enum Format {
353 Auto, Text, Integer, Decimal(u8), Float, Boolean, Date, DateTime, DateTimeCustom(Arc<str>),
362 Truthy, #[allow(dead_code)]
364 TruthyCustom(Vec<TruthyOption>) }
366
367impl ToString for Format {
368 fn to_string(&self) -> String {
369 let result = match self {
370 Self::Auto => "auto",
371 Self::Text => "text",
372 Self::Integer => "integer",
373 Self::Decimal(n) => &format!("decimal({})", n),
374 Self::Float => "float",
375 Self::Boolean => "boolean",
376 Self::Date => "date",
377 Self::DateTime => "datetime",
378 Self::DateTimeCustom(fmt) => &format!("datetime({})", fmt),
379 Self::Truthy => "truthy",
380 Self::TruthyCustom(opts) => {
381 let true_str: Vec<String> = extract_truth_patterns(&opts, true);
382 let false_str: Vec<String> = extract_truth_patterns(&opts, false);
383 &format!("truthy({},{})", true_str.join("|"), false_str.join("|"))
384 },
385 };
386 result.to_string() }
388}
389
390impl FromStr for Format {
391 type Err = Error;
392 fn from_str(key: &str) -> Result<Self, Self::Err> {
393 let fmt = match key {
394 "s" | "str" | "string" | "t" | "txt" | "text" => Self::Text,
395 "i" | "int" | "integer" => Self::Integer,
396 "d1" | "decimal_1" => Self::Decimal(1),
397 "d2" | "decimal_2" => Self::Decimal(2),
398 "d3" | "decimal_3" => Self::Decimal(3),
399 "d4" | "decimal_4" => Self::Decimal(4),
400 "d5" | "decimal_5" => Self::Decimal(5),
401 "d6" | "decimal_6" => Self::Decimal(6),
402 "d7" | "decimal_7" => Self::Decimal(7),
403 "d8" | "decimal_8" => Self::Decimal(6),
404 "fl" | "f" | "float" => Self::Float,
405 "b" | "bool" | "boolean" => Self::Boolean,
406 "da" | "date" => Self::Date,
407 "dt" | "datetime" => Self::DateTime,
408 "tr" | "truthy" => Self::Truthy,
409 _ => {
410 if let Some(str) = match_custom_dt(key) {
411 Self::DateTimeCustom(Arc::from(str))
412 } else if let Some((yes, no)) = match_custom_truthy(key) {
413 Self::TruthyCustom(to_truth_options(&yes, &no, false,false))
414 } else {
415 Self::Auto
416 }
417 },
418 };
419 Ok(fmt)
420 }
421}
422
423fn match_custom_dt(key: &str) -> Option<String> {
424 let test_str = key.trim();
425 if test_str.starts_with_ci("dt:") {
426 Some(test_str[3..].to_string())
427 } else {
428 None
429 }
430}
431
432fn match_custom_truthy(key: &str) -> Option<(String,String)> {
433 let test_str = key.trim();
434 let (head, tail) = test_str.to_head_tail(":");
435 if tail.len() > 1 && head.len() > 1 && head.starts_with_ci("tr") {
436 let (yes, no) = tail.to_head_tail(",");
437 if yes.len() > 0 && no.len() > 0 {
438 return Some((yes, no));
439 }
440 }
441 None
442}
443
444impl Format {
445 #[allow(dead_code)]
446 pub fn truthy_custom(yes: &str, no: &str) -> Self {
447 Format::TruthyCustom(to_truth_options(yes, no, false, false))
448 }
449}
450
451#[derive(Debug, Clone)]
452pub struct Column {
453 pub key: Option<Arc<str>>,
454 pub format: Format,
455 pub default: Option<Value>,
456 pub date_only: bool, pub decimal_comma: bool, }
459
460impl Column {
461
462 pub fn new(key_opt: Option<&str>) -> Self {
464 Self::from_key_ref_with_format(key_opt, Format::Auto, None, false, false)
465 }
466
467 pub fn new_format(fmt: Format, default: Option<Value>) -> Self {
469 Self::from_key_ref_with_format(None, fmt, default, false, false)
470 }
471
472 pub fn from_json(json: &Value) -> Self {
474 let key_opt = json.get("key").map(|v| v.as_str().unwrap_or(""));
475 let fmt = match json.get("format") {
476 Some(fmt_val) => {
477 match Format::from_str(fmt_val.as_str().unwrap()) {
478 Ok(fmt) => fmt,
479 Err(_) => Format::Auto
480 }
481 },
482 None => Format::Auto
483 };
484 let default = match json.get("default") {
485 Some(def_val) => {
486 match def_val {
487 Value::String(s) => Some(Value::String(s.clone())),
488 Value::Number(n) => Some(Value::Number(n.clone())),
489 Value::Bool(b) => Some(Value::Bool(b.clone())),
490 _ => None
491 }
492 },
493 None => None
494 };
495 let date_only = match json.get("date_only") {
496 Some(date_val) => date_val.as_bool().unwrap_or(false),
497 None => false
498 };
499 let dec_commas_keys = ["decimal_comma", "dec_comma"];
500 let mut decimal_comma = false;
501
502 for key in &dec_commas_keys {
503 if let Some(euro_val) = json.get(*key) {
504 decimal_comma = euro_val.as_bool().unwrap_or(false);
505 break;
506 }
507 }
508 Column::from_key_ref_with_format(key_opt, fmt, default, date_only, decimal_comma)
509}
510
511
512 #[allow(dead_code)]
514 pub fn set_format(mut self, fmt: Format) -> Self {
515 self.format = fmt;
516 self
517 }
518
519 #[allow(dead_code)]
520 pub fn set_default(mut self, val: Value) -> Self {
521 self.default = Some(val);
522 self
523 }
524
525 #[allow(dead_code)]
526 pub fn set_date_only(mut self, val: bool) -> Self {
527 self.date_only = val;
528 self
529 }
530
531 #[allow(dead_code)]
532 pub fn set_decimal_comma(mut self, val: bool) -> Self {
533 self.decimal_comma = val;
534 self
535 }
536
537 pub fn from_key_ref_with_format(key_opt: Option<&str>, format: Format, default: Option<Value>, date_only: bool, decimal_comma: bool) -> Self {
538 let mut key = None;
539 if let Some(k_str) = key_opt {
540 key = Some(Arc::from(k_str));
541 }
542 Column {
543 key,
544 format,
545 default,
546 date_only,
547 decimal_comma
548 }
549 }
550
551 pub fn key_name(&self) -> String {
552 self.key.clone().unwrap_or(Arc::from("")).to_string()
553 }
554
555 pub fn to_json(&self) -> Value {
556 json!({
557 "key": self.key_name(),
558 "format": self.format.to_string(),
559 "default": self.default,
560 "date_only": self.date_only,
561 "decimal_comma": self.decimal_comma
562 })
563 }
564
565 pub fn to_line(&self) -> String {
566 let date_only_str = if self.date_only {
567 ", date only"
568 } else {
569 ""
570 }.to_owned();
571 let def_string = if let Some(def_val) = self.default.clone() {
572 format!("default: {}", def_val.to_string())
573 } else {
574 "".to_string()
575 };
576 let comma_str = if self.decimal_comma {
577 ", decimal comma"
578 } else {
579 ""
580 };
581 format!(
582 "\tkey {}, format {}{}{}{}",
583 self.key_name(),
584 self.format.to_string(),
585 def_string,
586 date_only_str,
587 comma_str)
588 }
589
590}
591
592
593#[derive(Debug, Clone, Copy)]
596pub enum Extension {
597 Unmatched,
598 Ods,
599 Xlsx,
600 Xlsb,
601 Xls,
602 Csv,
603 Tsv,
604}
605
606impl Extension {
607 pub fn from_path(path:&Path) -> Extension {
608 if let Some(ext) = path.extension() {
609 if let Some(ext_str) = ext.to_str() {
610 let ext_lc = ext_str.to_lowercase();
611 return match ext_lc.as_str() {
612 "ods" => Extension::Ods,
613 "xlsx" => Extension::Xlsx,
614 "xlsb" => Extension::Xlsb,
615 "xls" => Extension::Xls,
616 "csv" => Extension::Csv,
617 "tsv" => Extension::Tsv,
618 _ => Extension::Unmatched
619 }
620 }
621 }
622 Extension::Unmatched
623 }
624
625 pub fn use_calamine(&self) -> bool {
627 match self {
628 Self::Ods | Self::Xlsx | Self::Xlsb | Self::Xls => true,
629 _ => false
630 }
631 }
632
633 #[allow(dead_code)]
636 pub fn use_csv(&self) -> bool {
637 match self {
638 Self::Csv | Self::Tsv => true,
639 _ => false
640 }
641 }
642
643}
644
645impl ToString for Extension {
646 fn to_string(&self) -> String {
647 match self {
648 Self::Ods => "ods",
649 Self::Xlsx => "xlsx",
650 Self::Xlsb => "xlsb",
651 Self::Xls => "xls",
652 Self::Csv => "csv",
653 Self::Tsv => "tsv",
654 _ => ""
655 }.to_string()
656 }
657}
658
659pub struct PathData<'a> {
660 path: &'a Path,
661 ext: Extension
662}
663
664impl<'a> PathData<'a> {
665 pub fn new(path: &'a Path) -> Self {
666 PathData {
667 path,
668 ext: Extension::from_path(path)
669 }
670 }
671
672 pub fn mode(&self) -> Extension {
673 self.ext
674 }
675
676 pub fn extension(&self) -> String {
677 self.ext.to_string()
678 }
679
680 pub fn ext(&self) -> Extension {
681 self.ext
682 }
683
684 pub fn path(&self) -> &Path {
685 self.path
686 }
687
688 pub fn is_valid(&self) -> bool {
689 match self.ext {
690 Extension::Unmatched => false,
691 _ => true
692 }
693 }
694
695 pub fn use_calamine(&self) -> bool {
696 self.ext.use_calamine()
697 }
698
699 pub fn filename(&self) -> String {
700 if let Some(file_ref) = self.path.file_name() {
701 file_ref.to_string_lossy().to_string()
702 } else {
703 "".to_owned()
704 }
705 }
706}
707
708
709
710#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
711pub enum ReadMode {
712 #[default]
713 Sync,
714 PreviewMultiple,
715 Async
716}
717
718impl ReadMode {
720
721 pub fn from_key(key: &str) -> Self {
722 let sample = key.to_lowercase().strip_non_alphanum();
723 match sample.as_str() {
724 "async" | "defer" | "deferred" | "a" => ReadMode::Async,
725 "preview" | "p" | "pre" | "multimode" | "multiple" | "previewmultiple" | "previewmulti" | "m" => ReadMode::PreviewMultiple,
726 _ => ReadMode::Sync
727 }
728 }
729
730 pub fn is_async(&self) -> bool {
731 match self {
732 Self::Async => true,
733 _ => false
734 }
735 }
736
737 pub fn is_multimode(&self) -> bool {
739 match self {
740 Self::PreviewMultiple => true,
741 _ => false
742 }
743 }
744}
745
746impl ToString for ReadMode {
747
748 fn to_string(&self) -> String {
749 match self {
750 Self::Async => "deferred",
751 Self::PreviewMultiple => "preview",
752 _ => "direct"
753 }.to_string()
754 }
755}
756
757#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
759pub enum FieldNameMode {
760 #[default]
761 AutoA1, AutoNumPadded, A1, NumPadded, }
766
767impl FieldNameMode {
769
770
771 pub fn from_key(system: &str, override_header: bool) -> Self {
772 if system.starts_with_ci("a1") {
773 if override_header {
774 FieldNameMode::A1
775 } else {
776 FieldNameMode::AutoA1
777 }
778 } else if system.starts_with_ci("c") || system.starts_with_ci("n") {
779 if override_header {
780 FieldNameMode::NumPadded
781 } else {
782 FieldNameMode::AutoNumPadded
783 }
784 } else {
785 FieldNameMode::AutoA1
786 }
787 }
788
789
790 pub fn use_a1(&self) -> bool {
792 match self {
793 Self::AutoA1 | Self::A1 => true,
794 _ => false
795 }
796 }
797
798 pub fn use_c01(&self) -> bool {
800 match self {
801 Self::AutoNumPadded | Self::NumPadded => true,
802 _ => false
803 }
804 }
805
806 pub fn override_headers(&self) -> bool {
808 match self {
809 Self::NumPadded | Self::A1 => true,
810 _ => false
811 }
812 }
813
814 pub fn keep_headers(&self) -> bool {
816 self.override_headers() == false
817 }
818}
819
820impl ToString for FieldNameMode {
821 fn to_string(&self) -> String {
822 match self {
823 Self::AutoNumPadded => "C01 auto",
824 Self::NumPadded => "C01 override",
825 Self::A1 => "A1 override",
826 _ => "A1 auto",
827 }.to_string()
828 }
829}
830
831#[cfg(test)]
832mod tests {
833 use super::*;
834
835 #[test]
836 fn test_format_mode() {
837 let custom_boolean = Format::truthy_custom("si", "no");
838 assert_eq!(custom_boolean.to_string(), "truthy(si,no)");
839 }
840
841 #[test]
842 fn test_match_truthy_custom() {
843 let (true_keys, false_keys) = match_custom_truthy("tr:si,no").unwrap();
844 assert_eq!("si", true_keys);
845 assert_eq!("no", false_keys);
846 }
847
848}