spreadsheet_to_json/
headers.rs

1
2use heck::ToSnakeCase;
3use indexmap::IndexMap;
4use serde_json::Value;
5
6use crate::{Column, FieldNameMode};
7
8pub fn to_a1_col_key(index: usize) -> String {
9    let mut result = String::new();
10    let mut n = index as i32; // Work with i32 to handle potential negative values
11
12    while n >= 0 {
13        let remainder = (n % 26) as u8;
14        result.push((b'a' + remainder) as char);
15        n = (n / 26) - 1;
16    }
17    result.chars().rev().collect()
18}
19
20pub fn to_padded_col_key(prefix: &str, index: usize, num_cols: usize) -> String {
21    build_padded_col_key(prefix, false, index, num_cols)
22}
23
24pub fn to_padded_col_suffix(prefix: &str, index: usize, num_cols: usize) -> String {
25  build_padded_col_key(prefix, true, index, num_cols)
26}
27
28
29fn build_padded_col_key(prefix: &str, underscore: bool, index: usize, num_cols: usize) -> String {
30  let width = if num_cols < 100 {
31    2
32  } else if num_cols < 1000 {
33    3
34  } else if num_cols < 10000 {
35    4
36  } else {
37    5
38  };
39  let num = index + 1;
40let separator = if underscore { "_" } else { "" };
41  format!("{}{}{:0width$}", prefix, separator, num, width = width)
42}
43
44pub fn to_c01_col_key(index: usize, num_cols: usize) -> String {
45  to_padded_col_key("c", index, num_cols)
46}
47
48pub fn to_head_key(index: usize, field_mode: &FieldNameMode, num_cols: usize) -> String {
49  if field_mode.use_c01() {
50      to_c01_col_key(index, num_cols)
51  } else {
52      to_a1_col_key(index)
53  }
54}
55
56pub fn to_head_key_default(index: usize) -> String {
57    to_c01_col_key(index, 1000)
58}
59
60/// Build header keys from the first row of a CSV file or headers captured from a spreadsheet
61pub fn build_header_keys(first_row: &[String], columns: &[Column], field_mode: &FieldNameMode) -> Vec<String> {
62let mut h_index = 0;
63    let mut headers: Vec<String> = vec![];
64    let num_cols = first_row.len();
65    let keep_headers = field_mode.keep_headers();
66    for h_row in first_row.to_owned() {
67        let sn = h_row.to_snake_case();
68        let mut has_override = false;
69        if let Some(col) = columns.get(h_index) {
70            // only apply override if key is not empty
71            if let Some(k_str) = &col.key {
72              let h_key = if headers.contains(&k_str.to_string()) {
73                to_padded_col_suffix(k_str, h_index, num_cols)
74              } else {
75                k_str.to_string()
76              };
77              headers.push(h_key);
78              has_override = true;
79            }
80        }
81        if !has_override {
82            if keep_headers && sn.len() > 0 {
83                let sn_key = if headers.contains(&sn) {
84                    to_padded_col_suffix(&sn, h_index, num_cols)
85                } else {
86                    sn
87                };
88                headers.push(sn_key);
89            } else {
90                headers.push(to_head_key(h_index, field_mode, num_cols));
91            }
92        }
93        h_index += 1;
94    }
95    headers
96}
97
98/// Assign keys with A1+ notation
99pub fn build_a1_headers(first_row: &[String]) -> Vec<String> {
100    build_header_keys(first_row, &[], &FieldNameMode::A1)
101}
102
103/// Assign keys as c + zero-padded number
104pub fn build_c01_headers(first_row: &[String]) -> Vec<String> {
105    build_header_keys(first_row, &[], &FieldNameMode::NumPadded)
106}
107
108/// check if the row is not a header row. Always return true if row_index is greater than 0
109pub(crate) fn is_not_header_row(row_map: &IndexMap<String, Value>, row_index: usize, headers: &[String]) -> bool {
110  if row_index > 0 {
111      return true;
112  }
113  let mut h_index = 0;
114  let mut num_matched: usize = 0;
115  for (_key, value) in row_map.iter() {
116    let ref_key = value.to_string().to_snake_case();
117    if let Some(hk) = headers.get(h_index) {
118      let sn = hk.to_snake_case();
119      if sn == ref_key || sn.len() == 0 {
120        num_matched += 1;
121      }
122    }
123    h_index += 1;
124  }
125  num_matched < headers.len()
126}
127
128#[cfg(test)]
129mod tests {
130    use simple_string_patterns::ToStrings;
131
132    use crate::Format;
133
134    use super::*;
135
136    #[test]
137    fn test_cell_letters_1() {
138
139        assert_eq!(to_a1_col_key(26), "aa");
140    }
141
142    #[test]
143    fn test_cell_letters_2() {
144
145        assert_eq!(to_a1_col_key(701), "zz");
146    }
147
148    #[test]
149    fn test_cell_letters_3() {
150
151        assert_eq!(to_a1_col_key(702), "aaa");
152    }
153
154    #[test]
155    fn test_cell_letters_4() {
156
157        assert_eq!(to_c01_col_key(8, 60), "c09");
158    }
159
160    #[test]
161    fn test_cell_letters_5() {
162        assert_eq!(to_c01_col_key(20, 750), "c021");
163    }
164
165    #[test]
166    fn test_cell_letters_6() {
167        assert_eq!(to_c01_col_key(20, 2000), "c0021");
168    }
169
170    #[test]
171    fn test_first_row() {
172        // header labels as captured from the top row
173        let first_row = ["Viscosity", "Rating", "", ""].to_strings();
174        let cols = vec![
175            Column::from_key_ref_with_format(None, Format::Float, None, false, false),
176            Column::from_key_ref_with_format(Some("points"), Format::Decimal(3), None, false, false),
177            Column::from_key_ref_with_format(Some("adjusted"), Format::Float, None, false, false),
178        ];
179        let headers = build_header_keys(&first_row, &cols, &FieldNameMode::AutoA1);
180        // should be lower-cased as `viscosity`
181        assert_eq!(headers.get(0).unwrap(), "viscosity");
182        // should be overridden as `points`
183        assert_eq!(headers.get(1).unwrap(), "points");
184        // should be labelled `adjusted`
185        assert_eq!(headers.get(2).unwrap(), "adjusted");
186        // fourth column  with empty heading should be assigned an A1-style key of `d`
187        assert_eq!(headers.get(3).unwrap(), "d");
188    }
189
190    #[test]
191    fn test_headers_a1_override() {
192        // header labels as captured from the top row
193        let first_row = ["Viscosity", "Rating", "Weighted", "Class"].to_strings();
194        
195        let headers = build_a1_headers(&first_row);
196        // should be lower-cased as `viscosity`
197        assert_eq!(headers.get(0).unwrap(), "a");
198        // the column should be d.
199        assert_eq!(headers.get(3).unwrap(), "d");
200    }
201
202    #[test]
203    fn test_headers_c01_override() {
204        // build header row with 200 sequential alphanumeric values
205        let first_row: Vec<String> = (0..200).map(|x| [char::from_u32(65 + (x % 26)).unwrap_or('_').to_string(), (x * 3 * 1).to_string()].concat()).collect();
206        
207        let headers = build_c01_headers(&first_row);
208        // the column should be c0001
209        assert_eq!(headers.get(0).unwrap(), "c001");
210        // the column should be c0004
211        assert_eq!(headers.get(3).unwrap(), "c004");
212    }
213}