use crate::state::{
attribute::{AttributeBuilder, AttributeType, Entries, Entry},
encoding::Encoding,
entries::EntriesElement,
entry_codes::EntryCodes,
language::Language,
oca::{OCABuilder, OCA},
};
use calamine::{open_workbook_auto, DataType, Reader};
use std::collections::{BTreeMap, HashMap};
pub struct ParsedResult {
pub oca: OCA,
pub languages: Vec<Language>,
}
const CLASSIFICATION_INDEX: u32 = 0;
const ATTR_NAME_INDEX: u32 = 1;
const ATTR_TYPE_INDEX: u32 = 2;
const PII_FLAG_INDEX: u32 = 3;
const ENCODING_INDEX: u32 = 4;
const FORMAT_INDEX: u32 = 5;
const ENTRY_CODES_INDEX: u32 = 6;
const LABEL_INDEX: u32 = 3;
const ENTRIES_INDEX: u32 = 4;
const INFORMATION_INDEX: u32 = 5;
const SAMPLE_TEMPLATE_MSG: &str = "Sample file template can be found here: https://github.com/THCLab/oca-rust/blob/main/tests/assets/oca_template.xlsx";
pub fn parse(path: String) -> Result<ParsedResult, Box<dyn std::error::Error>> {
let mut workbook = open_workbook_auto(path).or(Err(
"Provided file cannot be parsed. Check if file exists and format is XLS(X)",
))?;
let mut sheet_names = workbook.sheet_names().to_vec();
let mut languages = vec![];
sheet_names.retain(|n| n != "READ ME");
let main_sheet_name = sheet_names
.first()
.ok_or(format!("Missing sheets. {}", SAMPLE_TEMPLATE_MSG))?;
let main_sheet = workbook.worksheet_range(main_sheet_name).unwrap().unwrap();
let translation_sheet_names = sheet_names.split_off(1);
let mut translation_sheets: Vec<(Language, _)> = vec![];
for translation_sheet_name in translation_sheet_names {
languages.push(translation_sheet_name.clone());
translation_sheets.push((
translation_sheet_name.clone(),
workbook
.worksheet_range(&translation_sheet_name.clone())
.unwrap()
.unwrap(),
));
}
let first_translation_sheet = &translation_sheets
.first()
.ok_or(format!(
"Missing translation sheets. {}",
SAMPLE_TEMPLATE_MSG
))?
.1;
let start: u32 = 3;
let oca_range = (start, first_translation_sheet.height() as u32);
let mut oca_builder = OCABuilder::new(Encoding::Utf8);
let mut classification = String::new();
let classification_value = main_sheet.get_value((oca_range.0, CLASSIFICATION_INDEX));
if let Some(class) = classification_value {
classification = class.to_string();
}
oca_builder = oca_builder.add_classification(classification);
let mut attribute_names = vec![];
let mut attribute_builders: Vec<(u32, AttributeBuilder)> = vec![];
for attr_index in oca_range.0..oca_range.1 {
let mut attribute_name = main_sheet
.get_value((attr_index, ATTR_NAME_INDEX))
.unwrap()
.to_string();
let attribute_count = attribute_names
.iter()
.filter(|&name| *name == attribute_name)
.count();
if attribute_count > 0 {
attribute_name = format!("{}-{}", attribute_name, attribute_count);
}
attribute_names.push(attribute_name.clone());
let mut attribute_builder = AttributeBuilder::new(
attribute_name.clone(),
serde_json::from_str::<AttributeType>(&format!(
r#""{}""#,
&main_sheet.get_value((attr_index, ATTR_TYPE_INDEX)).unwrap()
))
.or_else(|e| {
Err(format!(
"Parsing attribute type in row {} ({}) failed. {}",
attr_index + 1,
attribute_name,
e.to_string()
))
})?,
);
if let Some(DataType::String(_value)) = main_sheet.get_value((attr_index, PII_FLAG_INDEX)) {
attribute_builder = attribute_builder.set_pii();
}
if let Some(DataType::String(encoding_value)) =
main_sheet.get_value((attr_index, ENCODING_INDEX))
{
let encoding = serde_json::from_str::<Encoding>(&format!(r#""{}""#, encoding_value))
.or_else(|e| {
Err(format!(
"Parsing character encoding in row {} failed. {}",
attr_index + 1,
e.to_string()
))
})?;
attribute_builder = attribute_builder.add_encoding(encoding);
}
if let Some(DataType::String(format_value)) =
main_sheet.get_value((attr_index, FORMAT_INDEX))
{
attribute_builder = attribute_builder.add_format(format_value.clone());
}
if let Some(DataType::String(entry_codes_value)) =
main_sheet.get_value((attr_index, ENTRY_CODES_INDEX))
{
if entry_codes_value != &"[SAI]".to_string() {
let entry_codes: EntryCodes;
if entry_codes_value.starts_with("SAI:") {
let sai = entry_codes_value.strip_prefix("SAI:").unwrap();
entry_codes = EntryCodes::Sai(sai.to_string());
} else {
let codes: Vec<String> = entry_codes_value
.split("|")
.collect::<Vec<&str>>()
.iter()
.map(|c| c.to_string())
.collect();
entry_codes = EntryCodes::Array(codes);
}
attribute_builder = attribute_builder.add_entry_codes(entry_codes);
}
}
attribute_builders.push((attr_index, attribute_builder));
}
let mut name_trans: HashMap<Language, String> = HashMap::new();
let mut description_trans: HashMap<Language, String> = HashMap::new();
let mut label_trans: HashMap<u32, HashMap<Language, String>> = HashMap::new();
let mut entries_trans: HashMap<u32, HashMap<Language, EntriesElement>> = HashMap::new();
let mut information_trans: HashMap<u32, HashMap<Language, String>> = HashMap::new();
for (lang, sheet) in translation_sheets.iter() {
name_trans.insert(
lang.to_string(),
sheet.get_value((oca_range.0, 0)).unwrap().to_string(),
);
description_trans.insert(
lang.to_string(),
sheet.get_value((oca_range.0, 1)).unwrap().to_string(),
);
for attr_index in (oca_range.0)..(oca_range.1) {
if let Some(DataType::String(label_value)) = sheet.get_value((attr_index, LABEL_INDEX))
{
match label_trans.get_mut(&attr_index) {
Some(attr_label_tr) => {
attr_label_tr.insert(lang.to_string(), label_value.clone());
}
None => {
let mut attr_label_tr: HashMap<Language, String> = HashMap::new();
attr_label_tr.insert(lang.to_string(), label_value.clone());
label_trans.insert(attr_index, attr_label_tr);
}
}
}
if let Some(DataType::String(entries_value)) =
sheet.get_value((attr_index, ENTRIES_INDEX))
{
let entries_el: EntriesElement;
if entries_value.starts_with("SAI:") {
let sai = entries_value.strip_prefix("SAI:").unwrap();
entries_el = EntriesElement::Sai(sai.to_string());
} else {
let entries_obj = entries_value.split("|").collect::<Vec<&str>>().iter().fold(
BTreeMap::new(),
|mut acc, x| {
let splitted = x.split(":").collect::<Vec<&str>>();
acc.insert(
splitted.get(0).unwrap().to_string(),
splitted.get(1).unwrap().to_string(),
);
acc
},
);
entries_el = EntriesElement::Object(entries_obj);
}
match entries_trans.get_mut(&attr_index) {
Some(attr_entries_tr) => {
if attr_entries_tr.get(lang).is_none() {
attr_entries_tr.insert(lang.to_string(), entries_el);
}
}
None => {
let mut attr_entries_tr: HashMap<Language, EntriesElement> = HashMap::new();
attr_entries_tr.insert(lang.to_string(), entries_el);
entries_trans.insert(attr_index, attr_entries_tr);
}
}
}
if let Some(DataType::String(information_value)) =
sheet.get_value((attr_index, INFORMATION_INDEX))
{
match information_trans.get_mut(&attr_index) {
Some(attr_info_tr) => {
attr_info_tr.insert(lang.to_string(), information_value.clone());
}
None => {
let mut attr_info_tr: HashMap<Language, String> = HashMap::new();
attr_info_tr.insert(lang.to_string(), information_value.clone());
information_trans.insert(attr_index, attr_info_tr);
}
}
}
}
}
for (i, mut attribute_builder) in attribute_builders {
if let Some(label_tr) = label_trans.get(&i).cloned() {
attribute_builder = attribute_builder.add_label(label_tr);
}
if let Some(lang_entries_tr) = entries_trans.get(&i).cloned() {
let mut entries: Option<Entries> = None;
for (lang, entries_tr) in lang_entries_tr.iter() {
match entries_tr {
EntriesElement::Sai(sai) => match entries {
Some(Entries::Sai(ref mut lang_sai)) => {
lang_sai.insert(lang.to_string(), sai.to_string());
}
Some(Entries::Object(_)) => {}
None => {
let mut lang_sai: HashMap<Language, String> = HashMap::new();
lang_sai.insert(lang.to_string(), sai.to_string());
entries = Some(Entries::Sai(lang_sai));
}
},
EntriesElement::Object(entries_obj) => match entries {
Some(Entries::Sai(_)) => {}
Some(Entries::Object(ref mut entry_vec)) => {
for (e_key, e_val) in entries_obj.iter() {
let lang_entry = &mut entry_vec
.iter_mut()
.find(|el| &el.id == e_key)
.ok_or(format!(
"Unknown entry code in {} translation: {}",
lang, e_key
))?
.translations;
lang_entry.insert(lang.to_string(), e_val.clone());
}
}
None => {
let mut entry_vec: Vec<Entry> = vec![];
for (e_key, e_val) in entries_obj.iter() {
let mut lang_entry: HashMap<Language, String> = HashMap::new();
lang_entry.insert(lang.to_string(), e_val.clone());
entry_vec.push(Entry::new(e_key.to_string(), lang_entry))
}
entries = Some(Entries::Object(entry_vec));
}
},
}
}
if let Some(ent) = entries {
attribute_builder = attribute_builder.add_entries(ent);
}
}
if let Some(info_tr) = information_trans.get(&i).cloned() {
attribute_builder = attribute_builder.add_information(info_tr);
}
oca_builder = oca_builder.add_attribute(attribute_builder.build());
}
oca_builder = oca_builder.add_name(name_trans);
oca_builder = oca_builder.add_description(description_trans);
let oca = oca_builder.finalize();
Ok(ParsedResult { oca, languages })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_xlsx_file() {
let result = parse(format!(
"{}/tests/assets/oca_template.xlsx",
env!("CARGO_MANIFEST_DIR")
));
assert!(result.is_ok());
if let Ok(parsed) = result {
assert_eq!(parsed.oca.capture_base.attributes.len(), 18);
assert_eq!(parsed.languages.len(), 2);
}
}
#[test]
fn parse_xls_file() {
let result = parse(format!(
"{}/tests/assets/oca_template.xls",
env!("CARGO_MANIFEST_DIR")
));
assert!(result.is_ok());
if let Ok(parsed) = result {
assert_eq!(parsed.oca.capture_base.attributes.len(), 18);
assert_eq!(parsed.languages.len(), 2);
}
}
#[test]
fn return_error_when_file_type_is_invalid() {
let result = parse(format!(
"{}/tests/assets/invalid_format.txt",
env!("CARGO_MANIFEST_DIR")
));
assert!(result.is_err());
}
}