codes-un-m49 0.1.2

This package contains an implementation of the UN M49 - Standard Country or Area Codes for Statistical Use (Series M, No. 49) specification
Documentation
use codes_common::build::{
    default_finalize_for, input_file_name, make_default_renderer, process, Data, SimpleData,
    DEFAULT_NUMERIC_CODE_TYPE,
};
use std::str::FromStr;
use tera::{Map, Value};

#[allow(dead_code)]
const TYPE_NAME: &str = "RegionClassificationCode";

fn main() -> Result<(), Box<dyn std::error::Error>> {
    process(
        || {
            Ok(SimpleData::new_with_inner(
                TYPE_NAME,
                DEFAULT_NUMERIC_CODE_TYPE,
            ))
        },
        process_input_data,
        default_finalize_for,
        make_default_renderer("lib._rs", "generated.rs"),
    )
}

fn process_input_data(mut data: SimpleData) -> Result<SimpleData, Box<dyn std::error::Error>> {
    use scraper::{Html, Selector};
    use std::fs;

    fn element_text(element: &scraper::element_ref::ElementRef) -> String {
        element
            .text()
            .map(|s| s.to_string())
            .collect::<Vec<String>>()
            .join("")
            .trim()
            .to_string()
    }

    let file_name = input_file_name("m49-overview.html");
    let source = fs::read_to_string(file_name)?;

    let document = Html::parse_document(&source);
    let row_selector = Selector::parse("#downloadTableEN tbody tr").unwrap();
    let data_selector = Selector::parse("td").unwrap();

    for row in document.select(&row_selector) {
        let mut strings = row
            .select(&data_selector)
            .enumerate()
            .filter_map(|(i, e)| {
                if let 0..=11 = &i {
                    Some(element_text(&e))
                } else {
                    None
                }
            })
            .collect::<Vec<String>>();
        assert_eq!(strings.len(), 12);
        let country_name = strings.remove(8);
        strings.insert(9, country_name);

        let code = strings.get(0).unwrap();
        if !code.is_empty() && !data.contains(code) {
            let mut row: Map<String, Value> = Default::default();

            row.insert(
                "code_as_int".to_string(),
                u16::from_str(code).unwrap().into(),
            );

            row.insert(
                "name".to_string(),
                strings.get(1).unwrap().to_string().into(),
            );

            row.insert("kind".to_string(), String::from("Global").into());

            data.insert_row(code, row);
        }

        let code = strings.get(2).unwrap();
        if !code.is_empty() && !data.contains(code) {
            let mut row: Map<String, Value> = Default::default();

            row.insert(
                "code_as_int".to_string(),
                u16::from_str(code).unwrap().into(),
            );

            row.insert(
                "name".to_string(),
                strings.get(3).unwrap().to_string().into(),
            );

            row.insert("kind".to_string(), String::from("Region").into());

            row.insert(
                "parent_code".to_string(),
                strings.get(0).unwrap().to_string().into(),
            );

            data.insert_row(code, row);
        }

        let code = strings.get(4).unwrap();
        if !code.is_empty() && !data.contains(code) {
            let mut row: Map<String, Value> = Default::default();

            row.insert(
                "code_as_int".to_string(),
                u16::from_str(code).unwrap().into(),
            );

            row.insert(
                "name".to_string(),
                strings.get(5).unwrap().to_string().into(),
            );

            row.insert("kind".to_string(), String::from("SubRegion").into());

            row.insert(
                "parent_code".to_string(),
                strings.get(2).unwrap().to_string().into(),
            );

            data.insert_row(code, row);
        }

        let code = strings.get(6).unwrap();
        if !code.is_empty() && !data.contains(code) {
            let mut row: Map<String, Value> = Default::default();

            row.insert(
                "code_as_int".to_string(),
                u16::from_str(code).unwrap().into(),
            );

            row.insert(
                "name".to_string(),
                strings.get(7).unwrap().to_string().into(),
            );

            row.insert(
                "kind".to_string(),
                String::from("IntermediateRegion").into(),
            );

            row.insert(
                "parent_code".to_string(),
                strings.get(4).unwrap().to_string().into(),
            );

            data.insert_row(code, row);
        }

        let code = strings.get(8).unwrap();
        if !code.is_empty() && !data.contains(code) {
            let mut row: Map<String, Value> = Default::default();

            row.insert(
                "code_as_int".to_string(),
                u16::from_str(code).unwrap().into(),
            );

            row.insert(
                "name".to_string(),
                strings.get(9).unwrap().to_string().into(),
            );

            let country_alpha_2_code = strings.get(10).unwrap();
            if country_alpha_2_code.is_empty() {
                row.insert("kind".to_string(), String::from("Area").into());
            } else {
                row.insert("kind".to_string(), String::from("Country").into());

                row.insert(
                    "country_alpha_2_code".to_string(),
                    country_alpha_2_code.to_string().into(),
                );

                row.insert(
                    "country_alpha_3_code".to_string(),
                    strings.get(11).unwrap().to_string().into(),
                );
            }

            for parent in [6, 4, 2] {
                let parent_code = strings.get(parent as usize).unwrap();
                if !parent_code.is_empty() {
                    row.insert("parent_code".to_string(), parent_code.to_string().into());
                    break;
                }
            }

            data.insert_row(code, row);
        }
    }

    Ok(data)
}