use std::io::BufReader;
use proc_macro::TokenStream;
use quote::{format_ident, quote};
use serde::Deserialize;
const NUMERIC_CATEGORY: &str = "Nd";
#[derive(Debug, Deserialize)]
#[allow(unused)]
struct UnicodeRecord {
pub code_point: String,
pub name: String,
pub general_category: String,
pub canonical_combining_class: u8,
pub bidi_category: String,
pub decomposition: Option<String>,
pub decimal_digit_value: Option<u32>,
pub digit_value: Option<u32>,
pub numeric_value: Option<String>,
pub bidi_mirrored: String,
pub unicode_1_name: Option<String>,
pub iso_comment: Option<String>,
pub simple_uppercase_mapping: Option<String>,
pub simple_lowercase_mapping: Option<String>,
pub simple_titlecase_mapping: Option<String>,
}
#[derive(Debug)]
struct NormalizationReplacement {
normalized_unicode_char: char,
ascii_char: u32
}
fn extract_unicode_char(hex_code_digits: &str) -> char {
u32::from_str_radix(hex_code_digits, 16)
.map(| parsed | char::from_u32(parsed) )
.unwrap()
.expect("Hex MUST be valid unicode")
}
fn parse_digit_mappings() -> Vec<NormalizationReplacement> {
let file = include_str!("../assets/UnicodeData.txt");
let reader = BufReader::new(file.as_bytes());
let mut parser = csv::ReaderBuilder::new()
.has_headers(false)
.delimiter(b';')
.from_reader(reader);
parser.deserialize::<UnicodeRecord>()
.flatten()
.filter(| res| res.general_category == NUMERIC_CATEGORY)
.map(| res | NormalizationReplacement{
normalized_unicode_char: extract_unicode_char(&res.code_point),
ascii_char: res.decimal_digit_value.expect("all \\Nd should have decimal value")
})
.collect()
}
#[proc_macro]
pub fn digit_parse_mappings(item: TokenStream) -> TokenStream {
let mappings = parse_digit_mappings()
.iter()
.map(| r | {
let match_char = r.normalized_unicode_char;
let ascii_digit = r.ascii_char;
quote! {
#match_char => Some(#ascii_digit as u8)
}
})
.collect::<Vec<_>>();
let item = format_ident!("{}", item.to_string());
quote! {
match #item {
#(#mappings,)*
_ => None
}
}.into()
}