use csv::Writer;
use std::collections::{BTreeMap, BTreeSet};
fn main() {
let mut replacements: BTreeMap<char, BTreeSet<char>> = BTreeMap::new();
let mut append_replacement = |(k, v): (char, String)| {
replacements
.entry(k)
.or_insert_with(BTreeSet::new)
.extend(v.chars())
};
include_str!("unicode_confusables.txt")
.lines()
.filter(|line| !line.is_empty() && !line.starts_with('#'))
.filter_map(|line| {
let mut segments = line.split(';');
segments
.next()
.zip(segments.next())
.and_then(|(find, replace)| {
let find_char = u32::from_str_radix(find.trim(), 16)
.ok()
.and_then(char::from_u32);
let replace_char = u32::from_str_radix(replace.trim(), 16)
.ok()
.and_then(char::from_u32)
.map(|c| c.to_ascii_lowercase());
find_char.zip(replace_char).and_then(|(find, replace)| {
if replace.is_digit(36) {
println!("{find} -> {replace}");
let mut replace = replace.to_string();
replace.push(find);
Some((find, replace))
} else if find.is_digit(36) {
panic!("reversed!");
} else {
None
}
})
})
})
.for_each(&mut append_replacement);
include_str!("unicode_fonts.csv")
.lines()
.filter(|line| !line.is_empty())
.for_each(|line| {
let (case, line) = line.split_once(',').unwrap();
let chars: Vec<_> = line.chars().collect();
if chars.len() != 26 {
panic!("alphabet doesn't have 26 chars: {}", line);
}
for (i, c) in chars.into_iter().enumerate() {
if c.is_ascii() {
continue;
}
let lowercase = b'a' + i as u8;
let mut bytes = vec![lowercase];
if case == "u" {
let uppercase = lowercase.to_ascii_uppercase();
bytes.push(uppercase);
} else {
assert_eq!(case, "l");
}
append_replacement((c, String::from_utf8(bytes).unwrap()))
}
});
(0..=0xFFFFFF)
.filter_map(char::from_u32)
.filter_map(|c| {
let r = c.to_lowercase().next().unwrap();
if r != c {
Some((c, r.to_string()))
} else {
None
}
})
.for_each(&mut append_replacement);
include_str!("replacements_extra.csv")
.split("\n")
.enumerate()
.filter(|(_, line)| !line.is_empty())
.map(|(n, line)| {
let comma = line.find(",").unwrap();
let before_comma = &line[..comma];
let c = if before_comma.chars().count() == 1 {
before_comma.chars().next().unwrap()
} else {
let escape = before_comma
.strip_prefix("\\u{")
.expect(&format!("line {}", n + 1))
.strip_suffix("}")
.unwrap();
let escape_int = u32::from_str_radix(escape, 16).unwrap();
char::from_u32(escape_int).unwrap()
};
use finl_unicode::categories::CharacterCategories;
use unicode_normalization::UnicodeNormalization;
let c_string = String::from(c);
let c_string_2 = c_string
.nfd()
.filter(|c| !c.is_mark_nonspacing())
.nfc()
.collect::<String>();
if c_string != c_string_2 {
println!("Warning (Mn): {c_string} -> {c_string_2}");
}
assert_eq!(c_string_2.chars().count(), 1, "line {}", n + 1);
(
c_string_2.chars().next().unwrap(),
String::from(&line[comma + 1..]),
)
})
.for_each(&mut append_replacement);
let mut writer = Writer::from_path("src/replacements.csv").unwrap();
for (find, mut replace) in replacements {
if find.is_ascii() {
replace.insert(find);
}
for c in replace.clone() {
let lower = c.to_lowercase().next().unwrap();
if c.is_uppercase() && !replace.contains(&lower) {
println!("WARNING: Replacing {find} with {replace:?}, so adding {lower}");
replace.insert(lower);
}
}
writer
.write_record(&[&find.to_string(), &replace.iter().collect()])
.unwrap();
}
writer.flush().unwrap();
}