use crate::Error;
use std::path::Path;
use std::str::FromStr;
use ucd_parse::UcdFile;
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct HangulSyllableType {
pub prop: ucd_parse::Property,
}
impl ucd_parse::UcdFile for HangulSyllableType {
fn relative_file_path() -> &'static Path {
Path::new("HangulSyllableType.txt")
}
}
impl ucd_parse::UcdFileByCodepoint for HangulSyllableType {
fn codepoints(&self) -> ucd_parse::CodepointIter {
self.prop.codepoints.into_iter()
}
}
impl FromStr for HangulSyllableType {
type Err = ucd_parse::Error;
fn from_str(line: &str) -> Result<HangulSyllableType, ucd_parse::Error> {
let prop = ucd_parse::Property::from_str(line)?;
Ok(HangulSyllableType { prop })
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DerivedJoiningType {
pub prop: ucd_parse::Property,
}
impl ucd_parse::UcdFile for DerivedJoiningType {
fn relative_file_path() -> &'static Path {
Path::new("extracted/DerivedJoiningType.txt")
}
}
impl ucd_parse::UcdFileByCodepoint for DerivedJoiningType {
fn codepoints(&self) -> ucd_parse::CodepointIter {
self.prop.codepoints.into_iter()
}
}
impl FromStr for DerivedJoiningType {
type Err = ucd_parse::Error;
fn from_str(line: &str) -> Result<DerivedJoiningType, ucd_parse::Error> {
let prop = ucd_parse::Property::from_str(line)?;
Ok(DerivedJoiningType { prop })
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct UnicodeData {
pub codepoints: ucd_parse::Codepoints,
pub name: String,
pub general_category: String,
pub canonical_combining_class: u8,
pub bidi_class: String,
pub decomposition: ucd_parse::UnicodeDataDecomposition,
pub numeric_type_decimal: Option<u8>,
pub numeric_type_digit: Option<u8>,
pub numeric_type_numeric: Option<ucd_parse::UnicodeDataNumeric>,
pub bidi_mirrored: bool,
pub unicode1_name: String,
pub iso_comment: String,
pub simple_uppercase_mapping: Option<ucd_parse::Codepoint>,
pub simple_lowercase_mapping: Option<ucd_parse::Codepoint>,
pub simple_titlecase_mapping: Option<ucd_parse::Codepoint>,
}
impl UnicodeData {
pub fn parse(ucd_dir: &Path) -> Result<Vec<UnicodeData>, Error> {
let mut xs = vec![];
let raws: Vec<ucd_parse::UnicodeData> = ucd_parse::parse(ucd_dir)?;
let mut range: Option<ucd_parse::CodepointRange> = None;
for udata in raws.iter() {
match range.as_mut() {
Some(r) => {
if !udata.is_range_end() {
return err!("Expected end range after codepoint {:#06x}. Current codepoint{:#06x}. File: {}",
r.start.value(), udata.codepoint.value(), ucd_parse::UnicodeData::file_path(ucd_dir).to_str().unwrap());
}
r.end = udata.codepoint;
if r.start.value() > r.end.value() {
return err!(
"Start range {:#06x} is minor than end range {:#06x}. File: {}",
r.start.value(),
r.end.value(),
ucd_parse::UnicodeData::file_path(ucd_dir).to_str().unwrap()
);
}
}
None => {
if udata.is_range_end() {
return err!(
"Found end range without starting. Current codepoint {:#06x}. File: {}",
udata.codepoint.value(),
ucd_parse::UnicodeData::file_path(ucd_dir).to_str().unwrap()
);
}
}
}
if udata.is_range_start() {
if range.is_some() {
return err!(
"Previous range started with codepoint {:#06x} has not yet finished. File: {}",
range.unwrap().start.value(),
ucd_parse::UnicodeData::file_path(ucd_dir)
.to_str()
.unwrap()
);
}
range = Some(ucd_parse::CodepointRange {
start: udata.codepoint,
end: udata.codepoint,
});
continue;
}
let codepoints = match range {
Some(r) => ucd_parse::Codepoints::Range(r),
None => ucd_parse::Codepoints::Single(udata.codepoint),
};
let ucd = UnicodeData {
codepoints,
name: udata.name.clone(),
general_category: udata.general_category.clone(),
canonical_combining_class: udata.canonical_combining_class,
bidi_class: udata.bidi_class.clone(),
decomposition: udata.decomposition.clone(),
numeric_type_decimal: udata.numeric_type_decimal,
numeric_type_digit: udata.numeric_type_digit,
numeric_type_numeric: udata.numeric_type_numeric,
bidi_mirrored: udata.bidi_mirrored,
unicode1_name: udata.unicode1_name.clone(),
iso_comment: udata.iso_comment.clone(),
simple_uppercase_mapping: udata.simple_uppercase_mapping,
simple_lowercase_mapping: udata.simple_lowercase_mapping,
simple_titlecase_mapping: udata.simple_titlecase_mapping,
};
if udata.is_range_end() {
range = None;
}
xs.push(ucd);
}
Ok(xs)
}
}