ucd_parse/
arabic_shaping.rs1use std::path::Path;
2
3use crate::{
4 common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint},
5 error::Error,
6};
7
8#[derive(Clone, Debug, Default, Eq, PartialEq)]
12pub struct ArabicShaping {
13 pub codepoint: Codepoint,
15 pub schematic_name: String,
22 pub joining_type: JoiningType,
24 pub joining_group: String,
26}
27
28#[derive(Clone, Copy, Debug, Eq, PartialEq)]
30pub enum JoiningType {
31 RightJoining,
32 LeftJoining,
33 DualJoining,
34 JoinCausing,
35 NonJoining,
36 Transparent,
37}
38
39impl JoiningType {
40 pub fn as_str(&self) -> &str {
41 match self {
42 JoiningType::RightJoining => "R",
43 JoiningType::LeftJoining => "L",
44 JoiningType::DualJoining => "D",
45 JoiningType::JoinCausing => "C",
46 JoiningType::NonJoining => "U",
47 JoiningType::Transparent => "T",
48 }
49 }
50}
51
52impl Default for JoiningType {
53 fn default() -> JoiningType {
54 JoiningType::NonJoining
55 }
56}
57
58impl std::str::FromStr for JoiningType {
59 type Err = Error;
60
61 fn from_str(s: &str) -> Result<JoiningType, Error> {
62 match s {
63 "R" => Ok(JoiningType::RightJoining),
64 "L" => Ok(JoiningType::LeftJoining),
65 "D" => Ok(JoiningType::DualJoining),
66 "C" => Ok(JoiningType::JoinCausing),
67 "U" => Ok(JoiningType::NonJoining),
68 "T" => Ok(JoiningType::Transparent),
69 _ => err!(
70 "unrecognized joining type: '{}' \
71 (must be one of R, L, D, C, U or T)",
72 s
73 ),
74 }
75 }
76}
77
78impl UcdFile for ArabicShaping {
79 fn relative_file_path() -> &'static Path {
80 Path::new("ArabicShaping.txt")
81 }
82}
83
84impl UcdFileByCodepoint for ArabicShaping {
85 fn codepoints(&self) -> CodepointIter {
86 self.codepoint.into_iter()
87 }
88}
89
90impl std::str::FromStr for ArabicShaping {
91 type Err = Error;
92
93 fn from_str(line: &str) -> Result<ArabicShaping, Error> {
94 let re_parts = regex!(
95 r"(?x)
96 ^
97 \s*(?P<codepoint>[A-F0-9]+)\s*;
98 \s*(?P<name>[^;]+)\s*;
99 \s*(?P<joining_type>[^;]+)\s*;
100 \s*(?P<joining_group>[^;]+)
101 $
102 ",
103 );
104 let caps = match re_parts.captures(line.trim()) {
105 Some(caps) => caps,
106 None => return err!("invalid ArabicShaping line"),
107 };
108
109 Ok(ArabicShaping {
110 codepoint: caps["codepoint"].parse()?,
111 schematic_name: caps["name"].to_string(),
112 joining_type: caps["joining_type"].parse()?,
113 joining_group: caps["joining_group"].to_string(),
114 })
115 }
116}
117
118#[cfg(test)]
119mod tests {
120 use crate::common::Codepoint;
121
122 use super::{ArabicShaping, JoiningType};
123
124 fn codepoint(n: u32) -> Codepoint {
125 Codepoint::from_u32(n).unwrap()
126 }
127
128 fn s(string: &str) -> String {
129 string.to_string()
130 }
131
132 #[test]
133 fn parse1() {
134 let line = "0600; ARABIC NUMBER SIGN; U; No_Joining_Group\n";
135 let data: ArabicShaping = line.parse().unwrap();
136 assert_eq!(
137 data,
138 ArabicShaping {
139 codepoint: codepoint(0x0600),
140 schematic_name: s("ARABIC NUMBER SIGN"),
141 joining_type: JoiningType::NonJoining,
142 joining_group: s("No_Joining_Group")
143 }
144 );
145 }
146
147 #[test]
148 fn parse2() {
149 let line = "063D; FARSI YEH WITH INVERTED V ABOVE; D; FARSI YEH\n";
150 let data: ArabicShaping = line.parse().unwrap();
151 assert_eq!(
152 data,
153 ArabicShaping {
154 codepoint: codepoint(0x063D),
155 schematic_name: s("FARSI YEH WITH INVERTED V ABOVE"),
156 joining_type: JoiningType::DualJoining,
157 joining_group: s("FARSI YEH")
158 }
159 );
160 }
161
162 #[test]
163 fn parse3() {
164 let line =
165 "10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA\n";
166 let data: ArabicShaping = line.parse().unwrap();
167 assert_eq!(
168 data,
169 ArabicShaping {
170 codepoint: codepoint(0x10D23),
171 schematic_name: s(
172 "HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE"
173 ),
174 joining_type: JoiningType::DualJoining,
175 joining_group: s("HANIFI ROHINGYA KINNA YA")
176 }
177 );
178 }
179}