hyeong/core/parse.rs
1use crate::core::area::Area;
2use crate::core::code::UnOptCode;
3
4pub(crate) const COMMANDS: &'static [char] = &['형', '항', '핫', '흣', '흡', '흑'];
5const HEARTS: &'static [char] = &[
6 '♥', '❤', '💕', '💖', '💗', '💘', '💙', '💚', '💛', '💜', '💝', '♡',
7];
8
9/// Check if the character is hangul
10///
11/// # Example
12///
13/// ```
14/// use hyeong::core::parse;
15///
16/// assert_eq!(true, parse::is_hangul_syllable('가'));
17/// assert_eq!(true, parse::is_hangul_syllable('힣'));
18/// assert_eq!(false, parse::is_hangul_syllable('a'));
19/// assert_eq!(false, parse::is_hangul_syllable('م'));
20/// assert_eq!(false, parse::is_hangul_syllable('ý'));
21/// assert_eq!(false, parse::is_hangul_syllable('ם'));
22/// assert_eq!(false, parse::is_hangul_syllable('न'));
23/// assert_eq!(false, parse::is_hangul_syllable('こ'));
24/// assert_eq!(false, parse::is_hangul_syllable('你'));
25/// assert_eq!(false, parse::is_hangul_syllable('д'));
26/// ```
27pub fn is_hangul_syllable(c: char) -> bool {
28 '\u{AC00}' <= c && c <= '\u{D7A3}'
29}
30
31/// Parse the code to unoptimized code
32/// Since the language itself has no compile error, it never returns error.
33///
34/// # State
35///
36/// This parsing algorithm is made with state.
37/// - `0`: before command starts: hangul, dot, area can come
38/// - `1`: when hangul part starts: hangul can come
39/// - `2`: when area part starts: hangul, area can come
40///
41/// # Terms
42///
43/// - starting character: `혀`, `하` or `흐`
44/// - ending character: `엉`, `앙`, `앗`, `읏`, `읍` or `윽`
45/// - area character: `?`, `!`, `♥`, `❤`, `💕`, `💖`, `💗`, `💘`, `💙`, `💚`, `💛`, `💜`, `💝` or `♡`
46/// - heart character: `♥`, `❤`, `💕`, `💖`, `💗`, `💘`, `💙`, `💚`, `💛`, `💜`, `💝` or `♡`
47/// - hangul part, dot part, area part:
48/// ```text
49/// 혀어어어어어어어어어어엉 .............. 💙?💕?♥!💝!!💘
50/// <- hangul part -> <- dot part -> <- area part ->
51/// ```
52///
53/// # Algorithm
54///
55/// ## Preprocessing
56///
57/// First, we have to preprocess the code to check if each character is valid.
58/// In greedy method, if the corresponing character(`엉` for `혀`, `앙` or `앗` for `하`, etc.)
59/// is not present after each starting character,
60///
61/// ## Main Algorithm
62///
63/// ### 0 State
64///
65/// In 0 state, we can have different scenarios.
66///
67/// 1. Before starting the whole command.
68/// - goto 1 state when starting character appears.
69/// 2. After finishing hangul part.
70/// - count dot
71/// 3. Before starting the area part. (Similar to 2)
72/// - goto 2 state when area character appears.
73///
74/// ### 1 State
75///
76/// In 1 state, just count hangul syllables until ending character appears.
77/// Then goto state 0(1)
78///
79/// ### 2 State
80///
81/// In 2 state, there are two binary operators: `?` and `!`
82/// So, we will create two [binary tree](../code/enum.Area.html)s for each operators.
83///
84/// - `?` operator
85/// 1. if tree is empty, put `?` as root
86/// 2. if most right node is heart character, change to to `?` and put it to the left.
87/// 3. if most right node is `?`, add to the right.
88/// - `!` operator
89/// 1. same as above.
90/// - heart character
91/// 1. if tree is empty, put in
92/// 2. if most right node is heart character, ignore.
93/// 3. if most right node is operator, add to the right.
94///
95/// # Time Complexity
96///
97/// - `O(n)` where `n := code.len()`
98/// - Iterates only twice: once for main loop, once for checking if the character is valid.
99///
100/// # Example
101///
102/// ```
103/// use hyeong::core::parse;
104///
105/// let parsed = parse::parse(String::from("형...?💖?"));
106///
107/// assert_eq!("type: 0, cnt1: 1, cnt2: 3, area: \"?_?💖_\"", format!("{:?}", parsed[0]));
108/// ```
109pub fn parse(code: String) -> Vec<UnOptCode> {
110 let mut res: Vec<UnOptCode> = Vec::new();
111
112 let mut hangul_count = 0usize;
113 let mut dot_count = 0usize;
114 let mut type_ = 10u8;
115 let mut loc = (1usize, 0usize);
116
117 let mut state = 0u8;
118 let mut area = Area::Nil;
119 let mut leaf = &mut area;
120 let mut qu_area = Area::Nil;
121 let mut qu_leaf = &mut qu_area;
122
123 let mut line_count = 0;
124 let mut last_line_started = 0;
125 let mut raw_command = String::new();
126
127 let mut max_pos = [0usize, 0usize, 0usize];
128 for (i, c) in code.chars().enumerate() {
129 if let Some(t) = "엉앙앗읏읍윽".find(c) {
130 max_pos[if t == 0 {
131 0
132 } else if t <= 6 {
133 1
134 } else {
135 2
136 }] = i;
137 }
138 }
139
140 for (i, c) in code.chars().enumerate() {
141 if c.is_whitespace() {
142 if c == '\n' {
143 line_count += 1;
144 last_line_started = i + 1;
145 }
146 continue;
147 }
148
149 state = match state {
150 0 | 2 => {
151 if let Some(mut t) = "형항핫흣흡흑혀하흐".find(c) {
152 t /= 3;
153
154 if t >= 6 && max_pos[t - 6] <= i {
155 continue;
156 }
157
158 if type_ != 10 {
159 res.push(UnOptCode::new(
160 type_,
161 hangul_count,
162 dot_count,
163 loc,
164 match qu_leaf {
165 Area::Val {
166 type_: _,
167 left: _,
168 ref mut right,
169 } => {
170 *right = Box::new(area);
171 qu_area
172 }
173 Area::Nil => area,
174 },
175 raw_command,
176 ));
177
178 area = Area::Nil;
179 leaf = &mut area;
180 qu_area = Area::Nil;
181 qu_leaf = &mut qu_area;
182 }
183
184 type_ = t as u8;
185 hangul_count = 1;
186 dot_count = 0;
187 loc = (line_count + 1, i - last_line_started);
188 raw_command = c.to_string();
189
190 if t < 6 {
191 0
192 } else {
193 1
194 }
195 } else if ".…⋯⋮".contains(c) {
196 if state == 0 {
197 dot_count += if c == '.' { 1 } else { 3 };
198 raw_command.push(c);
199 }
200 state
201 } else if c == '?' {
202 match qu_leaf {
203 Area::Val {
204 type_: _,
205 left: _,
206 ref mut right,
207 } => {
208 *right = Box::new(Area::Val {
209 type_: 0,
210 left: Box::new(area),
211 right: Box::new(Area::Nil),
212 });
213 qu_leaf = &mut *right;
214 }
215
216 Area::Nil => {
217 qu_area = Area::Val {
218 type_: 0,
219 left: Box::new(area),
220 right: Box::new(Area::Nil),
221 };
222 qu_leaf = &mut qu_area;
223 }
224 }
225
226 area = Area::Nil;
227 leaf = &mut area;
228 raw_command.push(c);
229 2
230 } else if c == '!' {
231 match leaf {
232 Area::Val {
233 ref type_,
234 left: _,
235 ref mut right,
236 } => {
237 if *type_ <= 1 {
238 *right = match right.as_ref() {
239 Area::Val {
240 type_: t,
241 left: _,
242 right: _,
243 } => Box::new(Area::Val {
244 type_: 1,
245 left: Box::new(Area::new(*t)),
246 right: Box::new(Area::Nil),
247 }),
248 Area::Nil => Box::new(Area::new(1)),
249 };
250 leaf = &mut *right;
251 } else {
252 area = Area::Val {
253 type_: 1,
254 left: Box::new(Area::new(*type_)),
255 right: Box::new(Area::Nil),
256 };
257 leaf = &mut area;
258 }
259 }
260 Area::Nil => {
261 area = Area::new(1);
262 leaf = &mut area;
263 }
264 }
265 raw_command.push(c);
266 2
267 } else if let Some(mut t) = HEARTS.iter().position(|&x| x == c) {
268 t += 2;
269 match leaf {
270 Area::Val {
271 ref type_,
272 left: _,
273 ref mut right,
274 } => {
275 if *type_ <= 1 {
276 match right.as_ref() {
277 Area::Nil => {
278 *right = Box::new(Area::new(t as u8));
279 }
280 _ => {}
281 }
282 }
283 }
284 Area::Nil => {
285 area = Area::new(t as u8);
286 leaf = &mut area;
287 }
288 }
289 raw_command.push(c);
290 2
291 } else {
292 continue;
293 }
294 }
295
296 // 1
297 _ => {
298 if is_hangul_syllable(c) {
299 hangul_count += 1;
300 raw_command.push(c);
301 }
302 match type_ {
303 6 => {
304 if "엉".contains(c) {
305 type_ = 0;
306 dot_count = 0;
307 0
308 } else {
309 1
310 }
311 }
312
313 7 => {
314 if let Some(t) = "앙앗".find(c) {
315 type_ = (t / 3 + 1) as u8;
316 dot_count = 0;
317 0
318 } else {
319 1
320 }
321 }
322
323 // 8
324 _ => {
325 if let Some(t) = "읏읍윽".find(c) {
326 type_ = (t / 3 + 3) as u8;
327 dot_count = 0;
328 0
329 } else {
330 1
331 }
332 }
333 }
334 }
335 };
336 }
337
338 if type_ != 10 {
339 res.push(UnOptCode::new(
340 type_,
341 hangul_count,
342 dot_count,
343 loc,
344 match qu_leaf {
345 Area::Val {
346 type_: _,
347 left: _,
348 ref mut right,
349 } => {
350 *right = Box::new(area);
351 qu_area
352 }
353 Area::Nil => area,
354 },
355 raw_command,
356 ));
357 }
358 res
359}