Skip to main content

hyeong/core/
parse.rs

1use crate::core::area::Area;
2use crate::core::code::UnOptCode;
3
4pub(crate) const COMMANDS: &'static [char] = &['형', '항', '핫', '흣', '흡', '흑'];
5const HEARTS: &'static [char] = &[
6    '♥', '❤', '💕', '💖', '💗', '💘', '💙', '💚', '💛', '💜', '💝', '♡',
7];
8
9/// Check if the character is hangul
10///
11/// # Example
12///
13/// ```
14/// use hyeong::core::parse;
15///
16/// assert_eq!(true, parse::is_hangul_syllable('가'));
17/// assert_eq!(true, parse::is_hangul_syllable('힣'));
18/// assert_eq!(false, parse::is_hangul_syllable('a'));
19/// assert_eq!(false, parse::is_hangul_syllable('م'));
20/// assert_eq!(false, parse::is_hangul_syllable('ý'));
21/// assert_eq!(false, parse::is_hangul_syllable('ם'));
22/// assert_eq!(false, parse::is_hangul_syllable('न'));
23/// assert_eq!(false, parse::is_hangul_syllable('こ'));
24/// assert_eq!(false, parse::is_hangul_syllable('你'));
25/// assert_eq!(false, parse::is_hangul_syllable('д'));
26/// ```
27pub fn is_hangul_syllable(c: char) -> bool {
28    '\u{AC00}' <= c && c <= '\u{D7A3}'
29}
30
31/// Parse the code to unoptimized code
32/// Since the language itself has no compile error, it never returns error.
33///
34/// # State
35///
36/// This parsing algorithm is made with state.
37/// - `0`: before command starts: hangul, dot, area can come
38/// - `1`: when hangul part starts: hangul can come
39/// - `2`: when area part starts: hangul, area can come
40///
41/// # Terms
42///
43/// - starting character: `혀`, `하` or `흐`
44/// - ending character: `엉`, `앙`, `앗`, `읏`, `읍` or `윽`
45/// - area character: `?`, `!`, `♥`, `❤`, `💕`, `💖`, `💗`, `💘`, `💙`, `💚`, `💛`, `💜`, `💝` or `♡`
46/// - heart character: `♥`, `❤`, `💕`, `💖`, `💗`, `💘`, `💙`, `💚`, `💛`, `💜`, `💝` or `♡`
47/// - hangul part, dot part, area part:
48///   ```text
49///   혀어어어어어어어어어어엉 .............. 💙?💕?♥!💝!!💘
50///   <- hangul part -> <- dot part -> <- area part ->
51///   ```
52///
53/// # Algorithm
54///
55/// ## Preprocessing
56///
57/// First, we have to preprocess the code to check if each character is valid.
58/// In greedy method, if the corresponing character(`엉` for `혀`, `앙` or `앗` for `하`, etc.)
59/// is not present after each starting character,
60///
61/// ## Main Algorithm
62///
63/// ### 0 State
64///
65/// In 0 state, we can have different scenarios.
66///
67/// 1. Before starting the whole command.
68///    - goto 1 state when starting character appears.
69/// 2. After finishing hangul part.
70///    - count dot
71/// 3. Before starting the area part. (Similar to 2)
72///    - goto 2 state when area character appears.
73///
74/// ### 1 State
75///
76/// In 1 state, just count hangul syllables until ending character appears.
77/// Then goto state 0(1)
78///
79/// ### 2 State
80///
81/// In 2 state, there are two binary operators: `?` and `!`
82/// So, we will create two [binary tree](../code/enum.Area.html)s for each operators.
83///
84/// - `?` operator
85///   1. if tree is empty, put `?` as root
86///   2. if most right node is heart character, change to to `?` and put it to the left.
87///   3. if most right node is `?`, add to the right.
88/// - `!` operator
89///   1. same as above.
90/// - heart character
91///   1. if tree is empty, put in
92///   2. if most right node is heart character, ignore.
93///   3. if most right node is operator, add to the right.
94///
95/// # Time Complexity
96///
97/// - `O(n)` where `n := code.len()`
98/// - Iterates only twice: once for main loop, once for checking if the character is valid.
99///
100/// # Example
101///
102/// ```
103/// use hyeong::core::parse;
104///
105/// let parsed = parse::parse(String::from("형...?💖?"));
106///
107/// assert_eq!("type: 0, cnt1: 1, cnt2: 3, area: \"?_?💖_\"", format!("{:?}", parsed[0]));
108/// ```
109pub fn parse(code: String) -> Vec<UnOptCode> {
110    let mut res: Vec<UnOptCode> = Vec::new();
111
112    let mut hangul_count = 0usize;
113    let mut dot_count = 0usize;
114    let mut type_ = 10u8;
115    let mut loc = (1usize, 0usize);
116
117    let mut state = 0u8;
118    let mut area = Area::Nil;
119    let mut leaf = &mut area;
120    let mut qu_area = Area::Nil;
121    let mut qu_leaf = &mut qu_area;
122
123    let mut line_count = 0;
124    let mut last_line_started = 0;
125    let mut raw_command = String::new();
126
127    let mut max_pos = [0usize, 0usize, 0usize];
128    for (i, c) in code.chars().enumerate() {
129        if let Some(t) = "엉앙앗읏읍윽".find(c) {
130            max_pos[if t == 0 {
131                0
132            } else if t <= 6 {
133                1
134            } else {
135                2
136            }] = i;
137        }
138    }
139
140    for (i, c) in code.chars().enumerate() {
141        if c.is_whitespace() {
142            if c == '\n' {
143                line_count += 1;
144                last_line_started = i + 1;
145            }
146            continue;
147        }
148
149        state = match state {
150            0 | 2 => {
151                if let Some(mut t) = "형항핫흣흡흑혀하흐".find(c) {
152                    t /= 3;
153
154                    if t >= 6 && max_pos[t - 6] <= i {
155                        continue;
156                    }
157
158                    if type_ != 10 {
159                        res.push(UnOptCode::new(
160                            type_,
161                            hangul_count,
162                            dot_count,
163                            loc,
164                            match qu_leaf {
165                                Area::Val {
166                                    type_: _,
167                                    left: _,
168                                    ref mut right,
169                                } => {
170                                    *right = Box::new(area);
171                                    qu_area
172                                }
173                                Area::Nil => area,
174                            },
175                            raw_command,
176                        ));
177
178                        area = Area::Nil;
179                        leaf = &mut area;
180                        qu_area = Area::Nil;
181                        qu_leaf = &mut qu_area;
182                    }
183
184                    type_ = t as u8;
185                    hangul_count = 1;
186                    dot_count = 0;
187                    loc = (line_count + 1, i - last_line_started);
188                    raw_command = c.to_string();
189
190                    if t < 6 {
191                        0
192                    } else {
193                        1
194                    }
195                } else if ".…⋯⋮".contains(c) {
196                    if state == 0 {
197                        dot_count += if c == '.' { 1 } else { 3 };
198                        raw_command.push(c);
199                    }
200                    state
201                } else if c == '?' {
202                    match qu_leaf {
203                        Area::Val {
204                            type_: _,
205                            left: _,
206                            ref mut right,
207                        } => {
208                            *right = Box::new(Area::Val {
209                                type_: 0,
210                                left: Box::new(area),
211                                right: Box::new(Area::Nil),
212                            });
213                            qu_leaf = &mut *right;
214                        }
215
216                        Area::Nil => {
217                            qu_area = Area::Val {
218                                type_: 0,
219                                left: Box::new(area),
220                                right: Box::new(Area::Nil),
221                            };
222                            qu_leaf = &mut qu_area;
223                        }
224                    }
225
226                    area = Area::Nil;
227                    leaf = &mut area;
228                    raw_command.push(c);
229                    2
230                } else if c == '!' {
231                    match leaf {
232                        Area::Val {
233                            ref type_,
234                            left: _,
235                            ref mut right,
236                        } => {
237                            if *type_ <= 1 {
238                                *right = match right.as_ref() {
239                                    Area::Val {
240                                        type_: t,
241                                        left: _,
242                                        right: _,
243                                    } => Box::new(Area::Val {
244                                        type_: 1,
245                                        left: Box::new(Area::new(*t)),
246                                        right: Box::new(Area::Nil),
247                                    }),
248                                    Area::Nil => Box::new(Area::new(1)),
249                                };
250                                leaf = &mut *right;
251                            } else {
252                                area = Area::Val {
253                                    type_: 1,
254                                    left: Box::new(Area::new(*type_)),
255                                    right: Box::new(Area::Nil),
256                                };
257                                leaf = &mut area;
258                            }
259                        }
260                        Area::Nil => {
261                            area = Area::new(1);
262                            leaf = &mut area;
263                        }
264                    }
265                    raw_command.push(c);
266                    2
267                } else if let Some(mut t) = HEARTS.iter().position(|&x| x == c) {
268                    t += 2;
269                    match leaf {
270                        Area::Val {
271                            ref type_,
272                            left: _,
273                            ref mut right,
274                        } => {
275                            if *type_ <= 1 {
276                                match right.as_ref() {
277                                    Area::Nil => {
278                                        *right = Box::new(Area::new(t as u8));
279                                    }
280                                    _ => {}
281                                }
282                            }
283                        }
284                        Area::Nil => {
285                            area = Area::new(t as u8);
286                            leaf = &mut area;
287                        }
288                    }
289                    raw_command.push(c);
290                    2
291                } else {
292                    continue;
293                }
294            }
295
296            // 1
297            _ => {
298                if is_hangul_syllable(c) {
299                    hangul_count += 1;
300                    raw_command.push(c);
301                }
302                match type_ {
303                    6 => {
304                        if "엉".contains(c) {
305                            type_ = 0;
306                            dot_count = 0;
307                            0
308                        } else {
309                            1
310                        }
311                    }
312
313                    7 => {
314                        if let Some(t) = "앙앗".find(c) {
315                            type_ = (t / 3 + 1) as u8;
316                            dot_count = 0;
317                            0
318                        } else {
319                            1
320                        }
321                    }
322
323                    // 8
324                    _ => {
325                        if let Some(t) = "읏읍윽".find(c) {
326                            type_ = (t / 3 + 3) as u8;
327                            dot_count = 0;
328                            0
329                        } else {
330                            1
331                        }
332                    }
333                }
334            }
335        };
336    }
337
338    if type_ != 10 {
339        res.push(UnOptCode::new(
340            type_,
341            hangul_count,
342            dot_count,
343            loc,
344            match qu_leaf {
345                Area::Val {
346                    type_: _,
347                    left: _,
348                    ref mut right,
349                } => {
350                    *right = Box::new(area);
351                    qu_area
352                }
353                Area::Nil => area,
354            },
355            raw_command,
356        ));
357    }
358    res
359}