hangul-cd 0.1.1

hangul-cd (compose-decompose): Rust helpers for composing and decomposing Hangul syllable blocks and words.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
use std::fmt::Debug;

use thiserror::Error;

use crate::{block::*, jamo::*};

/// A composer for a single Hangul word, made up of multiple syllable blocks.
#[derive(Error, Debug, PartialEq, Eq)]
pub enum WordError {
    /// Occurs when there is an error related to syllable blocks.
    #[error("Block error: {0}")]
    BlockError(#[from] BlockError),

    /// Occurs when there is an error related to Jamo letters.
    #[error("Jamo error: {0}")]
    JamoError(#[from] JamoError),

    /// Tried to start a new block while pushing Jamo, but it was not possible.
    /// The reason is provided in the `BlockPushResult`.
    #[error("Could not start new block with character '{0}'; reason: {1:?}")]
    CouldNotStartNewBlock(char, BlockPushResult),

    /// Tried popping from an empty word (no Jamo to pop).
    #[error("Tried popping from empty word")]
    NothingToPop,

    /// Tried to complete the current block, but it only contains one Jamo.
    #[error("Cannot complete current block; currently contains only one Jamo: {0:?}")]
    CannotCompleteCurrentBlock(Jamo),
}

/// A composer for a single Hangul word, made up of multiple syllable blocks.
/// The `HangulWordComposer` maintains a list of completed `HangulBlock`s and a
/// `BlockComposer` for the current syllable block being composed.
///
/// **API:**
/// ```rust
/// use hangul_cd::word::{HangulWordComposer, WordPushResult};
/// use hangul_cd::jamo::{Jamo, JamoConsonantSingular, JamoVowelSingular};
///
/// let mut composer = HangulWordComposer::new();
///
/// // Push characters to form Hangul syllables
/// assert_eq!(composer.push_char('ㅇ').unwrap(), WordPushResult::Continue);
/// assert_eq!(composer.push_char('ㅏ').unwrap(), WordPushResult::Continue);
/// assert_eq!(composer.push_char('ㄴ').unwrap(), WordPushResult::Continue);
/// assert_eq!(composer.push_char('ㄴ').unwrap(), WordPushResult::Continue);
/// assert_eq!(composer.push_char('ㅕ').unwrap(), WordPushResult::Continue);
/// assert_eq!(composer.push_char('ㅇ').unwrap(), WordPushResult::Continue);
///
/// // Get the composed string
/// let result = composer.as_string().unwrap();
/// assert_eq!(result, "안녕".to_string());
///
/// // Popping characters removes jamo in reverse order
/// assert_eq!(
///     composer.pop().unwrap(),
///     Some(Jamo::Consonant(JamoConsonantSingular::Ieung))
/// );
/// assert_eq!(
///     composer.pop().unwrap(),
///     Some(Jamo::Vowel(JamoVowelSingular::Yeo))
/// );
/// assert_eq!(
///     composer.pop().unwrap(),
///     Some(Jamo::Consonant(JamoConsonantSingular::Nieun))
/// );
/// assert_eq!(composer.as_string().unwrap(), "안".to_string());
/// ```
#[derive(Debug)]
pub struct HangulWordComposer {
    prev_blocks: Vec<HangulBlock>,
    cur_block: BlockComposer,
}

/// The result of attempting to push a character into the `HangulWordComposer`.
#[derive(Debug, PartialEq, Eq)]
pub enum WordPushResult {
    /// The character was successfully pushed and composition can continue.
    Continue,

    /// The character could not be pushed because it would result in an invalid
    /// Hangul syllable.
    InvalidHangul,

    /// The character was not pushed because it is not a Hangul character.
    NonHangul,
}

impl HangulWordComposer {
    /// Creates a new, empty `HangulWordComposer`.
    pub fn new() -> Self {
        HangulWordComposer {
            prev_blocks: Vec::new(),
            cur_block: BlockComposer::new(),
        }
    }

    /// Pushes a character into the `HangulWordComposer` if valid and returns a
    /// result indicating the outcome.
    ///
    /// If pushing would make a valid Hangul syllable, the new character is
    /// appended and `WordPushResult::Continue` is returned.
    ///
    /// If pushing the character would result in an invalid Hangul syllable,
    /// but the character is Hangul and can start a new syllable block, the current
    /// block is completed, a new block is started with the pushed character,
    /// and `WordPushResult::Continue` is returned.
    ///
    /// If the character is Hangul but cannot form a valid syllable in either
    /// the current or a new block, `WordPushResult::InvalidHangul` is returned.
    ///
    /// If the character is not Hangul, `WordPushResult::NonHangul` is returned.
    pub fn push_char(&mut self, c: char) -> Result<WordPushResult, WordError> {
        match Character::from_char(c)? {
            Character::Hangul(jamo) => self.push(&jamo),
            Character::NonHangul(_) => Ok(WordPushResult::NonHangul),
        }
    }

    /// Pushes a Jamo letter into the `HangulWordComposer`. Acts the same as
    /// `push_char`, but takes a `Jamo` instead of a `char`.
    /// Pushing appends to the current syllable block if that would make a
    /// valid Hangul syllable; otherwise, it completes the current block and
    /// creates a new block with the pushed character.
    pub fn push(&mut self, letter: &Jamo) -> Result<WordPushResult, WordError> {
        match self.cur_block.push(letter) {
            BlockPushResult::Success => Ok(WordPushResult::Continue),
            BlockPushResult::InvalidHangul => Ok(WordPushResult::InvalidHangul),
            BlockPushResult::NonHangul => Ok(WordPushResult::NonHangul),
            BlockPushResult::StartNewBlockNoPop => match self.start_new_block(letter.clone()) {
                Ok(_) => Ok(WordPushResult::Continue),
                Err(e) => Err(e),
            },
            BlockPushResult::PopAndStartNewBlock => {
                match self.pop_and_start_new_block(letter.clone()) {
                    Ok(_) => Ok(WordPushResult::Continue),
                    Err(e) => Err(e),
                }
            }
        }
    }

    /// Pops the last Jamo letter from the `HangulWordComposer`.
    /// If the current syllable block has letters, it will remove the last
    /// letter from it. If the current block is empty, it will set the last
    /// completed block as the currently-active block and remove one Jamo
    /// from it if possible.
    ///
    /// Returns `Ok(Some(Jamo))` if a letter was successfully removed,
    /// `Ok(None)` if there are no letters to remove, or `Err(String)` if an
    /// error occurred during the operation.
    pub fn pop(&mut self) -> Result<Option<Jamo>, WordError> {
        match self.cur_block.pop() {
            BlockPopStatus::PoppedAndNonEmpty(l) => Ok(Some(l)),
            BlockPopStatus::PoppedAndEmpty(l) => {
                self.prev_block_to_cur()?;
                Ok(Some(l))
            }
            BlockPopStatus::None => {
                self.prev_block_to_cur()?;
                Ok(None)
            }
        }
    }

    fn prev_block_to_cur(&mut self) -> Result<(), WordError> {
        if let Some(last_block) = self.prev_blocks.pop() {
            self.cur_block = BlockComposer::from_composed_block(&last_block)?;
            Ok(())
        } else {
            Ok(())
        }
    }

    fn pop_and_start_new_block(&mut self, letter: Jamo) -> Result<(), WordError> {
        match self.cur_block.pop_end_consonant() {
            Some(l) => {
                self.complete_current_block()?;
                self.cur_block.push(&l);
                match self.cur_block.push(&letter) {
                    BlockPushResult::Success => Ok(()),
                    other => Err(WordError::CouldNotStartNewBlock(
                        letter.char_compatibility(),
                        other,
                    )),
                }
            }
            None => Err(WordError::NothingToPop),
        }
    }

    fn start_new_block(&mut self, letter: Jamo) -> Result<(), WordError> {
        self.complete_current_block()?;
        match self.cur_block.push(&letter) {
            BlockPushResult::Success => Ok(()),
            other => Err(WordError::CouldNotStartNewBlock(
                letter.char_compatibility(),
                other,
            )),
        }
    }

    /// Returns the composed string for the current Hangul word.
    /// This includes all completed syllable blocks and the current block,
    /// even if it is incomplete.
    pub fn as_string(&self) -> Result<String, WordError> {
        let mut result = hangul_blocks_vec_to_string(&self.prev_blocks)?;
        let cur_as_char = self.cur_block.block_as_string()?;
        if let Some(c) = cur_as_char {
            result.push(c);
        }
        Ok(result)
    }

    fn complete_current_block(&mut self) -> Result<(), WordError> {
        match self.cur_block.try_as_complete_block()? {
            BlockCompletionStatus::Complete(block) => {
                self.prev_blocks.push(block);
                self.cur_block = BlockComposer::new();
                Ok(())
            }
            BlockCompletionStatus::Incomplete(c) => Err(WordError::CannotCompleteCurrentBlock(c)),
            BlockCompletionStatus::Empty => {
                // Nothing to complete
                Ok(())
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn start_new_block_valid() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue),);
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue),);
        assert_eq!(
            composer.prev_blocks,
            vec![HangulBlock {
                initial: Jamo::Consonant(JamoConsonantSingular::Giyeok),
                vowel: Jamo::Vowel(JamoVowelSingular::A),
                final_optional: Some(Jamo::Consonant(JamoConsonantSingular::Nieun)),
            }]
        );
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue),);
        assert_eq!(
            composer.prev_blocks,
            vec![
                HangulBlock {
                    initial: Jamo::Consonant(JamoConsonantSingular::Giyeok),
                    vowel: Jamo::Vowel(JamoVowelSingular::A),
                    final_optional: Some(Jamo::Consonant(JamoConsonantSingular::Nieun)),
                },
                HangulBlock {
                    initial: Jamo::Consonant(JamoConsonantSingular::Ieung),
                    vowel: Jamo::Vowel(JamoVowelSingular::Yo),
                    final_optional: None,
                }
            ]
        );
    }

    #[test]
    fn start_new_block_invalid() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(
            composer.start_new_block(Jamo::Vowel(JamoVowelSingular::A)),
            Err(WordError::CouldNotStartNewBlock(
                '',
                BlockPushResult::InvalidHangul
            ))
        );
        let _ = composer.push_char('');
        assert_eq!(
            composer.start_new_block(Jamo::CompositeVowel(JamoVowelComposite::Wae)),
            Err(WordError::CannotCompleteCurrentBlock(Jamo::Consonant(
                JamoConsonantSingular::Giyeok
            )))
        );
    }

    #[test]
    fn push_char_valid() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue),);
    }

    #[test]
    fn push_char_invalid_hangul() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::InvalidHangul));
    }

    #[test]
    fn push_char_next_block() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
    }

    #[test]
    fn push_char_non_hangul() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char('A'), Ok(WordPushResult::NonHangul));
    }

    #[test]
    fn test_single_word_안녕하세요_as_string() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        let result_string = composer.as_string().unwrap();
        assert_eq!(result_string, "안녕하세요".to_string());
    }

    #[test]
    fn test_single_word_앖어요_as_string() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        let result_string = composer.as_string().unwrap();
        assert_eq!(result_string, "없어요".to_string());
    }

    #[test]
    fn test_incomplete_block_as_string() {
        let mut composer = HangulWordComposer::new();

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        let result_string = composer.as_string().unwrap();
        assert_eq!(result_string, "".to_string());
    }

    #[test]
    fn test_deletions() {
        let mut composer = HangulWordComposer::new();
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop(), Ok(None));
    }

    #[test]
    fn test_deletion_then_write_again() {
        let mut composer = HangulWordComposer::new();
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');

        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        let result_string = composer.as_string().unwrap();
        assert_eq!(result_string, "".to_string());
    }

    #[test]
    fn deletion_removes_empty_block() {
        let mut composer = HangulWordComposer::new();
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        assert_eq!(composer.pop().unwrap().unwrap().char_compatibility(), '');
        // if current block is still empty, as_string should fail
        assert_eq!(composer.as_string().unwrap(), "".to_string());
    }

    #[test]
    fn test_complete_current_block() {
        let mut composer = HangulWordComposer::new();
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));
        assert_eq!(composer.push_char(''), Ok(WordPushResult::Continue));

        assert!(composer.complete_current_block().is_ok());

        assert_eq!(composer.prev_blocks.len(), 1);
        assert_eq!(composer.cur_block, BlockComposer::new());

        let result_string = composer.as_string().unwrap();
        assert_eq!(result_string, "".to_string());
    }
}