japanese_text/
lib.rs

1//! # japanese-text
2//!
3//! 日本語テキスト正規化のための軽量なRustライブラリ
4//!
5//! ## 特徴
6//!
7//! - 全角⇔半角変換（ASCII文字）
8//! - カタカナ⇔ひらがな変換
9//! - シンプルでゼロ依存の実装
10//!
11//! ## 使用例
12//!
13//! ```
14//! use japanese_text::*;
15//!
16//! // 全角→半角変換
17//! assert_eq!(to_half_width("ＡＢＣ１２３"), "ABC123");
18//!
19//! // 半角→全角変換
20//! assert_eq!(to_full_width("ABC123"), "ＡＢＣ１２３");
21//!
22//! // カタカナ→ひらがな変換
23//! assert_eq!(to_hiragana("カタカナ"), "かたかな");
24//!
25//! // ひらがな→カタカナ変換
26//! assert_eq!(to_katakana("ひらがな"), "ヒラガナ");
27//! ```
28
29/// 全角ASCII文字を半角に変換します。
30///
31/// この関数は全角の英数字や記号（U+FF01-U+FF5E）を、
32/// 対応する半角ASCII文字（U+0021-U+007E）に変換します。
33///
34/// # 使用例
35///
36/// ```
37/// use japanese_text::to_half_width;
38///
39/// assert_eq!(to_half_width("ＡＢＣ"), "ABC");
40/// assert_eq!(to_half_width("１２３"), "123");
41/// assert_eq!(to_half_width("！＠＃"), "!@#");
42/// assert_eq!(to_half_width("Hello　World"), "Hello World");
43/// ```
44pub fn to_half_width(input: &str) -> String {
45    input
46        .chars()
47        .map(|c| {
48            match c {
49                // Full-width space (U+3000) to half-width space
50                '　' => ' ',
51                // Full-width ASCII variants (U+FF01-U+FF5E) to half-width
52                '\u{FF01}'..='\u{FF5E}' => {
53                    char::from_u32(c as u32 - 0xFF01 + 0x0021).unwrap_or(c)
54                }
55                // Keep other characters as-is
56                _ => c,
57            }
58        })
59        .collect()
60}
61
62/// 半角ASCII文字を全角に変換します。
63///
64/// この関数は半角ASCII文字（U+0021-U+007E）を、
65/// 対応する全角文字（U+FF01-U+FF5E）に変換します。
66///
67/// # 使用例
68///
69/// ```
70/// use japanese_text::to_full_width;
71///
72/// assert_eq!(to_full_width("ABC"), "ＡＢＣ");
73/// assert_eq!(to_full_width("123"), "１２３");
74/// assert_eq!(to_full_width("!@#"), "！＠＃");
75/// assert_eq!(to_full_width("Hello World"), "Ｈｅｌｌｏ　Ｗｏｒｌｄ");
76/// ```
77pub fn to_full_width(input: &str) -> String {
78    input
79        .chars()
80        .map(|c| {
81            match c {
82                // Half-width space to full-width space (U+3000)
83                ' ' => '　',
84                // Half-width ASCII (U+0021-U+007E) to full-width
85                '\u{0021}'..='\u{007E}' => {
86                    char::from_u32(c as u32 - 0x0021 + 0xFF01).unwrap_or(c)
87                }
88                // Keep other characters as-is
89                _ => c,
90            }
91        })
92        .collect()
93}
94
95/// カタカナをひらがなに変換します。
96///
97/// この関数はカタカナ文字（U+30A1-U+30F6）を、
98/// 対応するひらがな文字（U+3041-U+3096）に変換します。
99///
100/// # 使用例
101///
102/// ```
103/// use japanese_text::to_hiragana;
104///
105/// assert_eq!(to_hiragana("カタカナ"), "かたかな");
106/// assert_eq!(to_hiragana("コンニチハ"), "こんにちは");
107/// assert_eq!(to_hiragana("ヴァイオリン"), "ゔぁいおりん");
108/// ```
109pub fn to_hiragana(input: &str) -> String {
110    input
111        .chars()
112        .map(|c| {
113            match c {
114                // Katakana (U+30A1-U+30F6) to Hiragana (U+3041-U+3096)
115                '\u{30A1}'..='\u{30F6}' => {
116                    char::from_u32(c as u32 - 0x30A1 + 0x3041).unwrap_or(c)
117                }
118                // Keep other characters as-is
119                _ => c,
120            }
121        })
122        .collect()
123}
124
125/// ひらがなをカタカナに変換します。
126///
127/// この関数はひらがな文字（U+3041-U+3096）を、
128/// 対応するカタカナ文字（U+30A1-U+30F6）に変換します。
129///
130/// # 使用例
131///
132/// ```
133/// use japanese_text::to_katakana;
134///
135/// assert_eq!(to_katakana("ひらがな"), "ヒラガナ");
136/// assert_eq!(to_katakana("こんにちは"), "コンニチハ");
137/// assert_eq!(to_katakana("ゔぁいおりん"), "ヴァイオリン");
138/// ```
139pub fn to_katakana(input: &str) -> String {
140    input
141        .chars()
142        .map(|c| {
143            match c {
144                // Hiragana (U+3041-U+3096) to Katakana (U+30A1-U+30F6)
145                '\u{3041}'..='\u{3096}' => {
146                    char::from_u32(c as u32 - 0x3041 + 0x30A1).unwrap_or(c)
147                }
148                // Keep other characters as-is
149                _ => c,
150            }
151        })
152        .collect()
153}
154
155/// 文字がひらがなかどうかを判定します。
156///
157/// # 使用例
158///
159/// ```
160/// use japanese_text::is_hiragana;
161///
162/// assert_eq!(is_hiragana('あ'), true);
163/// assert_eq!(is_hiragana('ア'), false);
164/// assert_eq!(is_hiragana('A'), false);
165/// ```
166pub fn is_hiragana(c: char) -> bool {
167    matches!(c, '\u{3041}'..='\u{3096}')
168}
169
170/// 文字がカタカナかどうかを判定します。
171///
172/// # 使用例
173///
174/// ```
175/// use japanese_text::is_katakana;
176///
177/// assert_eq!(is_katakana('ア'), true);
178/// assert_eq!(is_katakana('あ'), false);
179/// assert_eq!(is_katakana('A'), false);
180/// ```
181pub fn is_katakana(c: char) -> bool {
182    matches!(c, '\u{30A1}'..='\u{30F6}')
183}
184
185/// 文字が半角カタカナかどうかを判定します。
186///
187/// # 使用例
188///
189/// ```
190/// use japanese_text::is_half_width_katakana;
191///
192/// assert_eq!(is_half_width_katakana('ｱ'), true);
193/// assert_eq!(is_half_width_katakana('ア'), false);
194/// assert_eq!(is_half_width_katakana('A'), false);
195/// ```
196pub fn is_half_width_katakana(c: char) -> bool {
197    matches!(c, '\u{FF61}'..='\u{FF9F}')
198}
199
200/// 文字が漢字（CJK統合漢字）かどうかを判定します。
201///
202/// # 使用例
203///
204/// ```
205/// use japanese_text::is_kanji;
206///
207/// assert_eq!(is_kanji('漢'), true);
208/// assert_eq!(is_kanji('字'), true);
209/// assert_eq!(is_kanji('あ'), false);
210/// assert_eq!(is_kanji('A'), false);
211/// ```
212pub fn is_kanji(c: char) -> bool {
213    matches!(c, '\u{4E00}'..='\u{9FFF}')
214}
215
216/// 文字が全角文字かどうかを判定します。
217///
218/// # 使用例
219///
220/// ```
221/// use japanese_text::is_full_width;
222///
223/// assert_eq!(is_full_width('Ａ'), true);
224/// assert_eq!(is_full_width('１'), true);
225/// assert_eq!(is_full_width('A'), false);
226/// ```
227pub fn is_full_width(c: char) -> bool {
228    matches!(c, '\u{FF01}'..='\u{FF5E}' | '　')
229}
230
231/// 文字列内の各文字種の数をカウントします。
232///
233/// # 使用例
234///
235/// ```
236/// use japanese_text::count_character_types;
237///
238/// let counts = count_character_types("あア漢ABC123");
239/// assert_eq!(counts.hiragana, 1);
240/// assert_eq!(counts.katakana, 1);
241/// assert_eq!(counts.kanji, 1);
242/// assert_eq!(counts.ascii, 6);
243/// ```
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct CharacterTypes {
246    pub hiragana: usize,
247    pub katakana: usize,
248    pub half_width_katakana: usize,
249    pub kanji: usize,
250    pub ascii: usize,
251    pub full_width: usize,
252    pub other: usize,
253}
254
255pub fn count_character_types(input: &str) -> CharacterTypes {
256    let mut counts = CharacterTypes {
257        hiragana: 0,
258        katakana: 0,
259        half_width_katakana: 0,
260        kanji: 0,
261        ascii: 0,
262        full_width: 0,
263        other: 0,
264    };
265
266    for c in input.chars() {
267        if is_hiragana(c) {
268            counts.hiragana += 1;
269        } else if is_katakana(c) {
270            counts.katakana += 1;
271        } else if is_half_width_katakana(c) {
272            counts.half_width_katakana += 1;
273        } else if is_kanji(c) {
274            counts.kanji += 1;
275        } else if c.is_ascii() {
276            counts.ascii += 1;
277        } else if is_full_width(c) {
278            counts.full_width += 1;
279        } else {
280            counts.other += 1;
281        }
282    }
283
284    counts
285}
286
287/// 文字列内の空白文字を正規化します（全角スペース、タブなどを半角スペースに統一）。
288///
289/// # 使用例
290///
291/// ```
292/// use japanese_text::normalize_whitespace;
293///
294/// assert_eq!(normalize_whitespace("Hello　World"), "Hello World");
295/// assert_eq!(normalize_whitespace("A\t\tB"), "A B");
296/// ```
297pub fn normalize_whitespace(input: &str) -> String {
298    input
299        .chars()
300        .map(|c| {
301            if c.is_whitespace() || c == '　' {
302                ' '
303            } else {
304                c
305            }
306        })
307        .collect::<String>()
308        .split_whitespace()
309        .collect::<Vec<_>>()
310        .join(" ")
311}
312
313/// 半角カタカナを全角カタカナに変換します。
314///
315/// 濁点（゛）と半濁点（゜）も正しく結合されます。
316///
317/// # 使用例
318///
319/// ```
320/// use japanese_text::half_width_katakana_to_full_width;
321///
322/// assert_eq!(half_width_katakana_to_full_width("ｶﾀｶﾅ"), "カタカナ");
323/// assert_eq!(half_width_katakana_to_full_width("ｶﾞｷﾞｸﾞｹﾞｺﾞ"), "ガギグゲゴ");
324/// assert_eq!(half_width_katakana_to_full_width("ﾊﾟﾋﾟﾌﾟﾍﾟﾎﾟ"), "パピプペポ");
325/// ```
326pub fn half_width_katakana_to_full_width(input: &str) -> String {
327    let chars: Vec<char> = input.chars().collect();
328    let mut result = String::new();
329    let mut i = 0;
330
331    while i < chars.len() {
332        let c = chars[i];
333
334        // 次の文字が濁点または半濁点かチェック
335        let next = if i + 1 < chars.len() {
336            Some(chars[i + 1])
337        } else {
338            None
339        };
340
341        match (c, next) {
342            // 濁点付き
343            ('ｶ', Some('ﾞ')) => { result.push('ガ'); i += 2; }
344            ('ｷ', Some('ﾞ')) => { result.push('ギ'); i += 2; }
345            ('ｸ', Some('ﾞ')) => { result.push('グ'); i += 2; }
346            ('ｹ', Some('ﾞ')) => { result.push('ゲ'); i += 2; }
347            ('ｺ', Some('ﾞ')) => { result.push('ゴ'); i += 2; }
348            ('ｻ', Some('ﾞ')) => { result.push('ザ'); i += 2; }
349            ('ｼ', Some('ﾞ')) => { result.push('ジ'); i += 2; }
350            ('ｽ', Some('ﾞ')) => { result.push('ズ'); i += 2; }
351            ('ｾ', Some('ﾞ')) => { result.push('ゼ'); i += 2; }
352            ('ｿ', Some('ﾞ')) => { result.push('ゾ'); i += 2; }
353            ('ﾀ', Some('ﾞ')) => { result.push('ダ'); i += 2; }
354            ('ﾁ', Some('ﾞ')) => { result.push('ヂ'); i += 2; }
355            ('ﾂ', Some('ﾞ')) => { result.push('ヅ'); i += 2; }
356            ('ﾃ', Some('ﾞ')) => { result.push('デ'); i += 2; }
357            ('ﾄ', Some('ﾞ')) => { result.push('ド'); i += 2; }
358            ('ﾊ', Some('ﾞ')) => { result.push('バ'); i += 2; }
359            ('ﾋ', Some('ﾞ')) => { result.push('ビ'); i += 2; }
360            ('ﾌ', Some('ﾞ')) => { result.push('ブ'); i += 2; }
361            ('ﾍ', Some('ﾞ')) => { result.push('ベ'); i += 2; }
362            ('ﾎ', Some('ﾞ')) => { result.push('ボ'); i += 2; }
363            ('ｳ', Some('ﾞ')) => { result.push('ヴ'); i += 2; }
364
365            // 半濁点付き
366            ('ﾊ', Some('ﾟ')) => { result.push('パ'); i += 2; }
367            ('ﾋ', Some('ﾟ')) => { result.push('ピ'); i += 2; }
368            ('ﾌ', Some('ﾟ')) => { result.push('プ'); i += 2; }
369            ('ﾍ', Some('ﾟ')) => { result.push('ペ'); i += 2; }
370            ('ﾎ', Some('ﾟ')) => { result.push('ポ'); i += 2; }
371
372            // 通常の半角カタカナ
373            _ => {
374                let full = match c {
375                    'ｦ' => 'ヲ', 'ｧ' => 'ァ', 'ｨ' => 'ィ', 'ｩ' => 'ゥ', 'ｪ' => 'ェ', 'ｫ' => 'ォ',
376                    'ｬ' => 'ャ', 'ｭ' => 'ュ', 'ｮ' => 'ョ', 'ｯ' => 'ッ', 'ｰ' => 'ー',
377                    'ｱ' => 'ア', 'ｲ' => 'イ', 'ｳ' => 'ウ', 'ｴ' => 'エ', 'ｵ' => 'オ',
378                    'ｶ' => 'カ', 'ｷ' => 'キ', 'ｸ' => 'ク', 'ｹ' => 'ケ', 'ｺ' => 'コ',
379                    'ｻ' => 'サ', 'ｼ' => 'シ', 'ｽ' => 'ス', 'ｾ' => 'セ', 'ｿ' => 'ソ',
380                    'ﾀ' => 'タ', 'ﾁ' => 'チ', 'ﾂ' => 'ツ', 'ﾃ' => 'テ', 'ﾄ' => 'ト',
381                    'ﾅ' => 'ナ', 'ﾆ' => 'ニ', 'ﾇ' => 'ヌ', 'ﾈ' => 'ネ', 'ﾉ' => 'ノ',
382                    'ﾊ' => 'ハ', 'ﾋ' => 'ヒ', 'ﾌ' => 'フ', 'ﾍ' => 'ヘ', 'ﾎ' => 'ホ',
383                    'ﾏ' => 'マ', 'ﾐ' => 'ミ', 'ﾑ' => 'ム', 'ﾒ' => 'メ', 'ﾓ' => 'モ',
384                    'ﾔ' => 'ヤ', 'ﾕ' => 'ユ', 'ﾖ' => 'ヨ',
385                    'ﾗ' => 'ラ', 'ﾘ' => 'リ', 'ﾙ' => 'ル', 'ﾚ' => 'レ', 'ﾛ' => 'ロ',
386                    'ﾜ' => 'ワ', 'ﾝ' => 'ン',
387                    '｡' => '。', '｢' => '「', '｣' => '」', '､' => '、', '･' => '・',
388                    _ => c,
389                };
390                result.push(full);
391                i += 1;
392            }
393        }
394    }
395
396    result
397}
398
399/// 長音記号を正規化します（ー、〜、～などを統一）。
400///
401/// # 使用例
402///
403/// ```
404/// use japanese_text::normalize_prolonged_sound;
405///
406/// assert_eq!(normalize_prolonged_sound("コーヒー"), "コーヒー");
407/// assert_eq!(normalize_prolonged_sound("コ〜ヒ〜"), "コーヒー");
408/// ```
409pub fn normalize_prolonged_sound(input: &str) -> String {
410    input
411        .chars()
412        .map(|c| match c {
413            '〜' | '～' => 'ー',
414            _ => c,
415        })
416        .collect()
417}
418
419/// 繰り返し記号を展開します。
420///
421/// ひらがな・カタカナの繰り返し記号（ゝ、ゞ、ヽ、ヾ）を実際の文字に展開します。
422///
423/// # 使用例
424///
425/// ```
426/// use japanese_text::expand_iteration_marks;
427///
428/// assert_eq!(expand_iteration_marks("いろゝ"), "いろろ");
429/// assert_eq!(expand_iteration_marks("かゞ"), "かが");
430/// ```
431pub fn expand_iteration_marks(input: &str) -> String {
432    let chars: Vec<char> = input.chars().collect();
433    let mut result = String::new();
434
435    for (i, &c) in chars.iter().enumerate() {
436        match c {
437            // ひらがな繰り返し記号（無声音）
438            'ゝ' => {
439                if i > 0 {
440                    result.push(chars[i - 1]);
441                } else {
442                    result.push(c);
443                }
444            }
445            // ひらがな繰り返し記号（濁音）
446            'ゞ' => {
447                if i > 0 {
448                    let prev = chars[i - 1];
449                    let voiced = add_dakuten(prev);
450                    result.push(voiced);
451                } else {
452                    result.push(c);
453                }
454            }
455            // カタカナ繰り返し記号（無声音）
456            'ヽ' => {
457                if i > 0 {
458                    result.push(chars[i - 1]);
459                } else {
460                    result.push(c);
461                }
462            }
463            // カタカナ繰り返し記号（濁音）
464            'ヾ' => {
465                if i > 0 {
466                    let prev = chars[i - 1];
467                    let voiced = add_dakuten(prev);
468                    result.push(voiced);
469                } else {
470                    result.push(c);
471                }
472            }
473            _ => result.push(c),
474        }
475    }
476
477    result
478}
479
480/// 文字に濁点を追加します（内部ヘルパー関数）。
481fn add_dakuten(c: char) -> char {
482    match c {
483        // ひらがな
484        'か' => 'が', 'き' => 'ぎ', 'く' => 'ぐ', 'け' => 'げ', 'こ' => 'ご',
485        'さ' => 'ざ', 'し' => 'じ', 'す' => 'ず', 'せ' => 'ぜ', 'そ' => 'ぞ',
486        'た' => 'だ', 'ち' => 'ぢ', 'つ' => 'づ', 'て' => 'で', 'と' => 'ど',
487        'は' => 'ば', 'ひ' => 'び', 'ふ' => 'ぶ', 'へ' => 'べ', 'ほ' => 'ぼ',
488        // カタカナ
489        'カ' => 'ガ', 'キ' => 'ギ', 'ク' => 'グ', 'ケ' => 'ゲ', 'コ' => 'ゴ',
490        'サ' => 'ザ', 'シ' => 'ジ', 'ス' => 'ズ', 'セ' => 'ゼ', 'ソ' => 'ゾ',
491        'タ' => 'ダ', 'チ' => 'ヂ', 'ツ' => 'ヅ', 'テ' => 'デ', 'ト' => 'ド',
492        'ハ' => 'バ', 'ヒ' => 'ビ', 'フ' => 'ブ', 'ヘ' => 'ベ', 'ホ' => 'ボ',
493        _ => c,
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500
501    #[test]
502    fn test_to_half_width() {
503        assert_eq!(to_half_width("ＡＢＣ"), "ABC");
504        assert_eq!(to_half_width("１２３"), "123");
505        assert_eq!(to_half_width("！＠＃"), "!@#");
506        assert_eq!(to_half_width("　"), " ");
507        assert_eq!(to_half_width("Ｈｅｌｌｏ　Ｗｏｒｌｄ"), "Hello World");
508        // Mixed content
509        assert_eq!(to_half_width("ＡＢＣあいう"), "ABCあいう");
510    }
511
512    #[test]
513    fn test_to_full_width() {
514        assert_eq!(to_full_width("ABC"), "ＡＢＣ");
515        assert_eq!(to_full_width("123"), "１２３");
516        assert_eq!(to_full_width("!@#"), "！＠＃");
517        assert_eq!(to_full_width(" "), "　");
518        assert_eq!(to_full_width("Hello World"), "Ｈｅｌｌｏ　Ｗｏｒｌｄ");
519        // Mixed content
520        assert_eq!(to_full_width("ABCあいう"), "ＡＢＣあいう");
521    }
522
523    #[test]
524    fn test_to_hiragana() {
525        assert_eq!(to_hiragana("カタカナ"), "かたかな");
526        assert_eq!(to_hiragana("コンニチハ"), "こんにちは");
527        assert_eq!(to_hiragana("アイウエオ"), "あいうえお");
528        assert_eq!(to_hiragana("ヴァイオリン"), "ゔぁいおりん");
529        // Mixed content
530        assert_eq!(to_hiragana("カタカナABC"), "かたかなABC");
531    }
532
533    #[test]
534    fn test_to_katakana() {
535        assert_eq!(to_katakana("ひらがな"), "ヒラガナ");
536        assert_eq!(to_katakana("こんにちは"), "コンニチハ");
537        assert_eq!(to_katakana("あいうえお"), "アイウエオ");
538        assert_eq!(to_katakana("ゔぁいおりん"), "ヴァイオリン");
539        // Mixed content
540        assert_eq!(to_katakana("ひらがなABC"), "ヒラガナABC");
541    }
542
543    #[test]
544    fn test_roundtrip_full_half_width() {
545        let original = "ABC123!@#";
546        let full = to_full_width(original);
547        let back = to_half_width(&full);
548        assert_eq!(original, back);
549    }
550
551    #[test]
552    fn test_roundtrip_hiragana_katakana() {
553        let original = "こんにちは";
554        let katakana = to_katakana(original);
555        let back = to_hiragana(&katakana);
556        assert_eq!(original, back);
557    }
558
559    #[test]
560    fn test_empty_string() {
561        assert_eq!(to_half_width(""), "");
562        assert_eq!(to_full_width(""), "");
563        assert_eq!(to_hiragana(""), "");
564        assert_eq!(to_katakana(""), "");
565    }
566
567    #[test]
568    fn test_is_hiragana() {
569        assert_eq!(is_hiragana('あ'), true);
570        assert_eq!(is_hiragana('ん'), true);
571        assert_eq!(is_hiragana('ア'), false);
572        assert_eq!(is_hiragana('A'), false);
573        assert_eq!(is_hiragana('漢'), false);
574    }
575
576    #[test]
577    fn test_is_katakana() {
578        assert_eq!(is_katakana('ア'), true);
579        assert_eq!(is_katakana('ン'), true);
580        assert_eq!(is_katakana('あ'), false);
581        assert_eq!(is_katakana('A'), false);
582    }
583
584    #[test]
585    fn test_is_half_width_katakana() {
586        assert_eq!(is_half_width_katakana('ｱ'), true);
587        assert_eq!(is_half_width_katakana('ﾝ'), true);
588        assert_eq!(is_half_width_katakana('ア'), false);
589        assert_eq!(is_half_width_katakana('A'), false);
590    }
591
592    #[test]
593    fn test_is_kanji() {
594        assert_eq!(is_kanji('漢'), true);
595        assert_eq!(is_kanji('字'), true);
596        assert_eq!(is_kanji('あ'), false);
597        assert_eq!(is_kanji('A'), false);
598    }
599
600    #[test]
601    fn test_is_full_width() {
602        assert_eq!(is_full_width('Ａ'), true);
603        assert_eq!(is_full_width('１'), true);
604        assert_eq!(is_full_width('　'), true);
605        assert_eq!(is_full_width('A'), false);
606    }
607
608    #[test]
609    fn test_count_character_types() {
610        let counts = count_character_types("あア漢ABC123ｱｲｳ");
611        assert_eq!(counts.hiragana, 1);
612        assert_eq!(counts.katakana, 1);
613        assert_eq!(counts.kanji, 1);
614        assert_eq!(counts.ascii, 6);
615        assert_eq!(counts.half_width_katakana, 3);
616    }
617
618    #[test]
619    fn test_normalize_whitespace() {
620        assert_eq!(normalize_whitespace("Hello　World"), "Hello World");
621        assert_eq!(normalize_whitespace("A\t\t\tB"), "A B");
622        assert_eq!(normalize_whitespace("  Multiple   Spaces  "), "Multiple Spaces");
623    }
624
625    #[test]
626    fn test_half_width_katakana_to_full_width() {
627        assert_eq!(half_width_katakana_to_full_width("ｶﾀｶﾅ"), "カタカナ");
628        assert_eq!(half_width_katakana_to_full_width("ｶﾞｷﾞｸﾞｹﾞｺﾞ"), "ガギグゲゴ");
629        assert_eq!(half_width_katakana_to_full_width("ﾊﾟﾋﾟﾌﾟﾍﾟﾎﾟ"), "パピプペポ");
630        assert_eq!(half_width_katakana_to_full_width("ｺﾝﾆﾁﾊ"), "コンニチハ");
631    }
632
633    #[test]
634    fn test_normalize_prolonged_sound() {
635        assert_eq!(normalize_prolonged_sound("コーヒー"), "コーヒー");
636        assert_eq!(normalize_prolonged_sound("コ〜ヒ〜"), "コーヒー");
637        assert_eq!(normalize_prolonged_sound("ラーメン"), "ラーメン");
638    }
639
640    #[test]
641    fn test_expand_iteration_marks() {
642        assert_eq!(expand_iteration_marks("いろゝ"), "いろろ");
643        assert_eq!(expand_iteration_marks("かゞ"), "かが");
644        assert_eq!(expand_iteration_marks("トヽキ"), "トトキ");
645        assert_eq!(expand_iteration_marks("カヾ"), "カガ");
646    }
647}
japanese_text/lib.rs

japanese_text/
lib.rs