1pub mod range;
2pub mod table;
3pub mod util;
4
5use util::{
6 is_cjk, is_close_parentheses, is_colon, is_common_symbols, is_enclosed_cjk_letters_and_months,
7 is_greek_and_coptic, is_latin1_supplement, is_open_parentheses,
8 is_western_sentence_punctuation,
9};
10use wasm_bindgen::prelude::wasm_bindgen;
11
12#[derive(PartialEq, Debug)]
13#[wasm_bindgen]
14pub enum CharType {
15 Number,
16 Alphabet,
17 CJK,
18 Colon,
19 Other,
20}
21
22#[derive(Debug, Copy, Clone)]
23#[wasm_bindgen]
24pub struct SpacingOptions {
25 pub punctuations: bool,
26}
27
28#[derive(Debug, Clone, Copy)]
29#[wasm_bindgen]
30pub struct Options {
31 pub spacing: Option<SpacingOptions>,
32}
33
34#[wasm_bindgen]
35pub fn get_char_type(c: char) -> CharType {
36 if c.is_ascii_digit() {
37 CharType::Number
38 } else if c.is_ascii_alphabetic() {
39 CharType::Alphabet
40 } else if is_cjk(c) {
41 CharType::CJK
42 } else if is_colon(c) {
43 CharType::Colon
44 } else {
45 CharType::Other
46 }
47}
48
49fn spacing(
50 pre_char: char,
51 pre_type: &CharType,
52 cur_char: char,
53 cur_type: &CharType,
54 options: Option<SpacingOptions>,
55) -> bool {
56 let spacing_opts = options.expect("spacing options should be set");
57 match (pre_type, cur_type) {
58 (CharType::Alphabet, CharType::Number) => false,
59 (CharType::Alphabet, CharType::CJK) => true,
60 (CharType::Alphabet, CharType::Other) => is_open_parentheses(cur_char),
61 (CharType::CJK, CharType::Number) => true,
62 (CharType::CJK, CharType::Alphabet) => true,
63 (CharType::CJK, CharType::Other) => {
64 is_common_symbols(cur_char)
65 || is_latin1_supplement(cur_char)
66 || is_greek_and_coptic(cur_char)
67 || is_enclosed_cjk_letters_and_months(cur_char)
68 || is_open_parentheses(cur_char)
69 }
70 (CharType::Number, CharType::Alphabet) => false,
71 (CharType::Number, CharType::CJK) => true,
72 (CharType::Number, CharType::Other) => false,
73 (CharType::Other, CharType::CJK) => {
74 is_common_symbols(pre_char)
75 || is_latin1_supplement(pre_char)
76 || is_greek_and_coptic(pre_char)
77 || is_enclosed_cjk_letters_and_months(pre_char)
78 || is_close_parentheses(pre_char)
79 || (spacing_opts.punctuations && is_western_sentence_punctuation(pre_char))
80 }
81 (CharType::Other, CharType::Alphabet) => {
82 is_close_parentheses(pre_char)
83 || (spacing_opts.punctuations && is_western_sentence_punctuation(pre_char))
84 }
85 (CharType::Other, CharType::Number) => {
86 spacing_opts.punctuations && is_western_sentence_punctuation(pre_char)
87 }
88 (CharType::Colon, CharType::Alphabet | CharType::CJK | CharType::Number) => true,
89 (CharType::Colon, CharType::Other) => !cur_char.is_whitespace(),
90 _ => false,
91 }
92}
93
94#[wasm_bindgen]
95pub fn format(text: &str, options: Option<Options>) -> String {
96 let mut formatted = String::new();
97 if text.is_empty() {
98 return formatted;
99 }
100 let mut chars = text.chars();
101 let first_char = chars.next().expect("should have at least one char");
102 let mut pre_char = first_char;
103 formatted.push(first_char);
104 let mut pre_char_type = get_char_type(first_char);
105 for cur_char in chars {
106 let cur_char_type = get_char_type(cur_char);
107 let default_spacing_opts = Some(SpacingOptions { punctuations: true });
108 let spacing_opts = options.map_or(default_spacing_opts, |o| o.spacing);
109 if cur_char_type != pre_char_type
110 && spacing(
111 pre_char,
112 &pre_char_type,
113 cur_char,
114 &cur_char_type,
115 spacing_opts,
116 )
117 {
118 formatted.push('\u{0020}');
119 }
120 formatted.push(cur_char);
121 pre_char_type = cur_char_type;
122 pre_char = cur_char;
123 }
124
125 formatted
126}