qi_rs/
lib.rs

1pub mod range;
2pub mod table;
3pub mod util;
4
5use util::{
6    is_cjk, is_close_parentheses, is_colon, is_common_symbols, is_enclosed_cjk_letters_and_months,
7    is_greek_and_coptic, is_latin1_supplement, is_open_parentheses,
8    is_western_sentence_punctuation,
9};
10use wasm_bindgen::prelude::wasm_bindgen;
11
12#[derive(PartialEq, Debug)]
13#[wasm_bindgen]
14pub enum CharType {
15    Number,
16    Alphabet,
17    CJK,
18    Colon,
19    Other,
20}
21
22#[derive(Debug, Copy, Clone)]
23#[wasm_bindgen]
24pub struct SpacingOptions {
25    pub punctuations: bool,
26}
27
28#[derive(Debug, Clone, Copy)]
29#[wasm_bindgen]
30pub struct Options {
31    pub spacing: Option<SpacingOptions>,
32}
33
34#[wasm_bindgen]
35pub fn get_char_type(c: char) -> CharType {
36    if c.is_ascii_digit() {
37        CharType::Number
38    } else if c.is_ascii_alphabetic() {
39        CharType::Alphabet
40    } else if is_cjk(c) {
41        CharType::CJK
42    } else if is_colon(c) {
43        CharType::Colon
44    } else {
45        CharType::Other
46    }
47}
48
49fn spacing(
50    pre_char: char,
51    pre_type: &CharType,
52    cur_char: char,
53    cur_type: &CharType,
54    options: Option<SpacingOptions>,
55) -> bool {
56    let spacing_opts = options.expect("spacing options should be set");
57    match (pre_type, cur_type) {
58        (CharType::Alphabet, CharType::Number) => false,
59        (CharType::Alphabet, CharType::CJK) => true,
60        (CharType::Alphabet, CharType::Other) => is_open_parentheses(cur_char),
61        (CharType::CJK, CharType::Number) => true,
62        (CharType::CJK, CharType::Alphabet) => true,
63        (CharType::CJK, CharType::Other) => {
64            is_common_symbols(cur_char)
65                || is_latin1_supplement(cur_char)
66                || is_greek_and_coptic(cur_char)
67                || is_enclosed_cjk_letters_and_months(cur_char)
68                || is_open_parentheses(cur_char)
69        }
70        (CharType::Number, CharType::Alphabet) => false,
71        (CharType::Number, CharType::CJK) => true,
72        (CharType::Number, CharType::Other) => false,
73        (CharType::Other, CharType::CJK) => {
74            is_common_symbols(pre_char)
75                || is_latin1_supplement(pre_char)
76                || is_greek_and_coptic(pre_char)
77                || is_enclosed_cjk_letters_and_months(pre_char)
78                || is_close_parentheses(pre_char)
79                || (spacing_opts.punctuations && is_western_sentence_punctuation(pre_char))
80        }
81        (CharType::Other, CharType::Alphabet) => {
82            is_close_parentheses(pre_char)
83                || (spacing_opts.punctuations && is_western_sentence_punctuation(pre_char))
84        }
85        (CharType::Other, CharType::Number) => {
86            spacing_opts.punctuations && is_western_sentence_punctuation(pre_char)
87        }
88        (CharType::Colon, CharType::Alphabet | CharType::CJK | CharType::Number) => true,
89        (CharType::Colon, CharType::Other) => !cur_char.is_whitespace(),
90        _ => false,
91    }
92}
93
94#[wasm_bindgen]
95pub fn format(text: &str, options: Option<Options>) -> String {
96    let mut formatted = String::new();
97    if text.is_empty() {
98        return formatted;
99    }
100    let mut chars = text.chars();
101    let first_char = chars.next().expect("should have at least one char");
102    let mut pre_char = first_char;
103    formatted.push(first_char);
104    let mut pre_char_type = get_char_type(first_char);
105    for cur_char in chars {
106        let cur_char_type = get_char_type(cur_char);
107        let default_spacing_opts = Some(SpacingOptions { punctuations: true });
108        let spacing_opts = options.map_or(default_spacing_opts, |o| o.spacing);
109        if cur_char_type != pre_char_type
110            && spacing(
111                pre_char,
112                &pre_char_type,
113                cur_char,
114                &cur_char_type,
115                spacing_opts,
116            )
117        {
118            formatted.push('\u{0020}');
119        }
120        formatted.push(cur_char);
121        pre_char_type = cur_char_type;
122        pre_char = cur_char;
123    }
124
125    formatted
126}