oxihuman_core/
char_classifier.rs1#[allow(dead_code)]
7#[derive(Debug, Clone, PartialEq)]
8pub enum CharClass {
9 Alpha,
10 Digit,
11 AlphaNum,
12 Whitespace,
13 Punctuation,
14 Other,
15}
16
17#[allow(dead_code)]
18#[derive(Debug, Clone)]
19pub struct ClassifierConfig {
20 pub locale_aware: bool,
21}
22
23#[allow(dead_code)]
24pub fn default_classifier_config() -> ClassifierConfig {
25 ClassifierConfig {
26 locale_aware: false,
27 }
28}
29
30#[allow(dead_code)]
31pub fn classify_char(c: char) -> CharClass {
32 if c.is_ascii_alphabetic() {
33 CharClass::Alpha
34 } else if c.is_ascii_digit() {
35 CharClass::Digit
36 } else if c.is_ascii_alphanumeric() {
37 CharClass::AlphaNum
38 } else if c.is_whitespace() {
39 CharClass::Whitespace
40 } else if c.is_ascii_punctuation() {
41 CharClass::Punctuation
42 } else {
43 CharClass::Other
44 }
45}
46
47#[allow(dead_code)]
48pub fn is_alpha(c: char) -> bool {
49 c.is_ascii_alphabetic()
50}
51
52#[allow(dead_code)]
53pub fn is_digit(c: char) -> bool {
54 c.is_ascii_digit()
55}
56
57#[allow(dead_code)]
58pub fn is_alnum(c: char) -> bool {
59 c.is_ascii_alphanumeric()
60}
61
62#[allow(dead_code)]
63pub fn is_whitespace(c: char) -> bool {
64 c.is_whitespace()
65}
66
67#[allow(dead_code)]
68pub fn is_punctuation(c: char) -> bool {
69 c.is_ascii_punctuation()
70}
71
72#[allow(dead_code)]
73pub fn to_ascii_lower(c: char) -> char {
74 c.to_ascii_lowercase()
75}
76
77#[allow(dead_code)]
78pub fn to_ascii_upper(c: char) -> char {
79 c.to_ascii_uppercase()
80}
81
82#[allow(dead_code)]
83pub fn classify_str(s: &str) -> Vec<CharClass> {
84 s.chars().map(classify_char).collect()
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90
91 #[test]
92 fn test_default_config() {
93 let cfg = default_classifier_config();
94 assert!(!cfg.locale_aware);
95 }
96
97 #[test]
98 fn test_classify_alpha() {
99 assert_eq!(classify_char('a'), CharClass::Alpha);
100 assert_eq!(classify_char('Z'), CharClass::Alpha);
101 }
102
103 #[test]
104 fn test_classify_digit() {
105 assert_eq!(classify_char('0'), CharClass::Digit);
106 assert_eq!(classify_char('9'), CharClass::Digit);
107 }
108
109 #[test]
110 fn test_classify_whitespace() {
111 assert_eq!(classify_char(' '), CharClass::Whitespace);
112 assert_eq!(classify_char('\t'), CharClass::Whitespace);
113 }
114
115 #[test]
116 fn test_classify_punctuation() {
117 assert_eq!(classify_char('!'), CharClass::Punctuation);
118 assert_eq!(classify_char('.'), CharClass::Punctuation);
119 }
120
121 #[test]
122 fn test_is_alpha_digit() {
123 assert!(is_alpha('a'));
124 assert!(!is_alpha('1'));
125 assert!(is_digit('5'));
126 assert!(!is_digit('a'));
127 }
128
129 #[test]
130 fn test_is_alnum() {
131 assert!(is_alnum('a'));
132 assert!(is_alnum('3'));
133 assert!(!is_alnum('!'));
134 }
135
136 #[test]
137 fn test_to_ascii_lower_upper() {
138 assert_eq!(to_ascii_lower('A'), 'a');
139 assert_eq!(to_ascii_upper('a'), 'A');
140 assert_eq!(to_ascii_lower('z'), 'z');
141 }
142
143 #[test]
144 fn test_classify_str() {
145 let classes = classify_str("a1 !");
146 assert_eq!(classes[0], CharClass::Alpha);
147 assert_eq!(classes[1], CharClass::Digit);
148 assert_eq!(classes[2], CharClass::Whitespace);
149 assert_eq!(classes[3], CharClass::Punctuation);
150 }
151}