Skip to main content

oxihuman_core/
char_classifier.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Character classification utilities.
5
6#[allow(dead_code)]
7#[derive(Debug, Clone, PartialEq)]
8pub enum CharClass {
9    Alpha,
10    Digit,
11    AlphaNum,
12    Whitespace,
13    Punctuation,
14    Other,
15}
16
17#[allow(dead_code)]
18#[derive(Debug, Clone)]
19pub struct ClassifierConfig {
20    pub locale_aware: bool,
21}
22
23#[allow(dead_code)]
24pub fn default_classifier_config() -> ClassifierConfig {
25    ClassifierConfig {
26        locale_aware: false,
27    }
28}
29
30#[allow(dead_code)]
31pub fn classify_char(c: char) -> CharClass {
32    if c.is_ascii_alphabetic() {
33        CharClass::Alpha
34    } else if c.is_ascii_digit() {
35        CharClass::Digit
36    } else if c.is_ascii_alphanumeric() {
37        CharClass::AlphaNum
38    } else if c.is_whitespace() {
39        CharClass::Whitespace
40    } else if c.is_ascii_punctuation() {
41        CharClass::Punctuation
42    } else {
43        CharClass::Other
44    }
45}
46
47#[allow(dead_code)]
48pub fn is_alpha(c: char) -> bool {
49    c.is_ascii_alphabetic()
50}
51
52#[allow(dead_code)]
53pub fn is_digit(c: char) -> bool {
54    c.is_ascii_digit()
55}
56
57#[allow(dead_code)]
58pub fn is_alnum(c: char) -> bool {
59    c.is_ascii_alphanumeric()
60}
61
62#[allow(dead_code)]
63pub fn is_whitespace(c: char) -> bool {
64    c.is_whitespace()
65}
66
67#[allow(dead_code)]
68pub fn is_punctuation(c: char) -> bool {
69    c.is_ascii_punctuation()
70}
71
72#[allow(dead_code)]
73pub fn to_ascii_lower(c: char) -> char {
74    c.to_ascii_lowercase()
75}
76
77#[allow(dead_code)]
78pub fn to_ascii_upper(c: char) -> char {
79    c.to_ascii_uppercase()
80}
81
82#[allow(dead_code)]
83pub fn classify_str(s: &str) -> Vec<CharClass> {
84    s.chars().map(classify_char).collect()
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn test_default_config() {
93        let cfg = default_classifier_config();
94        assert!(!cfg.locale_aware);
95    }
96
97    #[test]
98    fn test_classify_alpha() {
99        assert_eq!(classify_char('a'), CharClass::Alpha);
100        assert_eq!(classify_char('Z'), CharClass::Alpha);
101    }
102
103    #[test]
104    fn test_classify_digit() {
105        assert_eq!(classify_char('0'), CharClass::Digit);
106        assert_eq!(classify_char('9'), CharClass::Digit);
107    }
108
109    #[test]
110    fn test_classify_whitespace() {
111        assert_eq!(classify_char(' '), CharClass::Whitespace);
112        assert_eq!(classify_char('\t'), CharClass::Whitespace);
113    }
114
115    #[test]
116    fn test_classify_punctuation() {
117        assert_eq!(classify_char('!'), CharClass::Punctuation);
118        assert_eq!(classify_char('.'), CharClass::Punctuation);
119    }
120
121    #[test]
122    fn test_is_alpha_digit() {
123        assert!(is_alpha('a'));
124        assert!(!is_alpha('1'));
125        assert!(is_digit('5'));
126        assert!(!is_digit('a'));
127    }
128
129    #[test]
130    fn test_is_alnum() {
131        assert!(is_alnum('a'));
132        assert!(is_alnum('3'));
133        assert!(!is_alnum('!'));
134    }
135
136    #[test]
137    fn test_to_ascii_lower_upper() {
138        assert_eq!(to_ascii_lower('A'), 'a');
139        assert_eq!(to_ascii_upper('a'), 'A');
140        assert_eq!(to_ascii_lower('z'), 'z');
141    }
142
143    #[test]
144    fn test_classify_str() {
145        let classes = classify_str("a1 !");
146        assert_eq!(classes[0], CharClass::Alpha);
147        assert_eq!(classes[1], CharClass::Digit);
148        assert_eq!(classes[2], CharClass::Whitespace);
149        assert_eq!(classes[3], CharClass::Punctuation);
150    }
151}