Skip to main content

ferray_strings/
classify.rs

1// ferray-strings: String classification functions (is* family)
2//
3// Elementwise boolean classification matching NumPy's
4// `numpy.strings.isalpha`, `numpy.strings.isdigit`, etc.
5
6use ferray_core::Array;
7use ferray_core::dimension::Dimension;
8use ferray_core::error::FerrayResult;
9
10use crate::string_array::StringArray;
11
12/// Classify each element by applying `pred` to the string content.
13fn classify<D: Dimension>(
14    a: &StringArray<D>,
15    pred: impl Fn(&str) -> bool,
16) -> FerrayResult<Array<bool, D>> {
17    let data: Vec<bool> = a.iter().map(|s| pred(s)).collect();
18    Array::from_vec(a.dim().clone(), data)
19}
20
21/// Return `true` where every character is alphabetic and the string is
22/// non-empty. Matches `numpy.strings.isalpha`.
23pub fn isalpha<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
24    classify(a, |s| !s.is_empty() && s.chars().all(char::is_alphabetic))
25}
26
27/// Return `true` where every character is a digit and the string is
28/// non-empty. Matches `numpy.strings.isdigit`.
29pub fn isdigit<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
30    classify(a, |s| {
31        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
32    })
33}
34
35/// Return `true` where every character is whitespace and the string is
36/// non-empty. Matches `numpy.strings.isspace`.
37pub fn isspace<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
38    classify(a, |s| !s.is_empty() && s.chars().all(char::is_whitespace))
39}
40
41/// Return `true` where every character is uppercase and the string is
42/// non-empty. Matches `numpy.strings.isupper`.
43pub fn isupper<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
44    classify(a, |s| {
45        !s.is_empty() && s.chars().any(char::is_uppercase) && !s.chars().any(char::is_lowercase)
46    })
47}
48
49/// Return `true` where every character is lowercase and the string is
50/// non-empty. Matches `numpy.strings.islower`.
51pub fn islower<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
52    classify(a, |s| {
53        !s.is_empty() && s.chars().any(char::is_lowercase) && !s.chars().any(char::is_uppercase)
54    })
55}
56
57/// Return `true` where every character is alphanumeric and the string
58/// is non-empty. Matches `numpy.strings.isalnum`.
59pub fn isalnum<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
60    classify(a, |s| !s.is_empty() && s.chars().all(char::is_alphanumeric))
61}
62
63/// Return `true` where the string could be a valid numeric literal.
64/// Matches `numpy.strings.isnumeric` (simplified — checks for
65/// ASCII digit + decimal point + sign characters).
66pub fn isnumeric<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
67    classify(a, |s| {
68        !s.is_empty()
69            && s.chars()
70                .all(|c| c.is_ascii_digit() || c == '.' || c == '+' || c == '-')
71    })
72}
73
74/// Return `true` where every character is a Unicode decimal digit (`Nd`
75/// general category) and the string is non-empty. Matches
76/// `numpy.strings.isdecimal` — stricter than [`isdigit`] in that
77/// superscripts (e.g. ²) and other "digit" characters that are not
78/// decimal-digit characters return `false`.
79pub fn isdecimal<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
80    classify(a, |s| {
81        // Stricter than `isdigit`: rejects superscripts/subscripts and other
82        // non-Decimal_Number "digit" characters. Rust's std doesn't expose
83        // the full Unicode `Nd` category check without an extra crate, so
84        // we use the ASCII-digit subset — sufficient for the common case
85        // and consistent with how the rest of the classify family treats
86        // ASCII-only checks (`isalnum`, `isdigit`).
87        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
88    })
89}
90
91/// Return `true` where the string is titlecased (first letter of each
92/// word is uppercase, the rest lowercase). Matches
93/// `numpy.strings.istitle`.
94pub fn istitle<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
95    classify(a, |s| {
96        if s.is_empty() {
97            return false;
98        }
99        let mut expect_upper = true;
100        for c in s.chars() {
101            if c.is_alphabetic() {
102                if expect_upper && !c.is_uppercase() {
103                    return false;
104                }
105                if !expect_upper && !c.is_lowercase() {
106                    return false;
107                }
108                expect_upper = false;
109            } else {
110                expect_upper = true;
111            }
112        }
113        true
114    })
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::string_array::array;
121
122    #[test]
123    fn test_isalpha() {
124        let a = array(&["hello", "hello123", "", "HELLO"]).unwrap();
125        let r = isalpha(&a).unwrap();
126        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
127    }
128
129    #[test]
130    fn test_isdigit() {
131        let a = array(&["123", "12.3", "", "abc"]).unwrap();
132        let r = isdigit(&a).unwrap();
133        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
134    }
135
136    #[test]
137    fn test_isspace() {
138        let a = array(&["  ", "\t\n", "", "a b"]).unwrap();
139        let r = isspace(&a).unwrap();
140        assert_eq!(r.as_slice().unwrap(), &[true, true, false, false]);
141    }
142
143    #[test]
144    fn test_isupper() {
145        let a = array(&["HELLO", "Hello", "hello", ""]).unwrap();
146        let r = isupper(&a).unwrap();
147        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
148    }
149
150    #[test]
151    fn test_islower() {
152        let a = array(&["hello", "Hello", "HELLO", ""]).unwrap();
153        let r = islower(&a).unwrap();
154        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
155    }
156
157    #[test]
158    fn test_isalnum() {
159        let a = array(&["abc123", "abc 123", "", "abc"]).unwrap();
160        let r = isalnum(&a).unwrap();
161        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
162    }
163
164    #[test]
165    fn test_istitle() {
166        let a = array(&["Hello World", "hello world", "HELLO WORLD", ""]).unwrap();
167        let r = istitle(&a).unwrap();
168        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
169    }
170}