Skip to main content

ferray_strings/
classify.rs

1// ferray-strings: String classification functions (is* family)
2//
3// Elementwise boolean classification matching NumPy's
4// `numpy.strings.isalpha`, `numpy.strings.isdigit`, etc.
5
6use ferray_core::Array;
7use ferray_core::dimension::Dimension;
8use ferray_core::error::FerrayResult;
9
10use crate::string_array::StringArray;
11
12/// Classify each element by applying `pred` to the string content.
13fn classify<D: Dimension>(
14    a: &StringArray<D>,
15    pred: impl Fn(&str) -> bool,
16) -> FerrayResult<Array<bool, D>> {
17    let data: Vec<bool> = a.iter().map(|s| pred(s)).collect();
18    Array::from_vec(a.dim().clone(), data)
19}
20
21/// Return `true` where every character is alphabetic and the string is
22/// non-empty. Matches `numpy.strings.isalpha`.
23pub fn isalpha<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
24    classify(a, |s| !s.is_empty() && s.chars().all(|c| c.is_alphabetic()))
25}
26
27/// Return `true` where every character is a digit and the string is
28/// non-empty. Matches `numpy.strings.isdigit`.
29pub fn isdigit<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
30    classify(a, |s| {
31        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
32    })
33}
34
35/// Return `true` where every character is whitespace and the string is
36/// non-empty. Matches `numpy.strings.isspace`.
37pub fn isspace<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
38    classify(a, |s| !s.is_empty() && s.chars().all(|c| c.is_whitespace()))
39}
40
41/// Return `true` where every character is uppercase and the string is
42/// non-empty. Matches `numpy.strings.isupper`.
43pub fn isupper<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
44    classify(a, |s| {
45        !s.is_empty() && s.chars().any(char::is_uppercase) && !s.chars().any(char::is_lowercase)
46    })
47}
48
49/// Return `true` where every character is lowercase and the string is
50/// non-empty. Matches `numpy.strings.islower`.
51pub fn islower<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
52    classify(a, |s| {
53        !s.is_empty() && s.chars().any(char::is_lowercase) && !s.chars().any(char::is_uppercase)
54    })
55}
56
57/// Return `true` where every character is alphanumeric and the string
58/// is non-empty. Matches `numpy.strings.isalnum`.
59pub fn isalnum<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
60    classify(a, |s| {
61        !s.is_empty() && s.chars().all(|c| c.is_alphanumeric())
62    })
63}
64
65/// Return `true` where the string could be a valid numeric literal.
66/// Matches `numpy.strings.isnumeric` (simplified — checks for
67/// ASCII digit + decimal point + sign characters).
68pub fn isnumeric<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
69    classify(a, |s| {
70        !s.is_empty()
71            && s.chars()
72                .all(|c| c.is_ascii_digit() || c == '.' || c == '+' || c == '-')
73    })
74}
75
76/// Return `true` where the string is titlecased (first letter of each
77/// word is uppercase, the rest lowercase). Matches
78/// `numpy.strings.istitle`.
79pub fn istitle<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
80    classify(a, |s| {
81        if s.is_empty() {
82            return false;
83        }
84        let mut expect_upper = true;
85        for c in s.chars() {
86            if c.is_alphabetic() {
87                if expect_upper && !c.is_uppercase() {
88                    return false;
89                }
90                if !expect_upper && !c.is_lowercase() {
91                    return false;
92                }
93                expect_upper = false;
94            } else {
95                expect_upper = true;
96            }
97        }
98        true
99    })
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105    use crate::string_array::array;
106
107    #[test]
108    fn test_isalpha() {
109        let a = array(&["hello", "hello123", "", "HELLO"]).unwrap();
110        let r = isalpha(&a).unwrap();
111        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
112    }
113
114    #[test]
115    fn test_isdigit() {
116        let a = array(&["123", "12.3", "", "abc"]).unwrap();
117        let r = isdigit(&a).unwrap();
118        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
119    }
120
121    #[test]
122    fn test_isspace() {
123        let a = array(&["  ", "\t\n", "", "a b"]).unwrap();
124        let r = isspace(&a).unwrap();
125        assert_eq!(r.as_slice().unwrap(), &[true, true, false, false]);
126    }
127
128    #[test]
129    fn test_isupper() {
130        let a = array(&["HELLO", "Hello", "hello", ""]).unwrap();
131        let r = isupper(&a).unwrap();
132        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
133    }
134
135    #[test]
136    fn test_islower() {
137        let a = array(&["hello", "Hello", "HELLO", ""]).unwrap();
138        let r = islower(&a).unwrap();
139        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
140    }
141
142    #[test]
143    fn test_isalnum() {
144        let a = array(&["abc123", "abc 123", "", "abc"]).unwrap();
145        let r = isalnum(&a).unwrap();
146        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
147    }
148
149    #[test]
150    fn test_istitle() {
151        let a = array(&["Hello World", "hello world", "HELLO WORLD", ""]).unwrap();
152        let r = istitle(&a).unwrap();
153        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
154    }
155}