Skip to main content

ferray_strings/
classify.rs

1// ferray-strings: String classification functions (is* family)
2//
3// Elementwise boolean classification matching NumPy's
4// `numpy.strings.isalpha`, `numpy.strings.isdigit`, etc.
5
6use ferray_core::Array;
7use ferray_core::dimension::Dimension;
8use ferray_core::error::FerrayResult;
9
10use crate::string_array::StringArray;
11
12/// Classify each element by applying `pred` to the string content.
13fn classify<D: Dimension>(
14    a: &StringArray<D>,
15    pred: impl Fn(&str) -> bool,
16) -> FerrayResult<Array<bool, D>> {
17    let data: Vec<bool> = a.iter().map(|s| pred(s)).collect();
18    Array::from_vec(a.dim().clone(), data)
19}
20
21/// Return `true` where every character is alphabetic and the string is
22/// non-empty. Matches `numpy.strings.isalpha`.
23pub fn isalpha<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
24    classify(a, |s| !s.is_empty() && s.chars().all(char::is_alphabetic))
25}
26
27/// Return `true` where every character is a digit and the string is
28/// non-empty. Matches `numpy.strings.isdigit`.
29pub fn isdigit<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
30    classify(a, |s| {
31        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
32    })
33}
34
35/// Return `true` where every character is whitespace and the string is
36/// non-empty. Matches `numpy.strings.isspace`.
37pub fn isspace<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
38    classify(a, |s| !s.is_empty() && s.chars().all(char::is_whitespace))
39}
40
41/// Return `true` where every character is uppercase and the string is
42/// non-empty. Matches `numpy.strings.isupper`.
43pub fn isupper<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
44    classify(a, |s| {
45        !s.is_empty() && s.chars().any(char::is_uppercase) && !s.chars().any(char::is_lowercase)
46    })
47}
48
49/// Return `true` where every character is lowercase and the string is
50/// non-empty. Matches `numpy.strings.islower`.
51pub fn islower<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
52    classify(a, |s| {
53        !s.is_empty() && s.chars().any(char::is_lowercase) && !s.chars().any(char::is_uppercase)
54    })
55}
56
57/// Return `true` where every character is alphanumeric and the string
58/// is non-empty. Matches `numpy.strings.isalnum`.
59pub fn isalnum<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
60    classify(a, |s| !s.is_empty() && s.chars().all(char::is_alphanumeric))
61}
62
63/// Return `true` where the string could be a valid numeric literal.
64/// Matches `numpy.strings.isnumeric` (simplified — checks for
65/// ASCII digit + decimal point + sign characters).
66pub fn isnumeric<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
67    classify(a, |s| {
68        !s.is_empty()
69            && s.chars()
70                .all(|c| c.is_ascii_digit() || c == '.' || c == '+' || c == '-')
71    })
72}
73
74/// Return `true` where the string is titlecased (first letter of each
75/// word is uppercase, the rest lowercase). Matches
76/// `numpy.strings.istitle`.
77pub fn istitle<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
78    classify(a, |s| {
79        if s.is_empty() {
80            return false;
81        }
82        let mut expect_upper = true;
83        for c in s.chars() {
84            if c.is_alphabetic() {
85                if expect_upper && !c.is_uppercase() {
86                    return false;
87                }
88                if !expect_upper && !c.is_lowercase() {
89                    return false;
90                }
91                expect_upper = false;
92            } else {
93                expect_upper = true;
94            }
95        }
96        true
97    })
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use crate::string_array::array;
104
105    #[test]
106    fn test_isalpha() {
107        let a = array(&["hello", "hello123", "", "HELLO"]).unwrap();
108        let r = isalpha(&a).unwrap();
109        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
110    }
111
112    #[test]
113    fn test_isdigit() {
114        let a = array(&["123", "12.3", "", "abc"]).unwrap();
115        let r = isdigit(&a).unwrap();
116        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
117    }
118
119    #[test]
120    fn test_isspace() {
121        let a = array(&["  ", "\t\n", "", "a b"]).unwrap();
122        let r = isspace(&a).unwrap();
123        assert_eq!(r.as_slice().unwrap(), &[true, true, false, false]);
124    }
125
126    #[test]
127    fn test_isupper() {
128        let a = array(&["HELLO", "Hello", "hello", ""]).unwrap();
129        let r = isupper(&a).unwrap();
130        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
131    }
132
133    #[test]
134    fn test_islower() {
135        let a = array(&["hello", "Hello", "HELLO", ""]).unwrap();
136        let r = islower(&a).unwrap();
137        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
138    }
139
140    #[test]
141    fn test_isalnum() {
142        let a = array(&["abc123", "abc 123", "", "abc"]).unwrap();
143        let r = isalnum(&a).unwrap();
144        assert_eq!(r.as_slice().unwrap(), &[true, false, false, true]);
145    }
146
147    #[test]
148    fn test_istitle() {
149        let a = array(&["Hello World", "hello world", "HELLO WORLD", ""]).unwrap();
150        let r = istitle(&a).unwrap();
151        assert_eq!(r.as_slice().unwrap(), &[true, false, false, false]);
152    }
153}