Skip to main content

ferray_strings/
str_ops.rs

1// ferray-strings: Miscellaneous string operations
2//
3// str_len, swapcase, and elementwise comparison functions.
4//
5// ## REQ status
6//
7// These ops are not numbered in the `.design/ferray-strings.md` REQ list
8// (which enumerates REQ-1..14 for the core surface); they are the
9// `numpy.strings` extras tracked by their issue ids. All SHIPPED & audited:
10//
11// SHIPPED:
12//   - `str_len` (`pub fn`, #518) -> `Array<i64, D>`: counts Unicode code
13//     points (`s.chars().count()`), matching numpy's `str_len` ufunc which
14//     writes a signed `npy_intp` from `buf.num_codepoints()` (NOT bytes).
15//   - `swapcase` (`pub fn`, #515): per-character case inversion, matching
16//     CPython `str.swapcase` / `numpy.strings.swapcase`.
17//   - Elementwise comparisons (#516) `equal`, `not_equal`, `less`,
18//     `greater`, `less_equal`, `greater_equal` (all `pub fn`) ->
19//     `Array<bool, D>`, lexicographic by Unicode scalar value, matching
20//     `numpy.strings`/`numpy.char` comparison ufuncs.
21//
22// Consumers (non-test): re-exported from the crate root
23// (`ferray-strings/src/lib.rs` `pub use str_ops::{equal, greater,
24// greater_equal, less, less_equal, not_equal, str_len, swapcase}`) and bound
25// at the Python surface in `ferray-python/src/char.rs` — `str_len` and
26// `swapcase` (`bind_unary_string_op!(swapcase, fs::swapcase)`) shims, and the
27// comparison shims generated via `bind_string_compare!(equal, fs::equal)`
28// (and the other five) — which back `numpy.char`/`numpy.strings`.
29
30use ferray_core::Array;
31use ferray_core::dimension::Dimension;
32use ferray_core::error::FerrayResult;
33
34use crate::string_array::StringArray;
35
36/// Return the length of each string element. Matches
37/// `numpy.strings.str_len` (#518).
38pub fn str_len<D: Dimension>(a: &StringArray<D>) -> FerrayResult<Array<i64, D>> {
39    // numpy `string_ufuncs.cpp:118`: `*(npy_intp *)out = buf.num_codepoints();`
40    // — signed `npy_intp` counting Unicode code points, NOT UTF-8 bytes.
41    let data: Vec<i64> = a.iter().map(|s| s.chars().count() as i64).collect();
42    Array::from_vec(a.dim().clone(), data)
43}
44
45/// Swap the case of each character in every element. Matches
46/// `numpy.strings.swapcase` (#515).
47pub fn swapcase<D: Dimension>(a: &StringArray<D>) -> FerrayResult<StringArray<D>> {
48    a.map(|s| {
49        s.chars()
50            .map(|c| {
51                if c.is_uppercase() {
52                    c.to_lowercase().collect::<String>()
53                } else if c.is_lowercase() {
54                    c.to_uppercase().collect::<String>()
55                } else {
56                    c.to_string()
57                }
58            })
59            .collect()
60    })
61}
62
63// ---------------------------------------------------------------------------
64// Elementwise string comparison (#516)
65// ---------------------------------------------------------------------------
66
67/// Elementwise string equality. Both arrays must have the same shape.
68pub fn equal<D: Dimension>(a: &StringArray<D>, b: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
69    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x == y).collect();
70    Array::from_vec(a.dim().clone(), data)
71}
72
73/// Elementwise string inequality.
74pub fn not_equal<D: Dimension>(
75    a: &StringArray<D>,
76    b: &StringArray<D>,
77) -> FerrayResult<Array<bool, D>> {
78    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x != y).collect();
79    Array::from_vec(a.dim().clone(), data)
80}
81
82/// Elementwise lexicographic less-than.
83pub fn less<D: Dimension>(a: &StringArray<D>, b: &StringArray<D>) -> FerrayResult<Array<bool, D>> {
84    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x < y).collect();
85    Array::from_vec(a.dim().clone(), data)
86}
87
88/// Elementwise lexicographic greater-than.
89pub fn greater<D: Dimension>(
90    a: &StringArray<D>,
91    b: &StringArray<D>,
92) -> FerrayResult<Array<bool, D>> {
93    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x > y).collect();
94    Array::from_vec(a.dim().clone(), data)
95}
96
97/// Elementwise lexicographic less-or-equal.
98pub fn less_equal<D: Dimension>(
99    a: &StringArray<D>,
100    b: &StringArray<D>,
101) -> FerrayResult<Array<bool, D>> {
102    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x <= y).collect();
103    Array::from_vec(a.dim().clone(), data)
104}
105
106/// Elementwise lexicographic greater-or-equal.
107pub fn greater_equal<D: Dimension>(
108    a: &StringArray<D>,
109    b: &StringArray<D>,
110) -> FerrayResult<Array<bool, D>> {
111    let data: Vec<bool> = a.iter().zip(b.iter()).map(|(x, y)| x >= y).collect();
112    Array::from_vec(a.dim().clone(), data)
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use crate::string_array::array;
119
120    #[test]
121    fn test_str_len() {
122        let a = array(&["hello", "", "abc", "hi"]).unwrap();
123        let r = str_len(&a).unwrap();
124        assert_eq!(r.as_slice().unwrap(), &[5, 0, 3, 2]);
125    }
126
127    #[test]
128    fn test_swapcase() {
129        let a = array(&["Hello World", "ABC", "abc", "123"]).unwrap();
130        let r = swapcase(&a).unwrap();
131        assert_eq!(r.as_slice(), &["hELLO wORLD", "abc", "ABC", "123"]);
132    }
133
134    #[test]
135    fn test_equal() {
136        let a = array(&["abc", "def", "ghi"]).unwrap();
137        let b = array(&["abc", "xyz", "ghi"]).unwrap();
138        let r = equal(&a, &b).unwrap();
139        assert_eq!(r.as_slice().unwrap(), &[true, false, true]);
140    }
141
142    #[test]
143    fn test_less() {
144        let a = array(&["abc", "xyz"]).unwrap();
145        let b = array(&["abd", "abc"]).unwrap();
146        let r = less(&a, &b).unwrap();
147        assert_eq!(r.as_slice().unwrap(), &[true, false]);
148    }
149
150    #[test]
151    fn test_greater() {
152        let a = array(&["xyz", "abc"]).unwrap();
153        let b = array(&["abc", "xyz"]).unwrap();
154        let r = greater(&a, &b).unwrap();
155        assert_eq!(r.as_slice().unwrap(), &[true, false]);
156    }
157}