Skip to main content

ferray_strings/
lib.rs

1// ferray-strings: Vectorized string operations on arrays of strings
2//
3// Implements `numpy.strings` (NumPy 2.0+): vectorized elementwise string
4// operations on arrays of strings with broadcasting. Covers case manipulation,
5// alignment/padding, stripping, find/replace, splitting/joining, and regex
6// support. Operates on `StringArray` — a separate array type backed by
7// `Vec<String>`.
8
9//! # ferray-strings
10//!
11//! Vectorized string operations on arrays of strings, analogous to
12//! `numpy.strings` in NumPy 2.0+.
13//!
14//! The primary type is [`StringArray`], a specialized N-dimensional array
15//! backed by `Vec<String>`. Since `String` does not implement
16//! [`ferray_core::Element`], this type is separate from `NdArray<T, D>`.
17//!
18//! # Quick Start
19//!
20//! ```ignore
21//! use ferray_strings::*;
22//!
23//! let a = array(&["hello", "world"]).unwrap();
24//! let b = upper(&a).unwrap();
25//! assert_eq!(b.as_slice(), &["HELLO", "WORLD"]);
26//! ```
27
28pub mod align;
29pub mod case;
30pub mod concat;
31pub mod regex_ops;
32pub mod search;
33pub mod split_join;
34pub mod string_array;
35pub mod strip;
36
37// Re-export types
38pub use string_array::{StringArray, StringArray1, StringArray2, array};
39
40// Re-export operations for flat namespace (like numpy.strings.upper etc.)
41pub use align::{center, ljust, rjust, zfill};
42pub use case::{capitalize, lower, title, upper};
43pub use concat::{add, multiply};
44pub use regex_ops::{extract, match_};
45pub use search::{count, endswith, find, replace, startswith};
46pub use split_join::{join, join_array, split};
47pub use strip::{lstrip, rstrip, strip};
48
49#[cfg(test)]
50mod integration_tests {
51    use super::*;
52
53    #[test]
54    fn ac1_upper() {
55        // AC-1: strings::upper(&["hello", "world"]) produces ["HELLO", "WORLD"]
56        let a = array(&["hello", "world"]).unwrap();
57        let b = upper(&a).unwrap();
58        assert_eq!(b.as_slice(), &["HELLO", "WORLD"]);
59    }
60
61    #[test]
62    fn ac2_add_broadcast_scalar() {
63        // AC-2: strings::add broadcasts a scalar string against an array correctly
64        let a = array(&["hello", "world"]).unwrap();
65        let b = array(&["!"]).unwrap();
66        let c = add(&a, &b).unwrap();
67        assert_eq!(c.as_slice(), &["hello!", "world!"]);
68    }
69
70    #[test]
71    fn ac3_find_indices() {
72        // AC-3: strings::find(&a, "ll") returns correct indices
73        let a = array(&["hello", "world"]).unwrap();
74        let b = find(&a, "ll").unwrap();
75        let data = b.as_slice().unwrap();
76        assert_eq!(data, &[2_i64, -1_i64]);
77    }
78
79    #[test]
80    fn ac4_split() {
81        // AC-4: strings::split(&["a-b", "c-d"], "-") returns [vec!["a","b"], vec!["c","d"]]
82        let a = array(&["a-b", "c-d"]).unwrap();
83        let result = split(&a, "-").unwrap();
84        assert_eq!(
85            result,
86            vec![
87                vec!["a".to_string(), "b".to_string()],
88                vec!["c".to_string(), "d".to_string()],
89            ]
90        );
91    }
92
93    #[test]
94    fn ac5_regex() {
95        // AC-5: Regex match_ and extract work correctly with capture groups
96        let a = array(&["abc123", "def", "ghi456"]).unwrap();
97
98        let matched = match_(&a, r"\d+").unwrap();
99        let matched_data = matched.as_slice().unwrap();
100        assert_eq!(matched_data, &[true, false, true]);
101
102        let extracted = extract(&a, r"(\d+)").unwrap();
103        assert_eq!(extracted.as_slice(), &["123", "", "456"]);
104    }
105
106    #[test]
107    fn full_pipeline() {
108        // End-to-end: strip, upper, add suffix, search
109        let raw = array(&["  Hello  ", " World "]).unwrap();
110        let stripped = strip(&raw, None).unwrap();
111        let uppered = upper(&stripped).unwrap();
112        let suffix = array(&["!"]).unwrap();
113        let result = add(&uppered, &suffix).unwrap();
114        assert_eq!(result.as_slice(), &["HELLO!", "WORLD!"]);
115
116        let has_excl = endswith(&result, "!").unwrap();
117        let data = has_excl.as_slice().unwrap();
118        assert_eq!(data, &[true, true]);
119    }
120
121    #[test]
122    fn case_round_trip() {
123        let a = array(&["Hello World"]).unwrap();
124        let low = lower(&a).unwrap();
125        let titled = title(&low).unwrap();
126        assert_eq!(titled.as_slice(), &["Hello World"]);
127    }
128
129    #[test]
130    fn alignment_operations() {
131        let a = array(&["hi"]).unwrap();
132        let c = center(&a, 6, '-').unwrap();
133        assert_eq!(c.as_slice(), &["--hi--"]);
134
135        let l = ljust(&a, 6).unwrap();
136        assert_eq!(l.as_slice(), &["hi    "]);
137
138        let r = rjust(&a, 6).unwrap();
139        assert_eq!(r.as_slice(), &["    hi"]);
140
141        let z = zfill(&array(&["42"]).unwrap(), 5).unwrap();
142        assert_eq!(z.as_slice(), &["00042"]);
143    }
144
145    #[test]
146    fn strip_operations() {
147        let a = array(&["  hello  "]).unwrap();
148        assert_eq!(strip(&a, None).unwrap().as_slice(), &["hello"]);
149        assert_eq!(lstrip(&a, None).unwrap().as_slice(), &["hello  "]);
150        assert_eq!(rstrip(&a, None).unwrap().as_slice(), &["  hello"]);
151    }
152
153    #[test]
154    fn search_operations() {
155        let a = array(&["hello world", "foo bar"]).unwrap();
156        let c = count(&a, "o").unwrap();
157        let data = c.as_slice().unwrap();
158        // "hello world" has 2 'o's, "foo bar" has 2 'o's
159        assert_eq!(data, &[2_u64, 2]);
160    }
161
162    #[test]
163    fn replace_operation() {
164        let a = array(&["hello world"]).unwrap();
165        let b = replace(&a, "world", "rust", None).unwrap();
166        assert_eq!(b.as_slice(), &["hello rust"]);
167    }
168
169    #[test]
170    fn multiply_operation() {
171        let a = array(&["ab"]).unwrap();
172        let b = multiply(&a, 3).unwrap();
173        assert_eq!(b.as_slice(), &["ababab"]);
174    }
175
176    #[test]
177    fn join_operation() {
178        let parts = vec![
179            vec!["a".to_string(), "b".to_string()],
180            vec!["c".to_string(), "d".to_string()],
181        ];
182        let result = join("-", &parts).unwrap();
183        assert_eq!(result.as_slice(), &["a-b", "c-d"]);
184    }
185
186    #[test]
187    fn capitalize_operation() {
188        let a = array(&["hello world", "RUST"]).unwrap();
189        let b = capitalize(&a).unwrap();
190        assert_eq!(b.as_slice(), &["Hello world", "Rust"]);
191    }
192
193    #[test]
194    fn string_array_2d() {
195        let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
196        assert_eq!(a.shape(), &[2, 2]);
197        let b = upper(&a).unwrap();
198        assert_eq!(b.as_slice(), &["A", "B", "C", "D"]);
199        assert_eq!(b.shape(), &[2, 2]);
200    }
201}