Skip to main content

ferray_strings/
search.rs

1// ferray-strings: Search operations (REQ-8, REQ-9, REQ-10)
2//
3// Implements find, count, startswith, endswith, replace — elementwise on StringArray.
4
5// `find` returns an `i64` array following NumPy's `numpy.strings.find`
6// contract (with `-1` for "not found"); converting the `usize` char count
7// to `i64` is the published return-type, not a precision bug.
8#![allow(clippy::cast_possible_wrap)]
9
10use ferray_core::Array;
11use ferray_core::dimension::Dimension;
12use ferray_core::error::FerrayResult;
13
14use crate::string_array::StringArray;
15
16/// Find the lowest index of `sub` in each string element.
17///
18/// Returns an `Array<i64, D>` preserving the input shape, where each element
19/// is the index of the first occurrence of `sub`, or -1 if not found.
20///
21/// # Errors
22/// Returns an error if the internal array construction fails.
23pub fn find<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<i64, D>> {
24    let data: Vec<i64> = a.map_to_vec(|s| {
25        match s.find(sub) {
26            Some(byte_idx) => {
27                // Convert byte index to character index
28                s[..byte_idx].chars().count() as i64
29            }
30            None => -1,
31        }
32    });
33    Array::from_vec(a.dim().clone(), data)
34}
35
36/// Count non-overlapping occurrences of `sub` in each string element.
37///
38/// Returns an `Array<u64, D>` preserving the input shape.
39///
40/// # Errors
41/// Returns an error if the internal array construction fails.
42pub fn count<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<u64, D>> {
43    let data: Vec<u64> = a.map_to_vec(|s| s.matches(sub).count() as u64);
44    Array::from_vec(a.dim().clone(), data)
45}
46
47/// Test whether each string element starts with the given prefix.
48///
49/// Returns an `Array<bool, D>` preserving the input shape.
50///
51/// # Errors
52/// Returns an error if the internal array construction fails.
53pub fn startswith<D: Dimension>(a: &StringArray<D>, prefix: &str) -> FerrayResult<Array<bool, D>> {
54    let data: Vec<bool> = a.map_to_vec(|s| s.starts_with(prefix));
55    Array::from_vec(a.dim().clone(), data)
56}
57
58/// Test whether each string element ends with the given suffix.
59///
60/// Returns an `Array<bool, D>` preserving the input shape.
61///
62/// # Errors
63/// Returns an error if the internal array construction fails.
64pub fn endswith<D: Dimension>(a: &StringArray<D>, suffix: &str) -> FerrayResult<Array<bool, D>> {
65    let data: Vec<bool> = a.map_to_vec(|s| s.ends_with(suffix));
66    Array::from_vec(a.dim().clone(), data)
67}
68
69/// Replace occurrences of `old` with `new` in each string element.
70///
71/// If `max_count` is `Some(n)`, only the first `n` occurrences are replaced.
72/// If `None`, all occurrences are replaced.
73///
74/// # Errors
75/// Returns an error if the internal array construction fails.
76pub fn replace<D: Dimension>(
77    a: &StringArray<D>,
78    old: &str,
79    new: &str,
80    max_count: Option<usize>,
81) -> FerrayResult<StringArray<D>> {
82    a.map(|s| match max_count {
83        None => s.replace(old, new),
84        Some(n) => s.replacen(old, new, n),
85    })
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91    use crate::string_array::array;
92
93    #[test]
94    fn test_find() {
95        let a = array(&["hello", "world", "bell"]).unwrap();
96        let b = find(&a, "ll").unwrap();
97        let data = b.as_slice().unwrap();
98        assert_eq!(data, &[2, -1, 2]);
99    }
100
101    #[test]
102    fn test_find_at_start() {
103        let a = array(&["abc", "def"]).unwrap();
104        let b = find(&a, "abc").unwrap();
105        let data = b.as_slice().unwrap();
106        assert_eq!(data, &[0, -1]);
107    }
108
109    #[test]
110    fn test_find_empty_sub() {
111        let a = array(&["hello"]).unwrap();
112        let b = find(&a, "").unwrap();
113        let data = b.as_slice().unwrap();
114        assert_eq!(data, &[0]);
115    }
116
117    #[test]
118    fn test_count() {
119        let a = array(&["abcabc", "abc", "xyz"]).unwrap();
120        let b = count(&a, "abc").unwrap();
121        let data = b.as_slice().unwrap();
122        assert_eq!(data, &[2_u64, 1, 0]);
123    }
124
125    #[test]
126    fn test_startswith() {
127        let a = array(&["hello", "world", "help"]).unwrap();
128        let b = startswith(&a, "hel").unwrap();
129        let data = b.as_slice().unwrap();
130        assert_eq!(data, &[true, false, true]);
131    }
132
133    #[test]
134    fn test_endswith() {
135        let a = array(&["hello", "world", "bello"]).unwrap();
136        let b = endswith(&a, "llo").unwrap();
137        let data = b.as_slice().unwrap();
138        assert_eq!(data, &[true, false, true]);
139    }
140
141    #[test]
142    fn test_replace_all() {
143        let a = array(&["aabbcc", "aabba"]).unwrap();
144        let b = replace(&a, "aa", "XX", None).unwrap();
145        assert_eq!(b.as_slice(), &["XXbbcc", "XXbba"]);
146    }
147
148    #[test]
149    fn test_replace_with_count() {
150        let a = array(&["ababab"]).unwrap();
151        let b = replace(&a, "ab", "X", Some(2)).unwrap();
152        assert_eq!(b.as_slice(), &["XXab"]);
153    }
154
155    #[test]
156    fn test_find_ac3() {
157        // AC-3: strings::find(&a, "ll") returns correct indices (2 for "hello", -1 for "world")
158        let a = array(&["hello", "world"]).unwrap();
159        let b = find(&a, "ll").unwrap();
160        let data = b.as_slice().unwrap();
161        assert_eq!(data, &[2, -1]);
162    }
163}