Skip to main content

ferray_strings/
search.rs

1// ferray-strings: Search operations (REQ-8, REQ-9, REQ-10)
2//
3// Implements find, count, startswith, endswith, replace — elementwise on StringArray.
4
5use ferray_core::Array;
6use ferray_core::dimension::{Dimension, Ix1};
7use ferray_core::error::FerrayResult;
8
9use crate::string_array::StringArray;
10
11/// Find the lowest index of `sub` in each string element.
12///
13/// Returns an `Array<i64>` where each element is the index of the first
14/// occurrence of `sub`, or -1 if not found.
15///
16/// # Errors
17/// Returns an error if the internal array construction fails.
18pub fn find<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<i64, Ix1>> {
19    let data: Vec<i64> = a.map_to_vec(|s| {
20        match s.find(sub) {
21            Some(byte_idx) => {
22                // Convert byte index to character index
23                s[..byte_idx].chars().count() as i64
24            }
25            None => -1,
26        }
27    });
28    let dim = Ix1::new([data.len()]);
29    Array::from_vec(dim, data)
30}
31
32/// Count non-overlapping occurrences of `sub` in each string element.
33///
34/// Returns an `Array<u64>` with the count for each element.
35///
36/// # Errors
37/// Returns an error if the internal array construction fails.
38pub fn count<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<u64, Ix1>> {
39    let data: Vec<u64> = a.map_to_vec(|s| s.matches(sub).count() as u64);
40    let dim = Ix1::new([data.len()]);
41    Array::from_vec(dim, data)
42}
43
44/// Test whether each string element starts with the given prefix.
45///
46/// Returns an `Array<bool>` indicating the result for each element.
47///
48/// # Errors
49/// Returns an error if the internal array construction fails.
50pub fn startswith<D: Dimension>(
51    a: &StringArray<D>,
52    prefix: &str,
53) -> FerrayResult<Array<bool, Ix1>> {
54    let data: Vec<bool> = a.map_to_vec(|s| s.starts_with(prefix));
55    let dim = Ix1::new([data.len()]);
56    Array::from_vec(dim, data)
57}
58
59/// Test whether each string element ends with the given suffix.
60///
61/// Returns an `Array<bool>` indicating the result for each element.
62///
63/// # Errors
64/// Returns an error if the internal array construction fails.
65pub fn endswith<D: Dimension>(a: &StringArray<D>, suffix: &str) -> FerrayResult<Array<bool, Ix1>> {
66    let data: Vec<bool> = a.map_to_vec(|s| s.ends_with(suffix));
67    let dim = Ix1::new([data.len()]);
68    Array::from_vec(dim, data)
69}
70
71/// Replace occurrences of `old` with `new` in each string element.
72///
73/// If `max_count` is `Some(n)`, only the first `n` occurrences are replaced.
74/// If `None`, all occurrences are replaced.
75///
76/// # Errors
77/// Returns an error if the internal array construction fails.
78pub fn replace<D: Dimension>(
79    a: &StringArray<D>,
80    old: &str,
81    new: &str,
82    max_count: Option<usize>,
83) -> FerrayResult<StringArray<D>> {
84    a.map(|s| match max_count {
85        None => s.replace(old, new),
86        Some(n) => s.replacen(old, new, n),
87    })
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93    use crate::string_array::array;
94
95    #[test]
96    fn test_find() {
97        let a = array(&["hello", "world", "bell"]).unwrap();
98        let b = find(&a, "ll").unwrap();
99        let data = b.as_slice().unwrap();
100        assert_eq!(data, &[2, -1, 2]);
101    }
102
103    #[test]
104    fn test_find_at_start() {
105        let a = array(&["abc", "def"]).unwrap();
106        let b = find(&a, "abc").unwrap();
107        let data = b.as_slice().unwrap();
108        assert_eq!(data, &[0, -1]);
109    }
110
111    #[test]
112    fn test_find_empty_sub() {
113        let a = array(&["hello"]).unwrap();
114        let b = find(&a, "").unwrap();
115        let data = b.as_slice().unwrap();
116        assert_eq!(data, &[0]);
117    }
118
119    #[test]
120    fn test_count() {
121        let a = array(&["abcabc", "abc", "xyz"]).unwrap();
122        let b = count(&a, "abc").unwrap();
123        let data = b.as_slice().unwrap();
124        assert_eq!(data, &[2_u64, 1, 0]);
125    }
126
127    #[test]
128    fn test_startswith() {
129        let a = array(&["hello", "world", "help"]).unwrap();
130        let b = startswith(&a, "hel").unwrap();
131        let data = b.as_slice().unwrap();
132        assert_eq!(data, &[true, false, true]);
133    }
134
135    #[test]
136    fn test_endswith() {
137        let a = array(&["hello", "world", "bello"]).unwrap();
138        let b = endswith(&a, "llo").unwrap();
139        let data = b.as_slice().unwrap();
140        assert_eq!(data, &[true, false, true]);
141    }
142
143    #[test]
144    fn test_replace_all() {
145        let a = array(&["aabbcc", "aabba"]).unwrap();
146        let b = replace(&a, "aa", "XX", None).unwrap();
147        assert_eq!(b.as_slice(), &["XXbbcc", "XXbba"]);
148    }
149
150    #[test]
151    fn test_replace_with_count() {
152        let a = array(&["ababab"]).unwrap();
153        let b = replace(&a, "ab", "X", Some(2)).unwrap();
154        assert_eq!(b.as_slice(), &["XXab"]);
155    }
156
157    #[test]
158    fn test_find_ac3() {
159        // AC-3: strings::find(&a, "ll") returns correct indices (2 for "hello", -1 for "world")
160        let a = array(&["hello", "world"]).unwrap();
161        let b = find(&a, "ll").unwrap();
162        let data = b.as_slice().unwrap();
163        assert_eq!(data, &[2, -1]);
164    }
165}