Skip to main content

ferray_strings/
search.rs

1// ferray-strings: Search operations (REQ-8, REQ-9, REQ-10)
2//
3// Implements find, count, startswith, endswith, replace — elementwise on StringArray.
4
5use ferray_core::Array;
6use ferray_core::dimension::Dimension;
7use ferray_core::error::FerrayResult;
8
9use crate::string_array::StringArray;
10
11/// Find the lowest index of `sub` in each string element.
12///
13/// Returns an `Array<i64, D>` preserving the input shape, where each element
14/// is the index of the first occurrence of `sub`, or -1 if not found.
15///
16/// # Errors
17/// Returns an error if the internal array construction fails.
18pub fn find<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<i64, D>> {
19    let data: Vec<i64> = a.map_to_vec(|s| {
20        match s.find(sub) {
21            Some(byte_idx) => {
22                // Convert byte index to character index
23                s[..byte_idx].chars().count() as i64
24            }
25            None => -1,
26        }
27    });
28    Array::from_vec(a.dim().clone(), data)
29}
30
31/// Count non-overlapping occurrences of `sub` in each string element.
32///
33/// Returns an `Array<u64, D>` preserving the input shape.
34///
35/// # Errors
36/// Returns an error if the internal array construction fails.
37pub fn count<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<u64, D>> {
38    let data: Vec<u64> = a.map_to_vec(|s| s.matches(sub).count() as u64);
39    Array::from_vec(a.dim().clone(), data)
40}
41
42/// Test whether each string element starts with the given prefix.
43///
44/// Returns an `Array<bool, D>` preserving the input shape.
45///
46/// # Errors
47/// Returns an error if the internal array construction fails.
48pub fn startswith<D: Dimension>(
49    a: &StringArray<D>,
50    prefix: &str,
51) -> FerrayResult<Array<bool, D>> {
52    let data: Vec<bool> = a.map_to_vec(|s| s.starts_with(prefix));
53    Array::from_vec(a.dim().clone(), data)
54}
55
56/// Test whether each string element ends with the given suffix.
57///
58/// Returns an `Array<bool, D>` preserving the input shape.
59///
60/// # Errors
61/// Returns an error if the internal array construction fails.
62pub fn endswith<D: Dimension>(a: &StringArray<D>, suffix: &str) -> FerrayResult<Array<bool, D>> {
63    let data: Vec<bool> = a.map_to_vec(|s| s.ends_with(suffix));
64    Array::from_vec(a.dim().clone(), data)
65}
66
67/// Replace occurrences of `old` with `new` in each string element.
68///
69/// If `max_count` is `Some(n)`, only the first `n` occurrences are replaced.
70/// If `None`, all occurrences are replaced.
71///
72/// # Errors
73/// Returns an error if the internal array construction fails.
74pub fn replace<D: Dimension>(
75    a: &StringArray<D>,
76    old: &str,
77    new: &str,
78    max_count: Option<usize>,
79) -> FerrayResult<StringArray<D>> {
80    a.map(|s| match max_count {
81        None => s.replace(old, new),
82        Some(n) => s.replacen(old, new, n),
83    })
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use crate::string_array::array;
90
91    #[test]
92    fn test_find() {
93        let a = array(&["hello", "world", "bell"]).unwrap();
94        let b = find(&a, "ll").unwrap();
95        let data = b.as_slice().unwrap();
96        assert_eq!(data, &[2, -1, 2]);
97    }
98
99    #[test]
100    fn test_find_at_start() {
101        let a = array(&["abc", "def"]).unwrap();
102        let b = find(&a, "abc").unwrap();
103        let data = b.as_slice().unwrap();
104        assert_eq!(data, &[0, -1]);
105    }
106
107    #[test]
108    fn test_find_empty_sub() {
109        let a = array(&["hello"]).unwrap();
110        let b = find(&a, "").unwrap();
111        let data = b.as_slice().unwrap();
112        assert_eq!(data, &[0]);
113    }
114
115    #[test]
116    fn test_count() {
117        let a = array(&["abcabc", "abc", "xyz"]).unwrap();
118        let b = count(&a, "abc").unwrap();
119        let data = b.as_slice().unwrap();
120        assert_eq!(data, &[2_u64, 1, 0]);
121    }
122
123    #[test]
124    fn test_startswith() {
125        let a = array(&["hello", "world", "help"]).unwrap();
126        let b = startswith(&a, "hel").unwrap();
127        let data = b.as_slice().unwrap();
128        assert_eq!(data, &[true, false, true]);
129    }
130
131    #[test]
132    fn test_endswith() {
133        let a = array(&["hello", "world", "bello"]).unwrap();
134        let b = endswith(&a, "llo").unwrap();
135        let data = b.as_slice().unwrap();
136        assert_eq!(data, &[true, false, true]);
137    }
138
139    #[test]
140    fn test_replace_all() {
141        let a = array(&["aabbcc", "aabba"]).unwrap();
142        let b = replace(&a, "aa", "XX", None).unwrap();
143        assert_eq!(b.as_slice(), &["XXbbcc", "XXbba"]);
144    }
145
146    #[test]
147    fn test_replace_with_count() {
148        let a = array(&["ababab"]).unwrap();
149        let b = replace(&a, "ab", "X", Some(2)).unwrap();
150        assert_eq!(b.as_slice(), &["XXab"]);
151    }
152
153    #[test]
154    fn test_find_ac3() {
155        // AC-3: strings::find(&a, "ll") returns correct indices (2 for "hello", -1 for "world")
156        let a = array(&["hello", "world"]).unwrap();
157        let b = find(&a, "ll").unwrap();
158        let data = b.as_slice().unwrap();
159        assert_eq!(data, &[2, -1]);
160    }
161}