Skip to main content

ferray_strings/
search.rs

1// ferray-strings: Search operations (REQ-8, REQ-9, REQ-10)
2//
3// Implements find, count, startswith, endswith, replace — elementwise on StringArray.
4
5use ferray_core::Array;
6use ferray_core::dimension::Dimension;
7use ferray_core::error::FerrayResult;
8
9use crate::string_array::StringArray;
10
11/// Find the lowest index of `sub` in each string element.
12///
13/// Returns an `Array<i64, D>` preserving the input shape, where each element
14/// is the index of the first occurrence of `sub`, or -1 if not found.
15///
16/// # Errors
17/// Returns an error if the internal array construction fails.
18pub fn find<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<i64, D>> {
19    let data: Vec<i64> = a.map_to_vec(|s| {
20        match s.find(sub) {
21            Some(byte_idx) => {
22                // Convert byte index to character index
23                s[..byte_idx].chars().count() as i64
24            }
25            None => -1,
26        }
27    });
28    Array::from_vec(a.dim().clone(), data)
29}
30
31/// Count non-overlapping occurrences of `sub` in each string element.
32///
33/// Returns an `Array<u64, D>` preserving the input shape.
34///
35/// # Errors
36/// Returns an error if the internal array construction fails.
37pub fn count<D: Dimension>(a: &StringArray<D>, sub: &str) -> FerrayResult<Array<u64, D>> {
38    let data: Vec<u64> = a.map_to_vec(|s| s.matches(sub).count() as u64);
39    Array::from_vec(a.dim().clone(), data)
40}
41
42/// Test whether each string element starts with the given prefix.
43///
44/// Returns an `Array<bool, D>` preserving the input shape.
45///
46/// # Errors
47/// Returns an error if the internal array construction fails.
48pub fn startswith<D: Dimension>(a: &StringArray<D>, prefix: &str) -> FerrayResult<Array<bool, D>> {
49    let data: Vec<bool> = a.map_to_vec(|s| s.starts_with(prefix));
50    Array::from_vec(a.dim().clone(), data)
51}
52
53/// Test whether each string element ends with the given suffix.
54///
55/// Returns an `Array<bool, D>` preserving the input shape.
56///
57/// # Errors
58/// Returns an error if the internal array construction fails.
59pub fn endswith<D: Dimension>(a: &StringArray<D>, suffix: &str) -> FerrayResult<Array<bool, D>> {
60    let data: Vec<bool> = a.map_to_vec(|s| s.ends_with(suffix));
61    Array::from_vec(a.dim().clone(), data)
62}
63
64/// Replace occurrences of `old` with `new` in each string element.
65///
66/// If `max_count` is `Some(n)`, only the first `n` occurrences are replaced.
67/// If `None`, all occurrences are replaced.
68///
69/// # Errors
70/// Returns an error if the internal array construction fails.
71pub fn replace<D: Dimension>(
72    a: &StringArray<D>,
73    old: &str,
74    new: &str,
75    max_count: Option<usize>,
76) -> FerrayResult<StringArray<D>> {
77    a.map(|s| match max_count {
78        None => s.replace(old, new),
79        Some(n) => s.replacen(old, new, n),
80    })
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86    use crate::string_array::array;
87
88    #[test]
89    fn test_find() {
90        let a = array(&["hello", "world", "bell"]).unwrap();
91        let b = find(&a, "ll").unwrap();
92        let data = b.as_slice().unwrap();
93        assert_eq!(data, &[2, -1, 2]);
94    }
95
96    #[test]
97    fn test_find_at_start() {
98        let a = array(&["abc", "def"]).unwrap();
99        let b = find(&a, "abc").unwrap();
100        let data = b.as_slice().unwrap();
101        assert_eq!(data, &[0, -1]);
102    }
103
104    #[test]
105    fn test_find_empty_sub() {
106        let a = array(&["hello"]).unwrap();
107        let b = find(&a, "").unwrap();
108        let data = b.as_slice().unwrap();
109        assert_eq!(data, &[0]);
110    }
111
112    #[test]
113    fn test_count() {
114        let a = array(&["abcabc", "abc", "xyz"]).unwrap();
115        let b = count(&a, "abc").unwrap();
116        let data = b.as_slice().unwrap();
117        assert_eq!(data, &[2_u64, 1, 0]);
118    }
119
120    #[test]
121    fn test_startswith() {
122        let a = array(&["hello", "world", "help"]).unwrap();
123        let b = startswith(&a, "hel").unwrap();
124        let data = b.as_slice().unwrap();
125        assert_eq!(data, &[true, false, true]);
126    }
127
128    #[test]
129    fn test_endswith() {
130        let a = array(&["hello", "world", "bello"]).unwrap();
131        let b = endswith(&a, "llo").unwrap();
132        let data = b.as_slice().unwrap();
133        assert_eq!(data, &[true, false, true]);
134    }
135
136    #[test]
137    fn test_replace_all() {
138        let a = array(&["aabbcc", "aabba"]).unwrap();
139        let b = replace(&a, "aa", "XX", None).unwrap();
140        assert_eq!(b.as_slice(), &["XXbbcc", "XXbba"]);
141    }
142
143    #[test]
144    fn test_replace_with_count() {
145        let a = array(&["ababab"]).unwrap();
146        let b = replace(&a, "ab", "X", Some(2)).unwrap();
147        assert_eq!(b.as_slice(), &["XXab"]);
148    }
149
150    #[test]
151    fn test_find_ac3() {
152        // AC-3: strings::find(&a, "ll") returns correct indices (2 for "hello", -1 for "world")
153        let a = array(&["hello", "world"]).unwrap();
154        let b = find(&a, "ll").unwrap();
155        let data = b.as_slice().unwrap();
156        assert_eq!(data, &[2, -1]);
157    }
158}