Skip to main content

ferray_strings/
string_array.rs

1// ferray-strings: StringArray<D> type definition (REQ-1, REQ-2)
2//
3// StringArray is a specialized array type backed by Vec<String>.
4// String does not implement Element, so we cannot use NdArray<String, D>.
5// Instead we store shape metadata alongside a flat Vec<String>.
6
7use ferray_core::dimension::{Dimension, Ix1, Ix2, IxDyn};
8use ferray_core::error::{FerrayError, FerrayResult};
9
10/// A specialized N-dimensional array of strings.
11///
12/// Unlike [`ferray_core::Array`], this type does not require `Element` —
13/// it stores `Vec<String>` directly with shape metadata for indexing.
14///
15/// The data is stored in row-major (C) order.
16#[derive(Debug, Clone)]
17pub struct StringArray<D: Dimension> {
18    /// Flat storage of string data in row-major order.
19    data: Vec<String>,
20    /// The shape of this array.
21    dim: D,
22}
23
24/// 1-dimensional string array.
25pub type StringArray1 = StringArray<Ix1>;
26
27/// 2-dimensional string array.
28pub type StringArray2 = StringArray<Ix2>;
29
30impl<D: Dimension> StringArray<D> {
31    /// Create a new `StringArray` from a flat vector of strings and a shape.
32    ///
33    /// # Errors
34    /// Returns `FerrayError::ShapeMismatch` if `data.len()` does not equal
35    /// the product of the shape dimensions.
36    pub fn from_vec(dim: D, data: Vec<String>) -> FerrayResult<Self> {
37        let expected = dim.size();
38        if data.len() != expected {
39            return Err(FerrayError::shape_mismatch(format!(
40                "data length {} does not match shape {:?} (expected {})",
41                data.len(),
42                dim.as_slice(),
43                expected,
44            )));
45        }
46        Ok(Self { data, dim })
47    }
48
49    /// Create a `StringArray` filled with empty strings.
50    ///
51    /// # Errors
52    /// This function is infallible for valid shapes but returns `Result`
53    /// for API consistency.
54    pub fn empty(dim: D) -> FerrayResult<Self> {
55        let size = dim.size();
56        let data = vec![String::new(); size];
57        Ok(Self { data, dim })
58    }
59
60    /// Return the shape as a slice.
61    #[inline]
62    pub fn shape(&self) -> &[usize] {
63        self.dim.as_slice()
64    }
65
66    /// Return the number of dimensions.
67    #[inline]
68    pub fn ndim(&self) -> usize {
69        self.dim.ndim()
70    }
71
72    /// Return the total number of elements.
73    #[inline]
74    pub fn len(&self) -> usize {
75        self.data.len()
76    }
77
78    /// Return `true` if the array has no elements.
79    #[inline]
80    pub fn is_empty(&self) -> bool {
81        self.data.is_empty()
82    }
83
84    /// Return a reference to the dimension descriptor.
85    #[inline]
86    pub fn dim(&self) -> &D {
87        &self.dim
88    }
89
90    /// Return a reference to the flat data.
91    #[inline]
92    pub fn as_slice(&self) -> &[String] {
93        &self.data
94    }
95
96    /// Return a mutable reference to the flat data.
97    #[inline]
98    pub fn as_slice_mut(&mut self) -> &mut [String] {
99        &mut self.data
100    }
101
102    /// Consume self and return the underlying `Vec<String>`.
103    #[inline]
104    pub fn into_vec(self) -> Vec<String> {
105        self.data
106    }
107
108    /// Apply a function to each element, producing a new `StringArray`.
109    pub fn map<F>(&self, f: F) -> FerrayResult<StringArray<D>>
110    where
111        F: Fn(&str) -> String,
112    {
113        let data: Vec<String> = self.data.iter().map(|s| f(s)).collect();
114        StringArray::from_vec(self.dim.clone(), data)
115    }
116
117    /// Apply a function to each element, producing a `Vec<T>`.
118    ///
119    /// This is a lower-level helper used by search and boolean operations
120    /// that need to produce typed arrays (e.g., `Array<bool, D>`).
121    pub fn map_to_vec<T, F>(&self, f: F) -> Vec<T>
122    where
123        F: Fn(&str) -> T,
124    {
125        self.data.iter().map(|s| f(s)).collect()
126    }
127
128    /// Iterate over all elements.
129    pub fn iter(&self) -> std::slice::Iter<'_, String> {
130        self.data.iter()
131    }
132}
133
134impl<D: Dimension> PartialEq for StringArray<D> {
135    fn eq(&self, other: &Self) -> bool {
136        self.dim == other.dim && self.data == other.data
137    }
138}
139
140impl<D: Dimension> Eq for StringArray<D> {}
141
142// ---------------------------------------------------------------------------
143// Construction from string slices (REQ-2)
144// ---------------------------------------------------------------------------
145
146impl StringArray<Ix1> {
147    /// Create a 1-D `StringArray` from a slice of string-like values.
148    ///
149    /// # Examples
150    /// ```ignore
151    /// let a = StringArray1::from_slice(&["hello", "world"]).unwrap();
152    /// ```
153    pub fn from_slice(items: &[&str]) -> FerrayResult<Self> {
154        let data: Vec<String> = items.iter().map(|s| (*s).to_string()).collect();
155        let dim = Ix1::new([data.len()]);
156        Self::from_vec(dim, data)
157    }
158}
159
160impl StringArray<Ix2> {
161    /// Create a 2-D `StringArray` from nested slices.
162    ///
163    /// # Errors
164    /// Returns `FerrayError::ShapeMismatch` if inner slices have different lengths.
165    pub fn from_rows(rows: &[&[&str]]) -> FerrayResult<Self> {
166        if rows.is_empty() {
167            return Self::from_vec(Ix2::new([0, 0]), Vec::new());
168        }
169        let ncols = rows[0].len();
170        for (i, row) in rows.iter().enumerate() {
171            if row.len() != ncols {
172                return Err(FerrayError::shape_mismatch(format!(
173                    "row {} has length {} but row 0 has length {}",
174                    i,
175                    row.len(),
176                    ncols
177                )));
178            }
179        }
180        let nrows = rows.len();
181        let data: Vec<String> = rows
182            .iter()
183            .flat_map(|row| row.iter().map(|s| (*s).to_string()))
184            .collect();
185        Self::from_vec(Ix2::new([nrows, ncols]), data)
186    }
187}
188
189impl StringArray<IxDyn> {
190    /// Create a dynamic-rank `StringArray` from a flat vec and a dynamic shape.
191    pub fn from_vec_dyn(shape: &[usize], data: Vec<String>) -> FerrayResult<Self> {
192        Self::from_vec(IxDyn::new(shape), data)
193    }
194}
195
196/// Create a 1-D `StringArray` from a slice of strings — the primary
197/// constructor matching `numpy.strings.array(...)`.
198///
199/// # Errors
200/// This function is infallible for valid inputs but returns `Result`
201/// for API consistency.
202pub fn array(items: &[&str]) -> FerrayResult<StringArray1> {
203    StringArray1::from_slice(items)
204}
205
206// ---------------------------------------------------------------------------
207// Broadcasting helpers for binary string operations
208// ---------------------------------------------------------------------------
209
210use ferray_core::dimension::broadcast::broadcast_shapes;
211
212/// Result of broadcasting two arrays: the output shape and paired indices.
213pub(crate) type BroadcastResult = (Vec<usize>, Vec<(usize, usize)>);
214
215/// Compute the broadcast result of two `StringArray`s, returning paired
216/// element indices into the flat data of each array.
217///
218/// Returns `(broadcast_shape, pairs)` where each pair is `(idx_a, idx_b)`.
219pub(crate) fn broadcast_binary<Da: Dimension, Db: Dimension>(
220    a: &StringArray<Da>,
221    b: &StringArray<Db>,
222) -> FerrayResult<BroadcastResult> {
223    let shape_a = a.shape();
224    let shape_b = b.shape();
225    let out_shape = broadcast_shapes(shape_a, shape_b)?;
226    let out_size: usize = out_shape.iter().product();
227
228    let strides_a = compute_strides(shape_a);
229    let strides_b = compute_strides(shape_b);
230
231    let mut pairs = Vec::with_capacity(out_size);
232    for linear in 0..out_size {
233        let multi = linear_to_multi(linear, &out_shape);
234        let idx_a = multi_to_broadcast_linear(&multi, shape_a, &strides_a);
235        let idx_b = multi_to_broadcast_linear(&multi, shape_b, &strides_b);
236        pairs.push((idx_a, idx_b));
237    }
238
239    Ok((out_shape, pairs))
240}
241
242/// Compute C-order strides from a shape.
243fn compute_strides(shape: &[usize]) -> Vec<usize> {
244    let ndim = shape.len();
245    if ndim == 0 {
246        return vec![];
247    }
248    let mut strides = vec![1usize; ndim];
249    for i in (0..ndim - 1).rev() {
250        strides[i] = strides[i + 1] * shape[i + 1];
251    }
252    strides
253}
254
255/// Convert a linear index to multi-dimensional indices given a shape.
256fn linear_to_multi(mut linear: usize, shape: &[usize]) -> Vec<usize> {
257    let ndim = shape.len();
258    let mut indices = vec![0usize; ndim];
259    for i in (0..ndim).rev() {
260        if shape[i] > 0 {
261            indices[i] = linear % shape[i];
262            linear /= shape[i];
263        }
264    }
265    indices
266}
267
268/// Convert multi-dimensional indices to a linear index, applying broadcasting
269/// (clamping indices to 0 for dimensions of size 1).
270fn multi_to_broadcast_linear(multi: &[usize], src_shape: &[usize], src_strides: &[usize]) -> usize {
271    let out_ndim = multi.len();
272    let src_ndim = src_shape.len();
273    let pad = out_ndim.saturating_sub(src_ndim);
274
275    let mut linear = 0usize;
276    for i in 0..src_ndim {
277        let idx = multi[i + pad];
278        // Broadcast: if src dimension is 1, always use index 0
279        let effective = if src_shape[i] == 1 { 0 } else { idx };
280        linear += effective * src_strides[i];
281    }
282    linear
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn create_from_slice() {
291        let a = array(&["hello", "world"]).unwrap();
292        assert_eq!(a.shape(), &[2]);
293        assert_eq!(a.len(), 2);
294        assert_eq!(a.as_slice()[0], "hello");
295        assert_eq!(a.as_slice()[1], "world");
296    }
297
298    #[test]
299    fn create_from_vec() {
300        let a = StringArray1::from_vec(Ix1::new([3]), vec!["a".into(), "b".into(), "c".into()])
301            .unwrap();
302        assert_eq!(a.shape(), &[3]);
303    }
304
305    #[test]
306    fn shape_mismatch_error() {
307        let res = StringArray1::from_vec(Ix1::new([5]), vec!["a".into(), "b".into()]);
308        assert!(res.is_err());
309    }
310
311    #[test]
312    fn empty_array() {
313        let a = StringArray1::empty(Ix1::new([4])).unwrap();
314        assert_eq!(a.len(), 4);
315        assert!(a.as_slice().iter().all(|s| s.is_empty()));
316    }
317
318    #[test]
319    fn map_strings() {
320        let a = array(&["hello", "world"]).unwrap();
321        let b = a.map(|s| s.to_uppercase()).unwrap();
322        assert_eq!(b.as_slice()[0], "HELLO");
323        assert_eq!(b.as_slice()[1], "WORLD");
324    }
325
326    #[test]
327    fn from_rows_2d() {
328        let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
329        assert_eq!(a.shape(), &[2, 2]);
330        assert_eq!(a.as_slice(), &["a", "b", "c", "d"]);
331    }
332
333    #[test]
334    fn from_rows_ragged_error() {
335        let res = StringArray2::from_rows(&[&["a", "b"], &["c"]]);
336        assert!(res.is_err());
337    }
338
339    #[test]
340    fn equality() {
341        let a = array(&["x", "y"]).unwrap();
342        let b = array(&["x", "y"]).unwrap();
343        let c = array(&["x", "z"]).unwrap();
344        assert_eq!(a, b);
345        assert_ne!(a, c);
346    }
347
348    #[test]
349    fn broadcast_binary_scalar() {
350        let a = array(&["hello", "world"]).unwrap();
351        let b = array(&["!"]).unwrap();
352        let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
353        assert_eq!(shape, vec![2]);
354        assert_eq!(pairs, vec![(0, 0), (1, 0)]);
355    }
356
357    #[test]
358    fn broadcast_binary_same_shape() {
359        let a = array(&["a", "b", "c"]).unwrap();
360        let b = array(&["x", "y", "z"]).unwrap();
361        let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
362        assert_eq!(shape, vec![3]);
363        assert_eq!(pairs, vec![(0, 0), (1, 1), (2, 2)]);
364    }
365
366    #[test]
367    fn into_vec() {
368        let a = array(&["a", "b"]).unwrap();
369        let v = a.into_vec();
370        assert_eq!(v, vec!["a".to_string(), "b".to_string()]);
371    }
372}