Skip to main content

ferray_strings/
string_array.rs

1// ferray-strings: StringArray<D> type definition (REQ-1, REQ-2)
2//
3// StringArray is a specialized array type backed by Vec<String>.
4// String does not implement Element, so we cannot use NdArray<String, D>.
5// Instead we store shape metadata alongside a flat Vec<String>.
6
7use ferray_core::dimension::{Dimension, Ix1, Ix2, IxDyn};
8use ferray_core::error::{FerrayError, FerrayResult};
9
10/// A specialized N-dimensional array of strings.
11///
12/// Unlike [`ferray_core::Array`], this type does not require `Element` —
13/// it stores `Vec<String>` directly with shape metadata for indexing.
14///
15/// The data is stored in row-major (C) order.
16#[derive(Debug, Clone)]
17pub struct StringArray<D: Dimension> {
18    /// Flat storage of string data in row-major order.
19    data: Vec<String>,
20    /// The shape of this array.
21    dim: D,
22}
23
24/// 1-dimensional string array.
25pub type StringArray1 = StringArray<Ix1>;
26
27/// 2-dimensional string array.
28pub type StringArray2 = StringArray<Ix2>;
29
30impl<D: Dimension> StringArray<D> {
31    /// Create a new `StringArray` from a flat vector of strings and a shape.
32    ///
33    /// # Errors
34    /// Returns `FerrayError::ShapeMismatch` if `data.len()` does not equal
35    /// the product of the shape dimensions.
36    pub fn from_vec(dim: D, data: Vec<String>) -> FerrayResult<Self> {
37        let expected = dim.size();
38        if data.len() != expected {
39            return Err(FerrayError::shape_mismatch(format!(
40                "data length {} does not match shape {:?} (expected {})",
41                data.len(),
42                dim.as_slice(),
43                expected,
44            )));
45        }
46        Ok(Self { data, dim })
47    }
48
49    /// Create a `StringArray` filled with empty strings.
50    ///
51    /// # Errors
52    /// This function is infallible for valid shapes but returns `Result`
53    /// for API consistency.
54    pub fn empty(dim: D) -> FerrayResult<Self> {
55        let size = dim.size();
56        let data = vec![String::new(); size];
57        Ok(Self { data, dim })
58    }
59
60    /// Return the shape as a slice.
61    #[inline]
62    pub fn shape(&self) -> &[usize] {
63        self.dim.as_slice()
64    }
65
66    /// Return the number of dimensions.
67    #[inline]
68    pub fn ndim(&self) -> usize {
69        self.dim.ndim()
70    }
71
72    /// Return the total number of elements.
73    #[inline]
74    pub fn len(&self) -> usize {
75        self.data.len()
76    }
77
78    /// Return `true` if the array has no elements.
79    #[inline]
80    pub fn is_empty(&self) -> bool {
81        self.data.is_empty()
82    }
83
84    /// Return a reference to the dimension descriptor.
85    #[inline]
86    pub fn dim(&self) -> &D {
87        &self.dim
88    }
89
90    /// Return a reference to the flat data.
91    #[inline]
92    pub fn as_slice(&self) -> &[String] {
93        &self.data
94    }
95
96    /// Return a mutable reference to the flat data.
97    #[inline]
98    pub fn as_slice_mut(&mut self) -> &mut [String] {
99        &mut self.data
100    }
101
102    /// Consume self and return the underlying `Vec<String>`.
103    #[inline]
104    pub fn into_vec(self) -> Vec<String> {
105        self.data
106    }
107
108    /// Apply a function to each element, producing a new `StringArray`.
109    pub fn map<F>(&self, f: F) -> FerrayResult<StringArray<D>>
110    where
111        F: Fn(&str) -> String,
112    {
113        let data: Vec<String> = self.data.iter().map(|s| f(s)).collect();
114        StringArray::from_vec(self.dim.clone(), data)
115    }
116
117    /// Apply a function to each element, producing a `Vec<T>`.
118    ///
119    /// This is a lower-level helper used by search and boolean operations
120    /// that need to produce typed arrays (e.g., `Array<bool, D>`).
121    pub fn map_to_vec<T, F>(&self, f: F) -> Vec<T>
122    where
123        F: Fn(&str) -> T,
124    {
125        self.data.iter().map(|s| f(s)).collect()
126    }
127
128    /// Iterate over all elements.
129    pub fn iter(&self) -> std::slice::Iter<'_, String> {
130        self.data.iter()
131    }
132
133    // -----------------------------------------------------------------
134    // Shape operations (#514) — parallel to `ferray_core::Array`
135    //
136    // StringArray can't reuse `Array<String, D>` because `String`
137    // isn't an `Element` (the trait is sealed inside ferray-core and
138    // `String` isn't `Copy`). Instead we mirror the shape API that
139    // `Array` exposes so callers can write shape-manipulation code
140    // that looks the same for string and numeric arrays.
141    // -----------------------------------------------------------------
142
143    /// Reshape this array to a new dimension type / shape. The total
144    /// element count must be unchanged.
145    ///
146    /// Since strings are cheap to move (they're owned), reshape just
147    /// rebuilds the array around the existing buffer. No data copy.
148    ///
149    /// # Errors
150    /// Returns [`FerrayError::ShapeMismatch`] if the new shape's
151    /// element count does not match `self.len()`.
152    pub fn reshape<D2: Dimension>(self, new_dim: D2) -> FerrayResult<StringArray<D2>> {
153        StringArray::<D2>::from_vec(new_dim, self.data)
154    }
155
156    /// Flatten to a 1-D `StringArray1` of length `self.len()`. The
157    /// row-major traversal order is preserved.
158    ///
159    /// This is the string analogue of `ndarray::Array::flatten` /
160    /// NumPy's `arr.flatten()`.
161    pub fn flatten(self) -> StringArray1 {
162        let n = self.data.len();
163        StringArray::<Ix1>::from_vec(Ix1::new([n]), self.data)
164            .expect("flatten: length check is trivially satisfied")
165    }
166
167    /// Convert to a dynamic-rank `StringArray<IxDyn>`. Useful when
168    /// the rank isn't known until runtime, or when interoperating
169    /// with code that only accepts `IxDyn`.
170    pub fn into_dyn(self) -> StringArray<IxDyn> {
171        let shape = self.dim.as_slice().to_vec();
172        StringArray::<IxDyn>::from_vec(IxDyn::new(&shape), self.data)
173            .expect("into_dyn: shape length check is trivially satisfied")
174    }
175
176    /// Look up an element by multi-dimensional index. Returns `None`
177    /// if the index is out of bounds.
178    ///
179    /// Indexing is row-major (C-order): for a `(rows, cols)` array,
180    /// index `[r, c]` maps to `data[r * cols + c]`.
181    pub fn get(&self, idx: &[usize]) -> Option<&String> {
182        let shape = self.dim.as_slice();
183        if idx.len() != shape.len() {
184            return None;
185        }
186        let mut flat = 0usize;
187        let mut stride = 1usize;
188        // Walk dimensions right-to-left (row-major).
189        for (i, (&dim, &k)) in shape.iter().zip(idx.iter()).enumerate().rev() {
190            if k >= dim {
191                return None;
192            }
193            if i == shape.len() - 1 {
194                flat += k;
195            } else {
196                flat += k * stride;
197            }
198            stride *= dim;
199        }
200        self.data.get(flat)
201    }
202}
203
204impl<D: Dimension> PartialEq for StringArray<D> {
205    fn eq(&self, other: &Self) -> bool {
206        self.dim == other.dim && self.data == other.data
207    }
208}
209
210impl<D: Dimension> Eq for StringArray<D> {}
211
212// Display: print like NumPy's `array(["a", "b", "c"])` (#278).
213impl<D: Dimension> std::fmt::Display for StringArray<D> {
214    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215        write!(f, "array([")?;
216        for (i, s) in self.data.iter().enumerate() {
217            if i > 0 {
218                write!(f, ", ")?;
219            }
220            write!(f, "{s:?}")?;
221        }
222        write!(f, "])")
223    }
224}
225
226// IntoIterator for &StringArray yields &String (#278).
227impl<'a, D: Dimension> IntoIterator for &'a StringArray<D> {
228    type Item = &'a String;
229    type IntoIter = std::slice::Iter<'a, String>;
230
231    fn into_iter(self) -> Self::IntoIter {
232        self.data.iter()
233    }
234}
235
236// IntoIterator for StringArray yields owned Strings.
237impl<D: Dimension> IntoIterator for StringArray<D> {
238    type Item = String;
239    type IntoIter = std::vec::IntoIter<String>;
240
241    fn into_iter(self) -> Self::IntoIter {
242        self.data.into_iter()
243    }
244}
245
246// ---------------------------------------------------------------------------
247// Construction from string slices (REQ-2)
248// ---------------------------------------------------------------------------
249
250impl StringArray<Ix1> {
251    /// Create a 1-D `StringArray` from a slice of string-like values.
252    ///
253    /// # Examples
254    /// ```ignore
255    /// let a = StringArray1::from_slice(&["hello", "world"]).unwrap();
256    /// ```
257    pub fn from_slice(items: &[&str]) -> FerrayResult<Self> {
258        let data: Vec<String> = items.iter().map(|s| (*s).to_string()).collect();
259        let dim = Ix1::new([data.len()]);
260        Self::from_vec(dim, data)
261    }
262}
263
264impl StringArray<Ix2> {
265    /// Transpose a 2-D `StringArray`: swap rows and columns.
266    ///
267    /// Walks the elements into a new buffer — a `(r, c)` cell of the
268    /// input becomes `(c, r)` of the output. Strings are cloned to
269    /// avoid disturbing the original array.
270    pub fn transpose(&self) -> FerrayResult<StringArray<Ix2>> {
271        let shape = self.shape();
272        let (nrows, ncols) = (shape[0], shape[1]);
273        let mut data = Vec::with_capacity(nrows * ncols);
274        for c in 0..ncols {
275            for r in 0..nrows {
276                data.push(self.data[r * ncols + c].clone());
277            }
278        }
279        Self::from_vec(Ix2::new([ncols, nrows]), data)
280    }
281
282    /// Create a 2-D `StringArray` from nested slices.
283    ///
284    /// # Errors
285    /// Returns `FerrayError::ShapeMismatch` if inner slices have different lengths.
286    pub fn from_rows(rows: &[&[&str]]) -> FerrayResult<Self> {
287        if rows.is_empty() {
288            return Self::from_vec(Ix2::new([0, 0]), Vec::new());
289        }
290        let ncols = rows[0].len();
291        for (i, row) in rows.iter().enumerate() {
292            if row.len() != ncols {
293                return Err(FerrayError::shape_mismatch(format!(
294                    "row {} has length {} but row 0 has length {}",
295                    i,
296                    row.len(),
297                    ncols
298                )));
299            }
300        }
301        let nrows = rows.len();
302        let data: Vec<String> = rows
303            .iter()
304            .flat_map(|row| row.iter().map(|s| (*s).to_string()))
305            .collect();
306        Self::from_vec(Ix2::new([nrows, ncols]), data)
307    }
308}
309
310impl StringArray<IxDyn> {
311    /// Create a dynamic-rank `StringArray` from a flat vec and a dynamic shape.
312    pub fn from_vec_dyn(shape: &[usize], data: Vec<String>) -> FerrayResult<Self> {
313        Self::from_vec(IxDyn::new(shape), data)
314    }
315}
316
317/// Create a 1-D `StringArray` from a slice of strings — the primary
318/// constructor matching `numpy.strings.array(...)`.
319///
320/// # Errors
321/// This function is infallible for valid inputs but returns `Result`
322/// for API consistency.
323pub fn array(items: &[&str]) -> FerrayResult<StringArray1> {
324    StringArray1::from_slice(items)
325}
326
327// ---------------------------------------------------------------------------
328// Broadcasting helpers for binary string operations
329// ---------------------------------------------------------------------------
330
331use ferray_core::dimension::broadcast::broadcast_shapes;
332
333/// Result of broadcasting two arrays: the output shape and paired indices.
334pub(crate) type BroadcastResult = (Vec<usize>, Vec<(usize, usize)>);
335
336/// Compute the broadcast result of two `StringArray`s, returning paired
337/// element indices into the flat data of each array.
338///
339/// Returns `(broadcast_shape, pairs)` where each pair is `(idx_a, idx_b)`.
340pub(crate) fn broadcast_binary<Da: Dimension, Db: Dimension>(
341    a: &StringArray<Da>,
342    b: &StringArray<Db>,
343) -> FerrayResult<BroadcastResult> {
344    let shape_a = a.shape();
345    let shape_b = b.shape();
346    let out_shape = broadcast_shapes(shape_a, shape_b)?;
347    let out_size: usize = out_shape.iter().product();
348
349    let strides_a = compute_strides(shape_a);
350    let strides_b = compute_strides(shape_b);
351
352    let mut pairs = Vec::with_capacity(out_size);
353    for linear in 0..out_size {
354        let multi = linear_to_multi(linear, &out_shape);
355        let idx_a = multi_to_broadcast_linear(&multi, shape_a, &strides_a);
356        let idx_b = multi_to_broadcast_linear(&multi, shape_b, &strides_b);
357        pairs.push((idx_a, idx_b));
358    }
359
360    Ok((out_shape, pairs))
361}
362
363/// Compute C-order strides from a shape.
364fn compute_strides(shape: &[usize]) -> Vec<usize> {
365    let ndim = shape.len();
366    if ndim == 0 {
367        return vec![];
368    }
369    let mut strides = vec![1usize; ndim];
370    for i in (0..ndim - 1).rev() {
371        strides[i] = strides[i + 1] * shape[i + 1];
372    }
373    strides
374}
375
376/// Convert a linear index to multi-dimensional indices given a shape.
377fn linear_to_multi(mut linear: usize, shape: &[usize]) -> Vec<usize> {
378    let ndim = shape.len();
379    let mut indices = vec![0usize; ndim];
380    for i in (0..ndim).rev() {
381        if shape[i] > 0 {
382            indices[i] = linear % shape[i];
383            linear /= shape[i];
384        }
385    }
386    indices
387}
388
389/// Convert multi-dimensional indices to a linear index, applying broadcasting
390/// (clamping indices to 0 for dimensions of size 1).
391fn multi_to_broadcast_linear(multi: &[usize], src_shape: &[usize], src_strides: &[usize]) -> usize {
392    let out_ndim = multi.len();
393    let src_ndim = src_shape.len();
394    let pad = out_ndim.saturating_sub(src_ndim);
395
396    let mut linear = 0usize;
397    for i in 0..src_ndim {
398        let idx = multi[i + pad];
399        // Broadcast: if src dimension is 1, always use index 0
400        let effective = if src_shape[i] == 1 { 0 } else { idx };
401        linear += effective * src_strides[i];
402    }
403    linear
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn create_from_slice() {
412        let a = array(&["hello", "world"]).unwrap();
413        assert_eq!(a.shape(), &[2]);
414        assert_eq!(a.len(), 2);
415        assert_eq!(a.as_slice()[0], "hello");
416        assert_eq!(a.as_slice()[1], "world");
417    }
418
419    #[test]
420    fn create_from_vec() {
421        let a = StringArray1::from_vec(Ix1::new([3]), vec!["a".into(), "b".into(), "c".into()])
422            .unwrap();
423        assert_eq!(a.shape(), &[3]);
424    }
425
426    #[test]
427    fn shape_mismatch_error() {
428        let res = StringArray1::from_vec(Ix1::new([5]), vec!["a".into(), "b".into()]);
429        assert!(res.is_err());
430    }
431
432    #[test]
433    fn empty_array() {
434        let a = StringArray1::empty(Ix1::new([4])).unwrap();
435        assert_eq!(a.len(), 4);
436        assert!(a.as_slice().iter().all(|s| s.is_empty()));
437    }
438
439    #[test]
440    fn map_strings() {
441        let a = array(&["hello", "world"]).unwrap();
442        let b = a.map(|s| s.to_uppercase()).unwrap();
443        assert_eq!(b.as_slice()[0], "HELLO");
444        assert_eq!(b.as_slice()[1], "WORLD");
445    }
446
447    #[test]
448    fn from_rows_2d() {
449        let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
450        assert_eq!(a.shape(), &[2, 2]);
451        assert_eq!(a.as_slice(), &["a", "b", "c", "d"]);
452    }
453
454    #[test]
455    fn from_rows_ragged_error() {
456        let res = StringArray2::from_rows(&[&["a", "b"], &["c"]]);
457        assert!(res.is_err());
458    }
459
460    #[test]
461    fn equality() {
462        let a = array(&["x", "y"]).unwrap();
463        let b = array(&["x", "y"]).unwrap();
464        let c = array(&["x", "z"]).unwrap();
465        assert_eq!(a, b);
466        assert_ne!(a, c);
467    }
468
469    #[test]
470    fn broadcast_binary_scalar() {
471        let a = array(&["hello", "world"]).unwrap();
472        let b = array(&["!"]).unwrap();
473        let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
474        assert_eq!(shape, vec![2]);
475        assert_eq!(pairs, vec![(0, 0), (1, 0)]);
476    }
477
478    #[test]
479    fn broadcast_binary_same_shape() {
480        let a = array(&["a", "b", "c"]).unwrap();
481        let b = array(&["x", "y", "z"]).unwrap();
482        let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
483        assert_eq!(shape, vec![3]);
484        assert_eq!(pairs, vec![(0, 0), (1, 1), (2, 2)]);
485    }
486
487    #[test]
488    fn into_vec() {
489        let a = array(&["a", "b"]).unwrap();
490        let v = a.into_vec();
491        assert_eq!(v, vec!["a".to_string(), "b".to_string()]);
492    }
493
494    // ---- shape operations (#514) ----
495
496    #[test]
497    fn reshape_1d_to_2d() {
498        let a = array(&["a", "b", "c", "d", "e", "f"]).unwrap();
499        let b = a.reshape(Ix2::new([2, 3])).unwrap();
500        assert_eq!(b.shape(), &[2, 3]);
501        assert_eq!(b.as_slice(), &["a", "b", "c", "d", "e", "f"]);
502    }
503
504    #[test]
505    fn reshape_wrong_size_errors() {
506        let a = array(&["a", "b", "c"]).unwrap();
507        assert!(a.reshape(Ix2::new([2, 2])).is_err());
508    }
509
510    #[test]
511    fn flatten_2d_to_1d() {
512        let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
513        let f = a.flatten();
514        assert_eq!(f.shape(), &[4]);
515        assert_eq!(f.as_slice(), &["a", "b", "c", "d"]);
516    }
517
518    #[test]
519    fn into_dyn_preserves_shape() {
520        let a = StringArray2::from_rows(&[&["x", "y"], &["z", "w"]]).unwrap();
521        let d = a.into_dyn();
522        assert_eq!(d.shape(), &[2, 2]);
523        assert_eq!(d.as_slice(), &["x", "y", "z", "w"]);
524    }
525
526    #[test]
527    fn transpose_2x3() {
528        // [["a","b","c"], ["d","e","f"]] -> [["a","d"], ["b","e"], ["c","f"]]
529        let a = StringArray2::from_rows(&[&["a", "b", "c"], &["d", "e", "f"]]).unwrap();
530        let t = a.transpose().unwrap();
531        assert_eq!(t.shape(), &[3, 2]);
532        assert_eq!(t.as_slice(), &["a", "d", "b", "e", "c", "f"]);
533    }
534
535    #[test]
536    fn transpose_square_is_involution() {
537        let a = StringArray2::from_rows(&[&["1", "2"], &["3", "4"]]).unwrap();
538        let t = a.transpose().unwrap();
539        let tt = t.transpose().unwrap();
540        assert_eq!(tt.as_slice(), a.as_slice());
541    }
542
543    #[test]
544    fn get_1d() {
545        let a = array(&["zero", "one", "two"]).unwrap();
546        assert_eq!(a.get(&[0]).unwrap(), "zero");
547        assert_eq!(a.get(&[1]).unwrap(), "one");
548        assert_eq!(a.get(&[2]).unwrap(), "two");
549        assert_eq!(a.get(&[3]), None); // out of bounds
550        assert_eq!(a.get(&[0, 0]), None); // wrong rank
551    }
552
553    #[test]
554    fn get_2d() {
555        let a = StringArray2::from_rows(&[&["a", "b", "c"], &["d", "e", "f"]]).unwrap();
556        assert_eq!(a.get(&[0, 0]).unwrap(), "a");
557        assert_eq!(a.get(&[0, 2]).unwrap(), "c");
558        assert_eq!(a.get(&[1, 1]).unwrap(), "e");
559        assert_eq!(a.get(&[2, 0]), None); // row out of bounds
560        assert_eq!(a.get(&[0, 3]), None); // col out of bounds
561    }
562}