1use ferray_core::dimension::{Dimension, Ix1, Ix2, IxDyn};
8use ferray_core::error::{FerrayError, FerrayResult};
9
10#[derive(Debug, Clone)]
17pub struct StringArray<D: Dimension> {
18 data: Vec<String>,
20 dim: D,
22}
23
24pub type StringArray1 = StringArray<Ix1>;
26
27pub type StringArray2 = StringArray<Ix2>;
29
30impl<D: Dimension> StringArray<D> {
31 pub fn from_vec(dim: D, data: Vec<String>) -> FerrayResult<Self> {
37 let expected = dim.size();
38 if data.len() != expected {
39 return Err(FerrayError::shape_mismatch(format!(
40 "data length {} does not match shape {:?} (expected {})",
41 data.len(),
42 dim.as_slice(),
43 expected,
44 )));
45 }
46 Ok(Self { data, dim })
47 }
48
49 pub fn empty(dim: D) -> FerrayResult<Self> {
55 let size = dim.size();
56 let data = vec![String::new(); size];
57 Ok(Self { data, dim })
58 }
59
60 #[inline]
62 pub fn shape(&self) -> &[usize] {
63 self.dim.as_slice()
64 }
65
66 #[inline]
68 pub fn ndim(&self) -> usize {
69 self.dim.ndim()
70 }
71
72 #[inline]
74 pub fn len(&self) -> usize {
75 self.data.len()
76 }
77
78 #[inline]
80 pub fn is_empty(&self) -> bool {
81 self.data.is_empty()
82 }
83
84 #[inline]
86 pub fn dim(&self) -> &D {
87 &self.dim
88 }
89
90 #[inline]
92 pub fn as_slice(&self) -> &[String] {
93 &self.data
94 }
95
96 #[inline]
98 pub fn as_slice_mut(&mut self) -> &mut [String] {
99 &mut self.data
100 }
101
102 #[inline]
104 pub fn into_vec(self) -> Vec<String> {
105 self.data
106 }
107
108 pub fn map<F>(&self, f: F) -> FerrayResult<StringArray<D>>
110 where
111 F: Fn(&str) -> String,
112 {
113 let data: Vec<String> = self.data.iter().map(|s| f(s)).collect();
114 StringArray::from_vec(self.dim.clone(), data)
115 }
116
117 pub fn map_to_vec<T, F>(&self, f: F) -> Vec<T>
122 where
123 F: Fn(&str) -> T,
124 {
125 self.data.iter().map(|s| f(s)).collect()
126 }
127
128 pub fn iter(&self) -> std::slice::Iter<'_, String> {
130 self.data.iter()
131 }
132
133 pub fn reshape<D2: Dimension>(self, new_dim: D2) -> FerrayResult<StringArray<D2>> {
153 StringArray::<D2>::from_vec(new_dim, self.data)
154 }
155
156 pub fn flatten(self) -> StringArray1 {
162 let n = self.data.len();
163 StringArray::<Ix1>::from_vec(Ix1::new([n]), self.data)
164 .expect("flatten: length check is trivially satisfied")
165 }
166
167 pub fn into_dyn(self) -> StringArray<IxDyn> {
171 let shape = self.dim.as_slice().to_vec();
172 StringArray::<IxDyn>::from_vec(IxDyn::new(&shape), self.data)
173 .expect("into_dyn: shape length check is trivially satisfied")
174 }
175
176 pub fn get(&self, idx: &[usize]) -> Option<&String> {
182 let shape = self.dim.as_slice();
183 if idx.len() != shape.len() {
184 return None;
185 }
186 let mut flat = 0usize;
187 let mut stride = 1usize;
188 for (i, (&dim, &k)) in shape.iter().zip(idx.iter()).enumerate().rev() {
190 if k >= dim {
191 return None;
192 }
193 if i == shape.len() - 1 {
194 flat += k;
195 } else {
196 flat += k * stride;
197 }
198 stride *= dim;
199 }
200 self.data.get(flat)
201 }
202}
203
204impl<D: Dimension> PartialEq for StringArray<D> {
205 fn eq(&self, other: &Self) -> bool {
206 self.dim == other.dim && self.data == other.data
207 }
208}
209
210impl<D: Dimension> Eq for StringArray<D> {}
211
212impl<D: Dimension> std::fmt::Display for StringArray<D> {
214 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215 write!(f, "array([")?;
216 for (i, s) in self.data.iter().enumerate() {
217 if i > 0 {
218 write!(f, ", ")?;
219 }
220 write!(f, "{s:?}")?;
221 }
222 write!(f, "])")
223 }
224}
225
226impl<'a, D: Dimension> IntoIterator for &'a StringArray<D> {
228 type Item = &'a String;
229 type IntoIter = std::slice::Iter<'a, String>;
230
231 fn into_iter(self) -> Self::IntoIter {
232 self.data.iter()
233 }
234}
235
236impl<D: Dimension> IntoIterator for StringArray<D> {
238 type Item = String;
239 type IntoIter = std::vec::IntoIter<String>;
240
241 fn into_iter(self) -> Self::IntoIter {
242 self.data.into_iter()
243 }
244}
245
246impl StringArray<Ix1> {
251 pub fn from_slice(items: &[&str]) -> FerrayResult<Self> {
258 let data: Vec<String> = items.iter().map(|s| (*s).to_string()).collect();
259 let dim = Ix1::new([data.len()]);
260 Self::from_vec(dim, data)
261 }
262}
263
264impl StringArray<Ix2> {
265 pub fn transpose(&self) -> FerrayResult<StringArray<Ix2>> {
271 let shape = self.shape();
272 let (nrows, ncols) = (shape[0], shape[1]);
273 let mut data = Vec::with_capacity(nrows * ncols);
274 for c in 0..ncols {
275 for r in 0..nrows {
276 data.push(self.data[r * ncols + c].clone());
277 }
278 }
279 Self::from_vec(Ix2::new([ncols, nrows]), data)
280 }
281
282 pub fn from_rows(rows: &[&[&str]]) -> FerrayResult<Self> {
287 if rows.is_empty() {
288 return Self::from_vec(Ix2::new([0, 0]), Vec::new());
289 }
290 let ncols = rows[0].len();
291 for (i, row) in rows.iter().enumerate() {
292 if row.len() != ncols {
293 return Err(FerrayError::shape_mismatch(format!(
294 "row {} has length {} but row 0 has length {}",
295 i,
296 row.len(),
297 ncols
298 )));
299 }
300 }
301 let nrows = rows.len();
302 let data: Vec<String> = rows
303 .iter()
304 .flat_map(|row| row.iter().map(|s| (*s).to_string()))
305 .collect();
306 Self::from_vec(Ix2::new([nrows, ncols]), data)
307 }
308}
309
310impl StringArray<IxDyn> {
311 pub fn from_vec_dyn(shape: &[usize], data: Vec<String>) -> FerrayResult<Self> {
313 Self::from_vec(IxDyn::new(shape), data)
314 }
315}
316
317pub fn array(items: &[&str]) -> FerrayResult<StringArray1> {
324 StringArray1::from_slice(items)
325}
326
327use ferray_core::dimension::broadcast::broadcast_shapes;
332
333pub(crate) type BroadcastResult = (Vec<usize>, Vec<(usize, usize)>);
335
336pub(crate) fn broadcast_binary<Da: Dimension, Db: Dimension>(
341 a: &StringArray<Da>,
342 b: &StringArray<Db>,
343) -> FerrayResult<BroadcastResult> {
344 let shape_a = a.shape();
345 let shape_b = b.shape();
346 let out_shape = broadcast_shapes(shape_a, shape_b)?;
347 let out_size: usize = out_shape.iter().product();
348
349 let strides_a = compute_strides(shape_a);
350 let strides_b = compute_strides(shape_b);
351
352 let mut pairs = Vec::with_capacity(out_size);
353 for linear in 0..out_size {
354 let multi = linear_to_multi(linear, &out_shape);
355 let idx_a = multi_to_broadcast_linear(&multi, shape_a, &strides_a);
356 let idx_b = multi_to_broadcast_linear(&multi, shape_b, &strides_b);
357 pairs.push((idx_a, idx_b));
358 }
359
360 Ok((out_shape, pairs))
361}
362
363fn compute_strides(shape: &[usize]) -> Vec<usize> {
365 let ndim = shape.len();
366 if ndim == 0 {
367 return vec![];
368 }
369 let mut strides = vec![1usize; ndim];
370 for i in (0..ndim - 1).rev() {
371 strides[i] = strides[i + 1] * shape[i + 1];
372 }
373 strides
374}
375
376fn linear_to_multi(mut linear: usize, shape: &[usize]) -> Vec<usize> {
378 let ndim = shape.len();
379 let mut indices = vec![0usize; ndim];
380 for i in (0..ndim).rev() {
381 if shape[i] > 0 {
382 indices[i] = linear % shape[i];
383 linear /= shape[i];
384 }
385 }
386 indices
387}
388
389fn multi_to_broadcast_linear(multi: &[usize], src_shape: &[usize], src_strides: &[usize]) -> usize {
392 let out_ndim = multi.len();
393 let src_ndim = src_shape.len();
394 let pad = out_ndim.saturating_sub(src_ndim);
395
396 let mut linear = 0usize;
397 for i in 0..src_ndim {
398 let idx = multi[i + pad];
399 let effective = if src_shape[i] == 1 { 0 } else { idx };
401 linear += effective * src_strides[i];
402 }
403 linear
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409
410 #[test]
411 fn create_from_slice() {
412 let a = array(&["hello", "world"]).unwrap();
413 assert_eq!(a.shape(), &[2]);
414 assert_eq!(a.len(), 2);
415 assert_eq!(a.as_slice()[0], "hello");
416 assert_eq!(a.as_slice()[1], "world");
417 }
418
419 #[test]
420 fn create_from_vec() {
421 let a = StringArray1::from_vec(Ix1::new([3]), vec!["a".into(), "b".into(), "c".into()])
422 .unwrap();
423 assert_eq!(a.shape(), &[3]);
424 }
425
426 #[test]
427 fn shape_mismatch_error() {
428 let res = StringArray1::from_vec(Ix1::new([5]), vec!["a".into(), "b".into()]);
429 assert!(res.is_err());
430 }
431
432 #[test]
433 fn empty_array() {
434 let a = StringArray1::empty(Ix1::new([4])).unwrap();
435 assert_eq!(a.len(), 4);
436 assert!(a.as_slice().iter().all(|s| s.is_empty()));
437 }
438
439 #[test]
440 fn map_strings() {
441 let a = array(&["hello", "world"]).unwrap();
442 let b = a.map(|s| s.to_uppercase()).unwrap();
443 assert_eq!(b.as_slice()[0], "HELLO");
444 assert_eq!(b.as_slice()[1], "WORLD");
445 }
446
447 #[test]
448 fn from_rows_2d() {
449 let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
450 assert_eq!(a.shape(), &[2, 2]);
451 assert_eq!(a.as_slice(), &["a", "b", "c", "d"]);
452 }
453
454 #[test]
455 fn from_rows_ragged_error() {
456 let res = StringArray2::from_rows(&[&["a", "b"], &["c"]]);
457 assert!(res.is_err());
458 }
459
460 #[test]
461 fn equality() {
462 let a = array(&["x", "y"]).unwrap();
463 let b = array(&["x", "y"]).unwrap();
464 let c = array(&["x", "z"]).unwrap();
465 assert_eq!(a, b);
466 assert_ne!(a, c);
467 }
468
469 #[test]
470 fn broadcast_binary_scalar() {
471 let a = array(&["hello", "world"]).unwrap();
472 let b = array(&["!"]).unwrap();
473 let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
474 assert_eq!(shape, vec![2]);
475 assert_eq!(pairs, vec![(0, 0), (1, 0)]);
476 }
477
478 #[test]
479 fn broadcast_binary_same_shape() {
480 let a = array(&["a", "b", "c"]).unwrap();
481 let b = array(&["x", "y", "z"]).unwrap();
482 let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
483 assert_eq!(shape, vec![3]);
484 assert_eq!(pairs, vec![(0, 0), (1, 1), (2, 2)]);
485 }
486
487 #[test]
488 fn into_vec() {
489 let a = array(&["a", "b"]).unwrap();
490 let v = a.into_vec();
491 assert_eq!(v, vec!["a".to_string(), "b".to_string()]);
492 }
493
494 #[test]
497 fn reshape_1d_to_2d() {
498 let a = array(&["a", "b", "c", "d", "e", "f"]).unwrap();
499 let b = a.reshape(Ix2::new([2, 3])).unwrap();
500 assert_eq!(b.shape(), &[2, 3]);
501 assert_eq!(b.as_slice(), &["a", "b", "c", "d", "e", "f"]);
502 }
503
504 #[test]
505 fn reshape_wrong_size_errors() {
506 let a = array(&["a", "b", "c"]).unwrap();
507 assert!(a.reshape(Ix2::new([2, 2])).is_err());
508 }
509
510 #[test]
511 fn flatten_2d_to_1d() {
512 let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
513 let f = a.flatten();
514 assert_eq!(f.shape(), &[4]);
515 assert_eq!(f.as_slice(), &["a", "b", "c", "d"]);
516 }
517
518 #[test]
519 fn into_dyn_preserves_shape() {
520 let a = StringArray2::from_rows(&[&["x", "y"], &["z", "w"]]).unwrap();
521 let d = a.into_dyn();
522 assert_eq!(d.shape(), &[2, 2]);
523 assert_eq!(d.as_slice(), &["x", "y", "z", "w"]);
524 }
525
526 #[test]
527 fn transpose_2x3() {
528 let a = StringArray2::from_rows(&[&["a", "b", "c"], &["d", "e", "f"]]).unwrap();
530 let t = a.transpose().unwrap();
531 assert_eq!(t.shape(), &[3, 2]);
532 assert_eq!(t.as_slice(), &["a", "d", "b", "e", "c", "f"]);
533 }
534
535 #[test]
536 fn transpose_square_is_involution() {
537 let a = StringArray2::from_rows(&[&["1", "2"], &["3", "4"]]).unwrap();
538 let t = a.transpose().unwrap();
539 let tt = t.transpose().unwrap();
540 assert_eq!(tt.as_slice(), a.as_slice());
541 }
542
543 #[test]
544 fn get_1d() {
545 let a = array(&["zero", "one", "two"]).unwrap();
546 assert_eq!(a.get(&[0]).unwrap(), "zero");
547 assert_eq!(a.get(&[1]).unwrap(), "one");
548 assert_eq!(a.get(&[2]).unwrap(), "two");
549 assert_eq!(a.get(&[3]), None); assert_eq!(a.get(&[0, 0]), None); }
552
553 #[test]
554 fn get_2d() {
555 let a = StringArray2::from_rows(&[&["a", "b", "c"], &["d", "e", "f"]]).unwrap();
556 assert_eq!(a.get(&[0, 0]).unwrap(), "a");
557 assert_eq!(a.get(&[0, 2]).unwrap(), "c");
558 assert_eq!(a.get(&[1, 1]).unwrap(), "e");
559 assert_eq!(a.get(&[2, 0]), None); assert_eq!(a.get(&[0, 3]), None); }
562}