1use ferray_core::dimension::{Dimension, Ix1, Ix2, IxDyn};
8use ferray_core::error::{FerrayError, FerrayResult};
9
10#[derive(Debug, Clone)]
17pub struct StringArray<D: Dimension> {
18 data: Vec<String>,
20 dim: D,
22}
23
24pub type StringArray1 = StringArray<Ix1>;
26
27pub type StringArray2 = StringArray<Ix2>;
29
30impl<D: Dimension> StringArray<D> {
31 pub fn from_vec(dim: D, data: Vec<String>) -> FerrayResult<Self> {
37 let expected = dim.size();
38 if data.len() != expected {
39 return Err(FerrayError::shape_mismatch(format!(
40 "data length {} does not match shape {:?} (expected {})",
41 data.len(),
42 dim.as_slice(),
43 expected,
44 )));
45 }
46 Ok(Self { data, dim })
47 }
48
49 pub fn empty(dim: D) -> FerrayResult<Self> {
55 let size = dim.size();
56 let data = vec![String::new(); size];
57 Ok(Self { data, dim })
58 }
59
60 #[inline]
62 pub fn shape(&self) -> &[usize] {
63 self.dim.as_slice()
64 }
65
66 #[inline]
68 pub fn ndim(&self) -> usize {
69 self.dim.ndim()
70 }
71
72 #[inline]
74 pub fn len(&self) -> usize {
75 self.data.len()
76 }
77
78 #[inline]
80 pub fn is_empty(&self) -> bool {
81 self.data.is_empty()
82 }
83
84 #[inline]
86 pub fn dim(&self) -> &D {
87 &self.dim
88 }
89
90 #[inline]
92 pub fn as_slice(&self) -> &[String] {
93 &self.data
94 }
95
96 #[inline]
98 pub fn as_slice_mut(&mut self) -> &mut [String] {
99 &mut self.data
100 }
101
102 #[inline]
104 pub fn into_vec(self) -> Vec<String> {
105 self.data
106 }
107
108 pub fn map<F>(&self, f: F) -> FerrayResult<StringArray<D>>
110 where
111 F: Fn(&str) -> String,
112 {
113 let data: Vec<String> = self.data.iter().map(|s| f(s)).collect();
114 StringArray::from_vec(self.dim.clone(), data)
115 }
116
117 pub fn map_to_vec<T, F>(&self, f: F) -> Vec<T>
122 where
123 F: Fn(&str) -> T,
124 {
125 self.data.iter().map(|s| f(s)).collect()
126 }
127
128 pub fn iter(&self) -> std::slice::Iter<'_, String> {
130 self.data.iter()
131 }
132}
133
134impl<D: Dimension> PartialEq for StringArray<D> {
135 fn eq(&self, other: &Self) -> bool {
136 self.dim == other.dim && self.data == other.data
137 }
138}
139
140impl<D: Dimension> Eq for StringArray<D> {}
141
142impl StringArray<Ix1> {
147 pub fn from_slice(items: &[&str]) -> FerrayResult<Self> {
154 let data: Vec<String> = items.iter().map(|s| (*s).to_string()).collect();
155 let dim = Ix1::new([data.len()]);
156 Self::from_vec(dim, data)
157 }
158}
159
160impl StringArray<Ix2> {
161 pub fn from_rows(rows: &[&[&str]]) -> FerrayResult<Self> {
166 if rows.is_empty() {
167 return Self::from_vec(Ix2::new([0, 0]), Vec::new());
168 }
169 let ncols = rows[0].len();
170 for (i, row) in rows.iter().enumerate() {
171 if row.len() != ncols {
172 return Err(FerrayError::shape_mismatch(format!(
173 "row {} has length {} but row 0 has length {}",
174 i,
175 row.len(),
176 ncols
177 )));
178 }
179 }
180 let nrows = rows.len();
181 let data: Vec<String> = rows
182 .iter()
183 .flat_map(|row| row.iter().map(|s| (*s).to_string()))
184 .collect();
185 Self::from_vec(Ix2::new([nrows, ncols]), data)
186 }
187}
188
189impl StringArray<IxDyn> {
190 pub fn from_vec_dyn(shape: &[usize], data: Vec<String>) -> FerrayResult<Self> {
192 Self::from_vec(IxDyn::new(shape), data)
193 }
194}
195
196pub fn array(items: &[&str]) -> FerrayResult<StringArray1> {
203 StringArray1::from_slice(items)
204}
205
206use ferray_core::dimension::broadcast::broadcast_shapes;
211
212pub(crate) type BroadcastResult = (Vec<usize>, Vec<(usize, usize)>);
214
215pub(crate) fn broadcast_binary<Da: Dimension, Db: Dimension>(
220 a: &StringArray<Da>,
221 b: &StringArray<Db>,
222) -> FerrayResult<BroadcastResult> {
223 let shape_a = a.shape();
224 let shape_b = b.shape();
225 let out_shape = broadcast_shapes(shape_a, shape_b)?;
226 let out_size: usize = out_shape.iter().product();
227
228 let strides_a = compute_strides(shape_a);
229 let strides_b = compute_strides(shape_b);
230
231 let mut pairs = Vec::with_capacity(out_size);
232 for linear in 0..out_size {
233 let multi = linear_to_multi(linear, &out_shape);
234 let idx_a = multi_to_broadcast_linear(&multi, shape_a, &strides_a);
235 let idx_b = multi_to_broadcast_linear(&multi, shape_b, &strides_b);
236 pairs.push((idx_a, idx_b));
237 }
238
239 Ok((out_shape, pairs))
240}
241
242fn compute_strides(shape: &[usize]) -> Vec<usize> {
244 let ndim = shape.len();
245 if ndim == 0 {
246 return vec![];
247 }
248 let mut strides = vec![1usize; ndim];
249 for i in (0..ndim - 1).rev() {
250 strides[i] = strides[i + 1] * shape[i + 1];
251 }
252 strides
253}
254
255fn linear_to_multi(mut linear: usize, shape: &[usize]) -> Vec<usize> {
257 let ndim = shape.len();
258 let mut indices = vec![0usize; ndim];
259 for i in (0..ndim).rev() {
260 if shape[i] > 0 {
261 indices[i] = linear % shape[i];
262 linear /= shape[i];
263 }
264 }
265 indices
266}
267
268fn multi_to_broadcast_linear(multi: &[usize], src_shape: &[usize], src_strides: &[usize]) -> usize {
271 let out_ndim = multi.len();
272 let src_ndim = src_shape.len();
273 let pad = out_ndim.saturating_sub(src_ndim);
274
275 let mut linear = 0usize;
276 for i in 0..src_ndim {
277 let idx = multi[i + pad];
278 let effective = if src_shape[i] == 1 { 0 } else { idx };
280 linear += effective * src_strides[i];
281 }
282 linear
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn create_from_slice() {
291 let a = array(&["hello", "world"]).unwrap();
292 assert_eq!(a.shape(), &[2]);
293 assert_eq!(a.len(), 2);
294 assert_eq!(a.as_slice()[0], "hello");
295 assert_eq!(a.as_slice()[1], "world");
296 }
297
298 #[test]
299 fn create_from_vec() {
300 let a = StringArray1::from_vec(Ix1::new([3]), vec!["a".into(), "b".into(), "c".into()])
301 .unwrap();
302 assert_eq!(a.shape(), &[3]);
303 }
304
305 #[test]
306 fn shape_mismatch_error() {
307 let res = StringArray1::from_vec(Ix1::new([5]), vec!["a".into(), "b".into()]);
308 assert!(res.is_err());
309 }
310
311 #[test]
312 fn empty_array() {
313 let a = StringArray1::empty(Ix1::new([4])).unwrap();
314 assert_eq!(a.len(), 4);
315 assert!(a.as_slice().iter().all(|s| s.is_empty()));
316 }
317
318 #[test]
319 fn map_strings() {
320 let a = array(&["hello", "world"]).unwrap();
321 let b = a.map(|s| s.to_uppercase()).unwrap();
322 assert_eq!(b.as_slice()[0], "HELLO");
323 assert_eq!(b.as_slice()[1], "WORLD");
324 }
325
326 #[test]
327 fn from_rows_2d() {
328 let a = StringArray2::from_rows(&[&["a", "b"], &["c", "d"]]).unwrap();
329 assert_eq!(a.shape(), &[2, 2]);
330 assert_eq!(a.as_slice(), &["a", "b", "c", "d"]);
331 }
332
333 #[test]
334 fn from_rows_ragged_error() {
335 let res = StringArray2::from_rows(&[&["a", "b"], &["c"]]);
336 assert!(res.is_err());
337 }
338
339 #[test]
340 fn equality() {
341 let a = array(&["x", "y"]).unwrap();
342 let b = array(&["x", "y"]).unwrap();
343 let c = array(&["x", "z"]).unwrap();
344 assert_eq!(a, b);
345 assert_ne!(a, c);
346 }
347
348 #[test]
349 fn broadcast_binary_scalar() {
350 let a = array(&["hello", "world"]).unwrap();
351 let b = array(&["!"]).unwrap();
352 let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
353 assert_eq!(shape, vec![2]);
354 assert_eq!(pairs, vec![(0, 0), (1, 0)]);
355 }
356
357 #[test]
358 fn broadcast_binary_same_shape() {
359 let a = array(&["a", "b", "c"]).unwrap();
360 let b = array(&["x", "y", "z"]).unwrap();
361 let (shape, pairs) = broadcast_binary(&a, &b).unwrap();
362 assert_eq!(shape, vec![3]);
363 assert_eq!(pairs, vec![(0, 0), (1, 1), (2, 2)]);
364 }
365
366 #[test]
367 fn into_vec() {
368 let a = array(&["a", "b"]).unwrap();
369 let v = a.into_vec();
370 assert_eq!(v, vec!["a".to_string(), "b".to_string()]);
371 }
372}