avila_arrow/
array.rs

1//! Array types for columnar storage
2
3use crate::error::{ArrowError, Result};
4use crate::datatypes::DataType;
5use std::any::Any;
6
7/// Trait for all array types
8pub trait Array: Send + Sync {
9    /// Get the data type of the array
10    fn data_type(&self) -> &DataType;
11
12    /// Get the number of elements in the array
13    fn len(&self) -> usize;
14
15    /// Check if the array is empty
16    fn is_empty(&self) -> bool {
17        self.len() == 0
18    }
19
20    /// Convert to Any for downcasting
21    fn as_any(&self) -> &dyn Any;
22}
23
24// ==================== INTEGER ARRAYS ====================
25
26/// 8-bit signed integer array
27#[derive(Debug, Clone)]
28pub struct Int8Array {
29    data: Vec<i8>,
30    data_type: DataType,
31}
32
33impl Int8Array {
34    pub fn new(data: Vec<i8>) -> Self {
35        Self { data, data_type: DataType::Int8 }
36    }
37
38    pub fn value(&self, index: usize) -> Result<i8> {
39        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
40            index, length: self.data.len(),
41        })
42    }
43
44    pub fn values(&self) -> &[i8] { &self.data }
45}
46
47impl From<Vec<i8>> for Int8Array {
48    fn from(data: Vec<i8>) -> Self { Self::new(data) }
49}
50
51impl Array for Int8Array {
52    fn data_type(&self) -> &DataType { &self.data_type }
53    fn len(&self) -> usize { self.data.len() }
54    fn as_any(&self) -> &dyn Any { self }
55}
56
57/// 16-bit signed integer array
58#[derive(Debug, Clone)]
59pub struct Int16Array {
60    data: Vec<i16>,
61    data_type: DataType,
62}
63
64impl Int16Array {
65    pub fn new(data: Vec<i16>) -> Self {
66        Self { data, data_type: DataType::Int16 }
67    }
68
69    pub fn value(&self, index: usize) -> Result<i16> {
70        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
71            index, length: self.data.len(),
72        })
73    }
74
75    pub fn values(&self) -> &[i16] { &self.data }
76}
77
78impl From<Vec<i16>> for Int16Array {
79    fn from(data: Vec<i16>) -> Self { Self::new(data) }
80}
81
82impl Array for Int16Array {
83    fn data_type(&self) -> &DataType { &self.data_type }
84    fn len(&self) -> usize { self.data.len() }
85    fn as_any(&self) -> &dyn Any { self }
86}
87
88/// 32-bit signed integer array
89#[derive(Debug, Clone)]
90pub struct Int32Array {
91    data: Vec<i32>,
92    data_type: DataType,
93}
94
95impl Int32Array {
96    pub fn new(data: Vec<i32>) -> Self {
97        Self { data, data_type: DataType::Int32 }
98    }
99
100    pub fn value(&self, index: usize) -> Result<i32> {
101        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
102            index, length: self.data.len(),
103        })
104    }
105
106    pub fn values(&self) -> &[i32] { &self.data }
107}
108
109impl From<Vec<i32>> for Int32Array {
110    fn from(data: Vec<i32>) -> Self { Self::new(data) }
111}
112
113impl Array for Int32Array {
114    fn data_type(&self) -> &DataType { &self.data_type }
115    fn len(&self) -> usize { self.data.len() }
116    fn as_any(&self) -> &dyn Any { self }
117}
118
119/// 64-bit signed integer array
120#[derive(Debug, Clone)]
121pub struct Int64Array {
122    data: Vec<i64>,
123    data_type: DataType,
124}
125
126impl Int64Array {
127    /// Create new Int64Array
128    pub fn new(data: Vec<i64>) -> Self {
129        Self {
130            data,
131            data_type: DataType::Int64,
132        }
133    }
134
135    /// Get value at index
136    pub fn value(&self, index: usize) -> Result<i64> {
137        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
138            index,
139            length: self.data.len(),
140        })
141    }
142
143    /// Get all values
144    pub fn values(&self) -> &[i64] {
145        &self.data
146    }
147}
148
149impl From<Vec<i64>> for Int64Array {
150    fn from(data: Vec<i64>) -> Self {
151        Self::new(data)
152    }
153}
154
155impl Array for Int64Array {
156    fn data_type(&self) -> &DataType {
157        &self.data_type
158    }
159
160    fn len(&self) -> usize {
161        self.data.len()
162    }
163
164    fn as_any(&self) -> &dyn Any {
165        self
166    }
167}
168
169/// 64-bit floating point array
170#[derive(Debug, Clone)]
171pub struct Float64Array {
172    data: Vec<f64>,
173    data_type: DataType,
174}
175
176impl Float64Array {
177    /// Create new Float64Array
178    pub fn new(data: Vec<f64>) -> Self {
179        Self {
180            data,
181            data_type: DataType::Float64,
182        }
183    }
184
185    /// Get value at index
186    pub fn value(&self, index: usize) -> Result<f64> {
187        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
188            index,
189            length: self.data.len(),
190        })
191    }
192
193    /// Get all values
194    pub fn values(&self) -> &[f64] {
195        &self.data
196    }
197}
198
199impl From<Vec<f64>> for Float64Array {
200    fn from(data: Vec<f64>) -> Self {
201        Self::new(data)
202    }
203}
204
205impl Array for Float64Array {
206    fn data_type(&self) -> &DataType {
207        &self.data_type
208    }
209
210    fn len(&self) -> usize {
211        self.data.len()
212    }
213
214    fn as_any(&self) -> &dyn Any {
215        self
216    }
217}
218
219// ==================== UNSIGNED INTEGER ARRAYS ====================
220
221/// 8-bit unsigned integer array
222#[derive(Debug, Clone)]
223pub struct UInt8Array {
224    data: Vec<u8>,
225    data_type: DataType,
226}
227
228impl UInt8Array {
229    pub fn new(data: Vec<u8>) -> Self {
230        Self { data, data_type: DataType::UInt8 }
231    }
232
233    pub fn value(&self, index: usize) -> Result<u8> {
234        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
235            index, length: self.data.len(),
236        })
237    }
238
239    pub fn values(&self) -> &[u8] { &self.data }
240}
241
242impl From<Vec<u8>> for UInt8Array {
243    fn from(data: Vec<u8>) -> Self { Self::new(data) }
244}
245
246impl Array for UInt8Array {
247    fn data_type(&self) -> &DataType { &self.data_type }
248    fn len(&self) -> usize { self.data.len() }
249    fn as_any(&self) -> &dyn Any { self }
250}
251
252/// 16-bit unsigned integer array
253#[derive(Debug, Clone)]
254pub struct UInt16Array {
255    data: Vec<u16>,
256    data_type: DataType,
257}
258
259impl UInt16Array {
260    pub fn new(data: Vec<u16>) -> Self {
261        Self { data, data_type: DataType::UInt16 }
262    }
263
264    pub fn value(&self, index: usize) -> Result<u16> {
265        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
266            index, length: self.data.len(),
267        })
268    }
269
270    pub fn values(&self) -> &[u16] { &self.data }
271}
272
273impl From<Vec<u16>> for UInt16Array {
274    fn from(data: Vec<u16>) -> Self { Self::new(data) }
275}
276
277impl Array for UInt16Array {
278    fn data_type(&self) -> &DataType { &self.data_type }
279    fn len(&self) -> usize { self.data.len() }
280    fn as_any(&self) -> &dyn Any { self }
281}
282
283/// 32-bit unsigned integer array
284#[derive(Debug, Clone)]
285pub struct UInt32Array {
286    data: Vec<u32>,
287    data_type: DataType,
288}
289
290impl UInt32Array {
291    pub fn new(data: Vec<u32>) -> Self {
292        Self { data, data_type: DataType::UInt32 }
293    }
294
295    pub fn value(&self, index: usize) -> Result<u32> {
296        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
297            index, length: self.data.len(),
298        })
299    }
300
301    pub fn values(&self) -> &[u32] { &self.data }
302}
303
304impl From<Vec<u32>> for UInt32Array {
305    fn from(data: Vec<u32>) -> Self { Self::new(data) }
306}
307
308impl Array for UInt32Array {
309    fn data_type(&self) -> &DataType { &self.data_type }
310    fn len(&self) -> usize { self.data.len() }
311    fn as_any(&self) -> &dyn Any { self }
312}
313
314/// 64-bit unsigned integer array
315#[derive(Debug, Clone)]
316pub struct UInt64Array {
317    data: Vec<u64>,
318    data_type: DataType,
319}
320
321impl UInt64Array {
322    pub fn new(data: Vec<u64>) -> Self {
323        Self { data, data_type: DataType::UInt64 }
324    }
325
326    pub fn value(&self, index: usize) -> Result<u64> {
327        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
328            index, length: self.data.len(),
329        })
330    }
331
332    pub fn values(&self) -> &[u64] { &self.data }
333}
334
335impl From<Vec<u64>> for UInt64Array {
336    fn from(data: Vec<u64>) -> Self { Self::new(data) }
337}
338
339impl Array for UInt64Array {
340    fn data_type(&self) -> &DataType { &self.data_type }
341    fn len(&self) -> usize { self.data.len() }
342    fn as_any(&self) -> &dyn Any { self }
343}
344
345// ==================== FLOATING POINT ARRAYS ====================
346
347/// 32-bit floating point array
348#[derive(Debug, Clone)]
349pub struct Float32Array {
350    data: Vec<f32>,
351    data_type: DataType,
352}
353
354impl Float32Array {
355    pub fn new(data: Vec<f32>) -> Self {
356        Self { data, data_type: DataType::Float32 }
357    }
358
359    pub fn value(&self, index: usize) -> Result<f32> {
360        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
361            index, length: self.data.len(),
362        })
363    }
364
365    pub fn values(&self) -> &[f32] { &self.data }
366}
367
368impl From<Vec<f32>> for Float32Array {
369    fn from(data: Vec<f32>) -> Self { Self::new(data) }
370}
371
372impl Array for Float32Array {
373    fn data_type(&self) -> &DataType { &self.data_type }
374    fn len(&self) -> usize { self.data.len() }
375    fn as_any(&self) -> &dyn Any { self }
376}
377
378// ==================== OTHER ARRAYS ====================
379
380/// UTF-8 string array
381#[derive(Debug, Clone)]
382pub struct Utf8Array {
383    data: Vec<String>,
384    data_type: DataType,
385}
386
387impl Utf8Array {
388    /// Create new Utf8Array
389    pub fn new(data: Vec<String>) -> Self {
390        Self {
391            data,
392            data_type: DataType::Utf8,
393        }
394    }
395
396    /// Get value at index
397    pub fn value(&self, index: usize) -> Result<&str> {
398        self.data.get(index).map(|s| s.as_str()).ok_or_else(|| ArrowError::OutOfBounds {
399            index,
400            length: self.data.len(),
401        })
402    }
403
404    /// Get all values
405    pub fn values(&self) -> &[String] {
406        &self.data
407    }
408}
409
410impl From<Vec<String>> for Utf8Array {
411    fn from(data: Vec<String>) -> Self {
412        Self::new(data)
413    }
414}
415
416impl From<Vec<&str>> for Utf8Array {
417    fn from(data: Vec<&str>) -> Self {
418        Self::new(data.iter().map(|s| s.to_string()).collect())
419    }
420}
421
422impl Array for Utf8Array {
423    fn data_type(&self) -> &DataType {
424        &self.data_type
425    }
426
427    fn len(&self) -> usize {
428        self.data.len()
429    }
430
431    fn as_any(&self) -> &dyn Any {
432        self
433    }
434}
435
436/// Boolean array
437#[derive(Debug, Clone)]
438pub struct BooleanArray {
439    data: Vec<bool>,
440    data_type: DataType,
441}
442
443impl BooleanArray {
444    /// Create new BooleanArray
445    pub fn new(data: Vec<bool>) -> Self {
446        Self {
447            data,
448            data_type: DataType::Boolean,
449        }
450    }
451
452    /// Get value at index
453    pub fn value(&self, index: usize) -> Result<bool> {
454        self.data.get(index).copied().ok_or_else(|| ArrowError::OutOfBounds {
455            index,
456            length: self.data.len(),
457        })
458    }
459
460    /// Get all values
461    pub fn values(&self) -> &[bool] {
462        &self.data
463    }
464}
465
466impl From<Vec<bool>> for BooleanArray {
467    fn from(data: Vec<bool>) -> Self {
468        Self::new(data)
469    }
470}
471
472impl Array for BooleanArray {
473    fn data_type(&self) -> &DataType {
474        &self.data_type
475    }
476
477    fn len(&self) -> usize {
478        self.data.len()
479    }
480
481    fn as_any(&self) -> &dyn Any {
482        self
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489
490    #[test]
491    fn test_int8_array() {
492        let array = Int8Array::from(vec![-128, 0, 127]);
493        assert_eq!(array.len(), 3);
494        assert_eq!(array.value(0).unwrap(), -128);
495        assert_eq!(array.value(2).unwrap(), 127);
496    }
497
498    #[test]
499    fn test_int16_array() {
500        let array = Int16Array::from(vec![-32768, 0, 32767]);
501        assert_eq!(array.len(), 3);
502        assert_eq!(array.value(0).unwrap(), -32768);
503        assert_eq!(array.value(2).unwrap(), 32767);
504    }
505
506    #[test]
507    fn test_int32_array() {
508        let array = Int32Array::from(vec![i32::MIN, 0, i32::MAX]);
509        assert_eq!(array.len(), 3);
510        assert_eq!(array.value(0).unwrap(), i32::MIN);
511        assert_eq!(array.value(2).unwrap(), i32::MAX);
512    }
513
514    #[test]
515    fn test_int64_array() {
516        let array = Int64Array::from(vec![1, 2, 3, 4, 5]);
517        assert_eq!(array.len(), 5);
518        assert_eq!(array.value(0).unwrap(), 1);
519        assert_eq!(array.value(4).unwrap(), 5);
520        assert!(array.value(5).is_err());
521    }
522
523    #[test]
524    fn test_uint8_array() {
525        let array = UInt8Array::from(vec![0, 128, 255]);
526        assert_eq!(array.len(), 3);
527        assert_eq!(array.value(0).unwrap(), 0);
528        assert_eq!(array.value(2).unwrap(), 255);
529    }
530
531    #[test]
532    fn test_uint16_array() {
533        let array = UInt16Array::from(vec![0, 32768, 65535]);
534        assert_eq!(array.len(), 3);
535        assert_eq!(array.value(1).unwrap(), 32768);
536        assert_eq!(array.value(2).unwrap(), 65535);
537    }
538
539    #[test]
540    fn test_uint32_array() {
541        let array = UInt32Array::from(vec![0, u32::MAX / 2, u32::MAX]);
542        assert_eq!(array.len(), 3);
543        assert_eq!(array.value(2).unwrap(), u32::MAX);
544    }
545
546    #[test]
547    fn test_uint64_array() {
548        let array = UInt64Array::from(vec![0, u64::MAX / 2, u64::MAX]);
549        assert_eq!(array.len(), 3);
550        assert_eq!(array.value(2).unwrap(), u64::MAX);
551    }
552
553    #[test]
554    fn test_float32_array() {
555        let array = Float32Array::from(vec![1.1, 2.2, 3.3]);
556        assert_eq!(array.len(), 3);
557        assert!((array.value(0).unwrap() - 1.1).abs() < 1e-6);
558        assert!((array.value(2).unwrap() - 3.3).abs() < 1e-6);
559    }
560
561    #[test]
562    fn test_float64_array() {
563        let array = Float64Array::from(vec![1.1, 2.2, 3.3]);
564        assert_eq!(array.len(), 3);
565        assert_eq!(array.value(0).unwrap(), 1.1);
566        assert_eq!(array.value(2).unwrap(), 3.3);
567    }
568
569    #[test]
570    fn test_utf8_array() {
571        let array = Utf8Array::from(vec!["hello", "world"]);
572        assert_eq!(array.len(), 2);
573        assert_eq!(array.value(0).unwrap(), "hello");
574        assert_eq!(array.value(1).unwrap(), "world");
575    }
576
577    #[test]
578    fn test_boolean_array() {
579        let array = BooleanArray::from(vec![true, false, true]);
580        assert_eq!(array.len(), 3);
581        assert_eq!(array.value(0).unwrap(), true);
582        assert_eq!(array.value(1).unwrap(), false);
583    }
584
585    #[test]
586    fn test_out_of_bounds() {
587        let array = Int64Array::from(vec![1, 2, 3]);
588        assert!(array.value(10).is_err());
589    }
590}