Skip to main content

clickhouse_native_client/column/
array.rs

1//! Array column implementation
2//!
3//! **ClickHouse Documentation:** <https://clickhouse.com/docs/en/sql-reference/data-types/array>
4//!
5//! ## Overview
6//!
7//! Array columns store variable-length arrays of elements. All elements are
8//! stored in a single nested column (flattened), with offsets tracking where
9//! each array begins/ends.
10//!
11//! ## Important Restriction
12//!
13//! **Arrays cannot be wrapped in Nullable:**
14//! - ❌ `Nullable(Array(String))` - Error: "Nested type Array(String) cannot
15//!   be inside Nullable type" (Error code 43)
16//! - ✅ `Array(Nullable(String))` - CORRECT: Each element can be NULL
17//!
18//! If you need to represent "no array", use an empty array `[]` instead of
19//! NULL.
20//!
21//! See: <https://github.com/ClickHouse/ClickHouse/issues/1062>
22//!
23//! ## Wire Format
24//!
25//! ```text
26//! [offsets: UInt64 * num_arrays]  // Cumulative element counts
27//! [nested_column_data]             // All elements concatenated
28//! ```
29//!
30//! Example: `[[1,2], [3], [4,5,6]]`
31//! - Offsets: `[2, 3, 6]` (2 elements in first array, 3 total after second, 6
32//!   total after third)
33//! - Nested data: `[1, 2, 3, 4, 5, 6]`
34
35use super::{
36    Column,
37    ColumnRef,
38};
39use crate::{
40    types::Type,
41    Error,
42    Result,
43};
44use bytes::{
45    Buf,
46    BytesMut,
47};
48use std::{
49    marker::PhantomData,
50    sync::Arc,
51};
52
53/// Column for arrays of variable length
54///
55/// Stores a nested column with all array elements concatenated,
56/// and an offsets array that marks where each array ends.
57///
58/// **Reference Implementation:** See
59/// `clickhouse-cpp/clickhouse/columns/array.cpp`
60pub struct ColumnArray {
61    type_: Type,
62    nested: ColumnRef,
63    offsets: Vec<u64>, /* Cumulative offsets: offsets[i] = total elements
64                        * up to and including array i */
65}
66
67impl ColumnArray {
68    /// Create a new array column from an array type
69    pub fn new(type_: Type) -> Self {
70        // Extract nested type and create nested column
71        let nested = match &type_ {
72            Type::Array { item_type } => {
73                crate::io::block_stream::create_column(item_type)
74                    .expect("Failed to create nested column")
75            }
76            _ => panic!("ColumnArray requires Array type"),
77        };
78
79        Self { type_, nested, offsets: Vec::new() }
80    }
81
82    /// Create a new array column with an existing nested column
83    pub fn with_nested(nested: ColumnRef) -> Self {
84        let nested_type = nested.column_type().clone();
85        Self { type_: Type::array(nested_type), nested, offsets: Vec::new() }
86    }
87
88    /// Create a new array column from parts (for geo types that need custom
89    /// type names)
90    pub(crate) fn from_parts(type_: Type, nested: ColumnRef) -> Self {
91        Self { type_, nested, offsets: Vec::new() }
92    }
93
94    /// Create with reserved capacity
95    pub fn with_capacity(type_: Type, capacity: usize) -> Self {
96        let nested = match &type_ {
97            Type::Array { item_type } => {
98                crate::io::block_stream::create_column(item_type)
99                    .expect("Failed to create nested column")
100            }
101            _ => panic!("ColumnArray requires Array type"),
102        };
103
104        Self { type_, nested, offsets: Vec::with_capacity(capacity) }
105    }
106
107    /// Append an array (specified by the number of elements in the nested
108    /// column to consume) The caller must ensure that `len` elements have
109    /// been added to the nested column
110    pub fn append_len(&mut self, len: u64) {
111        let new_offset = if self.offsets.is_empty() {
112            len
113        } else {
114            self.offsets.last().unwrap() + len
115        };
116        self.offsets.push(new_offset);
117    }
118
119    /// Get the start and end indices for the array at the given index
120    pub fn get_array_range(&self, index: usize) -> Option<(usize, usize)> {
121        if index >= self.offsets.len() {
122            return None;
123        }
124
125        let end = self.offsets[index] as usize;
126        let start =
127            if index == 0 { 0 } else { self.offsets[index - 1] as usize };
128
129        Some((start, end))
130    }
131
132    /// Get the length of the array at the given index
133    pub fn get_array_len(&self, index: usize) -> Option<usize> {
134        self.get_array_range(index).map(|(start, end)| end - start)
135    }
136
137    /// Get a reference to the nested column as a specific type
138    ///
139    /// # Example
140    /// ```ignore
141    /// let col: ColumnArray = /* ... */;
142    /// let nested: &ColumnUInt32 = col.nested();
143    /// ```
144    pub fn nested<T: Column + 'static>(&self) -> &T {
145        self.nested
146            .as_any()
147            .downcast_ref::<T>()
148            .expect("Failed to downcast nested column to requested type")
149    }
150
151    /// Get mutable reference to the nested column as a specific type
152    ///
153    /// # Example
154    /// ```ignore
155    /// let mut col: ColumnArray = /* ... */;
156    /// let nested_mut: &mut ColumnUInt32 = col.nested_mut();
157    /// ```
158    pub fn nested_mut<T: Column + 'static>(&mut self) -> &mut T {
159        Arc::get_mut(&mut self.nested)
160            .expect("Cannot get mutable reference to shared nested column")
161            .as_any_mut()
162            .downcast_mut::<T>()
163            .expect("Failed to downcast nested column to requested type")
164    }
165
166    /// Get the nested column as a `ColumnRef` (`Arc<dyn Column>`)
167    pub fn nested_ref(&self) -> ColumnRef {
168        self.nested.clone()
169    }
170
171    /// Get the offsets
172    pub fn offsets(&self) -> &[u64] {
173        &self.offsets
174    }
175
176    /// Append an entire array column as a single array element
177    /// This takes all the data from the provided column and adds it as one
178    /// array
179    pub fn append_array(&mut self, array_data: ColumnRef) {
180        let len = array_data.size() as u64;
181
182        // Append the array data to nested column
183        let nested_mut = Arc::get_mut(&mut self.nested)
184            .expect("Cannot append to shared array column - column has multiple references");
185        nested_mut
186            .append_column(array_data)
187            .expect("Failed to append array data to nested column");
188
189        // Update offsets
190        self.append_len(len);
191    }
192
193    /// Get the array at the given index as a sliced column
194    pub fn at(&self, index: usize) -> ColumnRef {
195        if let Some((start, end)) = self.get_array_range(index) {
196            self.nested.slice(start, end - start).expect("Valid slice")
197        } else {
198            panic!("Array index out of bounds: {}", index);
199        }
200    }
201
202    /// Get the number of arrays (alias for size())
203    pub fn len(&self) -> usize {
204        self.offsets.len()
205    }
206
207    /// Check if the array column is empty
208    pub fn is_empty(&self) -> bool {
209        self.offsets.is_empty()
210    }
211}
212
213impl Column for ColumnArray {
214    fn column_type(&self) -> &Type {
215        &self.type_
216    }
217
218    fn size(&self) -> usize {
219        self.offsets.len()
220    }
221
222    fn clear(&mut self) {
223        self.offsets.clear();
224        // CRITICAL: Must also clear nested data to maintain consistency
225        // If we clear offsets but not nested data, the column is in a corrupt
226        // state
227        let nested_mut = Arc::get_mut(&mut self.nested)
228            .expect("Cannot clear shared array column - column has multiple references");
229        nested_mut.clear();
230    }
231
232    fn reserve(&mut self, new_cap: usize) {
233        self.offsets.reserve(new_cap);
234    }
235
236    fn append_column(&mut self, other: ColumnRef) -> Result<()> {
237        let other =
238            other.as_any().downcast_ref::<ColumnArray>().ok_or_else(|| {
239                Error::TypeMismatch {
240                    expected: self.type_.name(),
241                    actual: other.column_type().name(),
242                }
243            })?;
244
245        // Check that nested types match
246        if self.nested.column_type().name()
247            != other.nested.column_type().name()
248        {
249            return Err(Error::TypeMismatch {
250                expected: self.nested.column_type().name(),
251                actual: other.nested.column_type().name(),
252            });
253        }
254
255        // Adjust offsets from other and append
256        let offset_base = self.offsets.last().copied().unwrap_or(0);
257        for &offset in &other.offsets {
258            self.offsets.push(offset_base + offset);
259        }
260
261        // CRITICAL: Must also append the nested data!
262        // Without this, offsets point to wrong/missing data → DATA CORRUPTION
263        let nested_mut = Arc::get_mut(&mut self.nested)
264            .ok_or_else(|| Error::Protocol(
265                "Cannot append to shared array column - column has multiple references".to_string()
266            ))?;
267        nested_mut.append_column(other.nested.clone())?;
268
269        Ok(())
270    }
271
272    fn load_from_buffer(
273        &mut self,
274        buffer: &mut &[u8],
275        rows: usize,
276    ) -> Result<()> {
277        self.offsets.reserve(rows);
278
279        // Read offsets (fixed UInt64, not varint!)
280        // Wire format: UInt64 values stored as 8-byte little-endian
281        let bytes_needed = rows * 8;
282        if buffer.len() < bytes_needed {
283            return Err(Error::Protocol(format!(
284                "Buffer underflow reading array offsets: need {} bytes, have {}",
285                bytes_needed,
286                buffer.len()
287            )));
288        }
289
290        // Use bulk copy for performance
291        self.offsets.reserve(rows);
292        let current_len = self.offsets.len();
293        unsafe {
294            // Set length first to claim ownership of the memory
295            self.offsets.set_len(current_len + rows);
296            // Cast dest to bytes and use byte offset
297            let dest_ptr =
298                (self.offsets.as_mut_ptr() as *mut u8).add(current_len * 8);
299            std::ptr::copy_nonoverlapping(
300                buffer.as_ptr(),
301                dest_ptr,
302                bytes_needed,
303            );
304        }
305
306        buffer.advance(bytes_needed);
307
308        // CRITICAL: Must also load the nested column data
309        // The total number of nested elements is the last offset value
310        let total_nested_elements =
311            self.offsets.last().copied().unwrap_or(0) as usize;
312        if total_nested_elements > 0 {
313            let nested_mut = Arc::get_mut(&mut self.nested)
314                .ok_or_else(|| Error::Protocol(
315                    "Cannot load into shared array column - column has multiple references".to_string()
316                ))?;
317            nested_mut.load_from_buffer(buffer, total_nested_elements)?;
318        }
319
320        Ok(())
321    }
322
323    fn load_prefix(&mut self, buffer: &mut &[u8], rows: usize) -> Result<()> {
324        // Delegate to nested column's load_prefix
325        // Critical for Array(LowCardinality(X)) to read LowCardinality
326        // key_version before offsets
327        let nested_mut = Arc::get_mut(&mut self.nested).ok_or_else(|| {
328            Error::Protocol(
329                "Cannot load prefix for shared array column".to_string(),
330            )
331        })?;
332        nested_mut.load_prefix(buffer, rows)
333    }
334
335    fn save_prefix(&self, buffer: &mut BytesMut) -> Result<()> {
336        // Delegate to nested column's save_prefix
337        // Critical for Array(LowCardinality(X)) to write LowCardinality
338        // version before offsets
339        self.nested.save_prefix(buffer)
340    }
341
342    fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
343        // Write offsets as fixed UInt64 (not varints!)
344        // Wire format: UInt64 values stored as 8-byte little-endian
345        // This matches load_from_buffer which reads fixed UInt64
346        if !self.offsets.is_empty() {
347            let byte_slice = unsafe {
348                std::slice::from_raw_parts(
349                    self.offsets.as_ptr() as *const u8,
350                    self.offsets.len() * 8,
351                )
352            };
353            buffer.extend_from_slice(byte_slice);
354        }
355
356        // Write nested column data
357        self.nested.save_to_buffer(buffer)?;
358
359        Ok(())
360    }
361
362    fn clone_empty(&self) -> ColumnRef {
363        Arc::new(ColumnArray::with_nested(self.nested.clone_empty()))
364    }
365
366    fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
367        if begin + len > self.offsets.len() {
368            return Err(Error::InvalidArgument(format!(
369                "Slice out of bounds: begin={}, len={}, size={}",
370                begin,
371                len,
372                self.offsets.len()
373            )));
374        }
375
376        // Calculate the range of nested elements we need
377        let nested_start =
378            if begin == 0 { 0 } else { self.offsets[begin - 1] as usize };
379        let nested_end = self.offsets[begin + len - 1] as usize;
380        let nested_len = nested_end - nested_start;
381
382        // Slice the nested column
383        let sliced_nested = self.nested.slice(nested_start, nested_len)?;
384
385        // Adjust offsets for the slice
386        let mut sliced_offsets = Vec::with_capacity(len);
387        let offset_base = if begin == 0 { 0 } else { self.offsets[begin - 1] };
388
389        for i in begin..begin + len {
390            sliced_offsets.push(self.offsets[i] - offset_base);
391        }
392
393        let mut result = ColumnArray::with_nested(sliced_nested);
394        result.offsets = sliced_offsets;
395
396        Ok(Arc::new(result))
397    }
398
399    fn as_any(&self) -> &dyn std::any::Any {
400        self
401    }
402
403    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
404        self
405    }
406}
407
408/// Typed wrapper for ColumnArray that provides type-safe access to nested
409/// column
410///
411/// This is analogous to `ColumnArrayT<T>` in clickhouse-cpp, providing
412/// compile-time type safety for array operations.
413///
414/// **Reference Implementation:** See
415/// `clickhouse-cpp/clickhouse/columns/array.h`
416pub struct ColumnArrayT<T>
417where
418    T: Column + 'static,
419{
420    inner: ColumnArray,
421    _phantom: PhantomData<fn() -> T>,
422}
423
424impl<T> ColumnArrayT<T>
425where
426    T: Column + 'static,
427{
428    /// Create a new typed array column from a typed nested column
429    pub fn with_nested(nested: Arc<T>) -> Self {
430        let inner = ColumnArray::with_nested(nested);
431        Self { inner, _phantom: PhantomData }
432    }
433
434    /// Create a new typed array column from an array type
435    ///
436    /// Returns an error if the nested column type doesn't match T
437    pub fn new(type_: Type) -> Result<Self> {
438        let inner = ColumnArray::new(type_);
439        // Verify the nested column is of the expected type
440        let _ = inner.nested_ref().as_any().downcast_ref::<T>().ok_or_else(
441            || {
442                Error::InvalidArgument(format!(
443                    "Type mismatch: expected nested column of type {}",
444                    std::any::type_name::<T>()
445                ))
446            },
447        )?;
448        Ok(Self { inner, _phantom: PhantomData })
449    }
450
451    /// Create with reserved capacity
452    pub fn with_capacity(type_: Type, capacity: usize) -> Result<Self> {
453        let inner = ColumnArray::with_capacity(type_, capacity);
454        // Verify type
455        let _ = inner.nested_ref().as_any().downcast_ref::<T>().ok_or_else(
456            || {
457                Error::InvalidArgument(format!(
458                    "Type mismatch: expected nested column of type {}",
459                    std::any::type_name::<T>()
460                ))
461            },
462        )?;
463        Ok(Self { inner, _phantom: PhantomData })
464    }
465
466    /// Get typed reference to the nested column
467    ///
468    /// This is safe because we verify the type at construction
469    pub fn nested_typed(&self) -> &T {
470        // Use the generic nested method to get typed access directly
471        self.inner.nested::<T>()
472    }
473
474    /// Get typed mutable reference to the nested column
475    ///
476    /// Returns an error if the column has multiple Arc references
477    pub fn nested_typed_mut(&mut self) -> Result<&mut T> {
478        // Use the generic nested_mut to get typed access directly
479        Ok(self.inner.nested_mut::<T>())
480    }
481
482    /// Append an array by building it with a closure
483    ///
484    /// The closure receives a mutable reference to the nested column
485    /// and can append elements. The array length is calculated automatically.
486    ///
487    /// # Example
488    /// ```ignore
489    /// let mut arr =
490    /// ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))?;
491    /// arr.append_array(|nested| {
492    ///     nested.append(1);
493    ///     nested.append(2);
494    ///     nested.append(3);
495    /// })?;
496    /// ```
497    pub fn append_array<F>(&mut self, build_fn: F) -> Result<()>
498    where
499        F: FnOnce(&mut T),
500    {
501        let start_len = self.inner.nested_ref().size();
502        let nested = self.nested_typed_mut()?;
503        build_fn(nested);
504        let end_len = self.inner.nested_ref().size();
505        let array_len = end_len - start_len;
506        self.inner.append_len(array_len as u64);
507        Ok(())
508    }
509
510    /// Append an entire column as a single array element
511    pub fn append_array_column(&mut self, array_data: ColumnRef) {
512        self.inner.append_array(array_data)
513    }
514
515    /// Append an array specified by length
516    ///
517    /// The caller must ensure that `len` elements have been added to the
518    /// nested column
519    pub fn append_len(&mut self, len: u64) {
520        self.inner.append_len(len)
521    }
522
523    /// Get the array at the given index as a sliced column
524    pub fn at(&self, index: usize) -> ColumnRef {
525        self.inner.at(index)
526    }
527
528    /// Get the start and end indices for the array at the given index
529    pub fn get_array_range(&self, index: usize) -> Option<(usize, usize)> {
530        self.inner.get_array_range(index)
531    }
532
533    /// Get the length of the array at the given index
534    pub fn get_array_len(&self, index: usize) -> Option<usize> {
535        self.inner.get_array_len(index)
536    }
537
538    /// Get the offsets
539    pub fn offsets(&self) -> &[u64] {
540        self.inner.offsets()
541    }
542
543    /// Get the number of arrays
544    pub fn len(&self) -> usize {
545        self.inner.len()
546    }
547
548    /// Check if the array column is empty
549    pub fn is_empty(&self) -> bool {
550        self.inner.is_empty()
551    }
552
553    /// Get reference to inner ColumnArray
554    pub fn inner(&self) -> &ColumnArray {
555        &self.inner
556    }
557
558    /// Get mutable reference to inner ColumnArray
559    pub fn inner_mut(&mut self) -> &mut ColumnArray {
560        &mut self.inner
561    }
562
563    /// Convert into inner ColumnArray
564    pub fn into_inner(self) -> ColumnArray {
565        self.inner
566    }
567}
568
569impl<T> Column for ColumnArrayT<T>
570where
571    T: Column + 'static,
572{
573    fn column_type(&self) -> &Type {
574        self.inner.column_type()
575    }
576
577    fn size(&self) -> usize {
578        self.inner.size()
579    }
580
581    fn clear(&mut self) {
582        self.inner.clear()
583    }
584
585    fn reserve(&mut self, new_cap: usize) {
586        self.inner.reserve(new_cap)
587    }
588
589    fn append_column(&mut self, other: ColumnRef) -> Result<()> {
590        self.inner.append_column(other)
591    }
592
593    fn load_from_buffer(
594        &mut self,
595        buffer: &mut &[u8],
596        rows: usize,
597    ) -> Result<()> {
598        self.inner.load_from_buffer(buffer, rows)
599    }
600
601    fn load_prefix(&mut self, buffer: &mut &[u8], rows: usize) -> Result<()> {
602        self.inner.load_prefix(buffer, rows)
603    }
604
605    fn save_prefix(&self, buffer: &mut BytesMut) -> Result<()> {
606        self.inner.save_prefix(buffer)
607    }
608
609    fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
610        self.inner.save_to_buffer(buffer)
611    }
612
613    fn clone_empty(&self) -> ColumnRef {
614        Arc::new(ColumnArrayT::<T> {
615            inner: ColumnArray::with_nested(
616                self.inner.nested_ref().clone_empty(),
617            ),
618            _phantom: PhantomData,
619        })
620    }
621
622    fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
623        let sliced_inner = self.inner.slice(begin, len)?;
624
625        // The sliced result is a ColumnArray with proper offsets and nested
626        // data We need to extract it and wrap it in ColumnArrayT
627        let sliced_array = sliced_inner
628            .as_any()
629            .downcast_ref::<ColumnArray>()
630            .ok_or_else(|| {
631                Error::InvalidArgument(
632                    "Failed to downcast sliced column".to_string(),
633                )
634            })?;
635
636        // Clone the sliced array structure (preserves offsets and nested)
637        let cloned_inner = ColumnArray {
638            type_: sliced_array.column_type().clone(),
639            nested: sliced_array.nested_ref().clone(),
640            offsets: sliced_array.offsets().to_vec(),
641        };
642
643        Ok(Arc::new(ColumnArrayT::<T> {
644            inner: cloned_inner,
645            _phantom: PhantomData,
646        }))
647    }
648
649    fn as_any(&self) -> &dyn std::any::Any {
650        self
651    }
652
653    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
654        self
655    }
656}
657
658// Helper functions removed - using buffer_utils module
659
660#[cfg(test)]
661#[cfg_attr(coverage_nightly, coverage(off))]
662mod tests {
663    use super::*;
664    use crate::{
665        column::{
666            numeric::ColumnUInt64,
667            string::ColumnString,
668        },
669        types::Type,
670    };
671
672    #[test]
673    fn test_array_creation() {
674        let nested = Arc::new(ColumnUInt64::new());
675        let col = ColumnArray::with_nested(nested);
676        assert_eq!(col.size(), 0);
677    }
678
679    #[test]
680    fn test_array_append() {
681        let mut nested = ColumnUInt64::new();
682        // First array: [1, 2, 3]
683        nested.append(1);
684        nested.append(2);
685        nested.append(3);
686
687        let mut col = ColumnArray::with_nested(Arc::new(nested));
688        col.append_len(3); // Array of 3 elements
689
690        // Second array: [4, 5]
691        let nested_mut: &mut ColumnUInt64 = col.nested_mut();
692        nested_mut.append(4);
693        nested_mut.append(5);
694
695        col.append_len(2); // Array of 2 more elements
696
697        assert_eq!(col.size(), 2);
698        assert_eq!(col.get_array_len(0), Some(3));
699        assert_eq!(col.get_array_len(1), Some(2));
700        assert_eq!(col.get_array_range(0), Some((0, 3)));
701        assert_eq!(col.get_array_range(1), Some((3, 5)));
702    }
703
704    #[test]
705    fn test_array_offsets() {
706        let nested = Arc::new(ColumnUInt64::new());
707        let mut col = ColumnArray::with_nested(nested);
708
709        col.append_len(3); // Array with 3 elements
710        col.append_len(0); // Empty array
711        col.append_len(2); // Array with 2 elements
712
713        assert_eq!(col.offsets(), &[3, 3, 5]);
714        assert_eq!(col.get_array_len(0), Some(3));
715        assert_eq!(col.get_array_len(1), Some(0));
716        assert_eq!(col.get_array_len(2), Some(2));
717    }
718
719    #[test]
720    fn test_array_empty_arrays() {
721        let nested = Arc::new(ColumnUInt64::new());
722        let mut col = ColumnArray::with_nested(nested);
723
724        col.append_len(0);
725        col.append_len(0);
726        col.append_len(0);
727
728        assert_eq!(col.size(), 3);
729        assert_eq!(col.get_array_len(0), Some(0));
730        assert_eq!(col.get_array_len(1), Some(0));
731        assert_eq!(col.get_array_len(2), Some(0));
732    }
733
734    #[test]
735    fn test_array_save_load() {
736        let nested = Arc::new(ColumnUInt64::new());
737        let mut col = ColumnArray::with_nested(nested);
738
739        col.append_len(3);
740        col.append_len(2);
741        col.append_len(1);
742
743        let mut buffer = BytesMut::new();
744        col.save_to_buffer(&mut buffer).unwrap();
745
746        // Verify offsets are written
747        assert!(!buffer.is_empty());
748    }
749
750    #[test]
751    fn test_array_load_offsets() {
752        use bytes::BufMut;
753
754        let nested = Arc::new(ColumnUInt64::new());
755        let mut col = ColumnArray::with_nested(nested);
756
757        // Encode offsets manually as fixed UInt64: 3, 5, 8 (total 8 nested
758        // elements)
759        let mut data = BytesMut::new();
760        data.put_u64_le(3);
761        data.put_u64_le(5);
762        data.put_u64_le(8);
763
764        // Must also include nested data (8 UInt64 values)
765        for i in 0..8u64 {
766            data.put_u64_le(i);
767        }
768
769        let mut reader = &data[..];
770        col.load_from_buffer(&mut reader, 3).unwrap();
771
772        assert_eq!(col.size(), 3);
773        assert_eq!(col.offsets(), &[3, 5, 8]);
774    }
775
776    #[test]
777    fn test_array_slice() {
778        let mut nested = ColumnUInt64::new();
779        // Arrays: [1,2,3], [4,5], [6], [7,8,9,10]
780        for i in 1..=10 {
781            nested.append(i);
782        }
783
784        let mut col = ColumnArray::with_nested(Arc::new(nested));
785        col.append_len(3); // offset: 3
786        col.append_len(2); // offset: 5
787        col.append_len(1); // offset: 6
788        col.append_len(4); // offset: 10
789
790        let sliced = col.slice(1, 2).unwrap(); // Take arrays [4,5] and [6]
791        let sliced_col =
792            sliced.as_any().downcast_ref::<ColumnArray>().unwrap();
793
794        assert_eq!(sliced_col.size(), 2);
795        assert_eq!(sliced_col.offsets(), &[2, 3]); // Adjusted offsets
796    }
797
798    #[test]
799    fn test_array_with_strings() {
800        let nested = Arc::new(ColumnString::new(Type::string()));
801        let mut col = ColumnArray::with_nested(nested);
802
803        col.append_len(2); // Array with 2 strings
804        col.append_len(3); // Array with 3 strings
805
806        assert_eq!(col.size(), 2);
807        assert_eq!(col.get_array_len(0), Some(2));
808        assert_eq!(col.get_array_len(1), Some(3));
809    }
810
811    #[test]
812    fn test_array_type_mismatch() {
813        let nested1 = Arc::new(ColumnUInt64::new());
814        let mut col1 = ColumnArray::with_nested(nested1);
815
816        let nested2 = Arc::new(ColumnString::new(Type::string()));
817        let col2 = ColumnArray::with_nested(nested2);
818
819        let result = col1.append_column(Arc::new(col2));
820        assert!(result.is_err());
821    }
822
823    #[test]
824    fn test_array_out_of_bounds() {
825        let nested = Arc::new(ColumnUInt64::new());
826        let mut col = ColumnArray::with_nested(nested);
827
828        col.append_len(3);
829        col.append_len(2);
830
831        assert_eq!(col.get_array_len(100), None);
832        assert_eq!(col.get_array_range(100), None);
833    }
834
835    #[test]
836    fn test_array_append_column() {
837        // Create first array column with data: [[1, 2], [3]]
838        let mut nested1 = ColumnUInt64::new();
839        nested1.append(1);
840        nested1.append(2);
841        nested1.append(3);
842
843        let mut col1 = ColumnArray::with_nested(Arc::new(nested1));
844        col1.append_len(2); // First array: [1, 2]
845        col1.append_len(1); // Second array: [3]
846
847        // Create second array column with data: [[4, 5, 6]]
848        let mut nested2 = ColumnUInt64::new();
849        nested2.append(4);
850        nested2.append(5);
851        nested2.append(6);
852
853        let mut col2 = ColumnArray::with_nested(Arc::new(nested2));
854        col2.append_len(3); // Third array: [4, 5, 6]
855
856        // Append col2 to col1
857        col1.append_column(Arc::new(col2))
858            .expect("append_column should succeed");
859
860        // Verify we have 3 arrays total
861        assert_eq!(col1.size(), 3, "Should have 3 arrays after append");
862
863        // Verify array lengths
864        assert_eq!(
865            col1.get_array_len(0),
866            Some(2),
867            "First array should have 2 elements"
868        );
869        assert_eq!(
870            col1.get_array_len(1),
871            Some(1),
872            "Second array should have 1 element"
873        );
874        assert_eq!(
875            col1.get_array_len(2),
876            Some(3),
877            "Third array should have 3 elements"
878        );
879
880        // CRITICAL: Verify nested data was actually appended
881        // The nested column should contain [1, 2, 3, 4, 5, 6]
882        let nested: &ColumnUInt64 = col1.nested();
883        assert_eq!(
884            nested.size(),
885            6,
886            "Nested column should have 6 total elements"
887        );
888
889        // Verify offsets are correct: [2, 3, 6]
890        assert_eq!(col1.offsets(), &[2, 3, 6], "Offsets should be [2, 3, 6]");
891    }
892
893    #[test]
894    #[should_panic(
895        expected = "Cannot clear shared array column - column has multiple references"
896    )]
897    fn test_array_clear_panics_on_shared_nested() {
898        // Create an array column
899        let mut nested = ColumnUInt64::new();
900        nested.append(1);
901        nested.append(2);
902        nested.append(3);
903
904        let nested_arc = Arc::new(nested);
905        let mut col = ColumnArray::with_nested(nested_arc.clone());
906        col.append_len(3);
907
908        // Create a second reference to the nested column
909        let _shared_ref = nested_arc.clone();
910
911        // Now nested has multiple Arc references, so clear() MUST panic
912        // to prevent data corruption (clearing offsets but not nested data)
913        col.clear();
914    }
915
916    #[test]
917    fn test_array_roundtrip_nested_data() {
918        use bytes::BytesMut;
919
920        // Create array column with actual nested data: [[1, 2], [3, 4, 5]]
921        let mut nested = ColumnUInt64::new();
922        nested.append(1);
923        nested.append(2);
924        nested.append(3);
925        nested.append(4);
926        nested.append(5);
927
928        let mut col = ColumnArray::with_nested(Arc::new(nested));
929        col.append_len(2); // First array: [1, 2]
930        col.append_len(3); // Second array: [3, 4, 5]
931
932        assert_eq!(col.size(), 2, "Original should have 2 arrays");
933
934        // Save to buffer
935        let mut buffer = BytesMut::new();
936        col.save_to_buffer(&mut buffer).expect("save should succeed");
937
938        // Load into new array column
939        let nested_empty = Arc::new(ColumnUInt64::new());
940        let mut col_loaded = ColumnArray::with_nested(nested_empty);
941
942        let mut buf_slice = &buffer[..];
943        col_loaded
944            .load_from_buffer(&mut buf_slice, 2)
945            .expect("load should succeed");
946
947        // Verify arrays structure
948        assert_eq!(col_loaded.size(), 2, "Loaded should have 2 arrays");
949        assert_eq!(
950            col_loaded.get_array_len(0),
951            Some(2),
952            "First array should have 2 elements"
953        );
954        assert_eq!(
955            col_loaded.get_array_len(1),
956            Some(3),
957            "Second array should have 3 elements"
958        );
959
960        // CRITICAL: Verify nested data was actually loaded
961        let nested_loaded: &ColumnUInt64 = col_loaded.nested();
962        assert_eq!(
963            nested_loaded.size(),
964            5,
965            "Nested should have 5 total elements after load"
966        );
967
968        // Verify we can retrieve the actual arrays
969        let arr0 = col_loaded.at(0);
970        let arr0_data = arr0.as_any().downcast_ref::<ColumnUInt64>().unwrap();
971        assert_eq!(arr0_data.size(), 2, "First array should have 2 elements");
972
973        let arr1 = col_loaded.at(1);
974        let arr1_data = arr1.as_any().downcast_ref::<ColumnUInt64>().unwrap();
975        assert_eq!(arr1_data.size(), 3, "Second array should have 3 elements");
976    }
977
978    // ColumnArrayT tests
979    #[test]
980    fn test_array_t_creation() {
981        let nested = Arc::new(ColumnUInt64::new());
982        let col = ColumnArrayT::<ColumnUInt64>::with_nested(nested);
983        assert_eq!(col.size(), 0);
984        assert!(col.is_empty());
985    }
986
987    #[test]
988    fn test_array_t_new() {
989        let col =
990            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
991                .unwrap();
992        assert_eq!(col.size(), 0);
993    }
994
995    #[test]
996    fn test_array_t_append_array() {
997        let mut col =
998            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
999                .unwrap();
1000
1001        // Append first array: [1, 2, 3]
1002        col.append_array(|nested| {
1003            nested.append(1);
1004            nested.append(2);
1005            nested.append(3);
1006        })
1007        .unwrap();
1008
1009        // Append second array: [4, 5]
1010        col.append_array(|nested| {
1011            nested.append(4);
1012            nested.append(5);
1013        })
1014        .unwrap();
1015
1016        assert_eq!(col.size(), 2);
1017        assert_eq!(col.get_array_len(0), Some(3));
1018        assert_eq!(col.get_array_len(1), Some(2));
1019        assert_eq!(col.offsets(), &[3, 5]);
1020    }
1021
1022    #[test]
1023    fn test_array_t_typed_access() {
1024        let mut col =
1025            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1026                .unwrap();
1027
1028        col.append_array(|nested| {
1029            nested.append(10);
1030            nested.append(20);
1031        })
1032        .unwrap();
1033
1034        // Get typed access to nested column
1035        let nested = col.nested_typed();
1036        assert_eq!(nested.size(), 2);
1037        assert_eq!(nested.at(0), 10);
1038        assert_eq!(nested.at(1), 20);
1039    }
1040
1041    #[test]
1042    fn test_array_t_with_strings() {
1043        let mut col =
1044            ColumnArrayT::<ColumnString>::new(Type::array(Type::string()))
1045                .unwrap();
1046
1047        col.append_array(|nested| {
1048            nested.append("hello");
1049            nested.append("world");
1050        })
1051        .unwrap();
1052
1053        col.append_array(|nested| {
1054            nested.append("foo");
1055        })
1056        .unwrap();
1057
1058        assert_eq!(col.size(), 2);
1059        assert_eq!(col.get_array_len(0), Some(2));
1060        assert_eq!(col.get_array_len(1), Some(1));
1061
1062        let nested = col.nested_typed();
1063        assert_eq!(nested.at(0), "hello");
1064        assert_eq!(nested.at(1), "world");
1065        assert_eq!(nested.at(2), "foo");
1066    }
1067
1068    #[test]
1069    fn test_array_t_empty_arrays() {
1070        let mut col =
1071            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1072                .unwrap();
1073
1074        col.append_array(|_nested| {
1075            // Empty array
1076        })
1077        .unwrap();
1078
1079        col.append_array(|nested| {
1080            nested.append(42);
1081        })
1082        .unwrap();
1083
1084        col.append_array(|_nested| {
1085            // Another empty array
1086        })
1087        .unwrap();
1088
1089        assert_eq!(col.size(), 3);
1090        assert_eq!(col.get_array_len(0), Some(0));
1091        assert_eq!(col.get_array_len(1), Some(1));
1092        assert_eq!(col.get_array_len(2), Some(0));
1093        assert_eq!(col.offsets(), &[0, 1, 1]);
1094    }
1095
1096    #[test]
1097    fn test_array_t_append_column() {
1098        let mut col1 =
1099            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1100                .unwrap();
1101        col1.append_array(|nested| {
1102            nested.append(1);
1103            nested.append(2);
1104        })
1105        .unwrap();
1106
1107        let mut col2 =
1108            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1109                .unwrap();
1110        col2.append_array(|nested| {
1111            nested.append(3);
1112            nested.append(4);
1113            nested.append(5);
1114        })
1115        .unwrap();
1116
1117        col1.append_column(Arc::new(col2.into_inner()))
1118            .expect("append_column should succeed");
1119
1120        assert_eq!(col1.size(), 2);
1121        assert_eq!(col1.get_array_len(0), Some(2));
1122        assert_eq!(col1.get_array_len(1), Some(3));
1123
1124        let nested = col1.nested_typed();
1125        assert_eq!(nested.size(), 5);
1126    }
1127
1128    #[test]
1129    fn test_array_t_slice() {
1130        let mut col =
1131            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1132                .unwrap();
1133
1134        // Arrays: [1,2,3], [4,5], [6], [7,8,9,10]
1135        col.append_array(|n| {
1136            n.append(1);
1137            n.append(2);
1138            n.append(3);
1139        })
1140        .unwrap();
1141        col.append_array(|n| {
1142            n.append(4);
1143            n.append(5);
1144        })
1145        .unwrap();
1146        col.append_array(|n| {
1147            n.append(6);
1148        })
1149        .unwrap();
1150        col.append_array(|n| {
1151            n.append(7);
1152            n.append(8);
1153            n.append(9);
1154            n.append(10);
1155        })
1156        .unwrap();
1157
1158        // Slice arrays [4,5] and [6] (indices 1-2)
1159        let sliced = col.slice(1, 2).unwrap();
1160        let sliced_col = sliced
1161            .as_any()
1162            .downcast_ref::<ColumnArrayT<ColumnUInt64>>()
1163            .unwrap();
1164
1165        assert_eq!(sliced_col.size(), 2);
1166        assert_eq!(sliced_col.offsets(), &[2, 3]);
1167
1168        let nested = sliced_col.nested_typed();
1169        assert_eq!(nested.size(), 3);
1170        assert_eq!(nested.at(0), 4);
1171        assert_eq!(nested.at(1), 5);
1172        assert_eq!(nested.at(2), 6);
1173    }
1174
1175    #[test]
1176    fn test_array_t_roundtrip() {
1177        use bytes::BytesMut;
1178
1179        let mut col =
1180            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1181                .unwrap();
1182
1183        col.append_array(|n| {
1184            n.append(1);
1185            n.append(2);
1186        })
1187        .unwrap();
1188        col.append_array(|n| {
1189            n.append(3);
1190            n.append(4);
1191            n.append(5);
1192        })
1193        .unwrap();
1194
1195        // Save to buffer
1196        let mut buffer = BytesMut::new();
1197        col.save_to_buffer(&mut buffer).unwrap();
1198
1199        // Load into new column
1200        let mut col_loaded =
1201            ColumnArrayT::<ColumnUInt64>::new(Type::array(Type::uint64()))
1202                .unwrap();
1203        let mut buf_slice = &buffer[..];
1204        col_loaded.load_from_buffer(&mut buf_slice, 2).unwrap();
1205
1206        assert_eq!(col_loaded.size(), 2);
1207        assert_eq!(col_loaded.get_array_len(0), Some(2));
1208        assert_eq!(col_loaded.get_array_len(1), Some(3));
1209
1210        let nested = col_loaded.nested_typed();
1211        assert_eq!(nested.size(), 5);
1212        assert_eq!(nested.at(0), 1);
1213        assert_eq!(nested.at(1), 2);
1214        assert_eq!(nested.at(2), 3);
1215        assert_eq!(nested.at(3), 4);
1216        assert_eq!(nested.at(4), 5);
1217    }
1218}