stringtape/
stringtape.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2
3//! # StringTape
4//!
5//! Memory-efficient string and bytes storage compatible with Apache Arrow.
6//!
7//! ```rust
8//! use stringtape::{CharsTapeI32, StringTapeError};
9//!
10//! let mut tape = CharsTapeI32::new();
11//! tape.push("hello")?;
12//! tape.push("world")?;
13//!
14//! assert_eq!(tape.len(), 2);
15//! assert_eq!(&tape[0], "hello");
16//!
17//! // Iterate over strings
18//! for s in &tape {
19//!     println!("{}", s);
20//! }
21//! # Ok::<(), StringTapeError>(())
22//! ```
23//!
24//! It also supports binary data via `BytesTape`:
25//!
26//! ```rust
27//! use stringtape::{BytesTapeI32, StringTapeError};
28//!
29//! let mut tape = BytesTapeI32::new();
30//! tape.push(&[0xde, 0xad, 0xbe, 0xef])?;
31//! tape.push(b"bytes")?;
32//!
33//! assert_eq!(&tape[1], b"bytes" as &[u8]);
34//! # Ok::<(), StringTapeError>(())
35//! ```
36
37#[cfg(feature = "std")]
38extern crate std;
39
40#[cfg(not(feature = "std"))]
41extern crate alloc;
42
43use core::fmt;
44use core::marker::PhantomData;
45use core::ops::{
46    Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Sub,
47};
48use core::ptr::{self, NonNull};
49use core::slice;
50
51#[cfg(not(feature = "std"))]
52use alloc::string::String;
53
54use allocator_api2::alloc::{Allocator, Global, Layout};
55
56/// Errors that can occur when working with tape classes.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum StringTapeError {
59    /// The string data size exceeds the maximum value representable by the offset type.
60    ///
61    /// This can happen when using 32-bit offsets (`CharsTapeI32`) and the total data
62    /// exceeds 2GB, or when memory allocation fails.
63    OffsetOverflow,
64    /// Memory allocation failed.
65    AllocationError,
66    /// Index is out of bounds for the current number of strings.
67    IndexOutOfBounds,
68    /// Invalid UTF-8 sequence encountered.
69    Utf8Error(core::str::Utf8Error),
70}
71
72impl fmt::Display for StringTapeError {
73    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74        match self {
75            StringTapeError::OffsetOverflow => write!(f, "offset value too large for offset type"),
76            StringTapeError::AllocationError => write!(f, "memory allocation failed"),
77            StringTapeError::IndexOutOfBounds => write!(f, "index out of bounds"),
78            StringTapeError::Utf8Error(e) => write!(f, "invalid UTF-8: {}", e),
79        }
80    }
81}
82
83#[cfg(feature = "std")]
84impl std::error::Error for StringTapeError {}
85
86/// A memory-efficient string storage structure compatible with Apache Arrow.
87///
88/// `CharsTape` stores multiple strings in a contiguous memory layout using offset-based
89/// indexing, similar to Apache Arrow's String and LargeString arrays. All string data
90/// is stored in a single buffer, with a separate offset array tracking string boundaries.
91///
92/// # Type Parameters
93///
94/// * `Offset` - The offset type used for indexing (`i32` for CharsTapeI32, `i64` for CharsTapeI64)
95/// * `A` - The allocator type (must implement `Allocator`). Defaults to `Global`.
96///
97/// # Examples
98///
99/// ```rust
100/// use stringtape::{CharsTapeI32, StringTapeError};
101///
102/// // Create a new CharsTape with i32 offsets and global allocator
103/// let mut tape = CharsTapeI32::new();
104/// tape.push("hello")?;
105/// tape.push("world")?;
106///
107/// assert_eq!(tape.len(), 2);
108/// assert_eq!(&tape[0], "hello");
109/// assert_eq!(tape.get(1), Some("world"));
110/// # Ok::<(), StringTapeError>(())
111/// ```
112///
113/// # Custom Allocators
114///
115/// ```rust,ignore
116/// use stringtape::CharsTape;
117/// use allocator_api2::alloc::{Allocator, Global};
118///
119/// // Use with the global allocator explicitly
120/// let tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
121/// ```
122///
123/// # Memory Layout
124///
125/// The memory layout is compatible with Apache Arrow:
126/// ```text
127/// Data buffer:    [h,e,l,l,o,w,o,r,l,d]
128/// Offset buffer:  [0, 5, 10]
129/// ```
130struct RawTape<Offset: OffsetType, A: Allocator> {
131    data: Option<NonNull<[u8]>>,
132    offsets: Option<NonNull<[Offset]>>,
133    len_bytes: usize,
134    len_items: usize,
135    allocator: A,
136    _phantom: PhantomData<Offset>,
137}
138
139/// Named raw parts returned by `as_raw_parts` methods.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub struct RawParts<Offset: OffsetType> {
142    /// Pointer to the start of the contiguous data buffer.
143    pub data_ptr: *const u8,
144    /// Pointer to the start of the offsets buffer.
145    pub offsets_ptr: *const Offset,
146    /// Number of bytes of valid data in `data_ptr`.
147    pub data_len: usize,
148    /// Number of items stored (strings/bytes entries).
149    pub items_count: usize,
150}
151
152/// UTF-8 string view over `RawTape`.
153pub struct CharsTape<Offset: OffsetType = i32, A: Allocator = Global> {
154    inner: RawTape<Offset, A>,
155}
156
157/// Binary bytes view over `RawTape`.
158pub struct BytesTape<Offset: OffsetType = i32, A: Allocator = Global> {
159    inner: RawTape<Offset, A>,
160}
161
162/// A view into a continuous slice of a RawTape.
163///
164/// This provides a zero-copy view that implements the same read-only interface
165/// as RawTape but cannot modify the underlying data.
166pub struct RawTapeView<'a, Offset: OffsetType> {
167    data: &'a [u8],
168    offsets: &'a [Offset],
169}
170
171/// UTF-8 string view over `RawTapeView`.
172pub struct CharsTapeView<'a, Offset: OffsetType = i32> {
173    inner: RawTapeView<'a, Offset>,
174}
175
176/// Binary bytes view over `RawTapeView`.
177pub struct BytesTapeView<'a, Offset: OffsetType = i32> {
178    inner: RawTapeView<'a, Offset>,
179}
180
181/// Trait for offset types used in CharsTape.
182///
183/// This trait defines the interface for offset types that can be used to index
184/// into the string data buffer. Implementations are provided for `i32` and `i64`
185/// to match Apache Arrow's String and LargeString array types, and for `u32` and
186/// `u64` when unsigned offsets are desired (note: Arrow interop is i32/i64-only).
187pub trait OffsetType: Copy + Default + PartialOrd + Sub<Output = Self> {
188    /// Size of the offset type in bytes.
189    const SIZE: usize;
190
191    /// Convert a usize value to this offset type.
192    ///
193    /// Returns `None` if the value is too large to be represented by this offset type.
194    fn from_usize(value: usize) -> Option<Self>;
195
196    /// Convert this offset value to usize.
197    fn to_usize(self) -> usize;
198}
199
200impl OffsetType for i32 {
201    const SIZE: usize = 4;
202
203    fn from_usize(value: usize) -> Option<Self> {
204        if value <= i32::MAX as usize {
205            Some(value as i32)
206        } else {
207            None
208        }
209    }
210
211    fn to_usize(self) -> usize {
212        self as usize
213    }
214}
215
216impl OffsetType for i64 {
217    const SIZE: usize = 8;
218
219    fn from_usize(value: usize) -> Option<Self> {
220        Some(value as i64)
221    }
222
223    fn to_usize(self) -> usize {
224        self as usize
225    }
226}
227
228impl OffsetType for u32 {
229    const SIZE: usize = 4;
230
231    fn from_usize(value: usize) -> Option<Self> {
232        if value <= u32::MAX as usize {
233            Some(value as u32)
234        } else {
235            None
236        }
237    }
238
239    fn to_usize(self) -> usize {
240        self as usize
241    }
242}
243
244impl OffsetType for u64 {
245    const SIZE: usize = 8;
246
247    fn from_usize(value: usize) -> Option<Self> {
248        Some(value as u64)
249    }
250
251    fn to_usize(self) -> usize {
252        self as usize
253    }
254}
255
256impl<Offset: OffsetType, A: Allocator> RawTape<Offset, A> {
257    /// Creates a new, empty CharsTape with the global allocator.
258    ///
259    /// This operation is O(1) and does not allocate memory until the first string is pushed.
260    ///
261    /// # Examples
262    ///
263    /// ```rust
264    /// use stringtape::CharsTapeI32;
265    ///
266    /// let tape = CharsTapeI32::new();
267    /// assert!(tape.is_empty());
268    /// assert_eq!(tape.len(), 0);
269    /// ```
270    pub fn new() -> RawTape<Offset, Global> {
271        RawTape::new_in(Global)
272    }
273
274    /// Creates a new, empty CharsTape with a custom allocator.
275    ///
276    /// This operation is O(1) and does not allocate memory until the first string is pushed.
277    ///
278    /// # Examples
279    ///
280    /// ```rust
281    /// use stringtape::CharsTape;
282    /// use allocator_api2::alloc::Global;
283    ///
284    /// let tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
285    /// assert!(tape.is_empty());
286    /// assert_eq!(tape.len(), 0);
287    /// ```
288    pub fn new_in(allocator: A) -> Self {
289        Self {
290            data: None,
291            offsets: None,
292            len_bytes: 0,
293            len_items: 0,
294            allocator,
295            _phantom: PhantomData,
296        }
297    }
298
299    /// Creates a new CharsTape with pre-allocated capacity using the global allocator.
300    ///
301    /// Pre-allocating capacity can improve performance when you know approximately
302    /// how much data you'll be storing.
303    ///
304    /// # Arguments
305    ///
306    /// * `data_capacity` - Number of bytes to pre-allocate for string data
307    /// * `strings_capacity` - Number of string slots to pre-allocate
308    ///
309    /// # Examples
310    ///
311    /// ```rust
312    /// use stringtape::{CharsTapeI32, StringTapeError};
313    ///
314    /// // Pre-allocate space for ~1KB of string data and 100 strings
315    /// let tape = CharsTapeI32::with_capacity(1024, 100)?;
316    /// assert_eq!(tape.data_capacity(), 1024);
317    /// # Ok::<(), StringTapeError>(())
318    /// ```
319    pub fn with_capacity(
320        data_capacity: usize,
321        strings_capacity: usize,
322    ) -> Result<RawTape<Offset, Global>, StringTapeError> {
323        RawTape::with_capacity_in(data_capacity, strings_capacity, Global)
324    }
325
326    /// Creates a new CharsTape with pre-allocated capacity and a custom allocator.
327    ///
328    /// Pre-allocating capacity can improve performance when you know approximately
329    /// how much data you'll be storing.
330    ///
331    /// # Arguments
332    ///
333    /// * `data_capacity` - Number of bytes to pre-allocate for string data
334    /// * `strings_capacity` - Number of string slots to pre-allocate
335    /// * `allocator` - The allocator to use for memory management
336    ///
337    /// # Examples
338    ///
339    /// ```rust
340    /// use stringtape::{CharsTape, StringTapeError};
341    /// use allocator_api2::alloc::Global;
342    ///
343    /// let tape: CharsTape<i32, Global> = CharsTape::with_capacity_in(1024, 100, Global)?;
344    /// assert_eq!(tape.data_capacity(), 1024);
345    /// # Ok::<(), StringTapeError>(())
346    /// ```
347    pub fn with_capacity_in(
348        data_capacity: usize,
349        strings_capacity: usize,
350        allocator: A,
351    ) -> Result<Self, StringTapeError> {
352        let mut tape = Self::new_in(allocator);
353        tape.reserve(data_capacity, strings_capacity)?;
354        Ok(tape)
355    }
356
357    pub fn reserve(
358        &mut self,
359        additional_bytes: usize,
360        additional_strings: usize,
361    ) -> Result<(), StringTapeError> {
362        if additional_bytes > 0 {
363            let current_capacity = self.data_capacity();
364            let new_capacity = current_capacity
365                .checked_add(additional_bytes)
366                .ok_or(StringTapeError::AllocationError)?;
367            self.grow_data(new_capacity)?;
368        }
369
370        if additional_strings > 0 {
371            let current_capacity = self.offsets_capacity();
372            let new_capacity = current_capacity
373                .checked_add(additional_strings + 1)
374                .ok_or(StringTapeError::AllocationError)?;
375            self.grow_offsets(new_capacity)?;
376        }
377        Ok(())
378    }
379
380    fn grow_data(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
381        let current_capacity = self.data_capacity();
382        if new_capacity <= current_capacity {
383            return Ok(());
384        }
385
386        let new_layout =
387            Layout::array::<u8>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
388
389        let new_ptr = if let Some(old_ptr) = self.data {
390            // Grow existing allocation
391            let old_layout = Layout::array::<u8>(current_capacity).unwrap();
392            unsafe {
393                self.allocator
394                    .grow(old_ptr.cast(), old_layout, new_layout)
395                    .map_err(|_| StringTapeError::AllocationError)?
396            }
397        } else {
398            // Initial allocation
399            self.allocator
400                .allocate(new_layout)
401                .map_err(|_| StringTapeError::AllocationError)?
402        };
403
404        self.data = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
405        Ok(())
406    }
407
408    fn grow_offsets(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
409        let current_capacity = self.offsets_capacity();
410        if new_capacity <= current_capacity {
411            return Ok(());
412        }
413
414        let new_layout =
415            Layout::array::<Offset>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
416
417        let new_ptr = if let Some(old_ptr) = self.offsets {
418            // Grow existing allocation
419            let old_layout = Layout::array::<Offset>(current_capacity).unwrap();
420            unsafe {
421                self.allocator
422                    .grow(old_ptr.cast(), old_layout, new_layout)
423                    .map_err(|_| StringTapeError::AllocationError)?
424            }
425        } else {
426            // Initial allocation with first offset = 0
427            let ptr = self
428                .allocator
429                .allocate_zeroed(new_layout)
430                .map_err(|_| StringTapeError::AllocationError)?;
431            ptr
432        };
433
434        self.offsets = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
435        Ok(())
436    }
437
438    /// Adds a raw bytes slice to the end of the tape.
439    ///
440    /// # Errors
441    ///
442    /// Returns `StringTapeError::OffsetOverflow` if adding this slice would cause
443    /// the total data size to exceed the maximum value representable by the offset type.
444    ///
445    /// Returns `StringTapeError::AllocationError` if memory allocation fails.
446    ///
447    /// # Examples
448    ///
449    /// ```rust
450    /// use stringtape::{BytesTapeI32, StringTapeError};
451    ///
452    /// let mut tape = BytesTapeI32::new();
453    /// tape.push(b"hello")?;
454    /// tape.push(&[1, 2, 3])?;
455    /// assert_eq!(tape.len(), 2);
456    /// # Ok::<(), StringTapeError>(())
457    /// ```
458    pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
459        let required_capacity = self
460            .len_bytes
461            .checked_add(bytes.len())
462            .ok_or(StringTapeError::AllocationError)?;
463
464        let current_data_capacity = self.data_capacity();
465        if required_capacity > current_data_capacity {
466            let new_capacity = (current_data_capacity * 2).max(required_capacity).max(64);
467            self.grow_data(new_capacity)?;
468        }
469
470        let current_offsets_capacity = self.offsets_capacity();
471        if self.len_items + 1 >= current_offsets_capacity {
472            let new_capacity = (current_offsets_capacity * 2)
473                .max(self.len_items + 2)
474                .max(8);
475            self.grow_offsets(new_capacity)?;
476        }
477
478        // Copy string data
479        if let Some(data_ptr) = self.data {
480            unsafe {
481                ptr::copy_nonoverlapping(
482                    bytes.as_ptr(),
483                    data_ptr.as_ptr().cast::<u8>().add(self.len_bytes),
484                    bytes.len(),
485                );
486            }
487        }
488
489        self.len_bytes += bytes.len();
490        self.len_items += 1;
491
492        // Write new offset
493        let offset = Offset::from_usize(self.len_bytes).ok_or(StringTapeError::OffsetOverflow)?;
494        if let Some(offsets_ptr) = self.offsets {
495            unsafe {
496                ptr::write(
497                    offsets_ptr.as_ptr().cast::<Offset>().add(self.len_items),
498                    offset,
499                );
500            }
501        }
502
503        Ok(())
504    }
505
506    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
507    ///
508    /// This operation is O(1).
509    pub fn get(&self, index: usize) -> Option<&[u8]> {
510        if index >= self.len_items {
511            return None;
512        }
513
514        let (data_ptr, offsets_ptr) = match (self.data, self.offsets) {
515            (Some(data), Some(offsets)) => (data, offsets),
516            _ => return None,
517        };
518
519        unsafe {
520            let offsets_ptr = offsets_ptr.as_ptr().cast::<Offset>();
521            let start_offset = if index == 0 {
522                0
523            } else {
524                ptr::read(offsets_ptr.add(index)).to_usize()
525            };
526            let end_offset = ptr::read(offsets_ptr.add(index + 1)).to_usize();
527
528            Some(slice::from_raw_parts(
529                data_ptr.as_ptr().cast::<u8>().add(start_offset),
530                end_offset - start_offset,
531            ))
532        }
533    }
534
535    /// Returns the number of items in the tape.
536    pub fn len(&self) -> usize {
537        self.len_items
538    }
539
540    /// Returns `true` if the CharsTape contains no strings.
541    pub fn is_empty(&self) -> bool {
542        self.len_items == 0
543    }
544
545    /// Returns the total number of bytes used by string data.
546    pub fn data_len(&self) -> usize {
547        self.len_bytes
548    }
549
550    /// Returns the number of items currently stored (same as `len()`).
551    #[allow(dead_code)]
552    pub fn capacity(&self) -> usize {
553        self.len_items
554    }
555
556    /// Returns the number of bytes allocated for string data.
557    pub fn data_capacity(&self) -> usize {
558        self.data.map(|ptr| ptr.len()).unwrap_or(0)
559    }
560
561    /// Returns the number of offset slots allocated.
562    pub fn offsets_capacity(&self) -> usize {
563        self.offsets.map(|ptr| ptr.len()).unwrap_or(0)
564    }
565
566    /// Removes all items from the tape, keeping allocated capacity.
567    pub fn clear(&mut self) {
568        self.len_bytes = 0;
569        self.len_items = 0;
570        if let Some(offsets_ptr) = self.offsets {
571            unsafe {
572                ptr::write(offsets_ptr.as_ptr().cast::<Offset>(), Offset::default());
573            }
574        }
575    }
576
577    /// Shortens the tape, keeping the first `len` items and dropping the rest.
578    ///
579    /// If `len` is greater than the current length, this has no effect.
580    pub fn truncate(&mut self, len: usize) {
581        if len >= self.len_items {
582            return;
583        }
584
585        self.len_items = len;
586        self.len_bytes = if len == 0 {
587            0
588        } else if let Some(offsets_ptr) = self.offsets {
589            unsafe { ptr::read(offsets_ptr.as_ptr().cast::<Offset>().add(len)).to_usize() }
590        } else {
591            0
592        };
593    }
594
595    /// Extends the tape with the contents of an iterator of byte slices.
596    ///
597    /// # Examples
598    ///
599    /// ```rust
600    /// use stringtape::{BytesTapeI32, StringTapeError};
601    ///
602    /// let mut tape = BytesTapeI32::new();
603    /// tape.extend([b"hello".as_slice(), b"world".as_slice()])?;
604    /// assert_eq!(tape.len(), 2);
605    /// # Ok::<(), StringTapeError>(())
606    /// ```
607    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
608    where
609        I: IntoIterator,
610        I::Item: AsRef<[u8]>,
611    {
612        for s in iter {
613            self.push(s.as_ref())?;
614        }
615        Ok(())
616    }
617
618    /// Returns the raw parts of the tape for Apache Arrow compatibility.
619    ///
620    /// Returns named fields:
621    /// - `data_ptr`: Data buffer pointer
622    /// - `offsets_ptr`: Offsets buffer pointer
623    /// - `data_len`: Data length in bytes
624    /// - `items_count`: Number of items
625    ///
626    /// # Safety
627    ///
628    /// The returned pointers are valid only as long as the CharsTape is not modified.
629    pub fn as_raw_parts(&self) -> RawParts<Offset> {
630        let data_ptr = self
631            .data
632            .map(|ptr| ptr.as_ptr().cast::<u8>() as *const u8)
633            .unwrap_or(ptr::null());
634        let offsets_ptr = self
635            .offsets
636            .map(|ptr| ptr.as_ptr().cast::<Offset>() as *const Offset)
637            .unwrap_or(ptr::null());
638        RawParts {
639            data_ptr,
640            offsets_ptr,
641            data_len: self.len_bytes,
642            items_count: self.len_items,
643        }
644    }
645
646    /// Returns a slice view of the data buffer.
647    ///
648    /// This provides a cleaner interface for accessing the underlying data
649    /// without dealing with raw pointers.
650    pub fn data_slice(&self) -> &[u8] {
651        if let Some(data_ptr) = self.data {
652            unsafe { core::slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), self.len_bytes) }
653        } else {
654            &[]
655        }
656    }
657
658    /// Returns a slice view of the offsets buffer.
659    ///
660    /// This provides a cleaner interface for accessing the underlying offsets
661    /// without dealing with raw pointers. The slice contains `len() + 1` elements.
662    pub fn offsets_slice(&self) -> &[Offset] {
663        if let Some(offsets_ptr) = self.offsets {
664            unsafe {
665                core::slice::from_raw_parts(
666                    offsets_ptr.as_ptr().cast::<Offset>(),
667                    self.len_items + 1,
668                )
669            }
670        } else {
671            &[]
672        }
673    }
674
675    /// Returns a reference to the allocator used by this tape.
676    pub fn allocator(&self) -> &A {
677        &self.allocator
678    }
679
680    /// Creates a view of the entire tape.
681    pub fn view(&self) -> RawTapeView<'_, Offset> {
682        RawTapeView::new(self, 0, self.len_items).unwrap_or(RawTapeView {
683            data: &[],
684            offsets: &[],
685        })
686    }
687
688    /// Creates a subview of a continuous slice of this tape.
689    pub fn subview(
690        &self,
691        start: usize,
692        end: usize,
693    ) -> Result<RawTapeView<'_, Offset>, StringTapeError> {
694        RawTapeView::new(self, start, end)
695    }
696
697}
698
699impl<Offset: OffsetType, A: Allocator> Drop for RawTape<Offset, A> {
700    fn drop(&mut self) {
701        if let Some(data_ptr) = self.data {
702            let layout = Layout::array::<u8>(data_ptr.len()).unwrap();
703            unsafe {
704                self.allocator.deallocate(data_ptr.cast(), layout);
705            }
706        }
707        if let Some(offsets_ptr) = self.offsets {
708            let layout = Layout::array::<Offset>(offsets_ptr.len()).unwrap();
709            unsafe {
710                self.allocator.deallocate(offsets_ptr.cast(), layout);
711            }
712        }
713    }
714}
715
716unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for RawTape<Offset, A> {}
717unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for RawTape<Offset, A> {}
718
719// Index trait implementations for RawTape to support [i..n] syntax
720impl<Offset: OffsetType, A: Allocator> Index<Range<usize>> for RawTape<Offset, A> {
721    type Output = [u8];
722
723    fn index(&self, range: Range<usize>) -> &Self::Output {
724        let view = self
725            .subview(range.start, range.end)
726            .expect("range out of bounds");
727        // Return the underlying data slice
728        view.data
729    }
730}
731
732impl<Offset: OffsetType, A: Allocator> Index<RangeFrom<usize>> for RawTape<Offset, A> {
733    type Output = [u8];
734
735    fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
736        let view = self
737            .subview(range.start, self.len_items)
738            .expect("range out of bounds");
739        view.data
740    }
741}
742
743impl<Offset: OffsetType, A: Allocator> Index<RangeTo<usize>> for RawTape<Offset, A> {
744    type Output = [u8];
745
746    fn index(&self, range: RangeTo<usize>) -> &Self::Output {
747        let view = self.subview(0, range.end).expect("range out of bounds");
748        view.data
749    }
750}
751
752impl<Offset: OffsetType, A: Allocator> Index<RangeFull> for RawTape<Offset, A> {
753    type Output = [u8];
754
755    fn index(&self, _range: RangeFull) -> &Self::Output {
756        let view = self.view();
757        view.data
758    }
759}
760
761impl<Offset: OffsetType, A: Allocator> Index<RangeInclusive<usize>> for RawTape<Offset, A> {
762    type Output = [u8];
763
764    fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
765        let view = self
766            .subview(*range.start(), range.end() + 1)
767            .expect("range out of bounds");
768        view.data
769    }
770}
771
772impl<Offset: OffsetType, A: Allocator> Index<RangeToInclusive<usize>> for RawTape<Offset, A> {
773    type Output = [u8];
774
775    fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
776        let view = self.subview(0, range.end + 1).expect("range out of bounds");
777        view.data
778    }
779}
780
781// ========================
782// RawTapeView implementation
783// ========================
784
785impl<'a, Offset: OffsetType> RawTapeView<'a, Offset> {
786    /// Creates a view into a slice of the RawTape from start to end (exclusive).
787    pub(crate) fn new<A: Allocator>(
788        tape: &'a RawTape<Offset, A>,
789        start: usize,
790        end: usize,
791    ) -> Result<Self, StringTapeError> {
792        if start > end || end > tape.len() {
793            return Err(StringTapeError::IndexOutOfBounds);
794        }
795
796        let (data_ptr, offsets_ptr) = match (tape.data, tape.offsets) {
797            (Some(data), Some(offsets)) => (data, offsets),
798            _ => return Err(StringTapeError::IndexOutOfBounds),
799        };
800
801        // Keep the data pointer at the beginning of the parent tape to remain Arrow-compatible.
802        // Offsets remain absolute (not normalized) and are sliced to the requested range.
803        let data = unsafe { slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), tape.len_bytes) };
804
805        let offsets = unsafe {
806            slice::from_raw_parts(
807                offsets_ptr.as_ptr().cast::<Offset>().add(start),
808                (end - start) + 1,
809            )
810        };
811
812        Ok(Self { data, offsets })
813    }
814
815    /// Creates a zero-copy view from raw Arrow-compatible parts.
816    ///
817    /// # Safety
818    ///
819    /// The caller must ensure that:
820    /// - `data` contains valid bytes for the lifetime `'a`
821    /// - `offsets` contains valid offsets with length `items_count + 1`
822    /// - All offsets are within bounds of the data slice
823    /// - For CharsTapeView, data must be valid UTF-8
824    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
825        Self { data, offsets }
826    }
827
828    /// Returns a reference to the bytes at the given index within this view.
829    pub fn get(&self, index: usize) -> Option<&[u8]> {
830        if index >= self.len() {
831            return None;
832        }
833
834        let start_offset = self.offsets[index].to_usize();
835        let end_offset = self.offsets[index + 1].to_usize();
836
837        Some(&self.data[start_offset..end_offset])
838    }
839
840    /// Returns the number of items in this view.
841    pub fn len(&self) -> usize {
842        self.offsets.len().saturating_sub(1)
843    }
844
845    /// Returns `true` if the view contains no items.
846    pub fn is_empty(&self) -> bool {
847        self.len() == 0
848    }
849
850    /// Returns the total number of bytes in this view.
851    pub fn data_len(&self) -> usize {
852        // Span covered by this view
853        self.offsets[self.offsets.len() - 1].to_usize() - self.offsets[0].to_usize()
854    }
855
856    /// Creates a sub-view of this view
857    pub fn subview(
858        &self,
859        start: usize,
860        end: usize,
861    ) -> Result<RawTapeView<'a, Offset>, StringTapeError> {
862        if start > end || end > self.len() {
863            return Err(StringTapeError::IndexOutOfBounds);
864        }
865
866        Ok(RawTapeView {
867            // Keep same data pointer, only narrow the offsets slice
868            data: self.data,
869            offsets: &self.offsets[start..=end],
870        })
871    }
872
873    /// Returns the raw parts of the view for Apache Arrow compatibility.
874    pub fn as_raw_parts(&self) -> RawParts<Offset> {
875        // Expose an Arrow-compatible view: data_ptr remains at the tape base,
876        // offsets are absolute into that buffer, and data_len reaches the last used byte.
877        RawParts {
878            data_ptr: self.data.as_ptr(),
879            offsets_ptr: self.offsets.as_ptr(),
880            data_len: self.offsets[self.offsets.len() - 1].to_usize(),
881            items_count: self.len(),
882        }
883    }
884}
885
886impl<'a, Offset: OffsetType> Index<usize> for RawTapeView<'a, Offset> {
887    type Output = [u8];
888
889    fn index(&self, index: usize) -> &Self::Output {
890        self.get(index).expect("index out of bounds")
891    }
892}
893
894// Index trait implementations for RawTapeView to support [i..n] syntax
895impl<'a, Offset: OffsetType> Index<Range<usize>> for RawTapeView<'a, Offset> {
896    type Output = [u8];
897
898    fn index(&self, range: Range<usize>) -> &Self::Output {
899        let view = self
900            .subview(range.start, range.end)
901            .expect("range out of bounds");
902        let start = view.offsets[0].to_usize();
903        let end = view.offsets[view.offsets.len() - 1].to_usize();
904        &view.data[start..end]
905    }
906}
907
908impl<'a, Offset: OffsetType> Index<RangeFrom<usize>> for RawTapeView<'a, Offset> {
909    type Output = [u8];
910
911    fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
912        let view = self
913            .subview(range.start, self.len())
914            .expect("range out of bounds");
915        let start = view.offsets[0].to_usize();
916        let end = view.offsets[view.offsets.len() - 1].to_usize();
917        &view.data[start..end]
918    }
919}
920
921impl<'a, Offset: OffsetType> Index<RangeTo<usize>> for RawTapeView<'a, Offset> {
922    type Output = [u8];
923
924    fn index(&self, range: RangeTo<usize>) -> &Self::Output {
925        let view = self.subview(0, range.end).expect("range out of bounds");
926        let start = view.offsets[0].to_usize();
927        let end = view.offsets[view.offsets.len() - 1].to_usize();
928        &view.data[start..end]
929    }
930}
931
932impl<'a, Offset: OffsetType> Index<RangeFull> for RawTapeView<'a, Offset> {
933    type Output = [u8];
934
935    fn index(&self, _range: RangeFull) -> &Self::Output {
936        let start = self.offsets[0].to_usize();
937        let end = self.offsets[self.offsets.len() - 1].to_usize();
938        &self.data[start..end]
939    }
940}
941
942impl<'a, Offset: OffsetType> Index<RangeInclusive<usize>> for RawTapeView<'a, Offset> {
943    type Output = [u8];
944
945    fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
946        let view = self
947            .subview(*range.start(), range.end() + 1)
948            .expect("range out of bounds");
949        let start = view.offsets[0].to_usize();
950        let end = view.offsets[view.offsets.len() - 1].to_usize();
951        &view.data[start..end]
952    }
953}
954
955impl<'a, Offset: OffsetType> Index<RangeToInclusive<usize>> for RawTapeView<'a, Offset> {
956    type Output = [u8];
957
958    fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
959        let view = self.subview(0, range.end + 1).expect("range out of bounds");
960        let start = view.offsets[0].to_usize();
961        let end = view.offsets[view.offsets.len() - 1].to_usize();
962        &view.data[start..end]
963    }
964}
965
966// ========================
967// CharsTapeView implementation
968// ========================
969
970impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
971    /// Creates a zero-copy CharsTapeView from raw Arrow StringArray parts.
972    ///
973    /// # Safety
974    ///
975    /// The caller must ensure that:
976    /// - `data` contains valid UTF-8 bytes for the lifetime `'a`
977    /// - `offsets` contains valid offsets with appropriate length
978    /// - All offsets are within bounds of the data slice
979    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
980        Self {
981            inner: RawTapeView::from_raw_parts(data, offsets),
982        }
983    }
984
985    /// Returns a reference to the string at the given index, or `None` if out of bounds.
986    pub fn get(&self, index: usize) -> Option<&str> {
987        // Safe because CharsTapeView only comes from CharsTape which validates UTF-8
988        self.inner
989            .get(index)
990            .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
991    }
992
993    /// Returns the number of strings in this view.
994    pub fn len(&self) -> usize {
995        self.inner.len()
996    }
997
998    /// Returns `true` if the view contains no strings.
999    pub fn is_empty(&self) -> bool {
1000        self.inner.is_empty()
1001    }
1002
1003    /// Returns the total number of bytes in this view.
1004    pub fn data_len(&self) -> usize {
1005        self.inner.data_len()
1006    }
1007
1008    /// Creates a sub-view of this view
1009    pub fn subview(
1010        &self,
1011        start: usize,
1012        end: usize,
1013    ) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1014        Ok(CharsTapeView {
1015            inner: self.inner.subview(start, end)?,
1016        })
1017    }
1018
1019    /// Returns the raw parts of the view for Apache Arrow compatibility.
1020    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1021        self.inner.as_raw_parts()
1022    }
1023}
1024
1025impl<'a, Offset: OffsetType> Index<usize> for CharsTapeView<'a, Offset> {
1026    type Output = str;
1027
1028    fn index(&self, index: usize) -> &Self::Output {
1029        self.get(index).expect("index out of bounds")
1030    }
1031}
1032
1033// ========================
1034// BytesTapeView implementation
1035// ========================
1036
1037impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1038    /// Creates a zero-copy BytesTapeView from raw Arrow BinaryArray parts.
1039    ///
1040    /// # Safety
1041    ///
1042    /// The caller must ensure that:
1043    /// - `data` contains valid bytes for the lifetime `'a`
1044    /// - `offsets` contains valid offsets with appropriate length
1045    /// - All offsets are within bounds of the data slice
1046    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
1047        Self {
1048            inner: RawTapeView::from_raw_parts(data, offsets),
1049        }
1050    }
1051
1052    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
1053    pub fn get(&self, index: usize) -> Option<&[u8]> {
1054        self.inner.get(index)
1055    }
1056
1057    /// Returns the number of items in this view.
1058    pub fn len(&self) -> usize {
1059        self.inner.len()
1060    }
1061
1062    /// Returns `true` if the view contains no items.
1063    pub fn is_empty(&self) -> bool {
1064        self.inner.is_empty()
1065    }
1066
1067    /// Returns the total number of bytes in this view.
1068    pub fn data_len(&self) -> usize {
1069        self.inner.data_len()
1070    }
1071
1072    /// Creates a sub-view of this view
1073    pub fn subview(
1074        &self,
1075        start: usize,
1076        end: usize,
1077    ) -> Result<BytesTapeView<'a, Offset>, StringTapeError> {
1078        Ok(BytesTapeView {
1079            inner: self.inner.subview(start, end)?,
1080        })
1081    }
1082
1083    /// Returns the raw parts of the view for Apache Arrow compatibility.
1084    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1085        self.inner.as_raw_parts()
1086    }
1087}
1088
1089impl<'a, Offset: OffsetType> Index<usize> for BytesTapeView<'a, Offset> {
1090    type Output = [u8];
1091
1092    fn index(&self, index: usize) -> &Self::Output {
1093        self.get(index).expect("index out of bounds")
1094    }
1095}
1096
1097// ========================
1098// CharsTape (UTF-8 view)
1099// ========================
1100
1101impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1102    /// Creates a new, empty CharsTape with the global allocator.
1103    pub fn new() -> CharsTape<Offset, Global> {
1104        CharsTape {
1105            inner: RawTape::<Offset, Global>::new(),
1106        }
1107    }
1108
1109    /// Creates a new, empty CharsTape with a custom allocator.
1110    pub fn new_in(allocator: A) -> Self {
1111        Self {
1112            inner: RawTape::<Offset, A>::new_in(allocator),
1113        }
1114    }
1115
1116    /// Creates a new CharsTape with pre-allocated capacity using the global allocator.
1117    pub fn with_capacity(
1118        data_capacity: usize,
1119        strings_capacity: usize,
1120    ) -> Result<CharsTape<Offset, Global>, StringTapeError> {
1121        Ok(CharsTape {
1122            inner: RawTape::<Offset, Global>::with_capacity(data_capacity, strings_capacity)?,
1123        })
1124    }
1125
1126    /// Creates a new CharsTape with pre-allocated capacity and a custom allocator.
1127    pub fn with_capacity_in(
1128        data_capacity: usize,
1129        strings_capacity: usize,
1130        allocator: A,
1131    ) -> Result<Self, StringTapeError> {
1132        Ok(Self {
1133            inner: RawTape::<Offset, A>::with_capacity_in(
1134                data_capacity,
1135                strings_capacity,
1136                allocator,
1137            )?,
1138        })
1139    }
1140
1141    /// Adds a string to the end of the CharsTape.
1142    pub fn push(&mut self, s: &str) -> Result<(), StringTapeError> {
1143        self.inner.push(s.as_bytes())
1144    }
1145
1146    /// Returns a reference to the string at the given index, or `None` if out of bounds.
1147    pub fn get(&self, index: usize) -> Option<&str> {
1148        // Safe because CharsTape only accepts &str pushes.
1149        self.inner
1150            .get(index)
1151            .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
1152    }
1153
1154    /// Returns the number of strings in the CharsTape.
1155    pub fn len(&self) -> usize {
1156        self.inner.len()
1157    }
1158
1159    /// Returns `true` if the CharsTape contains no strings.
1160    pub fn is_empty(&self) -> bool {
1161        self.inner.is_empty()
1162    }
1163
1164    /// Returns the total number of bytes used by string data.
1165    pub fn data_len(&self) -> usize {
1166        self.inner.data_len()
1167    }
1168
1169    /// Returns the number of strings currently stored (same as `len()`).
1170    pub fn capacity(&self) -> usize {
1171        self.inner.len()
1172    }
1173
1174    /// Returns the number of bytes allocated for string data.
1175    pub fn data_capacity(&self) -> usize {
1176        self.inner.data_capacity()
1177    }
1178
1179    /// Returns the number of offset slots allocated.
1180    pub fn offsets_capacity(&self) -> usize {
1181        self.inner.offsets_capacity()
1182    }
1183
1184    /// Removes all strings from the CharsTape, keeping allocated capacity.
1185    pub fn clear(&mut self) {
1186        self.inner.clear()
1187    }
1188
1189    /// Shortens the CharsTape, keeping the first `len` strings and dropping the rest.
1190    pub fn truncate(&mut self, len: usize) {
1191        self.inner.truncate(len)
1192    }
1193
1194    /// Extends the CharsTape with the contents of an iterator.
1195    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1196    where
1197        I: IntoIterator,
1198        I::Item: AsRef<str>,
1199    {
1200        for s in iter {
1201            self.push(s.as_ref())?;
1202        }
1203        Ok(())
1204    }
1205
1206    /// Returns the raw parts of the CharsTape for Apache Arrow compatibility.
1207    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1208        self.inner.as_raw_parts()
1209    }
1210
1211    /// Returns a slice view of the data buffer.
1212    pub fn data_slice(&self) -> &[u8] {
1213        self.inner.data_slice()
1214    }
1215
1216    /// Returns a slice view of the offsets buffer.
1217    pub fn offsets_slice(&self) -> &[Offset] {
1218        self.inner.offsets_slice()
1219    }
1220
1221    pub fn iter(&self) -> CharsTapeIter<'_, Offset, A> {
1222        CharsTapeIter {
1223            tape: self,
1224            index: 0,
1225        }
1226    }
1227
1228    /// Returns a reference to the allocator used by this CharsTape.
1229    pub fn allocator(&self) -> &A {
1230        self.inner.allocator()
1231    }
1232
1233    /// Creates a view of the entire CharsTape.
1234    pub fn view(&self) -> CharsTapeView<'_, Offset> {
1235        CharsTapeView {
1236            inner: self.inner.view(),
1237        }
1238    }
1239
1240    /// Creates a subview of a continuous slice of this CharsTape.
1241    pub fn subview(
1242        &self,
1243        start: usize,
1244        end: usize,
1245    ) -> Result<CharsTapeView<'_, Offset>, StringTapeError> {
1246        Ok(CharsTapeView {
1247            inner: self.inner.subview(start, end)?,
1248        })
1249    }
1250
1251    /// Returns data and offsets slices for zero-copy Arrow conversion.
1252    pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1253        (self.data_slice(), self.offsets_slice())
1254    }
1255}
1256
1257impl<Offset: OffsetType, A: Allocator> Drop for CharsTape<Offset, A> {
1258    fn drop(&mut self) {
1259        // Explicit drop of inner to run RawTape's Drop
1260        // (redundant but keeps intent clear)
1261    }
1262}
1263
1264unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for CharsTape<Offset, A> {}
1265unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for CharsTape<Offset, A> {}
1266
1267pub struct CharsTapeIter<'a, Offset: OffsetType, A: Allocator> {
1268    tape: &'a CharsTape<Offset, A>,
1269    index: usize,
1270}
1271
1272impl<'a, Offset: OffsetType, A: Allocator> Iterator for CharsTapeIter<'a, Offset, A> {
1273    type Item = &'a str;
1274
1275    fn next(&mut self) -> Option<Self::Item> {
1276        let result = self.tape.get(self.index);
1277        if result.is_some() {
1278            self.index += 1;
1279        }
1280        result
1281    }
1282
1283    fn size_hint(&self) -> (usize, Option<usize>) {
1284        let remaining = self.tape.len() - self.index;
1285        (remaining, Some(remaining))
1286    }
1287}
1288
1289impl<'a, Offset: OffsetType, A: Allocator> ExactSizeIterator for CharsTapeIter<'a, Offset, A> {}
1290
1291impl<Offset: OffsetType> FromIterator<String> for CharsTape<Offset, Global> {
1292    fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
1293        let mut tape = CharsTape::<Offset, Global>::new();
1294        for s in iter {
1295            tape.push(&s)
1296                .expect("Failed to build CharsTape from iterator");
1297        }
1298        tape
1299    }
1300}
1301
1302impl<'a, Offset: OffsetType> FromIterator<&'a str> for CharsTape<Offset, Global> {
1303    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
1304        let mut tape = CharsTape::<Offset, Global>::new();
1305        for s in iter {
1306            tape.push(s)
1307                .expect("Failed to build CharsTape from iterator");
1308        }
1309        tape
1310    }
1311}
1312
1313impl<Offset: OffsetType, A: Allocator> Index<usize> for CharsTape<Offset, A> {
1314    type Output = str;
1315
1316    fn index(&self, index: usize) -> &Self::Output {
1317        self.get(index).expect("index out of bounds")
1318    }
1319}
1320
1321impl<'a, Offset: OffsetType, A: Allocator> IntoIterator for &'a CharsTape<Offset, A> {
1322    type Item = &'a str;
1323    type IntoIter = CharsTapeIter<'a, Offset, A>;
1324
1325    fn into_iter(self) -> Self::IntoIter {
1326        self.iter()
1327    }
1328}
1329
1330// ======================
1331// BytesTape (bytes view)
1332// ======================
1333
1334impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1335    /// Creates a new, empty BytesTape with the global allocator.
1336    pub fn new() -> BytesTape<Offset, Global> {
1337        BytesTape {
1338            inner: RawTape::<Offset, Global>::new(),
1339        }
1340    }
1341
1342    /// Creates a new, empty BytesTape with a custom allocator.
1343    pub fn new_in(allocator: A) -> Self {
1344        Self {
1345            inner: RawTape::<Offset, A>::new_in(allocator),
1346        }
1347    }
1348
1349    /// Creates a new BytesTape with pre-allocated capacity using the global allocator.
1350    pub fn with_capacity(
1351        data_capacity: usize,
1352        items_capacity: usize,
1353    ) -> Result<BytesTape<Offset, Global>, StringTapeError> {
1354        Ok(BytesTape {
1355            inner: RawTape::<Offset, Global>::with_capacity(data_capacity, items_capacity)?,
1356        })
1357    }
1358
1359    /// Creates a new BytesTape with pre-allocated capacity and a custom allocator.
1360    pub fn with_capacity_in(
1361        data_capacity: usize,
1362        items_capacity: usize,
1363        allocator: A,
1364    ) -> Result<Self, StringTapeError> {
1365        Ok(Self {
1366            inner: RawTape::<Offset, A>::with_capacity_in(
1367                data_capacity,
1368                items_capacity,
1369                allocator,
1370            )?,
1371        })
1372    }
1373
1374    /// Adds bytes to the end of the tape.
1375    pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
1376        self.inner.push(bytes)
1377    }
1378
1379    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
1380    pub fn get(&self, index: usize) -> Option<&[u8]> {
1381        self.inner.get(index)
1382    }
1383
1384    /// Returns the number of items in the tape.
1385    pub fn len(&self) -> usize {
1386        self.inner.len()
1387    }
1388
1389    /// Returns `true` if the tape contains no items.
1390    pub fn is_empty(&self) -> bool {
1391        self.inner.is_empty()
1392    }
1393
1394    /// Returns the total number of bytes used by data.
1395    pub fn data_len(&self) -> usize {
1396        self.inner.data_len()
1397    }
1398
1399    /// Returns the number of bytes allocated for data.
1400    pub fn data_capacity(&self) -> usize {
1401        self.inner.data_capacity()
1402    }
1403
1404    /// Returns the number of offset slots allocated.
1405    pub fn offsets_capacity(&self) -> usize {
1406        self.inner.offsets_capacity()
1407    }
1408
1409    /// Removes all items from the tape, keeping allocated capacity.
1410    pub fn clear(&mut self) {
1411        self.inner.clear()
1412    }
1413
1414    /// Shortens the tape, keeping the first `len` items and dropping the rest.
1415    pub fn truncate(&mut self, len: usize) {
1416        self.inner.truncate(len)
1417    }
1418
1419    /// Extends the tape with the contents of an iterator of bytes.
1420    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1421    where
1422        I: IntoIterator,
1423        I::Item: AsRef<[u8]>,
1424    {
1425        self.inner.extend(iter)
1426    }
1427
1428    /// Returns the raw parts of the tape for Apache Arrow compatibility.
1429    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1430        self.inner.as_raw_parts()
1431    }
1432
1433    /// Returns a slice view of the data buffer.
1434    pub fn data_slice(&self) -> &[u8] {
1435        self.inner.data_slice()
1436    }
1437
1438    /// Returns a slice view of the offsets buffer.
1439    pub fn offsets_slice(&self) -> &[Offset] {
1440        self.inner.offsets_slice()
1441    }
1442
1443    /// Returns a reference to the allocator used by this BytesTape.
1444    pub fn allocator(&self) -> &A {
1445        self.inner.allocator()
1446    }
1447
1448    /// Creates a view of the entire BytesTape.
1449    pub fn view(&self) -> BytesTapeView<'_, Offset> {
1450        BytesTapeView {
1451            inner: self.inner.view(),
1452        }
1453    }
1454
1455    /// Creates a subview of a continuous slice of this BytesTape.
1456    pub fn subview(
1457        &self,
1458        start: usize,
1459        end: usize,
1460    ) -> Result<BytesTapeView<'_, Offset>, StringTapeError> {
1461        Ok(BytesTapeView {
1462            inner: self.inner.subview(start, end)?,
1463        })
1464    }
1465
1466    /// Returns data and offsets slices for zero-copy Arrow conversion.
1467    pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1468        (self.data_slice(), self.offsets_slice())
1469    }
1470}
1471
1472impl<Offset: OffsetType, A: Allocator> Index<usize> for BytesTape<Offset, A> {
1473    type Output = [u8];
1474
1475    fn index(&self, index: usize) -> &Self::Output {
1476        self.get(index).expect("index out of bounds")
1477    }
1478}
1479
1480// Signed (Arrow-compatible) aliases
1481pub type CharsTapeI32 = CharsTape<i32, Global>;
1482pub type CharsTapeI64 = CharsTape<i64, Global>;
1483pub type BytesTapeI32 = BytesTape<i32, Global>;
1484pub type BytesTapeI64 = BytesTape<i64, Global>;
1485
1486pub type CharsTapeViewI32<'a> = CharsTapeView<'a, i32>;
1487pub type CharsTapeViewI64<'a> = CharsTapeView<'a, i64>;
1488pub type BytesTapeViewI32<'a> = BytesTapeView<'a, i32>;
1489pub type BytesTapeViewI64<'a> = BytesTapeView<'a, i64>;
1490
1491// Unsigned aliases (not zero-copy with Arrow)
1492pub type CharsTapeU32 = CharsTape<u32, Global>;
1493pub type CharsTapeU64 = CharsTape<u64, Global>;
1494pub type BytesTapeU32 = BytesTape<u32, Global>;
1495pub type BytesTapeU64 = BytesTape<u64, Global>;
1496
1497pub type CharsTapeViewU32<'a> = CharsTapeView<'a, u32>;
1498pub type CharsTapeViewU64<'a> = CharsTapeView<'a, u64>;
1499pub type BytesTapeViewU32<'a> = BytesTapeView<'a, u32>;
1500pub type BytesTapeViewU64<'a> = BytesTapeView<'a, u64>;
1501
1502// Conversion implementations between BytesTape and CharsTape
1503impl<Offset: OffsetType, A: Allocator> TryFrom<BytesTape<Offset, A>> for CharsTape<Offset, A> {
1504    type Error = StringTapeError;
1505
1506    fn try_from(bytes_tape: BytesTape<Offset, A>) -> Result<Self, Self::Error> {
1507        // Validate that all byte sequences are valid UTF-8
1508        for i in 0..bytes_tape.len() {
1509            if let Err(e) = core::str::from_utf8(&bytes_tape[i]) {
1510                return Err(StringTapeError::Utf8Error(e));
1511            }
1512        }
1513
1514        // Since validation passed, we can safely convert
1515        // We need to take ownership of the inner RawTape without dropping BytesTape
1516        let inner = unsafe {
1517            // Take ownership of the inner RawTape
1518            let inner = core::ptr::read(&bytes_tape.inner);
1519            // Prevent BytesTape's destructor from running
1520            core::mem::forget(bytes_tape);
1521            inner
1522        };
1523        Ok(CharsTape { inner })
1524    }
1525}
1526
1527impl<Offset: OffsetType, A: Allocator> From<CharsTape<Offset, A>> for BytesTape<Offset, A> {
1528    fn from(chars_tape: CharsTape<Offset, A>) -> Self {
1529        // CharsTape already contains valid UTF-8, so conversion to BytesTape is infallible
1530        // We need to take ownership of the inner RawTape without dropping CharsTape
1531        let inner = unsafe {
1532            // Take ownership of the inner RawTape
1533            let inner = core::ptr::read(&chars_tape.inner);
1534            // Prevent CharsTape's destructor from running
1535            core::mem::forget(chars_tape);
1536            inner
1537        };
1538        BytesTape { inner }
1539    }
1540}
1541
1542impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1543    pub fn try_into_chars_tape(self) -> Result<CharsTape<Offset, A>, StringTapeError> {
1544        self.try_into()
1545    }
1546}
1547
1548impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1549    pub fn into_bytes_tape(self) -> BytesTape<Offset, A> {
1550        self.into()
1551    }
1552}
1553
1554// Conversion implementations between BytesTapeView and CharsTapeView
1555impl<'a, Offset: OffsetType> TryFrom<BytesTapeView<'a, Offset>> for CharsTapeView<'a, Offset> {
1556    type Error = StringTapeError;
1557
1558    fn try_from(bytes_view: BytesTapeView<'a, Offset>) -> Result<Self, Self::Error> {
1559        // Validate that all byte sequences are valid UTF-8
1560        for i in 0..bytes_view.len() {
1561            let bytes = bytes_view.get(i).ok_or(StringTapeError::IndexOutOfBounds)?;
1562            if core::str::from_utf8(bytes).is_err() {
1563                return Err(StringTapeError::Utf8Error(
1564                    core::str::from_utf8(bytes).unwrap_err(),
1565                ));
1566            }
1567        }
1568
1569        // Since validation passed, construct a CharsTapeView over the same inner view
1570        Ok(CharsTapeView {
1571            inner: bytes_view.inner,
1572        })
1573    }
1574}
1575
1576impl<'a, Offset: OffsetType> From<CharsTapeView<'a, Offset>> for BytesTapeView<'a, Offset> {
1577    fn from(chars_view: CharsTapeView<'a, Offset>) -> Self {
1578        // UTF-8 bytes can always be viewed as bytes
1579        BytesTapeView {
1580            inner: chars_view.inner,
1581        }
1582    }
1583}
1584
1585impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1586    pub fn try_into_chars_view(self) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1587        self.try_into()
1588    }
1589}
1590
1591impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
1592    pub fn into_bytes_view(self) -> BytesTapeView<'a, Offset> {
1593        self.into()
1594    }
1595}
1596
1597impl<Offset: OffsetType> Default for CharsTape<Offset, Global> {
1598    fn default() -> Self {
1599        Self::new()
1600    }
1601}
1602
1603#[cfg(test)]
1604mod tests {
1605    use super::*;
1606
1607    #[cfg(not(feature = "std"))]
1608    use alloc::vec;
1609    #[cfg(not(feature = "std"))]
1610    use alloc::vec::Vec;
1611
1612    #[test]
1613    fn basic_operations() {
1614        let mut tape = CharsTapeI32::new();
1615        assert!(tape.is_empty());
1616
1617        tape.push("hello").unwrap();
1618        tape.push("world").unwrap();
1619        tape.push("foo").unwrap();
1620
1621        assert_eq!(tape.len(), 3);
1622        assert_eq!(tape.get(0), Some("hello"));
1623        assert_eq!(tape.get(1), Some("world"));
1624        assert_eq!(tape.get(2), Some("foo"));
1625        assert_eq!(tape.get(3), None);
1626    }
1627
1628    #[test]
1629    fn unsigned_basic_operations() {
1630        // u32
1631        let mut t32 = CharsTapeU32::new();
1632        t32.push("hello").unwrap();
1633        t32.push("world").unwrap();
1634        assert_eq!(t32.len(), 2);
1635        assert_eq!(t32.get(0), Some("hello"));
1636        assert_eq!(t32.get(1), Some("world"));
1637
1638        // u64
1639        let mut t64 = CharsTapeU64::new();
1640        t64.extend(["a", "", "bbb"]).unwrap();
1641        assert_eq!(t64.len(), 3);
1642        assert_eq!(t64.get(0), Some("a"));
1643        assert_eq!(t64.get(1), Some(""));
1644        assert_eq!(t64.get(2), Some("bbb"));
1645    }
1646
1647    #[test]
1648    fn offsets_64bit() {
1649        let mut tape = CharsTapeI64::new();
1650        tape.push("test").unwrap();
1651        assert_eq!(tape.get(0), Some("test"));
1652    }
1653
1654    #[test]
1655    fn iterator_basics() {
1656        let mut tape = CharsTapeI32::new();
1657        tape.push("a").unwrap();
1658        tape.push("b").unwrap();
1659        tape.push("c").unwrap();
1660
1661        let strings: Vec<&str> = tape.iter().collect();
1662        assert_eq!(strings, vec!["a", "b", "c"]);
1663    }
1664
1665    #[test]
1666    fn empty_strings() {
1667        let mut tape = CharsTapeI32::new();
1668        tape.push("").unwrap();
1669        tape.push("non-empty").unwrap();
1670        tape.push("").unwrap();
1671
1672        assert_eq!(tape.len(), 3);
1673        assert_eq!(tape.get(0), Some(""));
1674        assert_eq!(tape.get(1), Some("non-empty"));
1675        assert_eq!(tape.get(2), Some(""));
1676    }
1677
1678    #[test]
1679    fn index_trait() {
1680        let mut tape = CharsTapeI32::new();
1681        tape.push("hello").unwrap();
1682        tape.push("world").unwrap();
1683
1684        assert_eq!(&tape[0], "hello");
1685        assert_eq!(&tape[1], "world");
1686    }
1687
1688    #[test]
1689    fn into_iterator() {
1690        let mut tape = CharsTapeI32::new();
1691        tape.push("a").unwrap();
1692        tape.push("b").unwrap();
1693        tape.push("c").unwrap();
1694
1695        let strings: Vec<&str> = (&tape).into_iter().collect();
1696        assert_eq!(strings, vec!["a", "b", "c"]);
1697
1698        // Test for-loop syntax
1699        let mut result = Vec::new();
1700        for s in &tape {
1701            result.push(s);
1702        }
1703        assert_eq!(result, vec!["a", "b", "c"]);
1704    }
1705
1706    #[test]
1707    fn from_iterator() {
1708        let strings = vec!["hello", "world", "test"];
1709        let tape: CharsTapeI32 = strings.into_iter().collect();
1710
1711        assert_eq!(tape.len(), 3);
1712        assert_eq!(tape.get(0), Some("hello"));
1713        assert_eq!(tape.get(1), Some("world"));
1714        assert_eq!(tape.get(2), Some("test"));
1715    }
1716
1717    #[test]
1718    fn from_iterator_unsigned() {
1719        let strings = vec!["hello", "world", "test"];
1720        let tape_u32: CharsTapeU32 = strings.clone().into_iter().collect();
1721        let tape_u64: CharsTapeU64 = strings.clone().into_iter().collect();
1722        assert_eq!(tape_u32.len(), 3);
1723        assert_eq!(tape_u64.len(), 3);
1724        assert_eq!(tape_u32.get(1), Some("world"));
1725        assert_eq!(tape_u64.get(2), Some("test"));
1726    }
1727
1728    #[test]
1729    fn extend() {
1730        let mut tape = CharsTapeI32::new();
1731        tape.push("initial").unwrap();
1732
1733        let additional = vec!["hello", "world"];
1734        tape.extend(additional).unwrap();
1735
1736        assert_eq!(tape.len(), 3);
1737        assert_eq!(tape.get(0), Some("initial"));
1738        assert_eq!(tape.get(1), Some("hello"));
1739        assert_eq!(tape.get(2), Some("world"));
1740    }
1741
1742    #[test]
1743    fn clear_and_truncate() {
1744        let mut tape = CharsTapeI32::new();
1745        tape.push("a").unwrap();
1746        tape.push("b").unwrap();
1747        tape.push("c").unwrap();
1748
1749        assert_eq!(tape.len(), 3);
1750
1751        tape.truncate(2);
1752        assert_eq!(tape.len(), 2);
1753        assert_eq!(tape.get(0), Some("a"));
1754        assert_eq!(tape.get(1), Some("b"));
1755        assert_eq!(tape.get(2), None);
1756
1757        tape.clear();
1758        assert_eq!(tape.len(), 0);
1759        assert!(tape.is_empty());
1760    }
1761
1762    #[test]
1763    fn unsigned_views_and_subviews() {
1764        let mut tape = CharsTapeU32::new();
1765        tape.extend(["0", "1", "22", "333"]).unwrap();
1766        let view = tape.subview(1, 4).unwrap();
1767        assert_eq!(view.len(), 3);
1768        assert_eq!(view.get(0), Some("1"));
1769        assert_eq!(view.get(2), Some("333"));
1770        let sub = view.subview(1, 2).unwrap();
1771        assert_eq!(sub.len(), 1);
1772        assert_eq!(sub.get(0), Some("22"));
1773    }
1774
1775    #[test]
1776    fn capacity() {
1777        let tape = CharsTapeI32::with_capacity(100, 10).unwrap();
1778        assert_eq!(tape.data_capacity(), 100);
1779        assert_eq!(tape.capacity(), 0); // No strings added yet
1780    }
1781
1782    #[test]
1783    fn custom_allocator() {
1784        // Using the Global allocator explicitly
1785        let mut tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
1786
1787        tape.push("hello").unwrap();
1788        tape.push("world").unwrap();
1789
1790        assert_eq!(tape.len(), 2);
1791        assert_eq!(tape.get(0), Some("hello"));
1792        assert_eq!(tape.get(1), Some("world"));
1793
1794        // Verify we can access the allocator
1795        let _allocator_ref = tape.allocator();
1796    }
1797
1798    #[test]
1799    fn custom_allocator_with_capacity() {
1800        let tape: CharsTape<i64, Global> = CharsTape::with_capacity_in(256, 50, Global).unwrap();
1801
1802        assert_eq!(tape.data_capacity(), 256);
1803        assert!(tape.is_empty());
1804    }
1805
1806    #[test]
1807    fn bytes_tape_basic() {
1808        let mut tape = BytesTapeI32::new();
1809        tape.push(&[1, 2, 3]).unwrap();
1810        tape.push(b"abc").unwrap();
1811
1812        assert_eq!(tape.len(), 2);
1813        assert_eq!(&tape[0], &[1u8, 2, 3] as &[u8]);
1814        assert_eq!(&tape[1], b"abc" as &[u8]);
1815    }
1816
1817    #[test]
1818    fn unsigned_bytes_tape_basic() {
1819        let mut tape = BytesTapeU64::new();
1820        tape.push(&[1u8, 2]).unwrap();
1821        tape.push(&[3u8, 4, 5]).unwrap();
1822        assert_eq!(tape.len(), 2);
1823        assert_eq!(&tape[0], &[1u8, 2] as &[u8]);
1824        assert_eq!(&tape[1], &[3u8, 4, 5] as &[u8]);
1825    }
1826
1827    #[test]
1828    fn chars_tape_view_basic() {
1829        let mut tape = CharsTapeI32::new();
1830        tape.push("hello").unwrap();
1831        tape.push("world").unwrap();
1832        tape.push("foo").unwrap();
1833        tape.push("bar").unwrap();
1834
1835        // Test basic subview creation
1836        let view = tape.subview(1, 3).unwrap();
1837        assert_eq!(view.len(), 2);
1838        assert_eq!(view.get(0), Some("world"));
1839        assert_eq!(view.get(1), Some("foo"));
1840        assert_eq!(view.get(2), None);
1841
1842        // Test indexing
1843        assert_eq!(&view[0], "world");
1844        assert_eq!(&view[1], "foo");
1845    }
1846
1847    #[test]
1848    fn chars_tape_range_syntax() {
1849        let mut tape = CharsTapeI32::new();
1850        tape.push("a").unwrap();
1851        tape.push("b").unwrap();
1852        tape.push("c").unwrap();
1853        tape.push("d").unwrap();
1854
1855        // Test view() method
1856        let full_view = tape.view();
1857        assert_eq!(full_view.len(), 4);
1858        assert_eq!(full_view.get(0), Some("a"));
1859        assert_eq!(full_view.get(3), Some("d"));
1860
1861        // Test subview
1862        let sub = tape.subview(1, 3).unwrap();
1863        assert_eq!(sub.len(), 2);
1864        assert_eq!(sub.get(0), Some("b"));
1865        assert_eq!(sub.get(1), Some("c"));
1866    }
1867
1868    #[test]
1869    fn chars_tape_view_subslicing() {
1870        let mut tape = CharsTapeI32::new();
1871        tape.push("0").unwrap();
1872        tape.push("1").unwrap();
1873        tape.push("2").unwrap();
1874        tape.push("3").unwrap();
1875        tape.push("4").unwrap();
1876
1877        // Create initial subview
1878        let view = tape.subview(1, 4).unwrap(); // ["1", "2", "3"]
1879        assert_eq!(view.len(), 3);
1880
1881        // Create sub-view of a view
1882        let subview = view.subview(1, 2).unwrap(); // ["2"]
1883        assert_eq!(subview.len(), 1);
1884        assert_eq!(subview.get(0), Some("2"));
1885
1886        // Test subviews with different ranges
1887        let subview_from = view.subview(1, view.len()).unwrap(); // ["2", "3"]
1888        assert_eq!(subview_from.len(), 2);
1889        assert_eq!(subview_from.get(0), Some("2"));
1890        assert_eq!(subview_from.get(1), Some("3"));
1891
1892        let subview_to = view.subview(0, 2).unwrap(); // ["1", "2"]
1893        assert_eq!(subview_to.len(), 2);
1894        assert_eq!(subview_to.get(0), Some("1"));
1895        assert_eq!(subview_to.get(1), Some("2"));
1896    }
1897
1898    #[test]
1899    fn bytes_tape_view_basic() {
1900        let mut tape = BytesTapeI32::new();
1901        tape.push(&[1u8, 2]).unwrap();
1902        tape.push(&[3u8, 4]).unwrap();
1903        tape.push(&[5u8, 6]).unwrap();
1904        tape.push(&[7u8, 8]).unwrap();
1905
1906        // Test basic subview creation
1907        let view = tape.subview(1, 3).unwrap();
1908        assert_eq!(view.len(), 2);
1909        assert_eq!(view.get(0), Some(&[3u8, 4] as &[u8]));
1910        assert_eq!(view.get(1), Some(&[5u8, 6] as &[u8]));
1911        assert_eq!(view.get(2), None);
1912
1913        // Test indexing
1914        assert_eq!(&view[0], &[3u8, 4] as &[u8]);
1915        assert_eq!(&view[1], &[5u8, 6] as &[u8]);
1916    }
1917
1918    #[test]
1919    fn view_empty_strings() {
1920        let mut tape = CharsTapeI32::new();
1921        tape.push("").unwrap();
1922        tape.push("non-empty").unwrap();
1923        tape.push("").unwrap();
1924        tape.push("another").unwrap();
1925
1926        let view = tape.subview(0, 3).unwrap();
1927        assert_eq!(view.len(), 3);
1928        assert_eq!(view.get(0), Some(""));
1929        assert_eq!(view.get(1), Some("non-empty"));
1930        assert_eq!(view.get(2), Some(""));
1931    }
1932
1933    #[test]
1934    fn view_single_item() {
1935        let mut tape = CharsTapeI32::new();
1936        tape.push("only").unwrap();
1937
1938        let view = tape.subview(0, 1).unwrap();
1939        assert_eq!(view.len(), 1);
1940        assert_eq!(view.get(0), Some("only"));
1941    }
1942
1943    #[test]
1944    fn view_bounds_checking() {
1945        let mut tape = CharsTapeI32::new();
1946        tape.push("a").unwrap();
1947        tape.push("b").unwrap();
1948
1949        // Out of bounds subview creation
1950        assert!(tape.subview(0, 3).is_err());
1951        assert!(tape.subview(2, 1).is_err());
1952        assert!(tape.subview(3, 4).is_err());
1953
1954        // Valid empty subview
1955        let empty_view = tape.subview(1, 1).unwrap();
1956        assert_eq!(empty_view.len(), 0);
1957        assert!(empty_view.is_empty());
1958    }
1959
1960    #[test]
1961    fn view_data_properties() {
1962        let mut tape = CharsTapeI32::new();
1963        tape.push("hello").unwrap(); // 5 bytes
1964        tape.push("world").unwrap(); // 5 bytes
1965        tape.push("!").unwrap(); // 1 byte
1966
1967        let view = tape.subview(0, 2).unwrap(); // "hello", "world" = 10 bytes
1968        assert_eq!(view.data_len(), 10);
1969        assert!(!view.is_empty());
1970
1971        let full_view = tape.subview(0, 3).unwrap(); // all = 11 bytes
1972        assert_eq!(full_view.data_len(), 11);
1973    }
1974
1975    #[test]
1976    fn view_raw_parts() {
1977        let mut tape = CharsTapeI32::new();
1978        tape.push("test").unwrap();
1979        tape.push("data").unwrap();
1980
1981        let view = tape.subview(0, 2).unwrap();
1982        let parts = view.as_raw_parts();
1983
1984        assert!(!parts.data_ptr.is_null());
1985        assert!(!parts.offsets_ptr.is_null());
1986        assert_eq!(parts.data_len, 8); // "test" + "data"
1987        assert_eq!(parts.items_count, 2);
1988    }
1989
1990    #[test]
1991    fn subview_raw_parts_consistency_chars() {
1992        let mut tape = CharsTapeI32::new();
1993        tape.extend(["abc", "", "xyz", "pq"]).unwrap();
1994
1995        // Subview over middle two items: ["", "xyz"]
1996        let view = tape.subview(1, 3).unwrap();
1997        let parts = view.as_raw_parts();
1998
1999        // Offsets len must be items_count + 1 and data_len equals absolute last offset
2000        unsafe {
2001            let offsets: &[i32] =
2002                core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
2003            assert_eq!(offsets.len(), parts.items_count + 1);
2004            assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
2005            let last_abs = offsets[offsets.len() - 1] as usize;
2006            assert_eq!(last_abs, parts.data_len);
2007        }
2008
2009        // Also check that element boundaries are respected
2010        assert_eq!(view.len(), 2);
2011        assert_eq!(view.get(0), Some(""));
2012        assert_eq!(view.get(1), Some("xyz"));
2013    }
2014
2015    #[test]
2016    fn subview_raw_parts_consistency_bytes() {
2017        let mut tape = BytesTapeI32::new();
2018        tape.extend([
2019            b"a".as_slice(),
2020            b"".as_slice(),
2021            b"bc".as_slice(),
2022            b"def".as_slice(),
2023        ])
2024        .unwrap();
2025
2026        // Subview over last two items: ["bc", "def"]
2027        let view = tape.subview(2, 4).unwrap();
2028        let parts = view.as_raw_parts();
2029
2030        unsafe {
2031            let offsets: &[i32] =
2032                core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
2033            assert_eq!(offsets.len(), parts.items_count + 1);
2034            assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
2035            let last_abs = offsets[offsets.len() - 1] as usize;
2036            assert_eq!(last_abs, parts.data_len);
2037        }
2038
2039        assert_eq!(view.len(), 2);
2040        assert_eq!(view.get(0), Some(b"bc" as &[u8]));
2041        assert_eq!(view.get(1), Some(b"def" as &[u8]));
2042    }
2043
2044    #[test]
2045    fn view_type_aliases() {
2046        let mut tape = CharsTapeI32::new();
2047        tape.push("test").unwrap();
2048
2049        let _view: CharsTapeViewI32 = tape.subview(0, 1).unwrap();
2050
2051        let mut bytes_tape = BytesTapeI64::new();
2052        bytes_tape.push(b"test").unwrap();
2053
2054        let _bytes_view: BytesTapeViewI64 = bytes_tape.subview(0, 1).unwrap();
2055    }
2056
2057    #[test]
2058    fn build_i32_from_other_offset_iterators() {
2059        let items = ["x", "yy", "", "zzz"];
2060
2061        // From u32 iterator
2062        let mut u32t = CharsTapeU32::new();
2063        u32t.extend(items).unwrap();
2064        let t_from_u32: CharsTapeI32 = u32t.iter().collect();
2065        assert_eq!(t_from_u32.len(), items.len());
2066        assert_eq!(t_from_u32.get(1), Some("yy"));
2067
2068        // From u64 iterator
2069        let mut u64t = CharsTapeU64::new();
2070        u64t.extend(items).unwrap();
2071        let t_from_u64: CharsTapeI32 = u64t.iter().collect();
2072        assert_eq!(t_from_u64.len(), items.len());
2073        assert_eq!(t_from_u64.get(3), Some("zzz"));
2074
2075        // From i64 iterator
2076        let mut i64t = CharsTapeI64::new();
2077        i64t.extend(items).unwrap();
2078        let t_from_i64: CharsTapeI32 = i64t.iter().collect();
2079        assert_eq!(t_from_i64.len(), items.len());
2080        assert_eq!(t_from_i64.get(2), Some(""));
2081    }
2082
2083    #[test]
2084    fn range_indexing_syntax() {
2085        let mut tape = CharsTapeI32::new();
2086        tape.push("a").unwrap();
2087        tape.push("b").unwrap();
2088        tape.push("c").unwrap();
2089        tape.push("d").unwrap();
2090
2091        // While we can't return views with [..] syntax due to lifetime constraints,
2092        // we can test that the view() and subview() API works correctly
2093
2094        // Get full view
2095        let full_view = tape.view();
2096        assert_eq!(full_view.len(), 4);
2097
2098        // Get subviews
2099        let sub = tape.subview(1, 3).unwrap();
2100        assert_eq!(sub.len(), 2);
2101        assert_eq!(sub.get(0), Some("b"));
2102        assert_eq!(sub.get(1), Some("c"));
2103
2104        // Test subview of subview
2105        let sub_sub = sub.subview(0, 1).unwrap();
2106        assert_eq!(sub_sub.len(), 1);
2107        assert_eq!(sub_sub.get(0), Some("b"));
2108    }
2109
2110    #[cfg(test)]
2111    use arrow::array::{Array, BinaryArray, StringArray};
2112    #[cfg(test)]
2113    use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer};
2114
2115    #[test]
2116    fn charstape_to_arrow_string_array() {
2117        let mut tape = CharsTapeI32::new();
2118        tape.extend(["hello", "world", "", "arrow"]).unwrap();
2119
2120        let (data_slice, offsets_slice) = tape.arrow_slices();
2121        let data_buffer = Buffer::from_slice_ref(data_slice);
2122        let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
2123            Buffer::from_slice_ref(offsets_slice),
2124            0,
2125            offsets_slice.len(),
2126        ));
2127        let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
2128
2129        assert_eq!(arrow_array.len(), 4);
2130        assert_eq!(arrow_array.value(0), "hello");
2131        assert_eq!(arrow_array.value(2), "");
2132    }
2133
2134    #[test]
2135    fn arrow_string_array_to_charstape_view() {
2136        let arrow_array = StringArray::from(vec!["foo", "bar", ""]);
2137
2138        // Zero-copy conversion to CharsTapeView
2139        let view = unsafe {
2140            CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
2141        };
2142
2143        assert_eq!(view.len(), 3);
2144        assert_eq!(view.get(0), Some("foo"));
2145        assert_eq!(view.get(1), Some("bar"));
2146        assert_eq!(view.get(2), Some(""));
2147    }
2148
2149    #[test]
2150    fn arrow_binary_array_to_bytestape_view() {
2151        let values: Vec<Option<&[u8]>> = vec![
2152            Some(&[1u8, 2, 3] as &[u8]),
2153            Some(&[] as &[u8]),
2154            Some(&[4u8, 5] as &[u8]),
2155        ];
2156        let arrow_array = BinaryArray::from(values);
2157
2158        // Zero-copy conversion to BytesTapeView
2159        let view = unsafe {
2160            BytesTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
2161        };
2162
2163        assert_eq!(view.len(), 3);
2164        assert_eq!(view.get(0), Some(&[1u8, 2, 3] as &[u8]));
2165        assert_eq!(view.get(1), Some(&[] as &[u8]));
2166        assert_eq!(view.get(2), Some(&[4u8, 5] as &[u8]));
2167    }
2168
2169    #[test]
2170    fn zero_copy_roundtrip() {
2171        // Original data
2172        let mut tape = CharsTapeI32::new();
2173        tape.extend(["hello", "", "world"]).unwrap();
2174
2175        // Convert to Arrow (zero-copy)
2176        let (data_slice, offsets_slice) = tape.arrow_slices();
2177        let data_buffer = Buffer::from_slice_ref(data_slice);
2178        let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
2179            Buffer::from_slice_ref(offsets_slice),
2180            0,
2181            offsets_slice.len(),
2182        ));
2183        let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
2184
2185        // Convert back to CharsTapeView (zero-copy)
2186        let view = unsafe {
2187            CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
2188        };
2189
2190        // Verify data integrity without any copying
2191        assert_eq!(view.len(), 3);
2192        assert_eq!(view.get(0), Some("hello"));
2193        assert_eq!(view.get(1), Some(""));
2194        assert_eq!(view.get(2), Some("world"));
2195    }
2196
2197    #[test]
2198    fn bytes_to_string_conversion() {
2199        // Test successful conversion with valid UTF-8
2200        let mut bytes_tape = BytesTapeI32::new();
2201        bytes_tape.push(b"hello").unwrap();
2202        bytes_tape.push(b"world").unwrap();
2203        bytes_tape.push(b"").unwrap();
2204        bytes_tape.push(b"rust").unwrap();
2205
2206        let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
2207        assert!(chars_tape.is_ok());
2208
2209        let chars_tape = chars_tape.unwrap();
2210        assert_eq!(chars_tape.len(), 4);
2211        assert_eq!(chars_tape.get(0), Some("hello"));
2212        assert_eq!(chars_tape.get(1), Some("world"));
2213        assert_eq!(chars_tape.get(2), Some(""));
2214        assert_eq!(chars_tape.get(3), Some("rust"));
2215    }
2216
2217    #[test]
2218    fn bytes_to_string_invalid_utf8() {
2219        // Test conversion failure with invalid UTF-8
2220        let mut bytes_tape = BytesTapeI32::new();
2221        bytes_tape.push(b"valid").unwrap();
2222        bytes_tape.push(&[0xFF, 0xFE]).unwrap(); // Invalid UTF-8 sequence
2223        bytes_tape.push(b"also valid").unwrap();
2224
2225        let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
2226        assert!(chars_tape.is_err());
2227
2228        match chars_tape {
2229            Err(StringTapeError::Utf8Error(_)) => {}
2230            _ => panic!("Expected Utf8Error"),
2231        }
2232    }
2233
2234    #[test]
2235    fn string_to_bytes_conversion() {
2236        // Test infallible conversion from CharsTape to BytesTape
2237        let mut chars_tape = CharsTapeI32::new();
2238        chars_tape.push("hello").unwrap();
2239        chars_tape.push("δΈ–η•Œ").unwrap(); // Unicode characters
2240        chars_tape.push("").unwrap();
2241        chars_tape.push("πŸ¦€").unwrap(); // Emoji
2242
2243        let bytes_tape: BytesTapeI32 = chars_tape.into();
2244        assert_eq!(bytes_tape.len(), 4);
2245        assert_eq!(&bytes_tape[0], b"hello");
2246        assert_eq!(&bytes_tape[1], "δΈ–η•Œ".as_bytes());
2247        assert_eq!(&bytes_tape[2], b"");
2248        assert_eq!(&bytes_tape[3], "πŸ¦€".as_bytes());
2249    }
2250
2251    #[test]
2252    fn conversion_convenience_methods() {
2253        // Test try_into_chars_tape method
2254        let mut bytes_tape = BytesTapeI32::new();
2255        bytes_tape.push(b"test").unwrap();
2256        let string_result = bytes_tape.try_into_chars_tape();
2257        assert!(string_result.is_ok());
2258        assert_eq!(string_result.unwrap().get(0), Some("test"));
2259
2260        // Test into_bytes_tape method
2261        let mut chars_tape = CharsTapeI32::new();
2262        chars_tape.push("test").unwrap();
2263        let bytes_back = chars_tape.into_bytes_tape();
2264        assert_eq!(&bytes_back[0], b"test");
2265    }
2266
2267    #[test]
2268    fn conversion_round_trip() {
2269        // Test round-trip conversion preserves data
2270        let mut original = CharsTapeI32::new();
2271        original.push("first").unwrap();
2272        original.push("second").unwrap();
2273        original.push("third").unwrap();
2274
2275        // Store expected values before conversion
2276        let expected = vec!["first", "second", "third"];
2277
2278        // Convert to BytesTape and back
2279        let bytes: BytesTapeI32 = original.into();
2280        let recovered: CharsTapeI32 = bytes.try_into().unwrap();
2281
2282        assert_eq!(expected.len(), recovered.len());
2283        for (i, expected_str) in expected.iter().enumerate() {
2284            assert_eq!(recovered.get(i), Some(*expected_str));
2285        }
2286    }
2287
2288    #[test]
2289    fn view_to_view_conversions_valid_utf8() {
2290        // Prepare a CharsTape and obtain its view
2291        let mut ct = CharsTapeI32::new();
2292        ct.extend(["abc", "", "δΈ–η•Œ"]).unwrap();
2293        let chars_view = ct.view();
2294
2295        // Chars -> Bytes view conversion is infallible
2296        let bytes_view: BytesTapeViewI32 = chars_view.into_bytes_view();
2297        assert_eq!(bytes_view.len(), 3);
2298        assert_eq!(bytes_view.get(0), Some("abc".as_bytes()));
2299        assert_eq!(bytes_view.get(1), Some(b"" as &[u8]));
2300        assert_eq!(bytes_view.get(2), Some("δΈ–η•Œ".as_bytes()));
2301
2302        // Bytes -> Chars view conversion is fallible, but should succeed for valid UTF-8
2303        let chars_back: Result<CharsTapeViewI32, _> = bytes_view.try_into_chars_view();
2304        assert!(chars_back.is_ok());
2305        let chars_back = chars_back.unwrap();
2306        assert_eq!(chars_back.len(), 3);
2307        assert_eq!(chars_back.get(0), Some("abc"));
2308        assert_eq!(chars_back.get(1), Some(""));
2309        assert_eq!(chars_back.get(2), Some("δΈ–η•Œ"));
2310    }
2311
2312    #[test]
2313    fn view_to_view_bytes_to_chars_invalid_utf8() {
2314        // Prepare a BytesTape with invalid UTF-8 payload
2315        let mut bt = BytesTapeI32::new();
2316        bt.push(b"ok").unwrap();
2317        bt.push(&[0xFF, 0xFE]).unwrap(); // invalid UTF-8
2318        let bview = bt.view();
2319
2320        // Converting to CharsTapeView should fail
2321        let res: Result<CharsTapeViewI32, _> = bview.try_into_chars_view();
2322        assert!(res.is_err());
2323        match res {
2324            Err(StringTapeError::Utf8Error(_)) => {}
2325            _ => panic!("Expected Utf8Error"),
2326        }
2327    }
2328}