stringtape/
stringtape.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2
3//! # StringTape
4//!
5//! Memory-efficient string and bytes storage compatible with Apache Arrow.
6//!
7//! ## CharsTape - Sequential String Storage
8//!
9//! ```rust
10//! use stringtape::{CharsTapeI32, StringTapeError};
11//!
12//! let mut tape = CharsTapeI32::new();
13//! tape.push("hello")?;
14//! tape.push("world")?;
15//!
16//! assert_eq!(tape.len(), 2);
17//! assert_eq!(&tape[0], "hello");
18//!
19//! // Iterate over strings
20//! for s in &tape {
21//!     println!("{}", s);
22//! }
23//! # Ok::<(), StringTapeError>(())
24//! ```
25//!
26//! ## CharsCows - Compressed Arbitrary-Order Slices
27//!
28//! For extremely large datasets, use `CharsCows` with configurable offset/length types:
29//!
30//! ```rust
31//! use stringtape::{CharsCowsU32U16, StringTapeError};
32//! use std::borrow::Cow;
33//!
34//! let data = "hello world foo bar";
35//! // 6 bytes per entry (u32 offset + u16 length) vs 24+ bytes for Vec<String>
36//! let cows = CharsCowsU32U16::from_iter_and_data(
37//!     data.split_whitespace(),
38//!     Cow::Borrowed(data.as_bytes())
39//! )?;
40//!
41//! assert_eq!(&cows[0], "hello");
42//! assert_eq!(&cows[3], "bar");
43//! # Ok::<(), StringTapeError>(())
44//! ```
45//!
46//! ## BytesTape - Binary Data
47//!
48//! ```rust
49//! use stringtape::{BytesTapeI32, StringTapeError};
50//!
51//! let mut tape = BytesTapeI32::new();
52//! tape.push(&[0xde, 0xad, 0xbe, 0xef])?;
53//! tape.push(b"bytes")?;
54//!
55//! assert_eq!(&tape[1], b"bytes" as &[u8]);
56//! # Ok::<(), StringTapeError>(())
57//! ```
58
59#[cfg(feature = "std")]
60extern crate std;
61
62#[cfg(not(feature = "std"))]
63extern crate alloc;
64
65use core::fmt;
66use core::marker::PhantomData;
67use core::ops::{
68    Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Sub,
69};
70use core::ptr::{self, NonNull};
71use core::slice;
72
73#[cfg(not(feature = "std"))]
74use alloc::borrow::Cow;
75#[cfg(not(feature = "std"))]
76use alloc::string::String;
77#[cfg(not(feature = "std"))]
78use alloc::vec::Vec;
79
80#[cfg(feature = "std")]
81use std::borrow::Cow;
82
83use allocator_api2::alloc::{Allocator, Global, Layout};
84
85/// Errors that can occur when working with tape classes.
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub enum StringTapeError {
88    /// Data size exceeds offset type maximum (e.g., >2GB for 32-bit offsets).
89    OffsetOverflow,
90    /// Memory allocation failed.
91    AllocationError,
92    /// Index out of bounds.
93    IndexOutOfBounds,
94    /// Invalid UTF-8 sequence.
95    Utf8Error(core::str::Utf8Error),
96}
97
98impl fmt::Display for StringTapeError {
99    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100        match self {
101            StringTapeError::OffsetOverflow => write!(f, "offset value too large for offset type"),
102            StringTapeError::AllocationError => write!(f, "memory allocation failed"),
103            StringTapeError::IndexOutOfBounds => write!(f, "index out of bounds"),
104            StringTapeError::Utf8Error(e) => write!(f, "invalid UTF-8: {}", e),
105        }
106    }
107}
108
109#[cfg(feature = "std")]
110impl std::error::Error for StringTapeError {}
111
112/// A memory-efficient string storage structure compatible with Apache Arrow.
113///
114/// `CharsTape` stores multiple strings in a contiguous memory layout using offset-based
115/// indexing, similar to Apache Arrow's String and LargeString arrays. All string data
116/// is stored in a single buffer, with a separate offset array tracking string boundaries.
117///
118/// # Type Parameters
119///
120/// * `Offset` - Offset type (`i32`, `i64`, `u32`, `u64`)
121/// * `A` - Allocator type (defaults to `Global`)
122///
123/// # Example
124///
125/// ```rust
126/// use stringtape::{CharsTapeI32, StringTapeError};
127///
128/// let mut tape = CharsTapeI32::new();
129/// tape.push("hello")?;
130/// assert_eq!(&tape[0], "hello");
131/// # Ok::<(), StringTapeError>(())
132/// ```
133///
134/// Memory layout compatible with Apache Arrow:
135/// ```text
136/// Data:    [h,e,l,l,o,w,o,r,l,d]
137/// Offsets: [0, 5, 10]
138/// ```
139struct RawTape<Offset: OffsetType, A: Allocator> {
140    data: Option<NonNull<[u8]>>,
141    offsets: Option<NonNull<[Offset]>>,
142    len_bytes: usize,
143    len_items: usize,
144    allocator: A,
145    _phantom: PhantomData<Offset>,
146}
147
148/// Named raw parts returned by `as_raw_parts` methods.
149#[derive(Debug, Clone, Copy, PartialEq, Eq)]
150pub struct RawParts<Offset: OffsetType> {
151    /// Pointer to the start of the contiguous data buffer.
152    pub data_ptr: *const u8,
153    /// Pointer to the start of the offsets buffer.
154    pub offsets_ptr: *const Offset,
155    /// Number of bytes of valid data in `data_ptr`.
156    pub data_len: usize,
157    /// Number of items stored (strings/bytes entries).
158    pub items_count: usize,
159}
160
161/// UTF-8 string view over `RawTape`.
162pub struct CharsTape<Offset: OffsetType = i32, A: Allocator = Global> {
163    inner: RawTape<Offset, A>,
164}
165
166/// Binary bytes view over `RawTape`.
167pub struct BytesTape<Offset: OffsetType = i32, A: Allocator = Global> {
168    inner: RawTape<Offset, A>,
169}
170
171/// Zero-copy read-only view into a RawTape slice.
172pub struct RawTapeView<'a, Offset: OffsetType> {
173    data: &'a [u8],
174    offsets: &'a [Offset],
175}
176
177/// UTF-8 string view over `RawTapeView`.
178pub struct CharsTapeView<'a, Offset: OffsetType = i32> {
179    inner: RawTapeView<'a, Offset>,
180}
181
182/// Binary bytes view over `RawTapeView`.
183pub struct BytesTapeView<'a, Offset: OffsetType = i32> {
184    inner: RawTapeView<'a, Offset>,
185}
186
187/// Trait for offset types used in CharsTape.
188///
189/// Implementations: `i32`/`i64` (Arrow-compatible), `u32`/`u64` (unsigned, no Arrow interop).
190pub trait OffsetType: Copy + Default + PartialOrd + Sub<Output = Self> {
191    /// Size of the offset type in bytes.
192    const SIZE: usize;
193
194    /// Convert a usize value to this offset type.
195    ///
196    /// Returns `None` if the value is too large to be represented by this offset type.
197    fn from_usize(value: usize) -> Option<Self>;
198
199    /// Convert this offset value to usize.
200    fn to_usize(self) -> usize;
201}
202
203impl OffsetType for i32 {
204    const SIZE: usize = 4;
205
206    fn from_usize(value: usize) -> Option<Self> {
207        if value <= i32::MAX as usize {
208            Some(value as i32)
209        } else {
210            None
211        }
212    }
213
214    fn to_usize(self) -> usize {
215        self as usize
216    }
217}
218
219impl OffsetType for i64 {
220    const SIZE: usize = 8;
221
222    fn from_usize(value: usize) -> Option<Self> {
223        Some(value as i64)
224    }
225
226    fn to_usize(self) -> usize {
227        self as usize
228    }
229}
230
231impl OffsetType for u16 {
232    const SIZE: usize = 2;
233
234    fn from_usize(value: usize) -> Option<Self> {
235        if value <= u16::MAX as usize {
236            Some(value as u16)
237        } else {
238            None
239        }
240    }
241
242    fn to_usize(self) -> usize {
243        self as usize
244    }
245}
246
247impl OffsetType for u32 {
248    const SIZE: usize = 4;
249
250    fn from_usize(value: usize) -> Option<Self> {
251        if value <= u32::MAX as usize {
252            Some(value as u32)
253        } else {
254            None
255        }
256    }
257
258    fn to_usize(self) -> usize {
259        self as usize
260    }
261}
262
263impl OffsetType for u64 {
264    const SIZE: usize = 8;
265
266    fn from_usize(value: usize) -> Option<Self> {
267        Some(value as u64)
268    }
269
270    fn to_usize(self) -> usize {
271        self as usize
272    }
273}
274
275/// Trait for length types used in slice collections.
276///
277/// This trait defines the interface for length types that can be used to represent
278/// the length of string cows. Implementations are provided for `u8`, `u16`, `u32`, and `u64`.
279pub trait LengthType: Copy + Default + PartialOrd {
280    /// Size of the length type in bytes.
281    const SIZE: usize;
282
283    /// Convert a usize value to this length type.
284    ///
285    /// Returns `None` if the value is too large to be represented by this length type.
286    fn from_usize(value: usize) -> Option<Self>;
287
288    /// Convert this length value to usize.
289    fn to_usize(self) -> usize;
290}
291
292impl LengthType for u8 {
293    const SIZE: usize = 1;
294
295    fn from_usize(value: usize) -> Option<Self> {
296        if value <= u8::MAX as usize {
297            Some(value as u8)
298        } else {
299            None
300        }
301    }
302
303    fn to_usize(self) -> usize {
304        self as usize
305    }
306}
307
308impl LengthType for u16 {
309    const SIZE: usize = 2;
310
311    fn from_usize(value: usize) -> Option<Self> {
312        if value <= u16::MAX as usize {
313            Some(value as u16)
314        } else {
315            None
316        }
317    }
318
319    fn to_usize(self) -> usize {
320        self as usize
321    }
322}
323
324impl LengthType for u32 {
325    const SIZE: usize = 4;
326
327    fn from_usize(value: usize) -> Option<Self> {
328        if value <= u32::MAX as usize {
329            Some(value as u32)
330        } else {
331            None
332        }
333    }
334
335    fn to_usize(self) -> usize {
336        self as usize
337    }
338}
339
340impl LengthType for u64 {
341    const SIZE: usize = 8;
342
343    fn from_usize(value: usize) -> Option<Self> {
344        Some(value as u64)
345    }
346
347    fn to_usize(self) -> usize {
348        self as usize
349    }
350}
351
352impl<Offset: OffsetType, A: Allocator> RawTape<Offset, A> {
353    /// Creates a new, empty CharsTape with the global allocator.
354    ///
355    /// This operation is O(1) and does not allocate memory until the first string is pushed.
356    ///
357    /// # Examples
358    ///
359    /// ```rust
360    /// use stringtape::CharsTapeI32;
361    ///
362    /// let tape = CharsTapeI32::new();
363    /// assert!(tape.is_empty());
364    /// assert_eq!(tape.len(), 0);
365    /// ```
366    pub fn new() -> RawTape<Offset, Global> {
367        RawTape::new_in(Global)
368    }
369
370    /// Creates a new, empty CharsTape with a custom allocator.
371    ///
372    /// This operation is O(1) and does not allocate memory until the first string is pushed.
373    ///
374    /// # Examples
375    ///
376    /// ```rust
377    /// use stringtape::CharsTape;
378    /// use allocator_api2::alloc::Global;
379    ///
380    /// let tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
381    /// assert!(tape.is_empty());
382    /// assert_eq!(tape.len(), 0);
383    /// ```
384    pub fn new_in(allocator: A) -> Self {
385        Self {
386            data: None,
387            offsets: None,
388            len_bytes: 0,
389            len_items: 0,
390            allocator,
391            _phantom: PhantomData,
392        }
393    }
394
395    /// Creates a tape with pre-allocated capacity.
396    ///
397    /// # Arguments
398    ///
399    /// * `data_capacity` - Bytes for string data
400    /// * `strings_capacity` - Number of string slots
401    ///
402    /// # Examples
403    ///
404    /// ```rust
405    /// use stringtape::{CharsTapeI32, StringTapeError};
406    ///
407    /// // Pre-allocate space for ~1KB of string data and 100 strings
408    /// let tape = CharsTapeI32::with_capacity(1024, 100)?;
409    /// assert_eq!(tape.data_capacity(), 1024);
410    /// # Ok::<(), StringTapeError>(())
411    /// ```
412    pub fn with_capacity(
413        data_capacity: usize,
414        strings_capacity: usize,
415    ) -> Result<RawTape<Offset, Global>, StringTapeError> {
416        RawTape::with_capacity_in(data_capacity, strings_capacity, Global)
417    }
418
419    /// Creates a new CharsTape with pre-allocated capacity and a custom allocator.
420    ///
421    /// Pre-allocating capacity can improve performance when you know approximately
422    /// how much data you'll be storing.
423    ///
424    /// # Arguments
425    ///
426    /// * `data_capacity` - Number of bytes to pre-allocate for string data
427    /// * `strings_capacity` - Number of string slots to pre-allocate
428    /// * `allocator` - The allocator to use for memory management
429    ///
430    /// # Examples
431    ///
432    /// ```rust
433    /// use stringtape::{CharsTape, StringTapeError};
434    /// use allocator_api2::alloc::Global;
435    ///
436    /// let tape: CharsTape<i32, Global> = CharsTape::with_capacity_in(1024, 100, Global)?;
437    /// assert_eq!(tape.data_capacity(), 1024);
438    /// # Ok::<(), StringTapeError>(())
439    /// ```
440    pub fn with_capacity_in(
441        data_capacity: usize,
442        strings_capacity: usize,
443        allocator: A,
444    ) -> Result<Self, StringTapeError> {
445        let mut tape = Self::new_in(allocator);
446        tape.reserve(data_capacity, strings_capacity)?;
447        Ok(tape)
448    }
449
450    pub fn reserve(
451        &mut self,
452        additional_bytes: usize,
453        additional_strings: usize,
454    ) -> Result<(), StringTapeError> {
455        if additional_bytes > 0 {
456            let current_capacity = self.data_capacity();
457            let new_capacity = current_capacity
458                .checked_add(additional_bytes)
459                .ok_or(StringTapeError::AllocationError)?;
460            self.grow_data(new_capacity)?;
461        }
462
463        if additional_strings > 0 {
464            let current_capacity = self.offsets_capacity();
465            let new_capacity = current_capacity
466                .checked_add(additional_strings + 1)
467                .ok_or(StringTapeError::AllocationError)?;
468            self.grow_offsets(new_capacity)?;
469        }
470        Ok(())
471    }
472
473    fn grow_data(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
474        let current_capacity = self.data_capacity();
475        if new_capacity <= current_capacity {
476            return Ok(());
477        }
478
479        let new_layout =
480            Layout::array::<u8>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
481
482        let new_ptr = if let Some(old_ptr) = self.data {
483            // Grow existing allocation
484            let old_layout = Layout::array::<u8>(current_capacity).unwrap();
485            unsafe {
486                self.allocator
487                    .grow(old_ptr.cast(), old_layout, new_layout)
488                    .map_err(|_| StringTapeError::AllocationError)?
489            }
490        } else {
491            // Initial allocation
492            self.allocator
493                .allocate(new_layout)
494                .map_err(|_| StringTapeError::AllocationError)?
495        };
496
497        self.data = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
498        Ok(())
499    }
500
501    fn grow_offsets(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
502        let current_capacity = self.offsets_capacity();
503        if new_capacity <= current_capacity {
504            return Ok(());
505        }
506
507        let new_layout =
508            Layout::array::<Offset>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
509
510        let new_ptr = if let Some(old_ptr) = self.offsets {
511            // Grow existing allocation
512            let old_layout = Layout::array::<Offset>(current_capacity).unwrap();
513            unsafe {
514                self.allocator
515                    .grow(old_ptr.cast(), old_layout, new_layout)
516                    .map_err(|_| StringTapeError::AllocationError)?
517            }
518        } else {
519            // Initial allocation with first offset = 0
520            self.allocator
521                .allocate_zeroed(new_layout)
522                .map_err(|_| StringTapeError::AllocationError)?
523        };
524
525        self.offsets = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
526        Ok(())
527    }
528
529    /// Appends bytes to the tape.
530    ///
531    /// # Errors
532    ///
533    /// - `OffsetOverflow` if data size exceeds offset type maximum
534    /// - `AllocationError` if memory allocation fails
535    ///
536    /// # Example
537    ///
538    /// ```rust
539    /// # use stringtape::{BytesTapeI32, StringTapeError};
540    /// let mut tape = BytesTapeI32::new();
541    /// tape.push(b"hello")?;
542    /// assert_eq!(tape.len(), 1);
543    /// # Ok::<(), StringTapeError>(())
544    /// ```
545    pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
546        let required_capacity = self
547            .len_bytes
548            .checked_add(bytes.len())
549            .ok_or(StringTapeError::AllocationError)?;
550
551        let current_data_capacity = self.data_capacity();
552        if required_capacity > current_data_capacity {
553            let new_capacity = (current_data_capacity * 2).max(required_capacity).max(64);
554            self.grow_data(new_capacity)?;
555        }
556
557        let current_offsets_capacity = self.offsets_capacity();
558        if self.len_items + 1 >= current_offsets_capacity {
559            let new_capacity = (current_offsets_capacity * 2)
560                .max(self.len_items + 2)
561                .max(8);
562            self.grow_offsets(new_capacity)?;
563        }
564
565        // Copy string data
566        if let Some(data_ptr) = self.data {
567            unsafe {
568                ptr::copy_nonoverlapping(
569                    bytes.as_ptr(),
570                    data_ptr.as_ptr().cast::<u8>().add(self.len_bytes),
571                    bytes.len(),
572                );
573            }
574        }
575
576        self.len_bytes += bytes.len();
577        self.len_items += 1;
578
579        // Write new offset
580        let offset = Offset::from_usize(self.len_bytes).ok_or(StringTapeError::OffsetOverflow)?;
581        if let Some(offsets_ptr) = self.offsets {
582            unsafe {
583                ptr::write(
584                    offsets_ptr.as_ptr().cast::<Offset>().add(self.len_items),
585                    offset,
586                );
587            }
588        }
589
590        Ok(())
591    }
592
593    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
594    ///
595    /// This operation is O(1).
596    pub fn get(&self, index: usize) -> Option<&[u8]> {
597        if index >= self.len_items {
598            return None;
599        }
600
601        let (data_ptr, offsets_ptr) = match (self.data, self.offsets) {
602            (Some(data), Some(offsets)) => (data, offsets),
603            _ => return None,
604        };
605
606        unsafe {
607            let offsets_ptr = offsets_ptr.as_ptr().cast::<Offset>();
608            let start_offset = if index == 0 {
609                0
610            } else {
611                ptr::read(offsets_ptr.add(index)).to_usize()
612            };
613            let end_offset = ptr::read(offsets_ptr.add(index + 1)).to_usize();
614
615            Some(slice::from_raw_parts(
616                data_ptr.as_ptr().cast::<u8>().add(start_offset),
617                end_offset - start_offset,
618            ))
619        }
620    }
621
622    /// Returns the number of items in the tape.
623    pub fn len(&self) -> usize {
624        self.len_items
625    }
626
627    /// Returns `true` if the CharsTape contains no strings.
628    pub fn is_empty(&self) -> bool {
629        self.len_items == 0
630    }
631
632    /// Returns the total number of bytes used by string data.
633    pub fn data_len(&self) -> usize {
634        self.len_bytes
635    }
636
637    /// Returns the number of items currently stored (same as `len()`).
638    #[allow(dead_code)]
639    pub fn capacity(&self) -> usize {
640        self.len_items
641    }
642
643    /// Returns the number of bytes allocated for string data.
644    pub fn data_capacity(&self) -> usize {
645        self.data.map(|ptr| ptr.len()).unwrap_or(0)
646    }
647
648    /// Returns the number of offset slots allocated.
649    pub fn offsets_capacity(&self) -> usize {
650        self.offsets.map(|ptr| ptr.len()).unwrap_or(0)
651    }
652
653    /// Removes all items from the tape, keeping allocated capacity.
654    pub fn clear(&mut self) {
655        self.len_bytes = 0;
656        self.len_items = 0;
657        if let Some(offsets_ptr) = self.offsets {
658            unsafe {
659                ptr::write(offsets_ptr.as_ptr().cast::<Offset>(), Offset::default());
660            }
661        }
662    }
663
664    /// Keeps the first `len` items, drops the rest.
665    pub fn truncate(&mut self, len: usize) {
666        if len >= self.len_items {
667            return;
668        }
669
670        self.len_items = len;
671        self.len_bytes = if len == 0 {
672            0
673        } else if let Some(offsets_ptr) = self.offsets {
674            unsafe { ptr::read(offsets_ptr.as_ptr().cast::<Offset>().add(len)).to_usize() }
675        } else {
676            0
677        };
678    }
679
680    /// Appends all items from an iterator.
681    ///
682    /// # Example
683    ///
684    /// ```rust
685    /// # use stringtape::{BytesTapeI32, StringTapeError};
686    /// let mut tape = BytesTapeI32::new();
687    /// tape.extend([b"hello".as_slice(), b"world".as_slice()])?;
688    /// # Ok::<(), StringTapeError>(())
689    /// ```
690    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
691    where
692        I: IntoIterator,
693        I::Item: AsRef<[u8]>,
694    {
695        for s in iter {
696            self.push(s.as_ref())?;
697        }
698        Ok(())
699    }
700
701    /// Returns raw pointers for Apache Arrow compatibility.
702    ///
703    /// Returns `data_ptr`, `offsets_ptr`, `data_len`, `items_count`.
704    ///
705    /// # Safety
706    ///
707    /// Pointers valid only while tape is unmodified.
708    pub fn as_raw_parts(&self) -> RawParts<Offset> {
709        let data_ptr = self
710            .data
711            .map(|ptr| ptr.as_ptr().cast::<u8>() as *const u8)
712            .unwrap_or(ptr::null());
713        let offsets_ptr = self
714            .offsets
715            .map(|ptr| ptr.as_ptr().cast::<Offset>() as *const Offset)
716            .unwrap_or(ptr::null());
717        RawParts {
718            data_ptr,
719            offsets_ptr,
720            data_len: self.len_bytes,
721            items_count: self.len_items,
722        }
723    }
724
725    /// Returns a slice view of the data buffer.
726    ///
727    /// This provides a cleaner interface for accessing the underlying data
728    /// without dealing with raw pointers.
729    pub fn data_slice(&self) -> &[u8] {
730        if let Some(data_ptr) = self.data {
731            unsafe { core::slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), self.len_bytes) }
732        } else {
733            &[]
734        }
735    }
736
737    /// Returns a slice view of the offsets buffer.
738    ///
739    /// This provides a cleaner interface for accessing the underlying offsets
740    /// without dealing with raw pointers. The slice contains `len() + 1` elements.
741    pub fn offsets_slice(&self) -> &[Offset] {
742        if let Some(offsets_ptr) = self.offsets {
743            unsafe {
744                core::slice::from_raw_parts(
745                    offsets_ptr.as_ptr().cast::<Offset>(),
746                    self.len_items + 1,
747                )
748            }
749        } else {
750            &[]
751        }
752    }
753
754    /// Returns a reference to the allocator used by this tape.
755    pub fn allocator(&self) -> &A {
756        &self.allocator
757    }
758
759    /// Creates a view of the entire tape.
760    pub fn view(&self) -> RawTapeView<'_, Offset> {
761        RawTapeView::new(self, 0, self.len_items).unwrap_or(RawTapeView {
762            data: &[],
763            offsets: &[],
764        })
765    }
766
767    /// Creates a subview of a continuous slice of this tape.
768    pub fn subview(
769        &self,
770        start: usize,
771        end: usize,
772    ) -> Result<RawTapeView<'_, Offset>, StringTapeError> {
773        RawTapeView::new(self, start, end)
774    }
775}
776
777impl<Offset: OffsetType, A: Allocator> Drop for RawTape<Offset, A> {
778    fn drop(&mut self) {
779        if let Some(data_ptr) = self.data {
780            let layout = Layout::array::<u8>(data_ptr.len()).unwrap();
781            unsafe {
782                self.allocator.deallocate(data_ptr.cast(), layout);
783            }
784        }
785        if let Some(offsets_ptr) = self.offsets {
786            let layout = Layout::array::<Offset>(offsets_ptr.len()).unwrap();
787            unsafe {
788                self.allocator.deallocate(offsets_ptr.cast(), layout);
789            }
790        }
791    }
792}
793
794unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for RawTape<Offset, A> {}
795unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for RawTape<Offset, A> {}
796
797// Index trait implementations for RawTape to support [i..n] syntax
798impl<Offset: OffsetType, A: Allocator> Index<Range<usize>> for RawTape<Offset, A> {
799    type Output = [u8];
800
801    fn index(&self, range: Range<usize>) -> &Self::Output {
802        let view = self
803            .subview(range.start, range.end)
804            .expect("range out of bounds");
805        // Return the underlying data slice
806        view.data
807    }
808}
809
810impl<Offset: OffsetType, A: Allocator> Index<RangeFrom<usize>> for RawTape<Offset, A> {
811    type Output = [u8];
812
813    fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
814        let view = self
815            .subview(range.start, self.len_items)
816            .expect("range out of bounds");
817        view.data
818    }
819}
820
821impl<Offset: OffsetType, A: Allocator> Index<RangeTo<usize>> for RawTape<Offset, A> {
822    type Output = [u8];
823
824    fn index(&self, range: RangeTo<usize>) -> &Self::Output {
825        let view = self.subview(0, range.end).expect("range out of bounds");
826        view.data
827    }
828}
829
830impl<Offset: OffsetType, A: Allocator> Index<RangeFull> for RawTape<Offset, A> {
831    type Output = [u8];
832
833    fn index(&self, _range: RangeFull) -> &Self::Output {
834        let view = self.view();
835        view.data
836    }
837}
838
839impl<Offset: OffsetType, A: Allocator> Index<RangeInclusive<usize>> for RawTape<Offset, A> {
840    type Output = [u8];
841
842    fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
843        let view = self
844            .subview(*range.start(), range.end() + 1)
845            .expect("range out of bounds");
846        view.data
847    }
848}
849
850impl<Offset: OffsetType, A: Allocator> Index<RangeToInclusive<usize>> for RawTape<Offset, A> {
851    type Output = [u8];
852
853    fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
854        let view = self.subview(0, range.end + 1).expect("range out of bounds");
855        view.data
856    }
857}
858
859// ========================
860// RawTapeView implementation
861// ========================
862
863impl<'a, Offset: OffsetType> RawTapeView<'a, Offset> {
864    /// Creates a view into a slice of the RawTape from start to end (exclusive).
865    pub(crate) fn new<A: Allocator>(
866        tape: &'a RawTape<Offset, A>,
867        start: usize,
868        end: usize,
869    ) -> Result<Self, StringTapeError> {
870        if start > end || end > tape.len() {
871            return Err(StringTapeError::IndexOutOfBounds);
872        }
873
874        let (data_ptr, offsets_ptr) = match (tape.data, tape.offsets) {
875            (Some(data), Some(offsets)) => (data, offsets),
876            _ => return Err(StringTapeError::IndexOutOfBounds),
877        };
878
879        // Keep the data pointer at the beginning of the parent tape to remain Arrow-compatible.
880        // Offsets remain absolute (not normalized) and are sliced to the requested range.
881        let data = unsafe { slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), tape.len_bytes) };
882
883        let offsets = unsafe {
884            slice::from_raw_parts(
885                offsets_ptr.as_ptr().cast::<Offset>().add(start),
886                (end - start) + 1,
887            )
888        };
889
890        Ok(Self { data, offsets })
891    }
892
893    /// Creates a zero-copy view from raw Arrow-compatible parts.
894    ///
895    /// # Safety
896    ///
897    /// The caller must ensure that:
898    /// - `data` contains valid bytes for the lifetime `'a`
899    /// - `offsets` contains valid offsets with length `items_count + 1`
900    /// - All offsets are within bounds of the data slice
901    /// - For CharsTapeView, data must be valid UTF-8
902    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
903        Self { data, offsets }
904    }
905
906    /// Returns a reference to the bytes at the given index within this view.
907    pub fn get(&self, index: usize) -> Option<&[u8]> {
908        if index >= self.len() {
909            return None;
910        }
911
912        let start_offset = self.offsets[index].to_usize();
913        let end_offset = self.offsets[index + 1].to_usize();
914
915        Some(&self.data[start_offset..end_offset])
916    }
917
918    /// Returns the number of items in this view.
919    pub fn len(&self) -> usize {
920        self.offsets.len().saturating_sub(1)
921    }
922
923    /// Returns `true` if the view contains no items.
924    pub fn is_empty(&self) -> bool {
925        self.len() == 0
926    }
927
928    /// Returns the total number of bytes in this view.
929    pub fn data_len(&self) -> usize {
930        // Span covered by this view
931        self.offsets[self.offsets.len() - 1].to_usize() - self.offsets[0].to_usize()
932    }
933
934    /// Creates a sub-view of this view
935    pub fn subview(
936        &self,
937        start: usize,
938        end: usize,
939    ) -> Result<RawTapeView<'a, Offset>, StringTapeError> {
940        if start > end || end > self.len() {
941            return Err(StringTapeError::IndexOutOfBounds);
942        }
943
944        Ok(RawTapeView {
945            // Keep same data pointer, only narrow the offsets slice
946            data: self.data,
947            offsets: &self.offsets[start..=end],
948        })
949    }
950
951    /// Returns the raw parts of the view for Apache Arrow compatibility.
952    pub fn as_raw_parts(&self) -> RawParts<Offset> {
953        // Expose an Arrow-compatible view: data_ptr remains at the tape base,
954        // offsets are absolute into that buffer, and data_len reaches the last used byte.
955        RawParts {
956            data_ptr: self.data.as_ptr(),
957            offsets_ptr: self.offsets.as_ptr(),
958            data_len: self.offsets[self.offsets.len() - 1].to_usize(),
959            items_count: self.len(),
960        }
961    }
962}
963
964impl<'a, Offset: OffsetType> Index<usize> for RawTapeView<'a, Offset> {
965    type Output = [u8];
966
967    fn index(&self, index: usize) -> &Self::Output {
968        self.get(index).expect("index out of bounds")
969    }
970}
971
972// Index trait implementations for RawTapeView to support [i..n] syntax
973impl<'a, Offset: OffsetType> Index<Range<usize>> for RawTapeView<'a, Offset> {
974    type Output = [u8];
975
976    fn index(&self, range: Range<usize>) -> &Self::Output {
977        let view = self
978            .subview(range.start, range.end)
979            .expect("range out of bounds");
980        let start = view.offsets[0].to_usize();
981        let end = view.offsets[view.offsets.len() - 1].to_usize();
982        &view.data[start..end]
983    }
984}
985
986impl<'a, Offset: OffsetType> Index<RangeFrom<usize>> for RawTapeView<'a, Offset> {
987    type Output = [u8];
988
989    fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
990        let view = self
991            .subview(range.start, self.len())
992            .expect("range out of bounds");
993        let start = view.offsets[0].to_usize();
994        let end = view.offsets[view.offsets.len() - 1].to_usize();
995        &view.data[start..end]
996    }
997}
998
999impl<'a, Offset: OffsetType> Index<RangeTo<usize>> for RawTapeView<'a, Offset> {
1000    type Output = [u8];
1001
1002    fn index(&self, range: RangeTo<usize>) -> &Self::Output {
1003        let view = self.subview(0, range.end).expect("range out of bounds");
1004        let start = view.offsets[0].to_usize();
1005        let end = view.offsets[view.offsets.len() - 1].to_usize();
1006        &view.data[start..end]
1007    }
1008}
1009
1010impl<'a, Offset: OffsetType> Index<RangeFull> for RawTapeView<'a, Offset> {
1011    type Output = [u8];
1012
1013    fn index(&self, _range: RangeFull) -> &Self::Output {
1014        let start = self.offsets[0].to_usize();
1015        let end = self.offsets[self.offsets.len() - 1].to_usize();
1016        &self.data[start..end]
1017    }
1018}
1019
1020impl<'a, Offset: OffsetType> Index<RangeInclusive<usize>> for RawTapeView<'a, Offset> {
1021    type Output = [u8];
1022
1023    fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
1024        let view = self
1025            .subview(*range.start(), range.end() + 1)
1026            .expect("range out of bounds");
1027        let start = view.offsets[0].to_usize();
1028        let end = view.offsets[view.offsets.len() - 1].to_usize();
1029        &view.data[start..end]
1030    }
1031}
1032
1033impl<'a, Offset: OffsetType> Index<RangeToInclusive<usize>> for RawTapeView<'a, Offset> {
1034    type Output = [u8];
1035
1036    fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
1037        let view = self.subview(0, range.end + 1).expect("range out of bounds");
1038        let start = view.offsets[0].to_usize();
1039        let end = view.offsets[view.offsets.len() - 1].to_usize();
1040        &view.data[start..end]
1041    }
1042}
1043
1044// ========================
1045// CharsTapeView implementation
1046// ========================
1047
1048impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
1049    /// Creates a zero-copy CharsTapeView from raw Arrow StringArray parts.
1050    ///
1051    /// # Safety
1052    ///
1053    /// The caller must ensure that:
1054    /// - `data` contains valid UTF-8 bytes for the lifetime `'a`
1055    /// - `offsets` contains valid offsets with appropriate length
1056    /// - All offsets are within bounds of the data slice
1057    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
1058        Self {
1059            inner: RawTapeView::from_raw_parts(data, offsets),
1060        }
1061    }
1062
1063    /// Returns a reference to the string at the given index, or `None` if out of bounds.
1064    pub fn get(&self, index: usize) -> Option<&str> {
1065        // Safe because CharsTapeView only comes from CharsTape which validates UTF-8
1066        self.inner
1067            .get(index)
1068            .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
1069    }
1070
1071    /// Returns the number of strings in this view.
1072    pub fn len(&self) -> usize {
1073        self.inner.len()
1074    }
1075
1076    /// Returns `true` if the view contains no strings.
1077    pub fn is_empty(&self) -> bool {
1078        self.inner.is_empty()
1079    }
1080
1081    /// Returns the total number of bytes in this view.
1082    pub fn data_len(&self) -> usize {
1083        self.inner.data_len()
1084    }
1085
1086    /// Creates a sub-view of this view
1087    pub fn subview(
1088        &self,
1089        start: usize,
1090        end: usize,
1091    ) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1092        Ok(CharsTapeView {
1093            inner: self.inner.subview(start, end)?,
1094        })
1095    }
1096
1097    /// Returns the raw parts of the view for Apache Arrow compatibility.
1098    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1099        self.inner.as_raw_parts()
1100    }
1101}
1102
1103impl<'a, Offset: OffsetType> Index<usize> for CharsTapeView<'a, Offset> {
1104    type Output = str;
1105
1106    fn index(&self, index: usize) -> &Self::Output {
1107        self.get(index).expect("index out of bounds")
1108    }
1109}
1110
1111// ========================
1112// BytesTapeView implementation
1113// ========================
1114
1115impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1116    /// Creates a zero-copy BytesTapeView from raw Arrow BinaryArray parts.
1117    ///
1118    /// # Safety
1119    ///
1120    /// The caller must ensure that:
1121    /// - `data` contains valid bytes for the lifetime `'a`
1122    /// - `offsets` contains valid offsets with appropriate length
1123    /// - All offsets are within bounds of the data slice
1124    pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
1125        Self {
1126            inner: RawTapeView::from_raw_parts(data, offsets),
1127        }
1128    }
1129
1130    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
1131    pub fn get(&self, index: usize) -> Option<&[u8]> {
1132        self.inner.get(index)
1133    }
1134
1135    /// Returns the number of items in this view.
1136    pub fn len(&self) -> usize {
1137        self.inner.len()
1138    }
1139
1140    /// Returns `true` if the view contains no items.
1141    pub fn is_empty(&self) -> bool {
1142        self.inner.is_empty()
1143    }
1144
1145    /// Returns the total number of bytes in this view.
1146    pub fn data_len(&self) -> usize {
1147        self.inner.data_len()
1148    }
1149
1150    /// Creates a sub-view of this view
1151    pub fn subview(
1152        &self,
1153        start: usize,
1154        end: usize,
1155    ) -> Result<BytesTapeView<'a, Offset>, StringTapeError> {
1156        Ok(BytesTapeView {
1157            inner: self.inner.subview(start, end)?,
1158        })
1159    }
1160
1161    /// Returns the raw parts of the view for Apache Arrow compatibility.
1162    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1163        self.inner.as_raw_parts()
1164    }
1165}
1166
1167impl<'a, Offset: OffsetType> Index<usize> for BytesTapeView<'a, Offset> {
1168    type Output = [u8];
1169
1170    fn index(&self, index: usize) -> &Self::Output {
1171        self.get(index).expect("index out of bounds")
1172    }
1173}
1174
1175// ========================
1176// CharsTape (UTF-8 view)
1177// ========================
1178
1179impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1180    /// Creates a new, empty CharsTape with the global allocator.
1181    pub fn new() -> CharsTape<Offset, Global> {
1182        CharsTape {
1183            inner: RawTape::<Offset, Global>::new(),
1184        }
1185    }
1186
1187    /// Creates a new, empty CharsTape with a custom allocator.
1188    pub fn new_in(allocator: A) -> Self {
1189        Self {
1190            inner: RawTape::<Offset, A>::new_in(allocator),
1191        }
1192    }
1193
1194    /// Creates a new CharsTape with pre-allocated capacity using the global allocator.
1195    pub fn with_capacity(
1196        data_capacity: usize,
1197        strings_capacity: usize,
1198    ) -> Result<CharsTape<Offset, Global>, StringTapeError> {
1199        Ok(CharsTape {
1200            inner: RawTape::<Offset, Global>::with_capacity(data_capacity, strings_capacity)?,
1201        })
1202    }
1203
1204    /// Creates a new CharsTape with pre-allocated capacity and a custom allocator.
1205    pub fn with_capacity_in(
1206        data_capacity: usize,
1207        strings_capacity: usize,
1208        allocator: A,
1209    ) -> Result<Self, StringTapeError> {
1210        Ok(Self {
1211            inner: RawTape::<Offset, A>::with_capacity_in(
1212                data_capacity,
1213                strings_capacity,
1214                allocator,
1215            )?,
1216        })
1217    }
1218
1219    /// Adds a string to the end of the CharsTape.
1220    pub fn push(&mut self, s: &str) -> Result<(), StringTapeError> {
1221        self.inner.push(s.as_bytes())
1222    }
1223
1224    /// Returns a reference to the string at the given index, or `None` if out of bounds.
1225    pub fn get(&self, index: usize) -> Option<&str> {
1226        // Safe because CharsTape only accepts &str pushes.
1227        self.inner
1228            .get(index)
1229            .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
1230    }
1231
1232    /// Returns the number of strings in the CharsTape.
1233    pub fn len(&self) -> usize {
1234        self.inner.len()
1235    }
1236
1237    /// Returns `true` if the CharsTape contains no strings.
1238    pub fn is_empty(&self) -> bool {
1239        self.inner.is_empty()
1240    }
1241
1242    /// Returns the total number of bytes used by string data.
1243    pub fn data_len(&self) -> usize {
1244        self.inner.data_len()
1245    }
1246
1247    /// Returns the number of strings currently stored (same as `len()`).
1248    pub fn capacity(&self) -> usize {
1249        self.inner.len()
1250    }
1251
1252    /// Returns the number of bytes allocated for string data.
1253    pub fn data_capacity(&self) -> usize {
1254        self.inner.data_capacity()
1255    }
1256
1257    /// Returns the number of offset slots allocated.
1258    pub fn offsets_capacity(&self) -> usize {
1259        self.inner.offsets_capacity()
1260    }
1261
1262    /// Removes all strings from the CharsTape, keeping allocated capacity.
1263    pub fn clear(&mut self) {
1264        self.inner.clear()
1265    }
1266
1267    /// Shortens the CharsTape, keeping the first `len` strings and dropping the rest.
1268    pub fn truncate(&mut self, len: usize) {
1269        self.inner.truncate(len)
1270    }
1271
1272    /// Extends the CharsTape with the contents of an iterator.
1273    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1274    where
1275        I: IntoIterator,
1276        I::Item: AsRef<str>,
1277    {
1278        for s in iter {
1279            self.push(s.as_ref())?;
1280        }
1281        Ok(())
1282    }
1283
1284    /// Returns the raw parts of the CharsTape for Apache Arrow compatibility.
1285    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1286        self.inner.as_raw_parts()
1287    }
1288
1289    /// Returns a slice view of the data buffer.
1290    pub fn data_slice(&self) -> &[u8] {
1291        self.inner.data_slice()
1292    }
1293
1294    /// Returns a slice view of the offsets buffer.
1295    pub fn offsets_slice(&self) -> &[Offset] {
1296        self.inner.offsets_slice()
1297    }
1298
1299    pub fn iter(&self) -> CharsTapeIter<'_, Offset, A> {
1300        CharsTapeIter {
1301            tape: self,
1302            index: 0,
1303        }
1304    }
1305
1306    /// Returns a reference to the allocator used by this CharsTape.
1307    pub fn allocator(&self) -> &A {
1308        self.inner.allocator()
1309    }
1310
1311    /// Creates a view of the entire CharsTape.
1312    pub fn view(&self) -> CharsTapeView<'_, Offset> {
1313        CharsTapeView {
1314            inner: self.inner.view(),
1315        }
1316    }
1317
1318    /// Creates a subview of a continuous slice of this CharsTape.
1319    pub fn subview(
1320        &self,
1321        start: usize,
1322        end: usize,
1323    ) -> Result<CharsTapeView<'_, Offset>, StringTapeError> {
1324        Ok(CharsTapeView {
1325            inner: self.inner.subview(start, end)?,
1326        })
1327    }
1328
1329    /// Creates a CharsCows view of the tape.
1330    ///
1331    /// # Example
1332    ///
1333    /// ```rust
1334    /// # use stringtape::{CharsTapeI32, CharsCows, StringTapeError};
1335    /// # use std::borrow::Cow;
1336    /// let mut tape = CharsTapeI32::new();
1337    /// tape.extend(["apple", "banana", "cherry"])?;
1338    ///
1339    /// let cows: CharsCows<i32, u16> = tape.as_reorderable()?;
1340    /// assert_eq!(cows.get(0), Some("apple"));
1341    /// # Ok::<(), StringTapeError>(())
1342    /// ```
1343    pub fn as_reorderable<Length: LengthType>(
1344        &self,
1345    ) -> Result<CharsCows<'_, Offset, Length>, StringTapeError> {
1346        CharsCows::from_iter_and_data(self, Cow::Borrowed(self.data_slice()))
1347    }
1348
1349    /// Returns data and offsets slices for zero-copy Arrow conversion.
1350    pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1351        (self.data_slice(), self.offsets_slice())
1352    }
1353}
1354
1355impl<Offset: OffsetType, A: Allocator> Drop for CharsTape<Offset, A> {
1356    fn drop(&mut self) {
1357        // Explicit drop of inner to run RawTape's Drop
1358        // (redundant but keeps intent clear)
1359    }
1360}
1361
1362unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for CharsTape<Offset, A> {}
1363unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for CharsTape<Offset, A> {}
1364
1365pub struct CharsTapeIter<'a, Offset: OffsetType, A: Allocator> {
1366    tape: &'a CharsTape<Offset, A>,
1367    index: usize,
1368}
1369
1370impl<'a, Offset: OffsetType, A: Allocator> Iterator for CharsTapeIter<'a, Offset, A> {
1371    type Item = &'a str;
1372
1373    fn next(&mut self) -> Option<Self::Item> {
1374        let result = self.tape.get(self.index);
1375        if result.is_some() {
1376            self.index += 1;
1377        }
1378        result
1379    }
1380
1381    fn size_hint(&self) -> (usize, Option<usize>) {
1382        let remaining = self.tape.len() - self.index;
1383        (remaining, Some(remaining))
1384    }
1385}
1386
1387impl<'a, Offset: OffsetType, A: Allocator> ExactSizeIterator for CharsTapeIter<'a, Offset, A> {}
1388
1389impl<Offset: OffsetType> FromIterator<String> for CharsTape<Offset, Global> {
1390    fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
1391        let mut tape = CharsTape::<Offset, Global>::new();
1392        for s in iter {
1393            tape.push(&s)
1394                .expect("Failed to build CharsTape from iterator");
1395        }
1396        tape
1397    }
1398}
1399
1400impl<'a, Offset: OffsetType> FromIterator<&'a str> for CharsTape<Offset, Global> {
1401    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
1402        let mut tape = CharsTape::<Offset, Global>::new();
1403        for s in iter {
1404            tape.push(s)
1405                .expect("Failed to build CharsTape from iterator");
1406        }
1407        tape
1408    }
1409}
1410
1411impl<Offset: OffsetType, A: Allocator> Index<usize> for CharsTape<Offset, A> {
1412    type Output = str;
1413
1414    fn index(&self, index: usize) -> &Self::Output {
1415        self.get(index).expect("index out of bounds")
1416    }
1417}
1418
1419impl<'a, Offset: OffsetType, A: Allocator> IntoIterator for &'a CharsTape<Offset, A> {
1420    type Item = &'a str;
1421    type IntoIter = CharsTapeIter<'a, Offset, A>;
1422
1423    fn into_iter(self) -> Self::IntoIter {
1424        self.iter()
1425    }
1426}
1427
1428// ======================
1429// BytesTape (bytes view)
1430// ======================
1431
1432impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1433    /// Creates a new, empty BytesTape with the global allocator.
1434    pub fn new() -> BytesTape<Offset, Global> {
1435        BytesTape {
1436            inner: RawTape::<Offset, Global>::new(),
1437        }
1438    }
1439
1440    /// Creates a new, empty BytesTape with a custom allocator.
1441    pub fn new_in(allocator: A) -> Self {
1442        Self {
1443            inner: RawTape::<Offset, A>::new_in(allocator),
1444        }
1445    }
1446
1447    /// Creates a new BytesTape with pre-allocated capacity using the global allocator.
1448    pub fn with_capacity(
1449        data_capacity: usize,
1450        items_capacity: usize,
1451    ) -> Result<BytesTape<Offset, Global>, StringTapeError> {
1452        Ok(BytesTape {
1453            inner: RawTape::<Offset, Global>::with_capacity(data_capacity, items_capacity)?,
1454        })
1455    }
1456
1457    /// Creates a new BytesTape with pre-allocated capacity and a custom allocator.
1458    pub fn with_capacity_in(
1459        data_capacity: usize,
1460        items_capacity: usize,
1461        allocator: A,
1462    ) -> Result<Self, StringTapeError> {
1463        Ok(Self {
1464            inner: RawTape::<Offset, A>::with_capacity_in(
1465                data_capacity,
1466                items_capacity,
1467                allocator,
1468            )?,
1469        })
1470    }
1471
1472    /// Adds bytes to the end of the tape.
1473    pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
1474        self.inner.push(bytes)
1475    }
1476
1477    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
1478    pub fn get(&self, index: usize) -> Option<&[u8]> {
1479        self.inner.get(index)
1480    }
1481
1482    /// Returns the number of items in the tape.
1483    pub fn len(&self) -> usize {
1484        self.inner.len()
1485    }
1486
1487    /// Returns `true` if the tape contains no items.
1488    pub fn is_empty(&self) -> bool {
1489        self.inner.is_empty()
1490    }
1491
1492    /// Returns the total number of bytes used by data.
1493    pub fn data_len(&self) -> usize {
1494        self.inner.data_len()
1495    }
1496
1497    /// Returns the number of bytes allocated for data.
1498    pub fn data_capacity(&self) -> usize {
1499        self.inner.data_capacity()
1500    }
1501
1502    /// Returns the number of offset slots allocated.
1503    pub fn offsets_capacity(&self) -> usize {
1504        self.inner.offsets_capacity()
1505    }
1506
1507    /// Removes all items from the tape, keeping allocated capacity.
1508    pub fn clear(&mut self) {
1509        self.inner.clear()
1510    }
1511
1512    /// Shortens the tape, keeping the first `len` items and dropping the rest.
1513    pub fn truncate(&mut self, len: usize) {
1514        self.inner.truncate(len)
1515    }
1516
1517    /// Extends the tape with the contents of an iterator of bytes.
1518    pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1519    where
1520        I: IntoIterator,
1521        I::Item: AsRef<[u8]>,
1522    {
1523        self.inner.extend(iter)
1524    }
1525
1526    /// Returns the raw parts of the tape for Apache Arrow compatibility.
1527    pub fn as_raw_parts(&self) -> RawParts<Offset> {
1528        self.inner.as_raw_parts()
1529    }
1530
1531    /// Returns a slice view of the data buffer.
1532    pub fn data_slice(&self) -> &[u8] {
1533        self.inner.data_slice()
1534    }
1535
1536    /// Returns a slice view of the offsets buffer.
1537    pub fn offsets_slice(&self) -> &[Offset] {
1538        self.inner.offsets_slice()
1539    }
1540
1541    /// Returns a reference to the allocator used by this BytesTape.
1542    pub fn allocator(&self) -> &A {
1543        self.inner.allocator()
1544    }
1545
1546    /// Creates a view of the entire BytesTape.
1547    pub fn view(&self) -> BytesTapeView<'_, Offset> {
1548        BytesTapeView {
1549            inner: self.inner.view(),
1550        }
1551    }
1552
1553    /// Returns an iterator over the byte cows.
1554    pub fn iter(&self) -> BytesTapeIter<'_, Offset, A> {
1555        BytesTapeIter {
1556            tape: self,
1557            index: 0,
1558        }
1559    }
1560
1561    /// Creates a subview of a continuous slice of this BytesTape.
1562    pub fn subview(
1563        &self,
1564        start: usize,
1565        end: usize,
1566    ) -> Result<BytesTapeView<'_, Offset>, StringTapeError> {
1567        Ok(BytesTapeView {
1568            inner: self.inner.subview(start, end)?,
1569        })
1570    }
1571
1572    /// Creates a BytesCows view of the tape.
1573    ///
1574    /// # Example
1575    ///
1576    /// ```rust
1577    /// # use stringtape::{BytesTapeI32, BytesCows, StringTapeError};
1578    /// # use std::borrow::Cow;
1579    /// let mut tape = BytesTapeI32::new();
1580    /// tape.push(&[1, 2, 3])?;
1581    /// tape.push(&[4, 5, 6])?;
1582    /// tape.push(&[7, 8, 9])?;
1583    ///
1584    /// let cows: BytesCows<i32, u16> = tape.as_reorderable()?;
1585    /// assert_eq!(cows.get(0), Some(&[1, 2, 3][..]));
1586    /// # Ok::<(), StringTapeError>(())
1587    /// ```
1588    pub fn as_reorderable<Length: LengthType>(
1589        &self,
1590    ) -> Result<BytesCows<'_, Offset, Length>, StringTapeError> {
1591        BytesCows::from_iter_and_data(self, Cow::Borrowed(self.data_slice()))
1592    }
1593
1594    /// Returns data and offsets slices for zero-copy Arrow conversion.
1595    pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1596        (self.data_slice(), self.offsets_slice())
1597    }
1598}
1599
1600impl<Offset: OffsetType, A: Allocator> Index<usize> for BytesTape<Offset, A> {
1601    type Output = [u8];
1602
1603    fn index(&self, index: usize) -> &Self::Output {
1604        self.get(index).expect("index out of bounds")
1605    }
1606}
1607
1608pub struct BytesTapeIter<'a, Offset: OffsetType, A: Allocator> {
1609    tape: &'a BytesTape<Offset, A>,
1610    index: usize,
1611}
1612
1613impl<'a, Offset: OffsetType, A: Allocator> Iterator for BytesTapeIter<'a, Offset, A> {
1614    type Item = &'a [u8];
1615
1616    fn next(&mut self) -> Option<Self::Item> {
1617        let result = self.tape.get(self.index);
1618        if result.is_some() {
1619            self.index += 1;
1620        }
1621        result
1622    }
1623
1624    fn size_hint(&self) -> (usize, Option<usize>) {
1625        let remaining = self.tape.len() - self.index;
1626        (remaining, Some(remaining))
1627    }
1628}
1629
1630impl<'a, Offset: OffsetType, A: Allocator> ExactSizeIterator for BytesTapeIter<'a, Offset, A> {}
1631
1632impl<'a, Offset: OffsetType, A: Allocator> IntoIterator for &'a BytesTape<Offset, A> {
1633    type Item = &'a [u8];
1634    type IntoIter = BytesTapeIter<'a, Offset, A>;
1635
1636    fn into_iter(self) -> Self::IntoIter {
1637        self.iter()
1638    }
1639}
1640
1641// Signed (Arrow-compatible) aliases
1642pub type CharsTapeI32 = CharsTape<i32, Global>;
1643pub type CharsTapeI64 = CharsTape<i64, Global>;
1644pub type BytesTapeI32 = BytesTape<i32, Global>;
1645pub type BytesTapeI64 = BytesTape<i64, Global>;
1646
1647pub type CharsTapeViewI32<'a> = CharsTapeView<'a, i32>;
1648pub type CharsTapeViewI64<'a> = CharsTapeView<'a, i64>;
1649pub type BytesTapeViewI32<'a> = BytesTapeView<'a, i32>;
1650pub type BytesTapeViewI64<'a> = BytesTapeView<'a, i64>;
1651
1652// Unsigned aliases (not zero-copy with Arrow)
1653pub type CharsTapeU32 = CharsTape<u32, Global>;
1654pub type CharsTapeU64 = CharsTape<u64, Global>;
1655pub type BytesTapeU16 = BytesTape<u16, Global>;
1656pub type BytesTapeU32 = BytesTape<u32, Global>;
1657pub type BytesTapeU64 = BytesTape<u64, Global>;
1658
1659pub type CharsTapeViewU32<'a> = CharsTapeView<'a, u32>;
1660pub type CharsTapeViewU64<'a> = CharsTapeView<'a, u64>;
1661pub type BytesTapeViewU16<'a> = BytesTapeView<'a, u16>;
1662pub type BytesTapeViewU32<'a> = BytesTapeView<'a, u32>;
1663pub type BytesTapeViewU64<'a> = BytesTapeView<'a, u64>;
1664
1665// Conversion implementations between BytesTape and CharsTape
1666impl<Offset: OffsetType, A: Allocator> TryFrom<BytesTape<Offset, A>> for CharsTape<Offset, A> {
1667    type Error = StringTapeError;
1668
1669    fn try_from(bytes_tape: BytesTape<Offset, A>) -> Result<Self, Self::Error> {
1670        // Validate that all byte sequences are valid UTF-8
1671        for i in 0..bytes_tape.len() {
1672            if let Err(e) = core::str::from_utf8(&bytes_tape[i]) {
1673                return Err(StringTapeError::Utf8Error(e));
1674            }
1675        }
1676
1677        // Since validation passed, we can safely convert
1678        // We need to take ownership of the inner RawTape without dropping BytesTape
1679        let inner = unsafe {
1680            // Take ownership of the inner RawTape
1681            let inner = core::ptr::read(&bytes_tape.inner);
1682            // Prevent BytesTape's destructor from running
1683            core::mem::forget(bytes_tape);
1684            inner
1685        };
1686        Ok(CharsTape { inner })
1687    }
1688}
1689
1690impl<Offset: OffsetType, A: Allocator> From<CharsTape<Offset, A>> for BytesTape<Offset, A> {
1691    fn from(chars_tape: CharsTape<Offset, A>) -> Self {
1692        // CharsTape already contains valid UTF-8, so conversion to BytesTape is infallible
1693        // We need to take ownership of the inner RawTape without dropping CharsTape
1694        let inner = unsafe {
1695            // Take ownership of the inner RawTape
1696            let inner = core::ptr::read(&chars_tape.inner);
1697            // Prevent CharsTape's destructor from running
1698            core::mem::forget(chars_tape);
1699            inner
1700        };
1701        BytesTape { inner }
1702    }
1703}
1704
1705impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1706    pub fn try_into_chars_tape(self) -> Result<CharsTape<Offset, A>, StringTapeError> {
1707        self.try_into()
1708    }
1709}
1710
1711impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1712    pub fn into_bytes_tape(self) -> BytesTape<Offset, A> {
1713        self.into()
1714    }
1715}
1716
1717// Conversion implementations between BytesTapeView and CharsTapeView
1718impl<'a, Offset: OffsetType> TryFrom<BytesTapeView<'a, Offset>> for CharsTapeView<'a, Offset> {
1719    type Error = StringTapeError;
1720
1721    fn try_from(bytes_view: BytesTapeView<'a, Offset>) -> Result<Self, Self::Error> {
1722        // Validate that all byte sequences are valid UTF-8
1723        for i in 0..bytes_view.len() {
1724            let bytes = bytes_view.get(i).ok_or(StringTapeError::IndexOutOfBounds)?;
1725            if core::str::from_utf8(bytes).is_err() {
1726                return Err(StringTapeError::Utf8Error(
1727                    core::str::from_utf8(bytes).unwrap_err(),
1728                ));
1729            }
1730        }
1731
1732        // Since validation passed, construct a CharsTapeView over the same inner view
1733        Ok(CharsTapeView {
1734            inner: bytes_view.inner,
1735        })
1736    }
1737}
1738
1739impl<'a, Offset: OffsetType> From<CharsTapeView<'a, Offset>> for BytesTapeView<'a, Offset> {
1740    fn from(chars_view: CharsTapeView<'a, Offset>) -> Self {
1741        // UTF-8 bytes can always be viewed as bytes
1742        BytesTapeView {
1743            inner: chars_view.inner,
1744        }
1745    }
1746}
1747
1748impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1749    pub fn try_into_chars_view(self) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1750        self.try_into()
1751    }
1752}
1753
1754impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
1755    pub fn into_bytes_view(self) -> BytesTapeView<'a, Offset> {
1756        self.into()
1757    }
1758}
1759
1760impl<Offset: OffsetType> Default for CharsTape<Offset, Global> {
1761    fn default() -> Self {
1762        Self::new()
1763    }
1764}
1765
1766// ========================
1767// CharsCows - Compact slice collection with configurable offset/length types
1768// ========================
1769
1770/// Packed entry struct to eliminate padding overhead between offset and length.
1771///
1772/// For example, `(u64, u8)` tuple uses 16 bytes (8 + 8 padding), but
1773/// `PackedEntry<u64, u8>` uses only 9 bytes (8 + 1).
1774#[repr(C, packed(1))]
1775#[derive(Copy, Clone, Debug)]
1776struct PackedEntry<Offset, Length> {
1777    offset: Offset,
1778    length: Length,
1779}
1780
1781/// A memory-efficient collection of string slices with configurable offset and length types.
1782///
1783/// `CharsCows` stores references to string slices in a shared data buffer using compact
1784/// (offset, length) pairs. This is ideal for large datasets where you want to reference
1785/// substrings without duplicating the underlying data.
1786///
1787/// # Type Parameters
1788///
1789/// * `Offset` - The offset type (u8, u16, u32, u64) determining maximum data size
1790/// * `Length` - The length type (u8, u16, u32, u64) determining maximum slice size
1791///
1792/// # Memory Efficiency
1793///
1794/// For 500M words (8 bytes avg) from a 4GB file:
1795/// - `Vec<String>`: ~66 GB (24 bytes per String + heap overhead)
1796/// - `CharsCows<u32, u16>`: ~7 GB (4+2 bytes per entry + shared 4GB data)
1797///
1798/// # Examples
1799///
1800/// ```rust
1801/// use stringtape::{CharsCows, StringTapeError};
1802/// use std::borrow::Cow;
1803///
1804/// let data = "hello world foo bar";
1805/// let cows = CharsCows::<u32, u16>::from_iter_and_data(
1806///     data.split_whitespace(),
1807///     Cow::Borrowed(data.as_bytes())
1808/// )?;
1809///
1810/// assert_eq!(cows.len(), 4);
1811/// assert_eq!(cows.get(0), Some("hello"));
1812/// assert_eq!(cows.get(3), Some("bar"));
1813/// # Ok::<(), StringTapeError>(())
1814/// ```
1815#[derive(Debug, Clone)]
1816pub struct CharsCows<'a, Offset: OffsetType = u32, Length: LengthType = u16> {
1817    data: Cow<'a, [u8]>,
1818    entries: Vec<PackedEntry<Offset, Length>>,
1819}
1820
1821/// A memory-efficient collection of byte slices with configurable offset and length types.
1822///
1823/// Similar to `CharsCows` but for arbitrary binary data without UTF-8 validation.
1824#[derive(Debug, Clone)]
1825pub struct BytesCows<'a, Offset: OffsetType = u32, Length: LengthType = u16> {
1826    data: Cow<'a, [u8]>,
1827    entries: Vec<PackedEntry<Offset, Length>>,
1828}
1829
1830impl<'a, Offset: OffsetType, Length: LengthType> CharsCows<'a, Offset, Length> {
1831    /// Creates a CharsCows from an iterator of string slices and shared data buffer.
1832    ///
1833    /// The slices must be subslices of the data buffer. Offsets and lengths are inferred
1834    /// from the slice pointers.
1835    ///
1836    /// # Arguments
1837    ///
1838    /// * `iter` - Iterator yielding string slices that are subslices of `data`
1839    /// * `data` - Cow-wrapped data buffer (borrowed or owned)
1840    ///
1841    /// # Errors
1842    ///
1843    /// - `OffsetOverflow` if offset/length exceeds type maximum
1844    /// - `IndexOutOfBounds` if slice not within data buffer
1845    ///
1846    /// # Example
1847    ///
1848    /// ```rust
1849    /// # use stringtape::{CharsCowsU32U8, StringTapeError};
1850    /// # use std::borrow::Cow;
1851    /// let data = "hello world";
1852    /// let cows = CharsCowsU32U8::from_iter_and_data(
1853    ///     data.split_whitespace(),
1854    ///     Cow::Borrowed(data.as_bytes())
1855    /// )?;
1856    /// # Ok::<(), StringTapeError>(())
1857    /// ```
1858    pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
1859    where
1860        I: IntoIterator,
1861        I::Item: AsRef<str>,
1862    {
1863        let data_ptr = data.as_ptr() as usize;
1864        let data_end = data_ptr + data.len();
1865        let mut entries = Vec::new();
1866
1867        for s in iter {
1868            let s_ref = s.as_ref();
1869            let s_bytes = s_ref.as_bytes();
1870            let s_ptr = s_bytes.as_ptr() as usize;
1871
1872            // Calculate offset from base pointer
1873            if s_ptr < data_ptr || s_ptr > data_end {
1874                return Err(StringTapeError::IndexOutOfBounds);
1875            }
1876
1877            let offset = s_ptr - data_ptr;
1878            let length = s_bytes.len();
1879
1880            if offset + length > data.len() {
1881                return Err(StringTapeError::IndexOutOfBounds);
1882            }
1883
1884            let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
1885            let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
1886
1887            entries.push(PackedEntry {
1888                offset: offset_typed,
1889                length: length_typed,
1890            });
1891        }
1892
1893        Ok(Self { data, entries })
1894    }
1895
1896    /// Returns a reference to the string at the given index, or `None` if out of bounds.
1897    pub fn get(&self, index: usize) -> Option<&'_ str> {
1898        self.entries.get(index).map(|entry| {
1899            // Must copy fields from packed struct (can't take references)
1900            let start = entry.offset.to_usize();
1901            let len = entry.length.to_usize();
1902            // Safety: UTF-8 validated during construction
1903            // The lifetime of the returned &str is tied to self.data, not self
1904            unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1905        })
1906    }
1907
1908    /// Returns the number of slices in the collection.
1909    pub fn len(&self) -> usize {
1910        self.entries.len()
1911    }
1912
1913    /// Returns `true` if the collection contains no cows.
1914    pub fn is_empty(&self) -> bool {
1915        self.entries.is_empty()
1916    }
1917
1918    /// Returns an iterator over the string cows.
1919    pub fn iter(&self) -> CharsCowsIter<'_, Offset, Length> {
1920        CharsCowsIter {
1921            slices: self,
1922            index: 0,
1923        }
1924    }
1925
1926    /// Returns a reference to the underlying data buffer.
1927    pub fn data(&self) -> &[u8] {
1928        &self.data
1929    }
1930
1931    /// Sorts the slices in-place using the default string comparison.
1932    ///
1933    /// This is a stable sort that preserves the order of equal elements.
1934    ///
1935    /// # Examples
1936    ///
1937    /// ```rust
1938    /// use stringtape::CharsCowsU32U8;
1939    /// use std::borrow::Cow;
1940    ///
1941    /// let data = "zebra apple banana";
1942    /// let mut cows = CharsCowsU32U8::from_iter_and_data(
1943    ///     data.split_whitespace(),
1944    ///     Cow::Borrowed(data.as_bytes())
1945    /// ).unwrap();
1946    ///
1947    /// cows.sort();
1948    /// let sorted: Vec<&str> = cows.iter().collect();
1949    /// assert_eq!(sorted, vec!["apple", "banana", "zebra"]);
1950    /// # Ok::<(), stringtape::StringTapeError>(())
1951    /// ```
1952    pub fn sort(&mut self)
1953    where
1954        Offset: OffsetType,
1955        Length: LengthType,
1956    {
1957        self.entries.sort_by(|a, b| {
1958            let str_a = {
1959                let start = a.offset.to_usize();
1960                let len = a.length.to_usize();
1961                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1962            };
1963            let str_b = {
1964                let start = b.offset.to_usize();
1965                let len = b.length.to_usize();
1966                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1967            };
1968            str_a.cmp(str_b)
1969        });
1970    }
1971
1972    /// Sorts the slices in-place using an unstable sorting algorithm.
1973    ///
1974    /// This is faster than stable sort but may not preserve the order of equal elements.
1975    pub fn sort_unstable(&mut self)
1976    where
1977        Offset: OffsetType,
1978        Length: LengthType,
1979    {
1980        self.entries.sort_unstable_by(|a, b| {
1981            let str_a = {
1982                let start = a.offset.to_usize();
1983                let len = a.length.to_usize();
1984                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1985            };
1986            let str_b = {
1987                let start = b.offset.to_usize();
1988                let len = b.length.to_usize();
1989                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1990            };
1991            str_a.cmp(str_b)
1992        });
1993    }
1994
1995    /// Sorts the slices in-place using a custom comparison function.
1996    ///
1997    /// # Examples
1998    ///
1999    /// ```rust
2000    /// use stringtape::CharsCowsU32U8;
2001    /// use std::borrow::Cow;
2002    ///
2003    /// let data = "aaa bb c";
2004    /// let mut cows = CharsCowsU32U8::from_iter_and_data(
2005    ///     data.split_whitespace(),
2006    ///     Cow::Borrowed(data.as_bytes())
2007    /// ).unwrap();
2008    ///
2009    /// // Sort by length, then alphabetically
2010    /// cows.sort_by(|a, b| a.len().cmp(&b.len()).then(a.cmp(b)));
2011    /// let sorted: Vec<&str> = cows.iter().collect();
2012    /// assert_eq!(sorted, vec!["c", "bb", "aaa"]);
2013    /// # Ok::<(), stringtape::StringTapeError>(())
2014    /// ```
2015    pub fn sort_by<F>(&mut self, mut compare: F)
2016    where
2017        F: FnMut(&str, &str) -> core::cmp::Ordering,
2018        Offset: OffsetType,
2019        Length: LengthType,
2020    {
2021        self.entries.sort_by(|a, b| {
2022            let str_a = {
2023                let start = a.offset.to_usize();
2024                let len = a.length.to_usize();
2025                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
2026            };
2027            let str_b = {
2028                let start = b.offset.to_usize();
2029                let len = b.length.to_usize();
2030                unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
2031            };
2032            compare(str_a, str_b)
2033        });
2034    }
2035
2036    /// Sorts the slices in-place using a key extraction function.
2037    ///
2038    /// # Examples
2039    ///
2040    /// ```rust
2041    /// use stringtape::CharsCowsU32U8;
2042    /// use std::borrow::Cow;
2043    ///
2044    /// let data = "aaa bb c";
2045    /// let mut cows = CharsCowsU32U8::from_iter_and_data(
2046    ///     data.split_whitespace(),
2047    ///     Cow::Borrowed(data.as_bytes())
2048    /// ).unwrap();
2049    ///
2050    /// // Sort by string length
2051    /// cows.sort_by_key(|s| s.len());
2052    /// let sorted: Vec<&str> = cows.iter().collect();
2053    /// assert_eq!(sorted, vec!["c", "bb", "aaa"]);
2054    /// # Ok::<(), stringtape::StringTapeError>(())
2055    /// ```
2056    pub fn sort_by_key<K, F>(&mut self, mut f: F)
2057    where
2058        F: FnMut(&str) -> K,
2059        K: Ord,
2060        Offset: OffsetType,
2061        Length: LengthType,
2062    {
2063        self.entries.sort_by_key(|entry| {
2064            let start = entry.offset.to_usize();
2065            let len = entry.length.to_usize();
2066            let s = unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) };
2067            f(s)
2068        });
2069    }
2070}
2071
2072impl<'a, Offset: OffsetType, Length: LengthType> BytesCows<'a, Offset, Length> {
2073    /// Creates BytesCows from iterator of byte slices and shared data buffer.
2074    ///
2075    /// Slices must be subslices of the data buffer. Offsets and lengths are inferred
2076    /// from slice pointers.
2077    pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2078    where
2079        I: IntoIterator,
2080        I::Item: AsRef<[u8]>,
2081    {
2082        let data_ptr = data.as_ptr() as usize;
2083        let data_end = data_ptr + data.len();
2084        let mut entries = Vec::new();
2085
2086        for b in iter {
2087            let b_ref = b.as_ref();
2088            let b_ptr = b_ref.as_ptr() as usize;
2089
2090            if b_ptr < data_ptr || b_ptr > data_end {
2091                return Err(StringTapeError::IndexOutOfBounds);
2092            }
2093
2094            let offset = b_ptr - data_ptr;
2095            let length = b_ref.len();
2096
2097            if offset + length > data.len() {
2098                return Err(StringTapeError::IndexOutOfBounds);
2099            }
2100
2101            let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
2102            let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
2103
2104            entries.push(PackedEntry {
2105                offset: offset_typed,
2106                length: length_typed,
2107            });
2108        }
2109
2110        Ok(Self { data, entries })
2111    }
2112
2113    /// Creates BytesCows from iterator of (offset, length) pairs and data buffer.
2114    pub fn from_offsets_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2115    where
2116        I: IntoIterator<Item = (usize, usize)>,
2117    {
2118        let mut entries = Vec::new();
2119
2120        for (offset, length) in iter {
2121            let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
2122            let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
2123
2124            let end = offset
2125                .checked_add(length)
2126                .ok_or(StringTapeError::OffsetOverflow)?;
2127            if end > data.len() {
2128                return Err(StringTapeError::IndexOutOfBounds);
2129            }
2130
2131            entries.push(PackedEntry {
2132                offset: offset_typed,
2133                length: length_typed,
2134            });
2135        }
2136
2137        Ok(Self { data, entries })
2138    }
2139
2140    /// Returns a reference to the bytes at the given index, or `None` if out of bounds.
2141    pub fn get(&self, index: usize) -> Option<&[u8]> {
2142        self.entries.get(index).map(|entry| {
2143            let start = entry.offset.to_usize();
2144            let len = entry.length.to_usize();
2145            &self.data[start..start + len]
2146        })
2147    }
2148
2149    /// Returns the number of slices in the collection.
2150    pub fn len(&self) -> usize {
2151        self.entries.len()
2152    }
2153
2154    /// Returns `true` if the collection contains no cows.
2155    pub fn is_empty(&self) -> bool {
2156        self.entries.is_empty()
2157    }
2158
2159    /// Returns an iterator over the byte cows.
2160    pub fn iter(&self) -> BytesCowsIter<'_, Offset, Length> {
2161        BytesCowsIter {
2162            slices: self,
2163            index: 0,
2164        }
2165    }
2166
2167    /// Returns a reference to the underlying data buffer.
2168    pub fn data(&self) -> &[u8] {
2169        &self.data
2170    }
2171
2172    /// Returns a zero-copy view of this `BytesCows` as a `CharsCows` if all slices are valid UTF-8.
2173    ///
2174    /// This validates that all byte slices contain valid UTF-8, then reinterprets the collection
2175    /// as strings without copying or moving any data.
2176    ///
2177    /// # Errors
2178    ///
2179    /// Returns `StringTapeError::Utf8Error` if any slice contains invalid UTF-8.
2180    ///
2181    /// # Examples
2182    ///
2183    /// ```rust
2184    /// use stringtape::BytesCowsU32U8;
2185    /// use std::borrow::Cow;
2186    ///
2187    /// let data = b"hello world";
2188    /// let bytes = BytesCowsU32U8::from_iter_and_data(
2189    ///     data.split(|&b| b == b' '),
2190    ///     Cow::Borrowed(&data[..])
2191    /// ).unwrap();
2192    ///
2193    /// let chars = bytes.as_chars().unwrap();
2194    /// assert_eq!(chars.get(0), Some("hello"));
2195    /// assert_eq!(chars.get(1), Some("world"));
2196    /// # Ok::<(), stringtape::StringTapeError>(())
2197    /// ```
2198    pub fn as_chars(&self) -> Result<CharsCows<'_, Offset, Length>, StringTapeError> {
2199        // Validate that all slices contain valid UTF-8
2200        for i in 0..self.len() {
2201            let slice = self.get(i).ok_or(StringTapeError::IndexOutOfBounds)?;
2202            core::str::from_utf8(slice).map_err(StringTapeError::Utf8Error)?;
2203        }
2204
2205        // Safety: All slices validated as UTF-8
2206        Ok(CharsCows {
2207            data: Cow::Borrowed(self.data.as_ref()),
2208            entries: self.entries.clone(),
2209        })
2210    }
2211}
2212
2213pub struct CharsCowsIter<'a, Offset: OffsetType, Length: LengthType> {
2214    slices: &'a CharsCows<'a, Offset, Length>,
2215    index: usize,
2216}
2217
2218impl<'a, Offset: OffsetType, Length: LengthType> Iterator for CharsCowsIter<'a, Offset, Length> {
2219    type Item = &'a str;
2220
2221    fn next(&mut self) -> Option<Self::Item> {
2222        let result = self.slices.get(self.index);
2223        if result.is_some() {
2224            self.index += 1;
2225        }
2226        result
2227    }
2228
2229    fn size_hint(&self) -> (usize, Option<usize>) {
2230        let remaining = self.slices.len() - self.index;
2231        (remaining, Some(remaining))
2232    }
2233}
2234
2235impl<'a, Offset: OffsetType, Length: LengthType> ExactSizeIterator
2236    for CharsCowsIter<'a, Offset, Length>
2237{
2238}
2239
2240pub struct BytesCowsIter<'a, Offset: OffsetType, Length: LengthType> {
2241    slices: &'a BytesCows<'a, Offset, Length>,
2242    index: usize,
2243}
2244
2245impl<'a, Offset: OffsetType, Length: LengthType> Iterator for BytesCowsIter<'a, Offset, Length> {
2246    type Item = &'a [u8];
2247
2248    fn next(&mut self) -> Option<Self::Item> {
2249        let result = self.slices.get(self.index);
2250        if result.is_some() {
2251            self.index += 1;
2252        }
2253        result
2254    }
2255
2256    fn size_hint(&self) -> (usize, Option<usize>) {
2257        let remaining = self.slices.len() - self.index;
2258        (remaining, Some(remaining))
2259    }
2260}
2261
2262impl<'a, Offset: OffsetType, Length: LengthType> ExactSizeIterator
2263    for BytesCowsIter<'a, Offset, Length>
2264{
2265}
2266
2267impl<'a, Offset: OffsetType, Length: LengthType> Index<usize> for CharsCows<'a, Offset, Length> {
2268    type Output = str;
2269
2270    fn index(&self, index: usize) -> &Self::Output {
2271        self.get(index).expect("index out of bounds")
2272    }
2273}
2274
2275impl<'a, Offset: OffsetType, Length: LengthType> Index<usize> for BytesCows<'a, Offset, Length> {
2276    type Output = [u8];
2277
2278    fn index(&self, index: usize) -> &Self::Output {
2279        self.get(index).expect("index out of bounds")
2280    }
2281}
2282
2283impl<'a, Offset: OffsetType, Length: LengthType> IntoIterator
2284    for &'a CharsCows<'a, Offset, Length>
2285{
2286    type Item = &'a str;
2287    type IntoIter = CharsCowsIter<'a, Offset, Length>;
2288
2289    fn into_iter(self) -> Self::IntoIter {
2290        self.iter()
2291    }
2292}
2293
2294impl<'a, Offset: OffsetType, Length: LengthType> IntoIterator
2295    for &'a BytesCows<'a, Offset, Length>
2296{
2297    type Item = &'a [u8];
2298    type IntoIter = BytesCowsIter<'a, Offset, Length>;
2299
2300    fn into_iter(self) -> Self::IntoIter {
2301        self.iter()
2302    }
2303}
2304
2305// Conversion implementations between BytesCows and CharsCows
2306impl<'a, Offset: OffsetType, Length: LengthType> TryFrom<BytesCows<'a, Offset, Length>>
2307    for CharsCows<'a, Offset, Length>
2308{
2309    type Error = StringTapeError;
2310
2311    fn try_from(bytes_slices: BytesCows<'a, Offset, Length>) -> Result<Self, Self::Error> {
2312        // Validate that all slices contain valid UTF-8
2313        for i in 0..bytes_slices.len() {
2314            let slice = bytes_slices
2315                .get(i)
2316                .ok_or(StringTapeError::IndexOutOfBounds)?;
2317            core::str::from_utf8(slice).map_err(StringTapeError::Utf8Error)?;
2318        }
2319
2320        // Safety: All slices validated as UTF-8
2321        Ok(CharsCows {
2322            data: bytes_slices.data,
2323            entries: bytes_slices.entries,
2324        })
2325    }
2326}
2327
2328impl<'a, Offset: OffsetType, Length: LengthType> From<CharsCows<'a, Offset, Length>>
2329    for BytesCows<'a, Offset, Length>
2330{
2331    fn from(chars_slices: CharsCows<'a, Offset, Length>) -> Self {
2332        // CharsCows contains valid UTF-8, so conversion to BytesCows is infallible
2333        BytesCows {
2334            data: chars_slices.data,
2335            entries: chars_slices.entries,
2336        }
2337    }
2338}
2339
2340impl<'a, Offset: OffsetType, Length: LengthType> BytesCows<'a, Offset, Length> {
2341    pub fn try_into_chars_slices(self) -> Result<CharsCows<'a, Offset, Length>, StringTapeError> {
2342        self.try_into()
2343    }
2344}
2345
2346impl<'a, Offset: OffsetType, Length: LengthType> CharsCows<'a, Offset, Length> {
2347    pub fn into_bytes_slices(self) -> BytesCows<'a, Offset, Length> {
2348        self.into()
2349    }
2350
2351    /// Returns a zero-copy view of this `CharsCows` as a `BytesCows`.
2352    ///
2353    /// This is a no-cost operation that reinterprets the string collection as bytes
2354    /// without copying or moving any data.
2355    ///
2356    /// # Examples
2357    ///
2358    /// ```rust
2359    /// use stringtape::CharsCowsU32U8;
2360    /// use std::borrow::Cow;
2361    ///
2362    /// let data = "hello world";
2363    /// let cows = CharsCowsU32U8::from_iter_and_data(
2364    ///     data.split_whitespace(),
2365    ///     Cow::Borrowed(data.as_bytes())
2366    /// ).unwrap();
2367    ///
2368    /// let bytes = cows.as_bytes();
2369    /// assert_eq!(bytes.get(0), Some(&b"hello"[..]));
2370    /// assert_eq!(bytes.get(1), Some(&b"world"[..]));
2371    /// # Ok::<(), stringtape::StringTapeError>(())
2372    /// ```
2373    pub fn as_bytes(&self) -> BytesCows<'_, Offset, Length> {
2374        BytesCows {
2375            data: Cow::Borrowed(self.data.as_ref()),
2376            entries: self.entries.clone(),
2377        }
2378    }
2379}
2380
2381// Type aliases for common configurations
2382pub type CharsCowsU32U16<'a> = CharsCows<'a, u32, u16>;
2383pub type CharsCowsU32U8<'a> = CharsCows<'a, u32, u8>;
2384pub type CharsCowsU16U8<'a> = CharsCows<'a, u16, u8>;
2385pub type CharsCowsU64U32<'a> = CharsCows<'a, u64, u32>;
2386
2387pub type BytesCowsU32U16<'a> = BytesCows<'a, u32, u16>;
2388pub type BytesCowsU32U8<'a> = BytesCows<'a, u32, u8>;
2389pub type BytesCowsU16U8<'a> = BytesCows<'a, u16, u8>;
2390pub type BytesCowsU64U32<'a> = BytesCows<'a, u64, u32>;
2391
2392// ========================
2393// Auto-selecting CharsCows
2394// ========================
2395
2396/// Automatically selects the most memory-efficient CharsCows type based on data size.
2397///
2398/// Returns an enum that can hold any combination of offset/length types.
2399pub enum CharsCowsAuto<'a> {
2400    U32U8(CharsCows<'a, u32, u8>),
2401    U32U16(CharsCows<'a, u32, u16>),
2402    U32U32(CharsCows<'a, u32, u32>),
2403    U64U8(CharsCows<'a, u64, u8>),
2404    U64U16(CharsCows<'a, u64, u16>),
2405    U64U32(CharsCows<'a, u64, u32>),
2406}
2407
2408impl<'a> CharsCowsAuto<'a> {
2409    /// Creates the most memory-efficient CharsCows based on data size and max word length.
2410    ///
2411    /// # Examples
2412    ///
2413    /// ```rust
2414    /// use stringtape::CharsCowsAuto;
2415    /// use std::borrow::Cow;
2416    ///
2417    /// let data = "hello world";
2418    /// let cows = CharsCowsAuto::from_iter_and_data(
2419    ///     data.split_whitespace(),
2420    ///     Cow::Borrowed(data.as_bytes())
2421    /// ).unwrap();
2422    ///
2423    /// // Automatically picks CharsCows<u32, u8> for small data
2424    /// assert_eq!(cows.len(), 2);
2425    /// # Ok::<(), stringtape::StringTapeError>(())
2426    /// ```
2427    /// Creates the most memory-efficient CharsCows using a two-pass strategy.
2428    ///
2429    /// First pass scans to find the maximum word length, then second pass builds
2430    /// with optimal types. Requires `Clone` iterator for memory efficiency.
2431    ///
2432    /// # Examples
2433    ///
2434    /// ```rust
2435    /// use stringtape::CharsCowsAuto;
2436    /// use std::borrow::Cow;
2437    ///
2438    /// let data = "hello world";
2439    /// let cows = CharsCowsAuto::from_iter_and_data(
2440    ///     data.split_whitespace(),  // Clone iterator
2441    ///     Cow::Borrowed(data.as_bytes())
2442    /// ).unwrap();
2443    ///
2444    /// assert_eq!(cows.len(), 2);
2445    /// # Ok::<(), stringtape::StringTapeError>(())
2446    /// ```
2447    pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2448    where
2449        I: IntoIterator + Clone,
2450        I::Item: AsRef<str>,
2451    {
2452        let data_len = data.len();
2453
2454        // First pass: find max word length without materializing
2455        let max_word_len = iter
2456            .clone()
2457            .into_iter()
2458            .map(|s| s.as_ref().len())
2459            .max()
2460            .unwrap_or(0);
2461
2462        // Pick smallest offset type
2463        let needs_u64_offset = data_len > u32::MAX as usize;
2464
2465        // Second pass: build with optimal types
2466        if max_word_len <= u8::MAX as usize {
2467            if needs_u64_offset {
2468                Ok(Self::U64U8(CharsCows::from_iter_and_data(iter, data)?))
2469            } else {
2470                Ok(Self::U32U8(CharsCows::from_iter_and_data(iter, data)?))
2471            }
2472        } else if max_word_len <= u16::MAX as usize {
2473            if needs_u64_offset {
2474                Ok(Self::U64U16(CharsCows::from_iter_and_data(iter, data)?))
2475            } else {
2476                Ok(Self::U32U16(CharsCows::from_iter_and_data(iter, data)?))
2477            }
2478        } else if needs_u64_offset {
2479            Ok(Self::U64U32(CharsCows::from_iter_and_data(iter, data)?))
2480        } else {
2481            Ok(Self::U32U32(CharsCows::from_iter_and_data(iter, data)?))
2482        }
2483    }
2484
2485    /// Returns the number of cows.
2486    pub fn len(&self) -> usize {
2487        match self {
2488            Self::U32U8(s) => s.len(),
2489            Self::U32U16(s) => s.len(),
2490            Self::U32U32(s) => s.len(),
2491            Self::U64U8(s) => s.len(),
2492            Self::U64U16(s) => s.len(),
2493            Self::U64U32(s) => s.len(),
2494        }
2495    }
2496
2497    /// Returns `true` if the collection contains no cows.
2498    pub fn is_empty(&self) -> bool {
2499        self.len() == 0
2500    }
2501
2502    /// Returns a reference to the string at the given index.
2503    pub fn get(&self, index: usize) -> Option<&str> {
2504        match self {
2505            Self::U32U8(s) => s.get(index),
2506            Self::U32U16(s) => s.get(index),
2507            Self::U32U32(s) => s.get(index),
2508            Self::U64U8(s) => s.get(index),
2509            Self::U64U16(s) => s.get(index),
2510            Self::U64U32(s) => s.get(index),
2511        }
2512    }
2513
2514    /// Returns the byte size per entry for the selected type combination.
2515    pub fn bytes_per_entry(&self) -> usize {
2516        match self {
2517            Self::U32U8(_) => 5,   // u32(4) + u8(1)
2518            Self::U32U16(_) => 6,  // u32(4) + u16(2)
2519            Self::U32U32(_) => 8,  // u32(4) + u32(4)
2520            Self::U64U8(_) => 9,   // u64(8) + u8(1)
2521            Self::U64U16(_) => 10, // u64(8) + u16(2)
2522            Self::U64U32(_) => 12, // u64(8) + u32(4)
2523        }
2524    }
2525
2526    /// Returns a string describing the selected type combination.
2527    pub fn type_name(&self) -> &'static str {
2528        match self {
2529            Self::U32U8(_) => "CharsCows<u32, u8>",
2530            Self::U32U16(_) => "CharsCows<u32, u16>",
2531            Self::U32U32(_) => "CharsCows<u32, u32>",
2532            Self::U64U8(_) => "CharsCows<u64, u8>",
2533            Self::U64U16(_) => "CharsCows<u64, u16>",
2534            Self::U64U32(_) => "CharsCows<u64, u32>",
2535        }
2536    }
2537
2538    /// Returns an iterator over the string cows.
2539    ///
2540    /// # Examples
2541    ///
2542    /// ```rust
2543    /// use stringtape::CharsCowsAuto;
2544    /// use std::borrow::Cow;
2545    ///
2546    /// let data = "hello world foo";
2547    /// let cows = CharsCowsAuto::from_iter_and_data(
2548    ///     data.split_whitespace(),
2549    ///     Cow::Borrowed(data.as_bytes())
2550    /// ).unwrap();
2551    ///
2552    /// let words: Vec<&str> = cows.iter().collect();
2553    /// assert_eq!(words, vec!["hello", "world", "foo"]);
2554    /// # Ok::<(), stringtape::StringTapeError>(())
2555    /// ```
2556    pub fn iter(&self) -> CharsCowsAutoIter<'_> {
2557        CharsCowsAutoIter {
2558            inner: self,
2559            index: 0,
2560        }
2561    }
2562
2563    /// Sorts the slices in-place using the default string comparison.
2564    ///
2565    /// This is a stable sort that preserves the order of equal elements.
2566    ///
2567    /// # Examples
2568    ///
2569    /// ```rust
2570    /// use stringtape::CharsCowsAuto;
2571    /// use std::borrow::Cow;
2572    ///
2573    /// let data = "zebra apple banana";
2574    /// let mut cows = CharsCowsAuto::from_iter_and_data(
2575    ///     data.split_whitespace(),
2576    ///     Cow::Borrowed(data.as_bytes())
2577    /// ).unwrap();
2578    ///
2579    /// cows.sort();
2580    /// let sorted: Vec<&str> = cows.iter().collect();
2581    /// assert_eq!(sorted, vec!["apple", "banana", "zebra"]);
2582    /// # Ok::<(), stringtape::StringTapeError>(())
2583    /// ```
2584    pub fn sort(&mut self) {
2585        match self {
2586            Self::U32U8(s) => s.sort(),
2587            Self::U32U16(s) => s.sort(),
2588            Self::U32U32(s) => s.sort(),
2589            Self::U64U8(s) => s.sort(),
2590            Self::U64U16(s) => s.sort(),
2591            Self::U64U32(s) => s.sort(),
2592        }
2593    }
2594
2595    /// Sorts the slices in-place using an unstable sorting algorithm.
2596    ///
2597    /// This is faster than stable sort but may not preserve the order of equal elements.
2598    pub fn sort_unstable(&mut self) {
2599        match self {
2600            Self::U32U8(s) => s.sort_unstable(),
2601            Self::U32U16(s) => s.sort_unstable(),
2602            Self::U32U32(s) => s.sort_unstable(),
2603            Self::U64U8(s) => s.sort_unstable(),
2604            Self::U64U16(s) => s.sort_unstable(),
2605            Self::U64U32(s) => s.sort_unstable(),
2606        }
2607    }
2608
2609    /// Sorts the slices in-place using a custom comparison function.
2610    ///
2611    /// # Examples
2612    ///
2613    /// ```rust
2614    /// use stringtape::CharsCowsAuto;
2615    /// use std::borrow::Cow;
2616    ///
2617    /// let data = "aaa bb c";
2618    /// let mut cows = CharsCowsAuto::from_iter_and_data(
2619    ///     data.split_whitespace(),
2620    ///     Cow::Borrowed(data.as_bytes())
2621    /// ).unwrap();
2622    ///
2623    /// // Sort by length, then alphabetically
2624    /// cows.sort_by(|a, b| a.len().cmp(&b.len()).then(a.cmp(b)));
2625    /// let sorted: Vec<&str> = cows.iter().collect();
2626    /// assert_eq!(sorted, vec!["c", "bb", "aaa"]);
2627    /// # Ok::<(), stringtape::StringTapeError>(())
2628    /// ```
2629    pub fn sort_by<F>(&mut self, compare: F)
2630    where
2631        F: FnMut(&str, &str) -> core::cmp::Ordering,
2632    {
2633        match self {
2634            Self::U32U8(s) => s.sort_by(compare),
2635            Self::U32U16(s) => s.sort_by(compare),
2636            Self::U32U32(s) => s.sort_by(compare),
2637            Self::U64U8(s) => s.sort_by(compare),
2638            Self::U64U16(s) => s.sort_by(compare),
2639            Self::U64U32(s) => s.sort_by(compare),
2640        }
2641    }
2642
2643    /// Sorts the slices in-place using a key extraction function.
2644    ///
2645    /// # Examples
2646    ///
2647    /// ```rust
2648    /// use stringtape::CharsCowsAuto;
2649    /// use std::borrow::Cow;
2650    ///
2651    /// let data = "aaa bb c";
2652    /// let mut cows = CharsCowsAuto::from_iter_and_data(
2653    ///     data.split_whitespace(),
2654    ///     Cow::Borrowed(data.as_bytes())
2655    /// ).unwrap();
2656    ///
2657    /// // Sort by string length
2658    /// cows.sort_by_key(|s| s.len());
2659    /// let sorted: Vec<&str> = cows.iter().collect();
2660    /// assert_eq!(sorted, vec!["c", "bb", "aaa"]);
2661    /// # Ok::<(), stringtape::StringTapeError>(())
2662    /// ```
2663    pub fn sort_by_key<K, F>(&mut self, f: F)
2664    where
2665        F: FnMut(&str) -> K,
2666        K: Ord,
2667    {
2668        match self {
2669            Self::U32U8(s) => s.sort_by_key(f),
2670            Self::U32U16(s) => s.sort_by_key(f),
2671            Self::U32U32(s) => s.sort_by_key(f),
2672            Self::U64U8(s) => s.sort_by_key(f),
2673            Self::U64U16(s) => s.sort_by_key(f),
2674            Self::U64U32(s) => s.sort_by_key(f),
2675        }
2676    }
2677
2678    /// Returns a zero-copy view of this `CharsCowsAuto` as a `BytesCowsAuto`.
2679    ///
2680    /// This is a no-cost operation that reinterprets the string collection as bytes
2681    /// without copying or moving any data.
2682    ///
2683    /// # Examples
2684    ///
2685    /// ```rust
2686    /// use stringtape::CharsCowsAuto;
2687    /// use std::borrow::Cow;
2688    ///
2689    /// let data = "hello world";
2690    /// let cows = CharsCowsAuto::from_iter_and_data(
2691    ///     data.split_whitespace(),
2692    ///     Cow::Borrowed(data.as_bytes())
2693    /// ).unwrap();
2694    ///
2695    /// let bytes = cows.as_bytes();
2696    /// assert_eq!(bytes.get(0), Some(&b"hello"[..]));
2697    /// assert_eq!(bytes.get(1), Some(&b"world"[..]));
2698    /// # Ok::<(), stringtape::StringTapeError>(())
2699    /// ```
2700    pub fn as_bytes(&self) -> BytesCowsAuto<'_> {
2701        match self {
2702            Self::U32U8(s) => BytesCowsAuto::U32U8(s.as_bytes()),
2703            Self::U32U16(s) => BytesCowsAuto::U32U16(s.as_bytes()),
2704            Self::U32U32(s) => BytesCowsAuto::U32U32(s.as_bytes()),
2705            Self::U64U8(s) => BytesCowsAuto::U64U8(s.as_bytes()),
2706            Self::U64U16(s) => BytesCowsAuto::U64U16(s.as_bytes()),
2707            Self::U64U32(s) => BytesCowsAuto::U64U32(s.as_bytes()),
2708        }
2709    }
2710}
2711
2712/// Iterator over CharsCowsAuto string cows.
2713pub struct CharsCowsAutoIter<'a> {
2714    inner: &'a CharsCowsAuto<'a>,
2715    index: usize,
2716}
2717
2718impl<'a> Iterator for CharsCowsAutoIter<'a> {
2719    type Item = &'a str;
2720
2721    fn next(&mut self) -> Option<Self::Item> {
2722        let result = self.inner.get(self.index);
2723        if result.is_some() {
2724            self.index += 1;
2725        }
2726        result
2727    }
2728
2729    fn size_hint(&self) -> (usize, Option<usize>) {
2730        let remaining = self.inner.len() - self.index;
2731        (remaining, Some(remaining))
2732    }
2733}
2734
2735impl<'a> ExactSizeIterator for CharsCowsAutoIter<'a> {}
2736
2737impl<'a> IntoIterator for &'a CharsCowsAuto<'a> {
2738    type Item = &'a str;
2739    type IntoIter = CharsCowsAutoIter<'a>;
2740
2741    fn into_iter(self) -> Self::IntoIter {
2742        self.iter()
2743    }
2744}
2745
2746// ========================
2747// Auto-selecting BytesCows
2748// ========================
2749
2750/// Automatically selects the most memory-efficient BytesCows type based on data size.
2751pub enum BytesCowsAuto<'a> {
2752    U32U8(BytesCows<'a, u32, u8>),
2753    U32U16(BytesCows<'a, u32, u16>),
2754    U32U32(BytesCows<'a, u32, u32>),
2755    U64U8(BytesCows<'a, u64, u8>),
2756    U64U16(BytesCows<'a, u64, u16>),
2757    U64U32(BytesCows<'a, u64, u32>),
2758}
2759
2760impl<'a> BytesCowsAuto<'a> {
2761    /// Creates BytesCowsAuto from iterator of byte cows.
2762    /// Auto-selects offset and length types based on data size and max slice length.
2763    pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2764    where
2765        I: IntoIterator + Clone,
2766        I::Item: AsRef<[u8]>,
2767    {
2768        let data_len = data.len();
2769
2770        // First pass: find max slice length
2771        let max_len = iter
2772            .clone()
2773            .into_iter()
2774            .map(|b| b.as_ref().len())
2775            .max()
2776            .unwrap_or(0);
2777
2778        let needs_u64_offset = data_len > u32::MAX as usize;
2779
2780        // Second pass: build with optimal types
2781        if max_len <= u8::MAX as usize {
2782            if needs_u64_offset {
2783                Ok(Self::U64U8(BytesCows::from_iter_and_data(iter, data)?))
2784            } else {
2785                Ok(Self::U32U8(BytesCows::from_iter_and_data(iter, data)?))
2786            }
2787        } else if max_len <= u16::MAX as usize {
2788            if needs_u64_offset {
2789                Ok(Self::U64U16(BytesCows::from_iter_and_data(iter, data)?))
2790            } else {
2791                Ok(Self::U32U16(BytesCows::from_iter_and_data(iter, data)?))
2792            }
2793        } else if needs_u64_offset {
2794            Ok(Self::U64U32(BytesCows::from_iter_and_data(iter, data)?))
2795        } else {
2796            Ok(Self::U32U32(BytesCows::from_iter_and_data(iter, data)?))
2797        }
2798    }
2799
2800    pub fn len(&self) -> usize {
2801        match self {
2802            Self::U32U8(s) => s.len(),
2803            Self::U32U16(s) => s.len(),
2804            Self::U32U32(s) => s.len(),
2805            Self::U64U8(s) => s.len(),
2806            Self::U64U16(s) => s.len(),
2807            Self::U64U32(s) => s.len(),
2808        }
2809    }
2810
2811    pub fn is_empty(&self) -> bool {
2812        self.len() == 0
2813    }
2814
2815    pub fn get(&self, index: usize) -> Option<&[u8]> {
2816        match self {
2817            Self::U32U8(s) => s.get(index),
2818            Self::U32U16(s) => s.get(index),
2819            Self::U32U32(s) => s.get(index),
2820            Self::U64U8(s) => s.get(index),
2821            Self::U64U16(s) => s.get(index),
2822            Self::U64U32(s) => s.get(index),
2823        }
2824    }
2825
2826    /// Returns a zero-copy view of this `BytesCowsAuto` as a `CharsCowsAuto` if all slices are valid UTF-8.
2827    ///
2828    /// This validates that all byte slices contain valid UTF-8, then reinterprets the collection
2829    /// as strings without copying or moving any data.
2830    ///
2831    /// # Errors
2832    ///
2833    /// Returns `StringTapeError::Utf8Error` if any slice contains invalid UTF-8.
2834    ///
2835    /// # Examples
2836    ///
2837    /// ```rust
2838    /// use stringtape::BytesCowsAuto;
2839    /// use std::borrow::Cow;
2840    ///
2841    /// let data = b"hello world";
2842    /// let bytes = BytesCowsAuto::from_iter_and_data(
2843    ///     data.split(|&b| b == b' '),
2844    ///     Cow::Borrowed(&data[..])
2845    /// ).unwrap();
2846    ///
2847    /// let chars = bytes.as_chars().unwrap();
2848    /// assert_eq!(chars.get(0), Some("hello"));
2849    /// assert_eq!(chars.get(1), Some("world"));
2850    /// # Ok::<(), stringtape::StringTapeError>(())
2851    /// ```
2852    pub fn as_chars(&self) -> Result<CharsCowsAuto<'_>, StringTapeError> {
2853        match self {
2854            Self::U32U8(s) => Ok(CharsCowsAuto::U32U8(s.as_chars()?)),
2855            Self::U32U16(s) => Ok(CharsCowsAuto::U32U16(s.as_chars()?)),
2856            Self::U32U32(s) => Ok(CharsCowsAuto::U32U32(s.as_chars()?)),
2857            Self::U64U8(s) => Ok(CharsCowsAuto::U64U8(s.as_chars()?)),
2858            Self::U64U16(s) => Ok(CharsCowsAuto::U64U16(s.as_chars()?)),
2859            Self::U64U32(s) => Ok(CharsCowsAuto::U64U32(s.as_chars()?)),
2860        }
2861    }
2862}
2863
2864// ========================
2865// Auto-selecting CharsTape
2866// ========================
2867
2868/// Automatically selects the most memory-efficient CharsTape offset type.
2869pub enum CharsTapeAuto<A: Allocator = Global> {
2870    I32(CharsTape<i32, A>),
2871    U32(CharsTape<u32, A>),
2872    U64(CharsTape<u64, A>),
2873}
2874
2875impl<A: Allocator> CharsTapeAuto<A> {
2876    /// Creates CharsTapeAuto with custom allocator.
2877    pub fn new_in(allocator: A) -> Self {
2878        Self::I32(CharsTape::new_in(allocator))
2879    }
2880
2881    pub fn push(&mut self, s: &str) -> Result<(), StringTapeError> {
2882        match self {
2883            Self::I32(t) => t.push(s),
2884            Self::U32(t) => t.push(s),
2885            Self::U64(t) => t.push(s),
2886        }
2887    }
2888
2889    pub fn len(&self) -> usize {
2890        match self {
2891            Self::I32(t) => t.len(),
2892            Self::U32(t) => t.len(),
2893            Self::U64(t) => t.len(),
2894        }
2895    }
2896
2897    pub fn is_empty(&self) -> bool {
2898        self.len() == 0
2899    }
2900
2901    pub fn get(&self, index: usize) -> Option<&str> {
2902        match self {
2903            Self::I32(t) => t.get(index),
2904            Self::U32(t) => t.get(index),
2905            Self::U64(t) => t.get(index),
2906        }
2907    }
2908}
2909
2910impl Default for CharsTapeAuto<Global> {
2911    fn default() -> Self {
2912        Self::new_in(Global)
2913    }
2914}
2915
2916impl<A: Allocator + Clone> CharsTapeAuto<A> {
2917    /// Creates tape from clonable iterator, auto-selecting offset type (I32/U32/U64) based on total data size.
2918    /// Two-pass: first calculates size, second builds tape.
2919    pub fn from_iter_in<'a, I>(iter: I, allocator: A) -> Self
2920    where
2921        I: IntoIterator<Item = &'a str> + Clone,
2922    {
2923        // First pass: calculate total data size to determine offset type
2924        let total_size: usize = iter.clone().into_iter().map(|s| s.len()).sum();
2925
2926        // Choose optimal type based on data size
2927        if total_size <= i32::MAX as usize {
2928            let mut tape = CharsTape::new_in(allocator);
2929            for s in iter {
2930                tape.push(s).ok();
2931            }
2932            Self::I32(tape)
2933        } else if total_size <= u32::MAX as usize {
2934            let mut tape = CharsTape::new_in(allocator);
2935            for s in iter {
2936                tape.push(s).ok();
2937            }
2938            Self::U32(tape)
2939        } else {
2940            let mut tape = CharsTape::new_in(allocator);
2941            for s in iter {
2942                tape.push(s).ok();
2943            }
2944            Self::U64(tape)
2945        }
2946    }
2947}
2948
2949impl CharsTapeAuto<Global> {
2950    /// Creates tape from clonable iterator with global allocator.
2951    #[allow(clippy::should_implement_trait)]
2952    pub fn from_iter<'a, I>(iter: I) -> Self
2953    where
2954        I: IntoIterator<Item = &'a str> + Clone,
2955    {
2956        Self::from_iter_in(iter, Global)
2957    }
2958}
2959
2960// ========================
2961// Auto-selecting BytesTape
2962// ========================
2963
2964/// Automatically selects the most memory-efficient BytesTape offset type.
2965pub enum BytesTapeAuto<A: Allocator = Global> {
2966    U16(BytesTape<u16, A>),
2967    U32(BytesTape<u32, A>),
2968    U64(BytesTape<u64, A>),
2969}
2970
2971impl<A: Allocator> BytesTapeAuto<A> {
2972    /// Creates BytesTapeAuto with custom allocator.
2973    pub fn new_in(allocator: A) -> Self {
2974        Self::U16(BytesTape::new_in(allocator))
2975    }
2976
2977    pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
2978        match self {
2979            Self::U16(t) => t.push(bytes),
2980            Self::U32(t) => t.push(bytes),
2981            Self::U64(t) => t.push(bytes),
2982        }
2983    }
2984
2985    pub fn len(&self) -> usize {
2986        match self {
2987            Self::U16(t) => t.len(),
2988            Self::U32(t) => t.len(),
2989            Self::U64(t) => t.len(),
2990        }
2991    }
2992
2993    pub fn is_empty(&self) -> bool {
2994        self.len() == 0
2995    }
2996
2997    pub fn get(&self, index: usize) -> Option<&[u8]> {
2998        match self {
2999            Self::U16(t) => t.get(index),
3000            Self::U32(t) => t.get(index),
3001            Self::U64(t) => t.get(index),
3002        }
3003    }
3004}
3005
3006impl Default for BytesTapeAuto<Global> {
3007    fn default() -> Self {
3008        Self::new_in(Global)
3009    }
3010}
3011
3012impl<A: Allocator + Clone> BytesTapeAuto<A> {
3013    /// Creates tape from clonable iterator, auto-selecting offset type (U16/U32/U64) based on total data size.
3014    /// Two-pass: first calculates size, second builds tape.
3015    pub fn from_iter_in<'a, I>(iter: I, allocator: A) -> Self
3016    where
3017        I: IntoIterator<Item = &'a [u8]> + Clone,
3018    {
3019        // First pass: calculate total data size to determine offset type
3020        let total_size: usize = iter.clone().into_iter().map(|b| b.len()).sum();
3021
3022        // Choose optimal type based on data size
3023        if total_size <= u16::MAX as usize {
3024            let mut tape = BytesTape::new_in(allocator);
3025            for bytes in iter {
3026                tape.push(bytes).ok();
3027            }
3028            Self::U16(tape)
3029        } else if total_size <= u32::MAX as usize {
3030            let mut tape = BytesTape::new_in(allocator);
3031            for bytes in iter {
3032                tape.push(bytes).ok();
3033            }
3034            Self::U32(tape)
3035        } else {
3036            let mut tape = BytesTape::new_in(allocator);
3037            for bytes in iter {
3038                tape.push(bytes).ok();
3039            }
3040            Self::U64(tape)
3041        }
3042    }
3043}
3044
3045impl BytesTapeAuto<Global> {
3046    /// Creates tape from clonable iterator with global allocator.
3047    #[allow(clippy::should_implement_trait)]
3048    pub fn from_iter<'a, I>(iter: I) -> Self
3049    where
3050        I: IntoIterator<Item = &'a [u8]> + Clone,
3051    {
3052        Self::from_iter_in(iter, Global)
3053    }
3054}
3055
3056#[cfg(test)]
3057mod tests {
3058    use super::*;
3059
3060    #[cfg(not(feature = "std"))]
3061    use alloc::string::ToString;
3062    #[cfg(not(feature = "std"))]
3063    use alloc::vec;
3064    #[cfg(not(feature = "std"))]
3065    use alloc::vec::Vec;
3066
3067    #[test]
3068    fn basic_operations() {
3069        let mut tape = CharsTapeI32::new();
3070        assert!(tape.is_empty());
3071
3072        tape.push("hello").unwrap();
3073        tape.push("world").unwrap();
3074        tape.push("foo").unwrap();
3075
3076        assert_eq!(tape.len(), 3);
3077        assert_eq!(tape.get(0), Some("hello"));
3078        assert_eq!(tape.get(1), Some("world"));
3079        assert_eq!(tape.get(2), Some("foo"));
3080        assert_eq!(tape.get(3), None);
3081    }
3082
3083    #[test]
3084    fn unsigned_basic_operations() {
3085        // u32
3086        let mut t32 = CharsTapeU32::new();
3087        t32.push("hello").unwrap();
3088        t32.push("world").unwrap();
3089        assert_eq!(t32.len(), 2);
3090        assert_eq!(t32.get(0), Some("hello"));
3091        assert_eq!(t32.get(1), Some("world"));
3092
3093        // u64
3094        let mut t64 = CharsTapeU64::new();
3095        t64.extend(["a", "", "bbb"]).unwrap();
3096        assert_eq!(t64.len(), 3);
3097        assert_eq!(t64.get(0), Some("a"));
3098        assert_eq!(t64.get(1), Some(""));
3099        assert_eq!(t64.get(2), Some("bbb"));
3100    }
3101
3102    #[test]
3103    fn offsets_64bit() {
3104        let mut tape = CharsTapeI64::new();
3105        tape.push("test").unwrap();
3106        assert_eq!(tape.get(0), Some("test"));
3107    }
3108
3109    #[test]
3110    fn iterator_basics() {
3111        let mut tape = CharsTapeI32::new();
3112        tape.push("a").unwrap();
3113        tape.push("b").unwrap();
3114        tape.push("c").unwrap();
3115
3116        let strings: Vec<&str> = tape.iter().collect();
3117        assert_eq!(strings, vec!["a", "b", "c"]);
3118    }
3119
3120    #[test]
3121    fn empty_strings() {
3122        let mut tape = CharsTapeI32::new();
3123        tape.push("").unwrap();
3124        tape.push("non-empty").unwrap();
3125        tape.push("").unwrap();
3126
3127        assert_eq!(tape.len(), 3);
3128        assert_eq!(tape.get(0), Some(""));
3129        assert_eq!(tape.get(1), Some("non-empty"));
3130        assert_eq!(tape.get(2), Some(""));
3131    }
3132
3133    #[test]
3134    fn index_trait() {
3135        let mut tape = CharsTapeI32::new();
3136        tape.push("hello").unwrap();
3137        tape.push("world").unwrap();
3138
3139        assert_eq!(&tape[0], "hello");
3140        assert_eq!(&tape[1], "world");
3141    }
3142
3143    #[test]
3144    fn into_iterator() {
3145        let mut tape = CharsTapeI32::new();
3146        tape.push("a").unwrap();
3147        tape.push("b").unwrap();
3148        tape.push("c").unwrap();
3149
3150        let strings: Vec<&str> = (&tape).into_iter().collect();
3151        assert_eq!(strings, vec!["a", "b", "c"]);
3152
3153        // Test for-loop syntax
3154        let mut result = Vec::new();
3155        for s in &tape {
3156            result.push(s);
3157        }
3158        assert_eq!(result, vec!["a", "b", "c"]);
3159    }
3160
3161    #[test]
3162    fn from_iterator() {
3163        let strings = vec!["hello", "world", "test"];
3164        let tape: CharsTapeI32 = strings.into_iter().collect();
3165
3166        assert_eq!(tape.len(), 3);
3167        assert_eq!(tape.get(0), Some("hello"));
3168        assert_eq!(tape.get(1), Some("world"));
3169        assert_eq!(tape.get(2), Some("test"));
3170    }
3171
3172    #[test]
3173    fn from_iterator_unsigned() {
3174        let strings = vec!["hello", "world", "test"];
3175        let tape_u32: CharsTapeU32 = strings.clone().into_iter().collect();
3176        let tape_u64: CharsTapeU64 = strings.clone().into_iter().collect();
3177        assert_eq!(tape_u32.len(), 3);
3178        assert_eq!(tape_u64.len(), 3);
3179        assert_eq!(tape_u32.get(1), Some("world"));
3180        assert_eq!(tape_u64.get(2), Some("test"));
3181    }
3182
3183    #[test]
3184    fn extend() {
3185        let mut tape = CharsTapeI32::new();
3186        tape.push("initial").unwrap();
3187
3188        let additional = vec!["hello", "world"];
3189        tape.extend(additional).unwrap();
3190
3191        assert_eq!(tape.len(), 3);
3192        assert_eq!(tape.get(0), Some("initial"));
3193        assert_eq!(tape.get(1), Some("hello"));
3194        assert_eq!(tape.get(2), Some("world"));
3195    }
3196
3197    #[test]
3198    fn clear_and_truncate() {
3199        let mut tape = CharsTapeI32::new();
3200        tape.push("a").unwrap();
3201        tape.push("b").unwrap();
3202        tape.push("c").unwrap();
3203
3204        assert_eq!(tape.len(), 3);
3205
3206        tape.truncate(2);
3207        assert_eq!(tape.len(), 2);
3208        assert_eq!(tape.get(0), Some("a"));
3209        assert_eq!(tape.get(1), Some("b"));
3210        assert_eq!(tape.get(2), None);
3211
3212        tape.clear();
3213        assert_eq!(tape.len(), 0);
3214        assert!(tape.is_empty());
3215    }
3216
3217    #[test]
3218    fn unsigned_views_and_subviews() {
3219        let mut tape = CharsTapeU32::new();
3220        tape.extend(["0", "1", "22", "333"]).unwrap();
3221        let view = tape.subview(1, 4).unwrap();
3222        assert_eq!(view.len(), 3);
3223        assert_eq!(view.get(0), Some("1"));
3224        assert_eq!(view.get(2), Some("333"));
3225        let sub = view.subview(1, 2).unwrap();
3226        assert_eq!(sub.len(), 1);
3227        assert_eq!(sub.get(0), Some("22"));
3228    }
3229
3230    #[test]
3231    fn capacity() {
3232        let tape = CharsTapeI32::with_capacity(100, 10).unwrap();
3233        assert_eq!(tape.data_capacity(), 100);
3234        assert_eq!(tape.capacity(), 0); // No strings added yet
3235    }
3236
3237    #[test]
3238    fn custom_allocator() {
3239        // Using the Global allocator explicitly
3240        let mut tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
3241
3242        tape.push("hello").unwrap();
3243        tape.push("world").unwrap();
3244
3245        assert_eq!(tape.len(), 2);
3246        assert_eq!(tape.get(0), Some("hello"));
3247        assert_eq!(tape.get(1), Some("world"));
3248
3249        // Verify we can access the allocator
3250        let _allocator_ref = tape.allocator();
3251    }
3252
3253    #[test]
3254    fn custom_allocator_with_capacity() {
3255        let tape: CharsTape<i64, Global> = CharsTape::with_capacity_in(256, 50, Global).unwrap();
3256
3257        assert_eq!(tape.data_capacity(), 256);
3258        assert!(tape.is_empty());
3259    }
3260
3261    #[test]
3262    fn bytes_tape_basic() {
3263        let mut tape = BytesTapeI32::new();
3264        tape.push(&[1, 2, 3]).unwrap();
3265        tape.push(b"abc").unwrap();
3266
3267        assert_eq!(tape.len(), 2);
3268        assert_eq!(&tape[0], &[1u8, 2, 3] as &[u8]);
3269        assert_eq!(&tape[1], b"abc" as &[u8]);
3270    }
3271
3272    #[test]
3273    fn unsigned_bytes_tape_basic() {
3274        let mut tape = BytesTapeU64::new();
3275        tape.push(&[1u8, 2]).unwrap();
3276        tape.push(&[3u8, 4, 5]).unwrap();
3277        assert_eq!(tape.len(), 2);
3278        assert_eq!(&tape[0], &[1u8, 2] as &[u8]);
3279        assert_eq!(&tape[1], &[3u8, 4, 5] as &[u8]);
3280    }
3281
3282    #[test]
3283    fn chars_tape_view_basic() {
3284        let mut tape = CharsTapeI32::new();
3285        tape.push("hello").unwrap();
3286        tape.push("world").unwrap();
3287        tape.push("foo").unwrap();
3288        tape.push("bar").unwrap();
3289
3290        // Test basic subview creation
3291        let view = tape.subview(1, 3).unwrap();
3292        assert_eq!(view.len(), 2);
3293        assert_eq!(view.get(0), Some("world"));
3294        assert_eq!(view.get(1), Some("foo"));
3295        assert_eq!(view.get(2), None);
3296
3297        // Test indexing
3298        assert_eq!(&view[0], "world");
3299        assert_eq!(&view[1], "foo");
3300    }
3301
3302    #[test]
3303    fn chars_tape_range_syntax() {
3304        let mut tape = CharsTapeI32::new();
3305        tape.push("a").unwrap();
3306        tape.push("b").unwrap();
3307        tape.push("c").unwrap();
3308        tape.push("d").unwrap();
3309
3310        // Test view() method
3311        let full_view = tape.view();
3312        assert_eq!(full_view.len(), 4);
3313        assert_eq!(full_view.get(0), Some("a"));
3314        assert_eq!(full_view.get(3), Some("d"));
3315
3316        // Test subview
3317        let sub = tape.subview(1, 3).unwrap();
3318        assert_eq!(sub.len(), 2);
3319        assert_eq!(sub.get(0), Some("b"));
3320        assert_eq!(sub.get(1), Some("c"));
3321    }
3322
3323    #[test]
3324    fn chars_tape_view_subslicing() {
3325        let mut tape = CharsTapeI32::new();
3326        tape.push("0").unwrap();
3327        tape.push("1").unwrap();
3328        tape.push("2").unwrap();
3329        tape.push("3").unwrap();
3330        tape.push("4").unwrap();
3331
3332        // Create initial subview
3333        let view = tape.subview(1, 4).unwrap(); // ["1", "2", "3"]
3334        assert_eq!(view.len(), 3);
3335
3336        // Create sub-view of a view
3337        let subview = view.subview(1, 2).unwrap(); // ["2"]
3338        assert_eq!(subview.len(), 1);
3339        assert_eq!(subview.get(0), Some("2"));
3340
3341        // Test subviews with different ranges
3342        let subview_from = view.subview(1, view.len()).unwrap(); // ["2", "3"]
3343        assert_eq!(subview_from.len(), 2);
3344        assert_eq!(subview_from.get(0), Some("2"));
3345        assert_eq!(subview_from.get(1), Some("3"));
3346
3347        let subview_to = view.subview(0, 2).unwrap(); // ["1", "2"]
3348        assert_eq!(subview_to.len(), 2);
3349        assert_eq!(subview_to.get(0), Some("1"));
3350        assert_eq!(subview_to.get(1), Some("2"));
3351    }
3352
3353    #[test]
3354    fn bytes_tape_view_basic() {
3355        let mut tape = BytesTapeI32::new();
3356        tape.push(&[1u8, 2]).unwrap();
3357        tape.push(&[3u8, 4]).unwrap();
3358        tape.push(&[5u8, 6]).unwrap();
3359        tape.push(&[7u8, 8]).unwrap();
3360
3361        // Test basic subview creation
3362        let view = tape.subview(1, 3).unwrap();
3363        assert_eq!(view.len(), 2);
3364        assert_eq!(view.get(0), Some(&[3u8, 4] as &[u8]));
3365        assert_eq!(view.get(1), Some(&[5u8, 6] as &[u8]));
3366        assert_eq!(view.get(2), None);
3367
3368        // Test indexing
3369        assert_eq!(&view[0], &[3u8, 4] as &[u8]);
3370        assert_eq!(&view[1], &[5u8, 6] as &[u8]);
3371    }
3372
3373    #[test]
3374    fn view_empty_strings() {
3375        let mut tape = CharsTapeI32::new();
3376        tape.push("").unwrap();
3377        tape.push("non-empty").unwrap();
3378        tape.push("").unwrap();
3379        tape.push("another").unwrap();
3380
3381        let view = tape.subview(0, 3).unwrap();
3382        assert_eq!(view.len(), 3);
3383        assert_eq!(view.get(0), Some(""));
3384        assert_eq!(view.get(1), Some("non-empty"));
3385        assert_eq!(view.get(2), Some(""));
3386    }
3387
3388    #[test]
3389    fn view_single_item() {
3390        let mut tape = CharsTapeI32::new();
3391        tape.push("only").unwrap();
3392
3393        let view = tape.subview(0, 1).unwrap();
3394        assert_eq!(view.len(), 1);
3395        assert_eq!(view.get(0), Some("only"));
3396    }
3397
3398    #[test]
3399    fn view_bounds_checking() {
3400        let mut tape = CharsTapeI32::new();
3401        tape.push("a").unwrap();
3402        tape.push("b").unwrap();
3403
3404        // Out of bounds subview creation
3405        assert!(tape.subview(0, 3).is_err());
3406        assert!(tape.subview(2, 1).is_err());
3407        assert!(tape.subview(3, 4).is_err());
3408
3409        // Valid empty subview
3410        let empty_view = tape.subview(1, 1).unwrap();
3411        assert_eq!(empty_view.len(), 0);
3412        assert!(empty_view.is_empty());
3413    }
3414
3415    #[test]
3416    fn view_data_properties() {
3417        let mut tape = CharsTapeI32::new();
3418        tape.push("hello").unwrap(); // 5 bytes
3419        tape.push("world").unwrap(); // 5 bytes
3420        tape.push("!").unwrap(); // 1 byte
3421
3422        let view = tape.subview(0, 2).unwrap(); // "hello", "world" = 10 bytes
3423        assert_eq!(view.data_len(), 10);
3424        assert!(!view.is_empty());
3425
3426        let full_view = tape.subview(0, 3).unwrap(); // all = 11 bytes
3427        assert_eq!(full_view.data_len(), 11);
3428    }
3429
3430    #[test]
3431    fn view_raw_parts() {
3432        let mut tape = CharsTapeI32::new();
3433        tape.push("test").unwrap();
3434        tape.push("data").unwrap();
3435
3436        let view = tape.subview(0, 2).unwrap();
3437        let parts = view.as_raw_parts();
3438
3439        assert!(!parts.data_ptr.is_null());
3440        assert!(!parts.offsets_ptr.is_null());
3441        assert_eq!(parts.data_len, 8); // "test" + "data"
3442        assert_eq!(parts.items_count, 2);
3443    }
3444
3445    #[test]
3446    fn subview_raw_parts_consistency_chars() {
3447        let mut tape = CharsTapeI32::new();
3448        tape.extend(["abc", "", "xyz", "pq"]).unwrap();
3449
3450        // Subview over middle two items: ["", "xyz"]
3451        let view = tape.subview(1, 3).unwrap();
3452        let parts = view.as_raw_parts();
3453
3454        // Offsets len must be items_count + 1 and data_len equals absolute last offset
3455        unsafe {
3456            let offsets: &[i32] =
3457                core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
3458            assert_eq!(offsets.len(), parts.items_count + 1);
3459            assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
3460            let last_abs = offsets[offsets.len() - 1] as usize;
3461            assert_eq!(last_abs, parts.data_len);
3462        }
3463
3464        // Also check that element boundaries are respected
3465        assert_eq!(view.len(), 2);
3466        assert_eq!(view.get(0), Some(""));
3467        assert_eq!(view.get(1), Some("xyz"));
3468    }
3469
3470    #[test]
3471    fn subview_raw_parts_consistency_bytes() {
3472        let mut tape = BytesTapeI32::new();
3473        tape.extend([
3474            b"a".as_slice(),
3475            b"".as_slice(),
3476            b"bc".as_slice(),
3477            b"def".as_slice(),
3478        ])
3479        .unwrap();
3480
3481        // Subview over last two items: ["bc", "def"]
3482        let view = tape.subview(2, 4).unwrap();
3483        let parts = view.as_raw_parts();
3484
3485        unsafe {
3486            let offsets: &[i32] =
3487                core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
3488            assert_eq!(offsets.len(), parts.items_count + 1);
3489            assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
3490            let last_abs = offsets[offsets.len() - 1] as usize;
3491            assert_eq!(last_abs, parts.data_len);
3492        }
3493
3494        assert_eq!(view.len(), 2);
3495        assert_eq!(view.get(0), Some(b"bc" as &[u8]));
3496        assert_eq!(view.get(1), Some(b"def" as &[u8]));
3497    }
3498
3499    #[test]
3500    fn view_type_aliases() {
3501        let mut tape = CharsTapeI32::new();
3502        tape.push("test").unwrap();
3503
3504        let _view: CharsTapeViewI32 = tape.subview(0, 1).unwrap();
3505
3506        let mut bytes_tape = BytesTapeI64::new();
3507        bytes_tape.push(b"test").unwrap();
3508
3509        let _bytes_view: BytesTapeViewI64 = bytes_tape.subview(0, 1).unwrap();
3510    }
3511
3512    #[test]
3513    fn build_i32_from_other_offset_iterators() {
3514        let items = ["x", "yy", "", "zzz"];
3515
3516        // From u32 iterator
3517        let mut u32t = CharsTapeU32::new();
3518        u32t.extend(items).unwrap();
3519        let t_from_u32: CharsTapeI32 = u32t.iter().collect();
3520        assert_eq!(t_from_u32.len(), items.len());
3521        assert_eq!(t_from_u32.get(1), Some("yy"));
3522
3523        // From u64 iterator
3524        let mut u64t = CharsTapeU64::new();
3525        u64t.extend(items).unwrap();
3526        let t_from_u64: CharsTapeI32 = u64t.iter().collect();
3527        assert_eq!(t_from_u64.len(), items.len());
3528        assert_eq!(t_from_u64.get(3), Some("zzz"));
3529
3530        // From i64 iterator
3531        let mut i64t = CharsTapeI64::new();
3532        i64t.extend(items).unwrap();
3533        let t_from_i64: CharsTapeI32 = i64t.iter().collect();
3534        assert_eq!(t_from_i64.len(), items.len());
3535        assert_eq!(t_from_i64.get(2), Some(""));
3536    }
3537
3538    #[test]
3539    fn range_indexing_syntax() {
3540        let mut tape = CharsTapeI32::new();
3541        tape.push("a").unwrap();
3542        tape.push("b").unwrap();
3543        tape.push("c").unwrap();
3544        tape.push("d").unwrap();
3545
3546        // While we can't return views with [..] syntax due to lifetime constraints,
3547        // we can test that the view() and subview() API works correctly
3548
3549        // Get full view
3550        let full_view = tape.view();
3551        assert_eq!(full_view.len(), 4);
3552
3553        // Get subviews
3554        let sub = tape.subview(1, 3).unwrap();
3555        assert_eq!(sub.len(), 2);
3556        assert_eq!(sub.get(0), Some("b"));
3557        assert_eq!(sub.get(1), Some("c"));
3558
3559        // Test subview of subview
3560        let sub_sub = sub.subview(0, 1).unwrap();
3561        assert_eq!(sub_sub.len(), 1);
3562        assert_eq!(sub_sub.get(0), Some("b"));
3563    }
3564
3565    #[cfg(test)]
3566    use arrow::array::{Array, BinaryArray, StringArray};
3567    #[cfg(test)]
3568    use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer};
3569
3570    #[test]
3571    fn charstape_to_arrow_string_array() {
3572        let mut tape = CharsTapeI32::new();
3573        tape.extend(["hello", "world", "", "arrow"]).unwrap();
3574
3575        let (data_slice, offsets_slice) = tape.arrow_slices();
3576        let data_buffer = Buffer::from_slice_ref(data_slice);
3577        let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
3578            Buffer::from_slice_ref(offsets_slice),
3579            0,
3580            offsets_slice.len(),
3581        ));
3582        let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
3583
3584        assert_eq!(arrow_array.len(), 4);
3585        assert_eq!(arrow_array.value(0), "hello");
3586        assert_eq!(arrow_array.value(2), "");
3587    }
3588
3589    #[test]
3590    fn arrow_string_array_to_charstape_view() {
3591        let arrow_array = StringArray::from(vec!["foo", "bar", ""]);
3592
3593        // Zero-copy conversion to CharsTapeView
3594        let view = unsafe {
3595            CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3596        };
3597
3598        assert_eq!(view.len(), 3);
3599        assert_eq!(view.get(0), Some("foo"));
3600        assert_eq!(view.get(1), Some("bar"));
3601        assert_eq!(view.get(2), Some(""));
3602    }
3603
3604    #[test]
3605    fn arrow_binary_array_to_bytestape_view() {
3606        let values: Vec<Option<&[u8]>> = vec![
3607            Some(&[1u8, 2, 3] as &[u8]),
3608            Some(&[] as &[u8]),
3609            Some(&[4u8, 5] as &[u8]),
3610        ];
3611        let arrow_array = BinaryArray::from(values);
3612
3613        // Zero-copy conversion to BytesTapeView
3614        let view = unsafe {
3615            BytesTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3616        };
3617
3618        assert_eq!(view.len(), 3);
3619        assert_eq!(view.get(0), Some(&[1u8, 2, 3] as &[u8]));
3620        assert_eq!(view.get(1), Some(&[] as &[u8]));
3621        assert_eq!(view.get(2), Some(&[4u8, 5] as &[u8]));
3622    }
3623
3624    #[test]
3625    fn zero_copy_roundtrip() {
3626        // Original data
3627        let mut tape = CharsTapeI32::new();
3628        tape.extend(["hello", "", "world"]).unwrap();
3629
3630        // Convert to Arrow (zero-copy)
3631        let (data_slice, offsets_slice) = tape.arrow_slices();
3632        let data_buffer = Buffer::from_slice_ref(data_slice);
3633        let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
3634            Buffer::from_slice_ref(offsets_slice),
3635            0,
3636            offsets_slice.len(),
3637        ));
3638        let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
3639
3640        // Convert back to CharsTapeView (zero-copy)
3641        let view = unsafe {
3642            CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3643        };
3644
3645        // Verify data integrity without any copying
3646        assert_eq!(view.len(), 3);
3647        assert_eq!(view.get(0), Some("hello"));
3648        assert_eq!(view.get(1), Some(""));
3649        assert_eq!(view.get(2), Some("world"));
3650    }
3651
3652    #[test]
3653    fn bytes_to_string_conversion() {
3654        // Test successful conversion with valid UTF-8
3655        let mut bytes_tape = BytesTapeI32::new();
3656        bytes_tape.push(b"hello").unwrap();
3657        bytes_tape.push(b"world").unwrap();
3658        bytes_tape.push(b"").unwrap();
3659        bytes_tape.push(b"rust").unwrap();
3660
3661        let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
3662        assert!(chars_tape.is_ok());
3663
3664        let chars_tape = chars_tape.unwrap();
3665        assert_eq!(chars_tape.len(), 4);
3666        assert_eq!(chars_tape.get(0), Some("hello"));
3667        assert_eq!(chars_tape.get(1), Some("world"));
3668        assert_eq!(chars_tape.get(2), Some(""));
3669        assert_eq!(chars_tape.get(3), Some("rust"));
3670    }
3671
3672    #[test]
3673    fn bytes_to_string_invalid_utf8() {
3674        // Test conversion failure with invalid UTF-8
3675        let mut bytes_tape = BytesTapeI32::new();
3676        bytes_tape.push(b"valid").unwrap();
3677        bytes_tape.push(&[0xFF, 0xFE]).unwrap(); // Invalid UTF-8 sequence
3678        bytes_tape.push(b"also valid").unwrap();
3679
3680        let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
3681        assert!(chars_tape.is_err());
3682
3683        match chars_tape {
3684            Err(StringTapeError::Utf8Error(_)) => {}
3685            _ => panic!("Expected Utf8Error"),
3686        }
3687    }
3688
3689    #[test]
3690    fn string_to_bytes_conversion() {
3691        // Test infallible conversion from CharsTape to BytesTape
3692        let mut chars_tape = CharsTapeI32::new();
3693        chars_tape.push("hello").unwrap();
3694        chars_tape.push("δΈ–η•Œ").unwrap(); // Unicode characters
3695        chars_tape.push("").unwrap();
3696        chars_tape.push("πŸ¦€").unwrap(); // Emoji
3697
3698        let bytes_tape: BytesTapeI32 = chars_tape.into();
3699        assert_eq!(bytes_tape.len(), 4);
3700        assert_eq!(&bytes_tape[0], b"hello");
3701        assert_eq!(&bytes_tape[1], "δΈ–η•Œ".as_bytes());
3702        assert_eq!(&bytes_tape[2], b"");
3703        assert_eq!(&bytes_tape[3], "πŸ¦€".as_bytes());
3704    }
3705
3706    #[test]
3707    fn conversion_convenience_methods() {
3708        // Test try_into_chars_tape method
3709        let mut bytes_tape = BytesTapeI32::new();
3710        bytes_tape.push(b"test").unwrap();
3711        let string_result = bytes_tape.try_into_chars_tape();
3712        assert!(string_result.is_ok());
3713        assert_eq!(string_result.unwrap().get(0), Some("test"));
3714
3715        // Test into_bytes_tape method
3716        let mut chars_tape = CharsTapeI32::new();
3717        chars_tape.push("test").unwrap();
3718        let bytes_back = chars_tape.into_bytes_tape();
3719        assert_eq!(&bytes_back[0], b"test");
3720    }
3721
3722    #[test]
3723    fn conversion_round_trip() {
3724        // Test round-trip conversion preserves data
3725        let mut original = CharsTapeI32::new();
3726        original.push("first").unwrap();
3727        original.push("second").unwrap();
3728        original.push("third").unwrap();
3729
3730        // Store expected values before conversion
3731        let expected = vec!["first", "second", "third"];
3732
3733        // Convert to BytesTape and back
3734        let bytes: BytesTapeI32 = original.into();
3735        let recovered: CharsTapeI32 = bytes.try_into().unwrap();
3736
3737        assert_eq!(expected.len(), recovered.len());
3738        for (i, expected_str) in expected.iter().enumerate() {
3739            assert_eq!(recovered.get(i), Some(*expected_str));
3740        }
3741    }
3742
3743    #[test]
3744    fn view_to_view_conversions_valid_utf8() {
3745        // Prepare a CharsTape and obtain its view
3746        let mut ct = CharsTapeI32::new();
3747        ct.extend(["abc", "", "δΈ–η•Œ"]).unwrap();
3748        let chars_view = ct.view();
3749
3750        // Chars -> Bytes view conversion is infallible
3751        let bytes_view: BytesTapeViewI32 = chars_view.into_bytes_view();
3752        assert_eq!(bytes_view.len(), 3);
3753        assert_eq!(bytes_view.get(0), Some("abc".as_bytes()));
3754        assert_eq!(bytes_view.get(1), Some(b"" as &[u8]));
3755        assert_eq!(bytes_view.get(2), Some("δΈ–η•Œ".as_bytes()));
3756
3757        // Bytes -> Chars view conversion is fallible, but should succeed for valid UTF-8
3758        let chars_back: Result<CharsTapeViewI32, _> = bytes_view.try_into_chars_view();
3759        assert!(chars_back.is_ok());
3760        let chars_back = chars_back.unwrap();
3761        assert_eq!(chars_back.len(), 3);
3762        assert_eq!(chars_back.get(0), Some("abc"));
3763        assert_eq!(chars_back.get(1), Some(""));
3764        assert_eq!(chars_back.get(2), Some("δΈ–η•Œ"));
3765    }
3766
3767    #[test]
3768    fn view_to_view_bytes_to_chars_invalid_utf8() {
3769        // Prepare a BytesTape with invalid UTF-8 payload
3770        let mut bt = BytesTapeI32::new();
3771        bt.push(b"ok").unwrap();
3772        bt.push(&[0xFF, 0xFE]).unwrap(); // invalid UTF-8
3773        let bview = bt.view();
3774
3775        // Converting to CharsTapeView should fail
3776        let res: Result<CharsTapeViewI32, _> = bview.try_into_chars_view();
3777        assert!(res.is_err());
3778        match res {
3779            Err(StringTapeError::Utf8Error(_)) => {}
3780            _ => panic!("Expected Utf8Error"),
3781        }
3782    }
3783
3784    #[test]
3785    fn chars_slices_basic() {
3786        let data = "hello world foo bar";
3787        let cows = CharsCowsU32U16::from_iter_and_data(
3788            data.split_whitespace(),
3789            Cow::Borrowed(data.as_bytes()),
3790        )
3791        .unwrap();
3792
3793        assert_eq!(cows.len(), 4);
3794        assert_eq!(cows.get(0), Some("hello"));
3795        assert_eq!(cows.get(1), Some("world"));
3796        assert_eq!(cows.get(2), Some("foo"));
3797        assert_eq!(cows.get(3), Some("bar"));
3798        assert_eq!(cows.get(4), None);
3799    }
3800
3801    #[test]
3802    fn chars_slices_index() {
3803        let data = "abc def";
3804
3805        let cows = CharsCowsU64U32::from_iter_and_data(
3806            data.split_whitespace(),
3807            Cow::Borrowed(data.as_bytes()),
3808        )
3809        .unwrap();
3810
3811        assert_eq!(&cows[0], "abc");
3812        assert_eq!(&cows[1], "def");
3813    }
3814
3815    #[test]
3816    fn chars_slices_iterator() {
3817        let data = "a b c";
3818
3819        let cows = CharsCowsU64U32::from_iter_and_data(
3820            data.split_whitespace(),
3821            Cow::Borrowed(data.as_bytes()),
3822        )
3823        .unwrap();
3824
3825        let result: Vec<&str> = cows.iter().collect();
3826        assert_eq!(result, vec!["a", "b", "c"]);
3827
3828        // Test for-loop
3829        let mut count = 0;
3830        for s in &cows {
3831            assert_eq!(s.len(), 1);
3832            count += 1;
3833        }
3834        assert_eq!(count, 3);
3835    }
3836
3837    #[test]
3838    fn chars_slices_arbitrary_order() {
3839        let data = "0123456789";
3840        // Create slices in non-sequential order manually
3841        let s1 = &data[5..7]; // "56"
3842        let s2 = &data[0..1]; // "0"
3843        let s3 = &data[9..10]; // "9"
3844        let s4 = &data[2..5]; // "234"
3845
3846        let cows =
3847            CharsCowsU64U32::from_iter_and_data([s1, s2, s3, s4], Cow::Borrowed(data.as_bytes()))
3848                .unwrap();
3849
3850        assert_eq!(cows.get(0), Some("56"));
3851        assert_eq!(cows.get(1), Some("0"));
3852        assert_eq!(cows.get(2), Some("9"));
3853        assert_eq!(cows.get(3), Some("234"));
3854    }
3855
3856    #[test]
3857    fn chars_slices_empty_strings() {
3858        let data = "ab";
3859        let s1 = &data[0..0]; // empty
3860        let s2 = &data[1..2]; // "b"
3861        let s3 = &data[2..2]; // empty
3862
3863        let cows =
3864            CharsCowsU64U32::from_iter_and_data([s1, s2, s3], Cow::Borrowed(data.as_bytes()))
3865                .unwrap();
3866
3867        assert_eq!(cows.len(), 3);
3868        assert_eq!(cows.get(0), Some(""));
3869        assert_eq!(cows.get(1), Some("b"));
3870        assert_eq!(cows.get(2), Some(""));
3871    }
3872
3873    #[test]
3874    fn chars_slices_overflow_checks() {
3875        let data_vec = vec![b'x'; 300];
3876        let data = core::str::from_utf8(&data_vec).unwrap();
3877
3878        // u8 length overflow - 256 bytes exceeds u8::MAX
3879        let long_slice = &data[0..256];
3880        let result = CharsCowsU32U8::from_iter_and_data(
3881            core::iter::once(long_slice),
3882            Cow::Borrowed(data.as_bytes()),
3883        );
3884        assert!(result.is_err());
3885        assert_eq!(result.unwrap_err(), StringTapeError::OffsetOverflow);
3886
3887        // Valid with u16 length
3888        let result = CharsCowsU32U16::from_iter_and_data(
3889            core::iter::once(long_slice),
3890            Cow::Borrowed(data.as_bytes()),
3891        );
3892        assert!(result.is_ok());
3893    }
3894
3895    #[test]
3896    fn chars_slices_bounds_check() {
3897        let data = String::from("hello");
3898        let other_data = String::from("world");
3899
3900        // Slice from different string - should fail
3901        let result = CharsCowsU64U32::from_iter_and_data(
3902            core::iter::once(other_data.as_str()),
3903            Cow::Borrowed(data.as_bytes()),
3904        );
3905        assert!(result.is_err());
3906        assert_eq!(result.unwrap_err(), StringTapeError::IndexOutOfBounds);
3907
3908        // Valid slice from same string
3909        let result = CharsCowsU64U32::from_iter_and_data(
3910            core::iter::once(data.as_str()),
3911            Cow::Borrowed(data.as_bytes()),
3912        );
3913        assert!(result.is_ok());
3914    }
3915
3916    #[test]
3917    fn slices_conversions() {
3918        let data = "hello world";
3919        let chars = CharsCowsU32U8::from_iter_and_data(
3920            data.split_whitespace(),
3921            Cow::Borrowed(data.as_bytes()),
3922        )
3923        .unwrap();
3924
3925        // CharsCows -> BytesCows
3926        let bytes: BytesCowsU32U8 = chars.into();
3927        assert_eq!(bytes.get(0), Some(b"hello" as &[u8]));
3928        assert_eq!(bytes.get(1), Some(b"world" as &[u8]));
3929
3930        // N -> CharsCows
3931        let chars_back: CharsCowsU32U8 = bytes.try_into().unwrap();
3932        assert_eq!(chars_back.get(0), Some("hello"));
3933        assert_eq!(chars_back.get(1), Some("world"));
3934    }
3935
3936    #[test]
3937    fn slices_type_aliases() {
3938        let data = "test";
3939
3940        let _s1: CharsCowsU32U16 =
3941            CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3942                .unwrap();
3943        let _s2: CharsCowsU32U8 =
3944            CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3945                .unwrap();
3946        let _s3: CharsCowsU16U8 =
3947            CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3948                .unwrap();
3949        let _s4: CharsCowsU64U32 =
3950            CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3951                .unwrap();
3952    }
3953
3954    #[test]
3955    fn chars_slices_auto_sorted() {
3956        let data = "zebra apple banana cherry";
3957        let mut cows = CharsCowsAuto::from_iter_and_data(
3958            data.split_whitespace(),
3959            Cow::Borrowed(data.as_bytes()),
3960        )
3961        .unwrap();
3962
3963        // Sort in-place using standard Rust patterns
3964        cows.sort();
3965
3966        let sorted: Vec<&str> = cows.iter().collect();
3967        assert_eq!(sorted, vec!["apple", "banana", "cherry", "zebra"]);
3968    }
3969
3970    #[test]
3971    fn chars_slices_auto_to_vec_string() {
3972        let data = "hello world foo";
3973        let cows = CharsCowsAuto::from_iter_and_data(
3974            data.split_whitespace(),
3975            Cow::Borrowed(data.as_bytes()),
3976        )
3977        .unwrap();
3978
3979        // Convert to Vec<String> using iterator
3980        let vec_string: Vec<String> = cows.iter().map(|s| s.to_string()).collect();
3981
3982        assert_eq!(vec_string, vec!["hello", "world", "foo"]);
3983    }
3984
3985    #[test]
3986    fn chars_slices_auto_filter_map() {
3987        let data = "hello world foo bar";
3988        let cows = CharsCowsAuto::from_iter_and_data(
3989            data.split_whitespace(),
3990            Cow::Borrowed(data.as_bytes()),
3991        )
3992        .unwrap();
3993
3994        // Filter and uppercase using iterator - common Vec<String> pattern
3995        let result: Vec<String> = cows
3996            .iter()
3997            .filter_map(|word| {
3998                if word.len() > 3 {
3999                    Some(word.to_uppercase())
4000                } else {
4001                    None
4002                }
4003            })
4004            .collect();
4005
4006        assert_eq!(result, vec!["HELLO", "WORLD"]);
4007    }
4008
4009    #[test]
4010    fn chars_slices_auto_type_selection() {
4011        // Small data -> u32 offset, u8 length
4012        let small = "hi";
4013        let s1 = CharsCowsAuto::from_iter_and_data(
4014            core::iter::once(small),
4015            Cow::Borrowed(small.as_bytes()),
4016        )
4017        .unwrap();
4018        assert!(matches!(s1, CharsCowsAuto::U32U8(_)));
4019        assert_eq!(s1.bytes_per_entry(), 5);
4020
4021        // Long word -> u32 offset, u16 length
4022        let long_word = "a".repeat(300);
4023        let s2 = CharsCowsAuto::from_iter_and_data(
4024            core::iter::once(long_word.as_str()),
4025            Cow::Borrowed(long_word.as_bytes()),
4026        )
4027        .unwrap();
4028        assert!(matches!(s2, CharsCowsAuto::U32U16(_)));
4029        assert_eq!(s2.bytes_per_entry(), 6);
4030    }
4031}
4032
4033// ========================
4034// Examples
4035// ========================
4036
4037#[cfg(all(feature = "std", not(test)))]
4038pub mod examples {
4039    use super::*;
4040    use std::env;
4041    use std::fs;
4042
4043    pub fn bench_vec_string() -> std::io::Result<()> {
4044        let path = env::args().nth(1).expect("Usage: bench_vec_string <file>");
4045
4046        eprintln!("[Vec<String>] Loading file: {}", path);
4047        let content = fs::read_to_string(&path)?;
4048        eprintln!("[Vec<String>] File size: {} bytes", content.len());
4049
4050        eprintln!("[Vec<String>] Collecting words...");
4051        let words: Vec<String> = content.split_whitespace().map(|s| s.to_string()).collect();
4052
4053        eprintln!("[Vec<String>] Collected {} words", words.len());
4054
4055        // Keep alive to measure peak
4056        std::thread::sleep(std::time::Duration::from_millis(1000));
4057        Ok(())
4058    }
4059
4060    pub fn bench_vec_slice() -> std::io::Result<()> {
4061        let path = env::args().nth(1).expect("Usage: bench_vec_slice <file>");
4062
4063        eprintln!("[Vec<&[u8]>] Loading file: {}", path);
4064        let content = fs::read_to_string(&path)?;
4065        eprintln!("[Vec<&[u8]>] File size: {} bytes", content.len());
4066
4067        eprintln!("[Vec<&[u8]>] Collecting words...");
4068        let words: Vec<&[u8]> = content.split_whitespace().map(|s| s.as_bytes()).collect();
4069
4070        eprintln!("[Vec<&[u8]>] Collected {} words", words.len());
4071
4072        // Keep alive to measure peak
4073        std::thread::sleep(std::time::Duration::from_millis(1000));
4074        Ok(())
4075    }
4076
4077    pub fn bench_chars_slices() -> Result<(), Box<dyn std::error::Error>> {
4078        let path = env::args()
4079            .nth(1)
4080            .expect("Usage: bench_chars_slices <file>");
4081
4082        eprintln!("[CharsCows] Loading file: {}", path);
4083        let content = fs::read_to_string(&path)?;
4084        eprintln!("[CharsCows] File size: {} bytes", content.len());
4085
4086        eprintln!("[CharsCows] Building CharsCows from words...");
4087        // Use u64 offset for files >4GB, u32 length for words up to 4GB
4088        let cows = CharsCowsAuto::from_iter_and_data(
4089            content.split_whitespace(),
4090            Cow::Borrowed(content.as_bytes()),
4091        )?;
4092
4093        eprintln!("[CharsCows] Collected {} words", cows.len());
4094
4095        // Keep alive to measure peak
4096        std::thread::sleep(std::time::Duration::from_millis(1000));
4097        Ok(())
4098    }
4099
4100    pub fn bench_chars_tape() -> Result<(), Box<dyn std::error::Error>> {
4101        let path = env::args().nth(1).expect("Usage: bench_chars_tape <file>");
4102
4103        eprintln!("[CharsTape] Loading file: {}", path);
4104        let content = fs::read_to_string(&path)?;
4105        eprintln!("[CharsTape] File size: {} bytes", content.len());
4106
4107        eprintln!("[CharsTape] Building CharsTape from words...");
4108        // Use from_iter for automatic type selection based on total data size
4109        let tape = CharsTapeAuto::from_iter(content.split_whitespace());
4110
4111        eprintln!("[CharsTape] Collected {} words", tape.len());
4112
4113        // Keep alive to measure peak
4114        std::thread::sleep(std::time::Duration::from_millis(1000));
4115        Ok(())
4116    }
4117}
4118
4119// ========================
4120// Binary entry points
4121// ========================
4122
4123#[cfg(all(feature = "std", not(test)))]
4124#[allow(dead_code)] // Only used when building binaries, not when checking lib
4125fn main() -> Result<(), Box<dyn std::error::Error>> {
4126    let exe_path = std::env::current_exe()?;
4127    let exe_name = exe_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
4128
4129    match exe_name {
4130        "bench_vec_string" => examples::bench_vec_string()?,
4131        "bench_vec_slice" => examples::bench_vec_slice()?,
4132        "bench_chars_slices" => examples::bench_chars_slices()?,
4133        "bench_chars_tape" => examples::bench_chars_tape()?,
4134        _ => {
4135            eprintln!("Unknown binary: {}", exe_name);
4136            eprintln!("Available: bench_vec_string, bench_vec_slice, bench_chars_slices, bench_chars_tape");
4137            std::process::exit(1);
4138        }
4139    }
4140
4141    Ok(())
4142}