1#![cfg_attr(not(feature = "std"), no_std)]
2
3#[cfg(feature = "std")]
60extern crate std;
61
62#[cfg(not(feature = "std"))]
63extern crate alloc;
64
65use core::fmt;
66use core::marker::PhantomData;
67use core::ops::{
68 Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Sub,
69};
70use core::ptr::{self, NonNull};
71use core::slice;
72
73#[cfg(not(feature = "std"))]
74use alloc::borrow::Cow;
75#[cfg(not(feature = "std"))]
76use alloc::string::String;
77#[cfg(not(feature = "std"))]
78use alloc::vec::Vec;
79
80#[cfg(feature = "std")]
81use std::borrow::Cow;
82
83use allocator_api2::alloc::{Allocator, Global, Layout};
84
85#[derive(Debug, Clone, PartialEq, Eq)]
87pub enum StringTapeError {
88 OffsetOverflow,
90 AllocationError,
92 IndexOutOfBounds,
94 Utf8Error(core::str::Utf8Error),
96}
97
98impl fmt::Display for StringTapeError {
99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
100 match self {
101 StringTapeError::OffsetOverflow => write!(f, "offset value too large for offset type"),
102 StringTapeError::AllocationError => write!(f, "memory allocation failed"),
103 StringTapeError::IndexOutOfBounds => write!(f, "index out of bounds"),
104 StringTapeError::Utf8Error(e) => write!(f, "invalid UTF-8: {}", e),
105 }
106 }
107}
108
109#[cfg(feature = "std")]
110impl std::error::Error for StringTapeError {}
111
112struct RawTape<Offset: OffsetType, A: Allocator> {
140 data: Option<NonNull<[u8]>>,
141 offsets: Option<NonNull<[Offset]>>,
142 len_bytes: usize,
143 len_items: usize,
144 allocator: A,
145 _phantom: PhantomData<Offset>,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
150pub struct RawParts<Offset: OffsetType> {
151 pub data_ptr: *const u8,
153 pub offsets_ptr: *const Offset,
155 pub data_len: usize,
157 pub items_count: usize,
159}
160
161pub struct CharsTape<Offset: OffsetType = i32, A: Allocator = Global> {
163 inner: RawTape<Offset, A>,
164}
165
166pub struct BytesTape<Offset: OffsetType = i32, A: Allocator = Global> {
168 inner: RawTape<Offset, A>,
169}
170
171pub struct RawTapeView<'a, Offset: OffsetType> {
173 data: &'a [u8],
174 offsets: &'a [Offset],
175}
176
177pub struct CharsTapeView<'a, Offset: OffsetType = i32> {
179 inner: RawTapeView<'a, Offset>,
180}
181
182pub struct BytesTapeView<'a, Offset: OffsetType = i32> {
184 inner: RawTapeView<'a, Offset>,
185}
186
187pub trait OffsetType: Copy + Default + PartialOrd + Sub<Output = Self> {
191 const SIZE: usize;
193
194 fn from_usize(value: usize) -> Option<Self>;
198
199 fn to_usize(self) -> usize;
201}
202
203impl OffsetType for i32 {
204 const SIZE: usize = 4;
205
206 fn from_usize(value: usize) -> Option<Self> {
207 if value <= i32::MAX as usize {
208 Some(value as i32)
209 } else {
210 None
211 }
212 }
213
214 fn to_usize(self) -> usize {
215 self as usize
216 }
217}
218
219impl OffsetType for i64 {
220 const SIZE: usize = 8;
221
222 fn from_usize(value: usize) -> Option<Self> {
223 Some(value as i64)
224 }
225
226 fn to_usize(self) -> usize {
227 self as usize
228 }
229}
230
231impl OffsetType for u16 {
232 const SIZE: usize = 2;
233
234 fn from_usize(value: usize) -> Option<Self> {
235 if value <= u16::MAX as usize {
236 Some(value as u16)
237 } else {
238 None
239 }
240 }
241
242 fn to_usize(self) -> usize {
243 self as usize
244 }
245}
246
247impl OffsetType for u32 {
248 const SIZE: usize = 4;
249
250 fn from_usize(value: usize) -> Option<Self> {
251 if value <= u32::MAX as usize {
252 Some(value as u32)
253 } else {
254 None
255 }
256 }
257
258 fn to_usize(self) -> usize {
259 self as usize
260 }
261}
262
263impl OffsetType for u64 {
264 const SIZE: usize = 8;
265
266 fn from_usize(value: usize) -> Option<Self> {
267 Some(value as u64)
268 }
269
270 fn to_usize(self) -> usize {
271 self as usize
272 }
273}
274
275pub trait LengthType: Copy + Default + PartialOrd {
280 const SIZE: usize;
282
283 fn from_usize(value: usize) -> Option<Self>;
287
288 fn to_usize(self) -> usize;
290}
291
292impl LengthType for u8 {
293 const SIZE: usize = 1;
294
295 fn from_usize(value: usize) -> Option<Self> {
296 if value <= u8::MAX as usize {
297 Some(value as u8)
298 } else {
299 None
300 }
301 }
302
303 fn to_usize(self) -> usize {
304 self as usize
305 }
306}
307
308impl LengthType for u16 {
309 const SIZE: usize = 2;
310
311 fn from_usize(value: usize) -> Option<Self> {
312 if value <= u16::MAX as usize {
313 Some(value as u16)
314 } else {
315 None
316 }
317 }
318
319 fn to_usize(self) -> usize {
320 self as usize
321 }
322}
323
324impl LengthType for u32 {
325 const SIZE: usize = 4;
326
327 fn from_usize(value: usize) -> Option<Self> {
328 if value <= u32::MAX as usize {
329 Some(value as u32)
330 } else {
331 None
332 }
333 }
334
335 fn to_usize(self) -> usize {
336 self as usize
337 }
338}
339
340impl LengthType for u64 {
341 const SIZE: usize = 8;
342
343 fn from_usize(value: usize) -> Option<Self> {
344 Some(value as u64)
345 }
346
347 fn to_usize(self) -> usize {
348 self as usize
349 }
350}
351
352impl<Offset: OffsetType, A: Allocator> RawTape<Offset, A> {
353 pub fn new() -> RawTape<Offset, Global> {
367 RawTape::new_in(Global)
368 }
369
370 pub fn new_in(allocator: A) -> Self {
385 Self {
386 data: None,
387 offsets: None,
388 len_bytes: 0,
389 len_items: 0,
390 allocator,
391 _phantom: PhantomData,
392 }
393 }
394
395 pub fn with_capacity(
413 data_capacity: usize,
414 strings_capacity: usize,
415 ) -> Result<RawTape<Offset, Global>, StringTapeError> {
416 RawTape::with_capacity_in(data_capacity, strings_capacity, Global)
417 }
418
419 pub fn with_capacity_in(
441 data_capacity: usize,
442 strings_capacity: usize,
443 allocator: A,
444 ) -> Result<Self, StringTapeError> {
445 let mut tape = Self::new_in(allocator);
446 tape.reserve(data_capacity, strings_capacity)?;
447 Ok(tape)
448 }
449
450 pub fn reserve(
451 &mut self,
452 additional_bytes: usize,
453 additional_strings: usize,
454 ) -> Result<(), StringTapeError> {
455 if additional_bytes > 0 {
456 let current_capacity = self.data_capacity();
457 let new_capacity = current_capacity
458 .checked_add(additional_bytes)
459 .ok_or(StringTapeError::AllocationError)?;
460 self.grow_data(new_capacity)?;
461 }
462
463 if additional_strings > 0 {
464 let current_capacity = self.offsets_capacity();
465 let new_capacity = current_capacity
466 .checked_add(additional_strings + 1)
467 .ok_or(StringTapeError::AllocationError)?;
468 self.grow_offsets(new_capacity)?;
469 }
470 Ok(())
471 }
472
473 fn grow_data(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
474 let current_capacity = self.data_capacity();
475 if new_capacity <= current_capacity {
476 return Ok(());
477 }
478
479 let new_layout =
480 Layout::array::<u8>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
481
482 let new_ptr = if let Some(old_ptr) = self.data {
483 let old_layout = Layout::array::<u8>(current_capacity).unwrap();
485 unsafe {
486 self.allocator
487 .grow(old_ptr.cast(), old_layout, new_layout)
488 .map_err(|_| StringTapeError::AllocationError)?
489 }
490 } else {
491 self.allocator
493 .allocate(new_layout)
494 .map_err(|_| StringTapeError::AllocationError)?
495 };
496
497 self.data = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
498 Ok(())
499 }
500
501 fn grow_offsets(&mut self, new_capacity: usize) -> Result<(), StringTapeError> {
502 let current_capacity = self.offsets_capacity();
503 if new_capacity <= current_capacity {
504 return Ok(());
505 }
506
507 let new_layout =
508 Layout::array::<Offset>(new_capacity).map_err(|_| StringTapeError::AllocationError)?;
509
510 let new_ptr = if let Some(old_ptr) = self.offsets {
511 let old_layout = Layout::array::<Offset>(current_capacity).unwrap();
513 unsafe {
514 self.allocator
515 .grow(old_ptr.cast(), old_layout, new_layout)
516 .map_err(|_| StringTapeError::AllocationError)?
517 }
518 } else {
519 self.allocator
521 .allocate_zeroed(new_layout)
522 .map_err(|_| StringTapeError::AllocationError)?
523 };
524
525 self.offsets = Some(NonNull::slice_from_raw_parts(new_ptr.cast(), new_capacity));
526 Ok(())
527 }
528
529 pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
546 let required_capacity = self
547 .len_bytes
548 .checked_add(bytes.len())
549 .ok_or(StringTapeError::AllocationError)?;
550
551 let current_data_capacity = self.data_capacity();
552 if required_capacity > current_data_capacity {
553 let new_capacity = (current_data_capacity * 2).max(required_capacity).max(64);
554 self.grow_data(new_capacity)?;
555 }
556
557 let current_offsets_capacity = self.offsets_capacity();
558 if self.len_items + 1 >= current_offsets_capacity {
559 let new_capacity = (current_offsets_capacity * 2)
560 .max(self.len_items + 2)
561 .max(8);
562 self.grow_offsets(new_capacity)?;
563 }
564
565 if let Some(data_ptr) = self.data {
567 unsafe {
568 ptr::copy_nonoverlapping(
569 bytes.as_ptr(),
570 data_ptr.as_ptr().cast::<u8>().add(self.len_bytes),
571 bytes.len(),
572 );
573 }
574 }
575
576 self.len_bytes += bytes.len();
577 self.len_items += 1;
578
579 let offset = Offset::from_usize(self.len_bytes).ok_or(StringTapeError::OffsetOverflow)?;
581 if let Some(offsets_ptr) = self.offsets {
582 unsafe {
583 ptr::write(
584 offsets_ptr.as_ptr().cast::<Offset>().add(self.len_items),
585 offset,
586 );
587 }
588 }
589
590 Ok(())
591 }
592
593 pub fn get(&self, index: usize) -> Option<&[u8]> {
597 if index >= self.len_items {
598 return None;
599 }
600
601 let (data_ptr, offsets_ptr) = match (self.data, self.offsets) {
602 (Some(data), Some(offsets)) => (data, offsets),
603 _ => return None,
604 };
605
606 unsafe {
607 let offsets_ptr = offsets_ptr.as_ptr().cast::<Offset>();
608 let start_offset = if index == 0 {
609 0
610 } else {
611 ptr::read(offsets_ptr.add(index)).to_usize()
612 };
613 let end_offset = ptr::read(offsets_ptr.add(index + 1)).to_usize();
614
615 Some(slice::from_raw_parts(
616 data_ptr.as_ptr().cast::<u8>().add(start_offset),
617 end_offset - start_offset,
618 ))
619 }
620 }
621
622 pub fn len(&self) -> usize {
624 self.len_items
625 }
626
627 pub fn is_empty(&self) -> bool {
629 self.len_items == 0
630 }
631
632 pub fn data_len(&self) -> usize {
634 self.len_bytes
635 }
636
637 #[allow(dead_code)]
639 pub fn capacity(&self) -> usize {
640 self.len_items
641 }
642
643 pub fn data_capacity(&self) -> usize {
645 self.data.map(|ptr| ptr.len()).unwrap_or(0)
646 }
647
648 pub fn offsets_capacity(&self) -> usize {
650 self.offsets.map(|ptr| ptr.len()).unwrap_or(0)
651 }
652
653 pub fn clear(&mut self) {
655 self.len_bytes = 0;
656 self.len_items = 0;
657 if let Some(offsets_ptr) = self.offsets {
658 unsafe {
659 ptr::write(offsets_ptr.as_ptr().cast::<Offset>(), Offset::default());
660 }
661 }
662 }
663
664 pub fn truncate(&mut self, len: usize) {
666 if len >= self.len_items {
667 return;
668 }
669
670 self.len_items = len;
671 self.len_bytes = if len == 0 {
672 0
673 } else if let Some(offsets_ptr) = self.offsets {
674 unsafe { ptr::read(offsets_ptr.as_ptr().cast::<Offset>().add(len)).to_usize() }
675 } else {
676 0
677 };
678 }
679
680 pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
691 where
692 I: IntoIterator,
693 I::Item: AsRef<[u8]>,
694 {
695 for s in iter {
696 self.push(s.as_ref())?;
697 }
698 Ok(())
699 }
700
701 pub fn as_raw_parts(&self) -> RawParts<Offset> {
709 let data_ptr = self
710 .data
711 .map(|ptr| ptr.as_ptr().cast::<u8>() as *const u8)
712 .unwrap_or(ptr::null());
713 let offsets_ptr = self
714 .offsets
715 .map(|ptr| ptr.as_ptr().cast::<Offset>() as *const Offset)
716 .unwrap_or(ptr::null());
717 RawParts {
718 data_ptr,
719 offsets_ptr,
720 data_len: self.len_bytes,
721 items_count: self.len_items,
722 }
723 }
724
725 pub fn data_slice(&self) -> &[u8] {
730 if let Some(data_ptr) = self.data {
731 unsafe { core::slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), self.len_bytes) }
732 } else {
733 &[]
734 }
735 }
736
737 pub fn offsets_slice(&self) -> &[Offset] {
742 if let Some(offsets_ptr) = self.offsets {
743 unsafe {
744 core::slice::from_raw_parts(
745 offsets_ptr.as_ptr().cast::<Offset>(),
746 self.len_items + 1,
747 )
748 }
749 } else {
750 &[]
751 }
752 }
753
754 pub fn allocator(&self) -> &A {
756 &self.allocator
757 }
758
759 pub fn view(&self) -> RawTapeView<'_, Offset> {
761 RawTapeView::new(self, 0, self.len_items).unwrap_or(RawTapeView {
762 data: &[],
763 offsets: &[],
764 })
765 }
766
767 pub fn subview(
769 &self,
770 start: usize,
771 end: usize,
772 ) -> Result<RawTapeView<'_, Offset>, StringTapeError> {
773 RawTapeView::new(self, start, end)
774 }
775}
776
777impl<Offset: OffsetType, A: Allocator> Drop for RawTape<Offset, A> {
778 fn drop(&mut self) {
779 if let Some(data_ptr) = self.data {
780 let layout = Layout::array::<u8>(data_ptr.len()).unwrap();
781 unsafe {
782 self.allocator.deallocate(data_ptr.cast(), layout);
783 }
784 }
785 if let Some(offsets_ptr) = self.offsets {
786 let layout = Layout::array::<Offset>(offsets_ptr.len()).unwrap();
787 unsafe {
788 self.allocator.deallocate(offsets_ptr.cast(), layout);
789 }
790 }
791 }
792}
793
794unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for RawTape<Offset, A> {}
795unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for RawTape<Offset, A> {}
796
797impl<Offset: OffsetType, A: Allocator> Index<Range<usize>> for RawTape<Offset, A> {
799 type Output = [u8];
800
801 fn index(&self, range: Range<usize>) -> &Self::Output {
802 let view = self
803 .subview(range.start, range.end)
804 .expect("range out of bounds");
805 view.data
807 }
808}
809
810impl<Offset: OffsetType, A: Allocator> Index<RangeFrom<usize>> for RawTape<Offset, A> {
811 type Output = [u8];
812
813 fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
814 let view = self
815 .subview(range.start, self.len_items)
816 .expect("range out of bounds");
817 view.data
818 }
819}
820
821impl<Offset: OffsetType, A: Allocator> Index<RangeTo<usize>> for RawTape<Offset, A> {
822 type Output = [u8];
823
824 fn index(&self, range: RangeTo<usize>) -> &Self::Output {
825 let view = self.subview(0, range.end).expect("range out of bounds");
826 view.data
827 }
828}
829
830impl<Offset: OffsetType, A: Allocator> Index<RangeFull> for RawTape<Offset, A> {
831 type Output = [u8];
832
833 fn index(&self, _range: RangeFull) -> &Self::Output {
834 let view = self.view();
835 view.data
836 }
837}
838
839impl<Offset: OffsetType, A: Allocator> Index<RangeInclusive<usize>> for RawTape<Offset, A> {
840 type Output = [u8];
841
842 fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
843 let view = self
844 .subview(*range.start(), range.end() + 1)
845 .expect("range out of bounds");
846 view.data
847 }
848}
849
850impl<Offset: OffsetType, A: Allocator> Index<RangeToInclusive<usize>> for RawTape<Offset, A> {
851 type Output = [u8];
852
853 fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
854 let view = self.subview(0, range.end + 1).expect("range out of bounds");
855 view.data
856 }
857}
858
859impl<'a, Offset: OffsetType> RawTapeView<'a, Offset> {
864 pub(crate) fn new<A: Allocator>(
866 tape: &'a RawTape<Offset, A>,
867 start: usize,
868 end: usize,
869 ) -> Result<Self, StringTapeError> {
870 if start > end || end > tape.len() {
871 return Err(StringTapeError::IndexOutOfBounds);
872 }
873
874 let (data_ptr, offsets_ptr) = match (tape.data, tape.offsets) {
875 (Some(data), Some(offsets)) => (data, offsets),
876 _ => return Err(StringTapeError::IndexOutOfBounds),
877 };
878
879 let data = unsafe { slice::from_raw_parts(data_ptr.as_ptr().cast::<u8>(), tape.len_bytes) };
882
883 let offsets = unsafe {
884 slice::from_raw_parts(
885 offsets_ptr.as_ptr().cast::<Offset>().add(start),
886 (end - start) + 1,
887 )
888 };
889
890 Ok(Self { data, offsets })
891 }
892
893 pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
903 Self { data, offsets }
904 }
905
906 pub fn get(&self, index: usize) -> Option<&[u8]> {
908 if index >= self.len() {
909 return None;
910 }
911
912 let start_offset = self.offsets[index].to_usize();
913 let end_offset = self.offsets[index + 1].to_usize();
914
915 Some(&self.data[start_offset..end_offset])
916 }
917
918 pub fn len(&self) -> usize {
920 self.offsets.len().saturating_sub(1)
921 }
922
923 pub fn is_empty(&self) -> bool {
925 self.len() == 0
926 }
927
928 pub fn data_len(&self) -> usize {
930 self.offsets[self.offsets.len() - 1].to_usize() - self.offsets[0].to_usize()
932 }
933
934 pub fn subview(
936 &self,
937 start: usize,
938 end: usize,
939 ) -> Result<RawTapeView<'a, Offset>, StringTapeError> {
940 if start > end || end > self.len() {
941 return Err(StringTapeError::IndexOutOfBounds);
942 }
943
944 Ok(RawTapeView {
945 data: self.data,
947 offsets: &self.offsets[start..=end],
948 })
949 }
950
951 pub fn as_raw_parts(&self) -> RawParts<Offset> {
953 RawParts {
956 data_ptr: self.data.as_ptr(),
957 offsets_ptr: self.offsets.as_ptr(),
958 data_len: self.offsets[self.offsets.len() - 1].to_usize(),
959 items_count: self.len(),
960 }
961 }
962}
963
964impl<'a, Offset: OffsetType> Index<usize> for RawTapeView<'a, Offset> {
965 type Output = [u8];
966
967 fn index(&self, index: usize) -> &Self::Output {
968 self.get(index).expect("index out of bounds")
969 }
970}
971
972impl<'a, Offset: OffsetType> Index<Range<usize>> for RawTapeView<'a, Offset> {
974 type Output = [u8];
975
976 fn index(&self, range: Range<usize>) -> &Self::Output {
977 let view = self
978 .subview(range.start, range.end)
979 .expect("range out of bounds");
980 let start = view.offsets[0].to_usize();
981 let end = view.offsets[view.offsets.len() - 1].to_usize();
982 &view.data[start..end]
983 }
984}
985
986impl<'a, Offset: OffsetType> Index<RangeFrom<usize>> for RawTapeView<'a, Offset> {
987 type Output = [u8];
988
989 fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
990 let view = self
991 .subview(range.start, self.len())
992 .expect("range out of bounds");
993 let start = view.offsets[0].to_usize();
994 let end = view.offsets[view.offsets.len() - 1].to_usize();
995 &view.data[start..end]
996 }
997}
998
999impl<'a, Offset: OffsetType> Index<RangeTo<usize>> for RawTapeView<'a, Offset> {
1000 type Output = [u8];
1001
1002 fn index(&self, range: RangeTo<usize>) -> &Self::Output {
1003 let view = self.subview(0, range.end).expect("range out of bounds");
1004 let start = view.offsets[0].to_usize();
1005 let end = view.offsets[view.offsets.len() - 1].to_usize();
1006 &view.data[start..end]
1007 }
1008}
1009
1010impl<'a, Offset: OffsetType> Index<RangeFull> for RawTapeView<'a, Offset> {
1011 type Output = [u8];
1012
1013 fn index(&self, _range: RangeFull) -> &Self::Output {
1014 let start = self.offsets[0].to_usize();
1015 let end = self.offsets[self.offsets.len() - 1].to_usize();
1016 &self.data[start..end]
1017 }
1018}
1019
1020impl<'a, Offset: OffsetType> Index<RangeInclusive<usize>> for RawTapeView<'a, Offset> {
1021 type Output = [u8];
1022
1023 fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
1024 let view = self
1025 .subview(*range.start(), range.end() + 1)
1026 .expect("range out of bounds");
1027 let start = view.offsets[0].to_usize();
1028 let end = view.offsets[view.offsets.len() - 1].to_usize();
1029 &view.data[start..end]
1030 }
1031}
1032
1033impl<'a, Offset: OffsetType> Index<RangeToInclusive<usize>> for RawTapeView<'a, Offset> {
1034 type Output = [u8];
1035
1036 fn index(&self, range: RangeToInclusive<usize>) -> &Self::Output {
1037 let view = self.subview(0, range.end + 1).expect("range out of bounds");
1038 let start = view.offsets[0].to_usize();
1039 let end = view.offsets[view.offsets.len() - 1].to_usize();
1040 &view.data[start..end]
1041 }
1042}
1043
1044impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
1049 pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
1058 Self {
1059 inner: RawTapeView::from_raw_parts(data, offsets),
1060 }
1061 }
1062
1063 pub fn get(&self, index: usize) -> Option<&str> {
1065 self.inner
1067 .get(index)
1068 .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
1069 }
1070
1071 pub fn len(&self) -> usize {
1073 self.inner.len()
1074 }
1075
1076 pub fn is_empty(&self) -> bool {
1078 self.inner.is_empty()
1079 }
1080
1081 pub fn data_len(&self) -> usize {
1083 self.inner.data_len()
1084 }
1085
1086 pub fn subview(
1088 &self,
1089 start: usize,
1090 end: usize,
1091 ) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1092 Ok(CharsTapeView {
1093 inner: self.inner.subview(start, end)?,
1094 })
1095 }
1096
1097 pub fn as_raw_parts(&self) -> RawParts<Offset> {
1099 self.inner.as_raw_parts()
1100 }
1101}
1102
1103impl<'a, Offset: OffsetType> Index<usize> for CharsTapeView<'a, Offset> {
1104 type Output = str;
1105
1106 fn index(&self, index: usize) -> &Self::Output {
1107 self.get(index).expect("index out of bounds")
1108 }
1109}
1110
1111impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1116 pub unsafe fn from_raw_parts(data: &'a [u8], offsets: &'a [Offset]) -> Self {
1125 Self {
1126 inner: RawTapeView::from_raw_parts(data, offsets),
1127 }
1128 }
1129
1130 pub fn get(&self, index: usize) -> Option<&[u8]> {
1132 self.inner.get(index)
1133 }
1134
1135 pub fn len(&self) -> usize {
1137 self.inner.len()
1138 }
1139
1140 pub fn is_empty(&self) -> bool {
1142 self.inner.is_empty()
1143 }
1144
1145 pub fn data_len(&self) -> usize {
1147 self.inner.data_len()
1148 }
1149
1150 pub fn subview(
1152 &self,
1153 start: usize,
1154 end: usize,
1155 ) -> Result<BytesTapeView<'a, Offset>, StringTapeError> {
1156 Ok(BytesTapeView {
1157 inner: self.inner.subview(start, end)?,
1158 })
1159 }
1160
1161 pub fn as_raw_parts(&self) -> RawParts<Offset> {
1163 self.inner.as_raw_parts()
1164 }
1165}
1166
1167impl<'a, Offset: OffsetType> Index<usize> for BytesTapeView<'a, Offset> {
1168 type Output = [u8];
1169
1170 fn index(&self, index: usize) -> &Self::Output {
1171 self.get(index).expect("index out of bounds")
1172 }
1173}
1174
1175impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1180 pub fn new() -> CharsTape<Offset, Global> {
1182 CharsTape {
1183 inner: RawTape::<Offset, Global>::new(),
1184 }
1185 }
1186
1187 pub fn new_in(allocator: A) -> Self {
1189 Self {
1190 inner: RawTape::<Offset, A>::new_in(allocator),
1191 }
1192 }
1193
1194 pub fn with_capacity(
1196 data_capacity: usize,
1197 strings_capacity: usize,
1198 ) -> Result<CharsTape<Offset, Global>, StringTapeError> {
1199 Ok(CharsTape {
1200 inner: RawTape::<Offset, Global>::with_capacity(data_capacity, strings_capacity)?,
1201 })
1202 }
1203
1204 pub fn with_capacity_in(
1206 data_capacity: usize,
1207 strings_capacity: usize,
1208 allocator: A,
1209 ) -> Result<Self, StringTapeError> {
1210 Ok(Self {
1211 inner: RawTape::<Offset, A>::with_capacity_in(
1212 data_capacity,
1213 strings_capacity,
1214 allocator,
1215 )?,
1216 })
1217 }
1218
1219 pub fn push(&mut self, s: &str) -> Result<(), StringTapeError> {
1221 self.inner.push(s.as_bytes())
1222 }
1223
1224 pub fn get(&self, index: usize) -> Option<&str> {
1226 self.inner
1228 .get(index)
1229 .map(|b| unsafe { core::str::from_utf8_unchecked(b) })
1230 }
1231
1232 pub fn len(&self) -> usize {
1234 self.inner.len()
1235 }
1236
1237 pub fn is_empty(&self) -> bool {
1239 self.inner.is_empty()
1240 }
1241
1242 pub fn data_len(&self) -> usize {
1244 self.inner.data_len()
1245 }
1246
1247 pub fn capacity(&self) -> usize {
1249 self.inner.len()
1250 }
1251
1252 pub fn data_capacity(&self) -> usize {
1254 self.inner.data_capacity()
1255 }
1256
1257 pub fn offsets_capacity(&self) -> usize {
1259 self.inner.offsets_capacity()
1260 }
1261
1262 pub fn clear(&mut self) {
1264 self.inner.clear()
1265 }
1266
1267 pub fn truncate(&mut self, len: usize) {
1269 self.inner.truncate(len)
1270 }
1271
1272 pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1274 where
1275 I: IntoIterator,
1276 I::Item: AsRef<str>,
1277 {
1278 for s in iter {
1279 self.push(s.as_ref())?;
1280 }
1281 Ok(())
1282 }
1283
1284 pub fn as_raw_parts(&self) -> RawParts<Offset> {
1286 self.inner.as_raw_parts()
1287 }
1288
1289 pub fn data_slice(&self) -> &[u8] {
1291 self.inner.data_slice()
1292 }
1293
1294 pub fn offsets_slice(&self) -> &[Offset] {
1296 self.inner.offsets_slice()
1297 }
1298
1299 pub fn iter(&self) -> CharsTapeIter<'_, Offset, A> {
1300 CharsTapeIter {
1301 tape: self,
1302 index: 0,
1303 }
1304 }
1305
1306 pub fn allocator(&self) -> &A {
1308 self.inner.allocator()
1309 }
1310
1311 pub fn view(&self) -> CharsTapeView<'_, Offset> {
1313 CharsTapeView {
1314 inner: self.inner.view(),
1315 }
1316 }
1317
1318 pub fn subview(
1320 &self,
1321 start: usize,
1322 end: usize,
1323 ) -> Result<CharsTapeView<'_, Offset>, StringTapeError> {
1324 Ok(CharsTapeView {
1325 inner: self.inner.subview(start, end)?,
1326 })
1327 }
1328
1329 pub fn as_reorderable<Length: LengthType>(
1344 &self,
1345 ) -> Result<CharsCows<'_, Offset, Length>, StringTapeError> {
1346 CharsCows::from_iter_and_data(self, Cow::Borrowed(self.data_slice()))
1347 }
1348
1349 pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1351 (self.data_slice(), self.offsets_slice())
1352 }
1353}
1354
1355impl<Offset: OffsetType, A: Allocator> Drop for CharsTape<Offset, A> {
1356 fn drop(&mut self) {
1357 }
1360}
1361
1362unsafe impl<Offset: OffsetType + Send, A: Allocator + Send> Send for CharsTape<Offset, A> {}
1363unsafe impl<Offset: OffsetType + Sync, A: Allocator + Sync> Sync for CharsTape<Offset, A> {}
1364
1365pub struct CharsTapeIter<'a, Offset: OffsetType, A: Allocator> {
1366 tape: &'a CharsTape<Offset, A>,
1367 index: usize,
1368}
1369
1370impl<'a, Offset: OffsetType, A: Allocator> Iterator for CharsTapeIter<'a, Offset, A> {
1371 type Item = &'a str;
1372
1373 fn next(&mut self) -> Option<Self::Item> {
1374 let result = self.tape.get(self.index);
1375 if result.is_some() {
1376 self.index += 1;
1377 }
1378 result
1379 }
1380
1381 fn size_hint(&self) -> (usize, Option<usize>) {
1382 let remaining = self.tape.len() - self.index;
1383 (remaining, Some(remaining))
1384 }
1385}
1386
1387impl<'a, Offset: OffsetType, A: Allocator> ExactSizeIterator for CharsTapeIter<'a, Offset, A> {}
1388
1389impl<Offset: OffsetType> FromIterator<String> for CharsTape<Offset, Global> {
1390 fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
1391 let mut tape = CharsTape::<Offset, Global>::new();
1392 for s in iter {
1393 tape.push(&s)
1394 .expect("Failed to build CharsTape from iterator");
1395 }
1396 tape
1397 }
1398}
1399
1400impl<'a, Offset: OffsetType> FromIterator<&'a str> for CharsTape<Offset, Global> {
1401 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
1402 let mut tape = CharsTape::<Offset, Global>::new();
1403 for s in iter {
1404 tape.push(s)
1405 .expect("Failed to build CharsTape from iterator");
1406 }
1407 tape
1408 }
1409}
1410
1411impl<Offset: OffsetType, A: Allocator> Index<usize> for CharsTape<Offset, A> {
1412 type Output = str;
1413
1414 fn index(&self, index: usize) -> &Self::Output {
1415 self.get(index).expect("index out of bounds")
1416 }
1417}
1418
1419impl<'a, Offset: OffsetType, A: Allocator> IntoIterator for &'a CharsTape<Offset, A> {
1420 type Item = &'a str;
1421 type IntoIter = CharsTapeIter<'a, Offset, A>;
1422
1423 fn into_iter(self) -> Self::IntoIter {
1424 self.iter()
1425 }
1426}
1427
1428impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1433 pub fn new() -> BytesTape<Offset, Global> {
1435 BytesTape {
1436 inner: RawTape::<Offset, Global>::new(),
1437 }
1438 }
1439
1440 pub fn new_in(allocator: A) -> Self {
1442 Self {
1443 inner: RawTape::<Offset, A>::new_in(allocator),
1444 }
1445 }
1446
1447 pub fn with_capacity(
1449 data_capacity: usize,
1450 items_capacity: usize,
1451 ) -> Result<BytesTape<Offset, Global>, StringTapeError> {
1452 Ok(BytesTape {
1453 inner: RawTape::<Offset, Global>::with_capacity(data_capacity, items_capacity)?,
1454 })
1455 }
1456
1457 pub fn with_capacity_in(
1459 data_capacity: usize,
1460 items_capacity: usize,
1461 allocator: A,
1462 ) -> Result<Self, StringTapeError> {
1463 Ok(Self {
1464 inner: RawTape::<Offset, A>::with_capacity_in(
1465 data_capacity,
1466 items_capacity,
1467 allocator,
1468 )?,
1469 })
1470 }
1471
1472 pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
1474 self.inner.push(bytes)
1475 }
1476
1477 pub fn get(&self, index: usize) -> Option<&[u8]> {
1479 self.inner.get(index)
1480 }
1481
1482 pub fn len(&self) -> usize {
1484 self.inner.len()
1485 }
1486
1487 pub fn is_empty(&self) -> bool {
1489 self.inner.is_empty()
1490 }
1491
1492 pub fn data_len(&self) -> usize {
1494 self.inner.data_len()
1495 }
1496
1497 pub fn data_capacity(&self) -> usize {
1499 self.inner.data_capacity()
1500 }
1501
1502 pub fn offsets_capacity(&self) -> usize {
1504 self.inner.offsets_capacity()
1505 }
1506
1507 pub fn clear(&mut self) {
1509 self.inner.clear()
1510 }
1511
1512 pub fn truncate(&mut self, len: usize) {
1514 self.inner.truncate(len)
1515 }
1516
1517 pub fn extend<I>(&mut self, iter: I) -> Result<(), StringTapeError>
1519 where
1520 I: IntoIterator,
1521 I::Item: AsRef<[u8]>,
1522 {
1523 self.inner.extend(iter)
1524 }
1525
1526 pub fn as_raw_parts(&self) -> RawParts<Offset> {
1528 self.inner.as_raw_parts()
1529 }
1530
1531 pub fn data_slice(&self) -> &[u8] {
1533 self.inner.data_slice()
1534 }
1535
1536 pub fn offsets_slice(&self) -> &[Offset] {
1538 self.inner.offsets_slice()
1539 }
1540
1541 pub fn allocator(&self) -> &A {
1543 self.inner.allocator()
1544 }
1545
1546 pub fn view(&self) -> BytesTapeView<'_, Offset> {
1548 BytesTapeView {
1549 inner: self.inner.view(),
1550 }
1551 }
1552
1553 pub fn iter(&self) -> BytesTapeIter<'_, Offset, A> {
1555 BytesTapeIter {
1556 tape: self,
1557 index: 0,
1558 }
1559 }
1560
1561 pub fn subview(
1563 &self,
1564 start: usize,
1565 end: usize,
1566 ) -> Result<BytesTapeView<'_, Offset>, StringTapeError> {
1567 Ok(BytesTapeView {
1568 inner: self.inner.subview(start, end)?,
1569 })
1570 }
1571
1572 pub fn as_reorderable<Length: LengthType>(
1589 &self,
1590 ) -> Result<BytesCows<'_, Offset, Length>, StringTapeError> {
1591 BytesCows::from_iter_and_data(self, Cow::Borrowed(self.data_slice()))
1592 }
1593
1594 pub fn arrow_slices(&self) -> (&[u8], &[Offset]) {
1596 (self.data_slice(), self.offsets_slice())
1597 }
1598}
1599
1600impl<Offset: OffsetType, A: Allocator> Index<usize> for BytesTape<Offset, A> {
1601 type Output = [u8];
1602
1603 fn index(&self, index: usize) -> &Self::Output {
1604 self.get(index).expect("index out of bounds")
1605 }
1606}
1607
1608pub struct BytesTapeIter<'a, Offset: OffsetType, A: Allocator> {
1609 tape: &'a BytesTape<Offset, A>,
1610 index: usize,
1611}
1612
1613impl<'a, Offset: OffsetType, A: Allocator> Iterator for BytesTapeIter<'a, Offset, A> {
1614 type Item = &'a [u8];
1615
1616 fn next(&mut self) -> Option<Self::Item> {
1617 let result = self.tape.get(self.index);
1618 if result.is_some() {
1619 self.index += 1;
1620 }
1621 result
1622 }
1623
1624 fn size_hint(&self) -> (usize, Option<usize>) {
1625 let remaining = self.tape.len() - self.index;
1626 (remaining, Some(remaining))
1627 }
1628}
1629
1630impl<'a, Offset: OffsetType, A: Allocator> ExactSizeIterator for BytesTapeIter<'a, Offset, A> {}
1631
1632impl<'a, Offset: OffsetType, A: Allocator> IntoIterator for &'a BytesTape<Offset, A> {
1633 type Item = &'a [u8];
1634 type IntoIter = BytesTapeIter<'a, Offset, A>;
1635
1636 fn into_iter(self) -> Self::IntoIter {
1637 self.iter()
1638 }
1639}
1640
1641pub type CharsTapeI32 = CharsTape<i32, Global>;
1643pub type CharsTapeI64 = CharsTape<i64, Global>;
1644pub type BytesTapeI32 = BytesTape<i32, Global>;
1645pub type BytesTapeI64 = BytesTape<i64, Global>;
1646
1647pub type CharsTapeViewI32<'a> = CharsTapeView<'a, i32>;
1648pub type CharsTapeViewI64<'a> = CharsTapeView<'a, i64>;
1649pub type BytesTapeViewI32<'a> = BytesTapeView<'a, i32>;
1650pub type BytesTapeViewI64<'a> = BytesTapeView<'a, i64>;
1651
1652pub type CharsTapeU32 = CharsTape<u32, Global>;
1654pub type CharsTapeU64 = CharsTape<u64, Global>;
1655pub type BytesTapeU16 = BytesTape<u16, Global>;
1656pub type BytesTapeU32 = BytesTape<u32, Global>;
1657pub type BytesTapeU64 = BytesTape<u64, Global>;
1658
1659pub type CharsTapeViewU32<'a> = CharsTapeView<'a, u32>;
1660pub type CharsTapeViewU64<'a> = CharsTapeView<'a, u64>;
1661pub type BytesTapeViewU16<'a> = BytesTapeView<'a, u16>;
1662pub type BytesTapeViewU32<'a> = BytesTapeView<'a, u32>;
1663pub type BytesTapeViewU64<'a> = BytesTapeView<'a, u64>;
1664
1665impl<Offset: OffsetType, A: Allocator> TryFrom<BytesTape<Offset, A>> for CharsTape<Offset, A> {
1667 type Error = StringTapeError;
1668
1669 fn try_from(bytes_tape: BytesTape<Offset, A>) -> Result<Self, Self::Error> {
1670 for i in 0..bytes_tape.len() {
1672 if let Err(e) = core::str::from_utf8(&bytes_tape[i]) {
1673 return Err(StringTapeError::Utf8Error(e));
1674 }
1675 }
1676
1677 let inner = unsafe {
1680 let inner = core::ptr::read(&bytes_tape.inner);
1682 core::mem::forget(bytes_tape);
1684 inner
1685 };
1686 Ok(CharsTape { inner })
1687 }
1688}
1689
1690impl<Offset: OffsetType, A: Allocator> From<CharsTape<Offset, A>> for BytesTape<Offset, A> {
1691 fn from(chars_tape: CharsTape<Offset, A>) -> Self {
1692 let inner = unsafe {
1695 let inner = core::ptr::read(&chars_tape.inner);
1697 core::mem::forget(chars_tape);
1699 inner
1700 };
1701 BytesTape { inner }
1702 }
1703}
1704
1705impl<Offset: OffsetType, A: Allocator> BytesTape<Offset, A> {
1706 pub fn try_into_chars_tape(self) -> Result<CharsTape<Offset, A>, StringTapeError> {
1707 self.try_into()
1708 }
1709}
1710
1711impl<Offset: OffsetType, A: Allocator> CharsTape<Offset, A> {
1712 pub fn into_bytes_tape(self) -> BytesTape<Offset, A> {
1713 self.into()
1714 }
1715}
1716
1717impl<'a, Offset: OffsetType> TryFrom<BytesTapeView<'a, Offset>> for CharsTapeView<'a, Offset> {
1719 type Error = StringTapeError;
1720
1721 fn try_from(bytes_view: BytesTapeView<'a, Offset>) -> Result<Self, Self::Error> {
1722 for i in 0..bytes_view.len() {
1724 let bytes = bytes_view.get(i).ok_or(StringTapeError::IndexOutOfBounds)?;
1725 if core::str::from_utf8(bytes).is_err() {
1726 return Err(StringTapeError::Utf8Error(
1727 core::str::from_utf8(bytes).unwrap_err(),
1728 ));
1729 }
1730 }
1731
1732 Ok(CharsTapeView {
1734 inner: bytes_view.inner,
1735 })
1736 }
1737}
1738
1739impl<'a, Offset: OffsetType> From<CharsTapeView<'a, Offset>> for BytesTapeView<'a, Offset> {
1740 fn from(chars_view: CharsTapeView<'a, Offset>) -> Self {
1741 BytesTapeView {
1743 inner: chars_view.inner,
1744 }
1745 }
1746}
1747
1748impl<'a, Offset: OffsetType> BytesTapeView<'a, Offset> {
1749 pub fn try_into_chars_view(self) -> Result<CharsTapeView<'a, Offset>, StringTapeError> {
1750 self.try_into()
1751 }
1752}
1753
1754impl<'a, Offset: OffsetType> CharsTapeView<'a, Offset> {
1755 pub fn into_bytes_view(self) -> BytesTapeView<'a, Offset> {
1756 self.into()
1757 }
1758}
1759
1760impl<Offset: OffsetType> Default for CharsTape<Offset, Global> {
1761 fn default() -> Self {
1762 Self::new()
1763 }
1764}
1765
1766#[repr(C, packed(1))]
1775#[derive(Copy, Clone, Debug)]
1776struct PackedEntry<Offset, Length> {
1777 offset: Offset,
1778 length: Length,
1779}
1780
1781#[derive(Debug, Clone)]
1816pub struct CharsCows<'a, Offset: OffsetType = u32, Length: LengthType = u16> {
1817 data: Cow<'a, [u8]>,
1818 entries: Vec<PackedEntry<Offset, Length>>,
1819}
1820
1821#[derive(Debug, Clone)]
1825pub struct BytesCows<'a, Offset: OffsetType = u32, Length: LengthType = u16> {
1826 data: Cow<'a, [u8]>,
1827 entries: Vec<PackedEntry<Offset, Length>>,
1828}
1829
1830impl<'a, Offset: OffsetType, Length: LengthType> CharsCows<'a, Offset, Length> {
1831 pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
1859 where
1860 I: IntoIterator,
1861 I::Item: AsRef<str>,
1862 {
1863 let data_ptr = data.as_ptr() as usize;
1864 let data_end = data_ptr + data.len();
1865 let mut entries = Vec::new();
1866
1867 for s in iter {
1868 let s_ref = s.as_ref();
1869 let s_bytes = s_ref.as_bytes();
1870 let s_ptr = s_bytes.as_ptr() as usize;
1871
1872 if s_ptr < data_ptr || s_ptr > data_end {
1874 return Err(StringTapeError::IndexOutOfBounds);
1875 }
1876
1877 let offset = s_ptr - data_ptr;
1878 let length = s_bytes.len();
1879
1880 if offset + length > data.len() {
1881 return Err(StringTapeError::IndexOutOfBounds);
1882 }
1883
1884 let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
1885 let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
1886
1887 entries.push(PackedEntry {
1888 offset: offset_typed,
1889 length: length_typed,
1890 });
1891 }
1892
1893 Ok(Self { data, entries })
1894 }
1895
1896 pub fn get(&self, index: usize) -> Option<&'_ str> {
1898 self.entries.get(index).map(|entry| {
1899 let start = entry.offset.to_usize();
1901 let len = entry.length.to_usize();
1902 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1905 })
1906 }
1907
1908 pub fn len(&self) -> usize {
1910 self.entries.len()
1911 }
1912
1913 pub fn is_empty(&self) -> bool {
1915 self.entries.is_empty()
1916 }
1917
1918 pub fn iter(&self) -> CharsCowsIter<'_, Offset, Length> {
1920 CharsCowsIter {
1921 slices: self,
1922 index: 0,
1923 }
1924 }
1925
1926 pub fn data(&self) -> &[u8] {
1928 &self.data
1929 }
1930
1931 pub fn sort(&mut self)
1953 where
1954 Offset: OffsetType,
1955 Length: LengthType,
1956 {
1957 self.entries.sort_by(|a, b| {
1958 let str_a = {
1959 let start = a.offset.to_usize();
1960 let len = a.length.to_usize();
1961 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1962 };
1963 let str_b = {
1964 let start = b.offset.to_usize();
1965 let len = b.length.to_usize();
1966 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1967 };
1968 str_a.cmp(str_b)
1969 });
1970 }
1971
1972 pub fn sort_unstable(&mut self)
1976 where
1977 Offset: OffsetType,
1978 Length: LengthType,
1979 {
1980 self.entries.sort_unstable_by(|a, b| {
1981 let str_a = {
1982 let start = a.offset.to_usize();
1983 let len = a.length.to_usize();
1984 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1985 };
1986 let str_b = {
1987 let start = b.offset.to_usize();
1988 let len = b.length.to_usize();
1989 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
1990 };
1991 str_a.cmp(str_b)
1992 });
1993 }
1994
1995 pub fn sort_by<F>(&mut self, mut compare: F)
2016 where
2017 F: FnMut(&str, &str) -> core::cmp::Ordering,
2018 Offset: OffsetType,
2019 Length: LengthType,
2020 {
2021 self.entries.sort_by(|a, b| {
2022 let str_a = {
2023 let start = a.offset.to_usize();
2024 let len = a.length.to_usize();
2025 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
2026 };
2027 let str_b = {
2028 let start = b.offset.to_usize();
2029 let len = b.length.to_usize();
2030 unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) }
2031 };
2032 compare(str_a, str_b)
2033 });
2034 }
2035
2036 pub fn sort_by_key<K, F>(&mut self, mut f: F)
2057 where
2058 F: FnMut(&str) -> K,
2059 K: Ord,
2060 Offset: OffsetType,
2061 Length: LengthType,
2062 {
2063 self.entries.sort_by_key(|entry| {
2064 let start = entry.offset.to_usize();
2065 let len = entry.length.to_usize();
2066 let s = unsafe { core::str::from_utf8_unchecked(&self.data[start..start + len]) };
2067 f(s)
2068 });
2069 }
2070}
2071
2072impl<'a, Offset: OffsetType, Length: LengthType> BytesCows<'a, Offset, Length> {
2073 pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2078 where
2079 I: IntoIterator,
2080 I::Item: AsRef<[u8]>,
2081 {
2082 let data_ptr = data.as_ptr() as usize;
2083 let data_end = data_ptr + data.len();
2084 let mut entries = Vec::new();
2085
2086 for b in iter {
2087 let b_ref = b.as_ref();
2088 let b_ptr = b_ref.as_ptr() as usize;
2089
2090 if b_ptr < data_ptr || b_ptr > data_end {
2091 return Err(StringTapeError::IndexOutOfBounds);
2092 }
2093
2094 let offset = b_ptr - data_ptr;
2095 let length = b_ref.len();
2096
2097 if offset + length > data.len() {
2098 return Err(StringTapeError::IndexOutOfBounds);
2099 }
2100
2101 let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
2102 let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
2103
2104 entries.push(PackedEntry {
2105 offset: offset_typed,
2106 length: length_typed,
2107 });
2108 }
2109
2110 Ok(Self { data, entries })
2111 }
2112
2113 pub fn from_offsets_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2115 where
2116 I: IntoIterator<Item = (usize, usize)>,
2117 {
2118 let mut entries = Vec::new();
2119
2120 for (offset, length) in iter {
2121 let offset_typed = Offset::from_usize(offset).ok_or(StringTapeError::OffsetOverflow)?;
2122 let length_typed = Length::from_usize(length).ok_or(StringTapeError::OffsetOverflow)?;
2123
2124 let end = offset
2125 .checked_add(length)
2126 .ok_or(StringTapeError::OffsetOverflow)?;
2127 if end > data.len() {
2128 return Err(StringTapeError::IndexOutOfBounds);
2129 }
2130
2131 entries.push(PackedEntry {
2132 offset: offset_typed,
2133 length: length_typed,
2134 });
2135 }
2136
2137 Ok(Self { data, entries })
2138 }
2139
2140 pub fn get(&self, index: usize) -> Option<&[u8]> {
2142 self.entries.get(index).map(|entry| {
2143 let start = entry.offset.to_usize();
2144 let len = entry.length.to_usize();
2145 &self.data[start..start + len]
2146 })
2147 }
2148
2149 pub fn len(&self) -> usize {
2151 self.entries.len()
2152 }
2153
2154 pub fn is_empty(&self) -> bool {
2156 self.entries.is_empty()
2157 }
2158
2159 pub fn iter(&self) -> BytesCowsIter<'_, Offset, Length> {
2161 BytesCowsIter {
2162 slices: self,
2163 index: 0,
2164 }
2165 }
2166
2167 pub fn data(&self) -> &[u8] {
2169 &self.data
2170 }
2171
2172 pub fn as_chars(&self) -> Result<CharsCows<'_, Offset, Length>, StringTapeError> {
2199 for i in 0..self.len() {
2201 let slice = self.get(i).ok_or(StringTapeError::IndexOutOfBounds)?;
2202 core::str::from_utf8(slice).map_err(StringTapeError::Utf8Error)?;
2203 }
2204
2205 Ok(CharsCows {
2207 data: Cow::Borrowed(self.data.as_ref()),
2208 entries: self.entries.clone(),
2209 })
2210 }
2211}
2212
2213pub struct CharsCowsIter<'a, Offset: OffsetType, Length: LengthType> {
2214 slices: &'a CharsCows<'a, Offset, Length>,
2215 index: usize,
2216}
2217
2218impl<'a, Offset: OffsetType, Length: LengthType> Iterator for CharsCowsIter<'a, Offset, Length> {
2219 type Item = &'a str;
2220
2221 fn next(&mut self) -> Option<Self::Item> {
2222 let result = self.slices.get(self.index);
2223 if result.is_some() {
2224 self.index += 1;
2225 }
2226 result
2227 }
2228
2229 fn size_hint(&self) -> (usize, Option<usize>) {
2230 let remaining = self.slices.len() - self.index;
2231 (remaining, Some(remaining))
2232 }
2233}
2234
2235impl<'a, Offset: OffsetType, Length: LengthType> ExactSizeIterator
2236 for CharsCowsIter<'a, Offset, Length>
2237{
2238}
2239
2240pub struct BytesCowsIter<'a, Offset: OffsetType, Length: LengthType> {
2241 slices: &'a BytesCows<'a, Offset, Length>,
2242 index: usize,
2243}
2244
2245impl<'a, Offset: OffsetType, Length: LengthType> Iterator for BytesCowsIter<'a, Offset, Length> {
2246 type Item = &'a [u8];
2247
2248 fn next(&mut self) -> Option<Self::Item> {
2249 let result = self.slices.get(self.index);
2250 if result.is_some() {
2251 self.index += 1;
2252 }
2253 result
2254 }
2255
2256 fn size_hint(&self) -> (usize, Option<usize>) {
2257 let remaining = self.slices.len() - self.index;
2258 (remaining, Some(remaining))
2259 }
2260}
2261
2262impl<'a, Offset: OffsetType, Length: LengthType> ExactSizeIterator
2263 for BytesCowsIter<'a, Offset, Length>
2264{
2265}
2266
2267impl<'a, Offset: OffsetType, Length: LengthType> Index<usize> for CharsCows<'a, Offset, Length> {
2268 type Output = str;
2269
2270 fn index(&self, index: usize) -> &Self::Output {
2271 self.get(index).expect("index out of bounds")
2272 }
2273}
2274
2275impl<'a, Offset: OffsetType, Length: LengthType> Index<usize> for BytesCows<'a, Offset, Length> {
2276 type Output = [u8];
2277
2278 fn index(&self, index: usize) -> &Self::Output {
2279 self.get(index).expect("index out of bounds")
2280 }
2281}
2282
2283impl<'a, Offset: OffsetType, Length: LengthType> IntoIterator
2284 for &'a CharsCows<'a, Offset, Length>
2285{
2286 type Item = &'a str;
2287 type IntoIter = CharsCowsIter<'a, Offset, Length>;
2288
2289 fn into_iter(self) -> Self::IntoIter {
2290 self.iter()
2291 }
2292}
2293
2294impl<'a, Offset: OffsetType, Length: LengthType> IntoIterator
2295 for &'a BytesCows<'a, Offset, Length>
2296{
2297 type Item = &'a [u8];
2298 type IntoIter = BytesCowsIter<'a, Offset, Length>;
2299
2300 fn into_iter(self) -> Self::IntoIter {
2301 self.iter()
2302 }
2303}
2304
2305impl<'a, Offset: OffsetType, Length: LengthType> TryFrom<BytesCows<'a, Offset, Length>>
2307 for CharsCows<'a, Offset, Length>
2308{
2309 type Error = StringTapeError;
2310
2311 fn try_from(bytes_slices: BytesCows<'a, Offset, Length>) -> Result<Self, Self::Error> {
2312 for i in 0..bytes_slices.len() {
2314 let slice = bytes_slices
2315 .get(i)
2316 .ok_or(StringTapeError::IndexOutOfBounds)?;
2317 core::str::from_utf8(slice).map_err(StringTapeError::Utf8Error)?;
2318 }
2319
2320 Ok(CharsCows {
2322 data: bytes_slices.data,
2323 entries: bytes_slices.entries,
2324 })
2325 }
2326}
2327
2328impl<'a, Offset: OffsetType, Length: LengthType> From<CharsCows<'a, Offset, Length>>
2329 for BytesCows<'a, Offset, Length>
2330{
2331 fn from(chars_slices: CharsCows<'a, Offset, Length>) -> Self {
2332 BytesCows {
2334 data: chars_slices.data,
2335 entries: chars_slices.entries,
2336 }
2337 }
2338}
2339
2340impl<'a, Offset: OffsetType, Length: LengthType> BytesCows<'a, Offset, Length> {
2341 pub fn try_into_chars_slices(self) -> Result<CharsCows<'a, Offset, Length>, StringTapeError> {
2342 self.try_into()
2343 }
2344}
2345
2346impl<'a, Offset: OffsetType, Length: LengthType> CharsCows<'a, Offset, Length> {
2347 pub fn into_bytes_slices(self) -> BytesCows<'a, Offset, Length> {
2348 self.into()
2349 }
2350
2351 pub fn as_bytes(&self) -> BytesCows<'_, Offset, Length> {
2374 BytesCows {
2375 data: Cow::Borrowed(self.data.as_ref()),
2376 entries: self.entries.clone(),
2377 }
2378 }
2379}
2380
2381pub type CharsCowsU32U16<'a> = CharsCows<'a, u32, u16>;
2383pub type CharsCowsU32U8<'a> = CharsCows<'a, u32, u8>;
2384pub type CharsCowsU16U8<'a> = CharsCows<'a, u16, u8>;
2385pub type CharsCowsU64U32<'a> = CharsCows<'a, u64, u32>;
2386
2387pub type BytesCowsU32U16<'a> = BytesCows<'a, u32, u16>;
2388pub type BytesCowsU32U8<'a> = BytesCows<'a, u32, u8>;
2389pub type BytesCowsU16U8<'a> = BytesCows<'a, u16, u8>;
2390pub type BytesCowsU64U32<'a> = BytesCows<'a, u64, u32>;
2391
2392pub enum CharsCowsAuto<'a> {
2400 U32U8(CharsCows<'a, u32, u8>),
2401 U32U16(CharsCows<'a, u32, u16>),
2402 U32U32(CharsCows<'a, u32, u32>),
2403 U64U8(CharsCows<'a, u64, u8>),
2404 U64U16(CharsCows<'a, u64, u16>),
2405 U64U32(CharsCows<'a, u64, u32>),
2406}
2407
2408impl<'a> CharsCowsAuto<'a> {
2409 pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2448 where
2449 I: IntoIterator + Clone,
2450 I::Item: AsRef<str>,
2451 {
2452 let data_len = data.len();
2453
2454 let max_word_len = iter
2456 .clone()
2457 .into_iter()
2458 .map(|s| s.as_ref().len())
2459 .max()
2460 .unwrap_or(0);
2461
2462 let needs_u64_offset = data_len > u32::MAX as usize;
2464
2465 if max_word_len <= u8::MAX as usize {
2467 if needs_u64_offset {
2468 Ok(Self::U64U8(CharsCows::from_iter_and_data(iter, data)?))
2469 } else {
2470 Ok(Self::U32U8(CharsCows::from_iter_and_data(iter, data)?))
2471 }
2472 } else if max_word_len <= u16::MAX as usize {
2473 if needs_u64_offset {
2474 Ok(Self::U64U16(CharsCows::from_iter_and_data(iter, data)?))
2475 } else {
2476 Ok(Self::U32U16(CharsCows::from_iter_and_data(iter, data)?))
2477 }
2478 } else if needs_u64_offset {
2479 Ok(Self::U64U32(CharsCows::from_iter_and_data(iter, data)?))
2480 } else {
2481 Ok(Self::U32U32(CharsCows::from_iter_and_data(iter, data)?))
2482 }
2483 }
2484
2485 pub fn len(&self) -> usize {
2487 match self {
2488 Self::U32U8(s) => s.len(),
2489 Self::U32U16(s) => s.len(),
2490 Self::U32U32(s) => s.len(),
2491 Self::U64U8(s) => s.len(),
2492 Self::U64U16(s) => s.len(),
2493 Self::U64U32(s) => s.len(),
2494 }
2495 }
2496
2497 pub fn is_empty(&self) -> bool {
2499 self.len() == 0
2500 }
2501
2502 pub fn get(&self, index: usize) -> Option<&str> {
2504 match self {
2505 Self::U32U8(s) => s.get(index),
2506 Self::U32U16(s) => s.get(index),
2507 Self::U32U32(s) => s.get(index),
2508 Self::U64U8(s) => s.get(index),
2509 Self::U64U16(s) => s.get(index),
2510 Self::U64U32(s) => s.get(index),
2511 }
2512 }
2513
2514 pub fn bytes_per_entry(&self) -> usize {
2516 match self {
2517 Self::U32U8(_) => 5, Self::U32U16(_) => 6, Self::U32U32(_) => 8, Self::U64U8(_) => 9, Self::U64U16(_) => 10, Self::U64U32(_) => 12, }
2524 }
2525
2526 pub fn type_name(&self) -> &'static str {
2528 match self {
2529 Self::U32U8(_) => "CharsCows<u32, u8>",
2530 Self::U32U16(_) => "CharsCows<u32, u16>",
2531 Self::U32U32(_) => "CharsCows<u32, u32>",
2532 Self::U64U8(_) => "CharsCows<u64, u8>",
2533 Self::U64U16(_) => "CharsCows<u64, u16>",
2534 Self::U64U32(_) => "CharsCows<u64, u32>",
2535 }
2536 }
2537
2538 pub fn iter(&self) -> CharsCowsAutoIter<'_> {
2557 CharsCowsAutoIter {
2558 inner: self,
2559 index: 0,
2560 }
2561 }
2562
2563 pub fn sort(&mut self) {
2585 match self {
2586 Self::U32U8(s) => s.sort(),
2587 Self::U32U16(s) => s.sort(),
2588 Self::U32U32(s) => s.sort(),
2589 Self::U64U8(s) => s.sort(),
2590 Self::U64U16(s) => s.sort(),
2591 Self::U64U32(s) => s.sort(),
2592 }
2593 }
2594
2595 pub fn sort_unstable(&mut self) {
2599 match self {
2600 Self::U32U8(s) => s.sort_unstable(),
2601 Self::U32U16(s) => s.sort_unstable(),
2602 Self::U32U32(s) => s.sort_unstable(),
2603 Self::U64U8(s) => s.sort_unstable(),
2604 Self::U64U16(s) => s.sort_unstable(),
2605 Self::U64U32(s) => s.sort_unstable(),
2606 }
2607 }
2608
2609 pub fn sort_by<F>(&mut self, compare: F)
2630 where
2631 F: FnMut(&str, &str) -> core::cmp::Ordering,
2632 {
2633 match self {
2634 Self::U32U8(s) => s.sort_by(compare),
2635 Self::U32U16(s) => s.sort_by(compare),
2636 Self::U32U32(s) => s.sort_by(compare),
2637 Self::U64U8(s) => s.sort_by(compare),
2638 Self::U64U16(s) => s.sort_by(compare),
2639 Self::U64U32(s) => s.sort_by(compare),
2640 }
2641 }
2642
2643 pub fn sort_by_key<K, F>(&mut self, f: F)
2664 where
2665 F: FnMut(&str) -> K,
2666 K: Ord,
2667 {
2668 match self {
2669 Self::U32U8(s) => s.sort_by_key(f),
2670 Self::U32U16(s) => s.sort_by_key(f),
2671 Self::U32U32(s) => s.sort_by_key(f),
2672 Self::U64U8(s) => s.sort_by_key(f),
2673 Self::U64U16(s) => s.sort_by_key(f),
2674 Self::U64U32(s) => s.sort_by_key(f),
2675 }
2676 }
2677
2678 pub fn as_bytes(&self) -> BytesCowsAuto<'_> {
2701 match self {
2702 Self::U32U8(s) => BytesCowsAuto::U32U8(s.as_bytes()),
2703 Self::U32U16(s) => BytesCowsAuto::U32U16(s.as_bytes()),
2704 Self::U32U32(s) => BytesCowsAuto::U32U32(s.as_bytes()),
2705 Self::U64U8(s) => BytesCowsAuto::U64U8(s.as_bytes()),
2706 Self::U64U16(s) => BytesCowsAuto::U64U16(s.as_bytes()),
2707 Self::U64U32(s) => BytesCowsAuto::U64U32(s.as_bytes()),
2708 }
2709 }
2710}
2711
2712pub struct CharsCowsAutoIter<'a> {
2714 inner: &'a CharsCowsAuto<'a>,
2715 index: usize,
2716}
2717
2718impl<'a> Iterator for CharsCowsAutoIter<'a> {
2719 type Item = &'a str;
2720
2721 fn next(&mut self) -> Option<Self::Item> {
2722 let result = self.inner.get(self.index);
2723 if result.is_some() {
2724 self.index += 1;
2725 }
2726 result
2727 }
2728
2729 fn size_hint(&self) -> (usize, Option<usize>) {
2730 let remaining = self.inner.len() - self.index;
2731 (remaining, Some(remaining))
2732 }
2733}
2734
2735impl<'a> ExactSizeIterator for CharsCowsAutoIter<'a> {}
2736
2737impl<'a> IntoIterator for &'a CharsCowsAuto<'a> {
2738 type Item = &'a str;
2739 type IntoIter = CharsCowsAutoIter<'a>;
2740
2741 fn into_iter(self) -> Self::IntoIter {
2742 self.iter()
2743 }
2744}
2745
2746pub enum BytesCowsAuto<'a> {
2752 U32U8(BytesCows<'a, u32, u8>),
2753 U32U16(BytesCows<'a, u32, u16>),
2754 U32U32(BytesCows<'a, u32, u32>),
2755 U64U8(BytesCows<'a, u64, u8>),
2756 U64U16(BytesCows<'a, u64, u16>),
2757 U64U32(BytesCows<'a, u64, u32>),
2758}
2759
2760impl<'a> BytesCowsAuto<'a> {
2761 pub fn from_iter_and_data<I>(iter: I, data: Cow<'a, [u8]>) -> Result<Self, StringTapeError>
2764 where
2765 I: IntoIterator + Clone,
2766 I::Item: AsRef<[u8]>,
2767 {
2768 let data_len = data.len();
2769
2770 let max_len = iter
2772 .clone()
2773 .into_iter()
2774 .map(|b| b.as_ref().len())
2775 .max()
2776 .unwrap_or(0);
2777
2778 let needs_u64_offset = data_len > u32::MAX as usize;
2779
2780 if max_len <= u8::MAX as usize {
2782 if needs_u64_offset {
2783 Ok(Self::U64U8(BytesCows::from_iter_and_data(iter, data)?))
2784 } else {
2785 Ok(Self::U32U8(BytesCows::from_iter_and_data(iter, data)?))
2786 }
2787 } else if max_len <= u16::MAX as usize {
2788 if needs_u64_offset {
2789 Ok(Self::U64U16(BytesCows::from_iter_and_data(iter, data)?))
2790 } else {
2791 Ok(Self::U32U16(BytesCows::from_iter_and_data(iter, data)?))
2792 }
2793 } else if needs_u64_offset {
2794 Ok(Self::U64U32(BytesCows::from_iter_and_data(iter, data)?))
2795 } else {
2796 Ok(Self::U32U32(BytesCows::from_iter_and_data(iter, data)?))
2797 }
2798 }
2799
2800 pub fn len(&self) -> usize {
2801 match self {
2802 Self::U32U8(s) => s.len(),
2803 Self::U32U16(s) => s.len(),
2804 Self::U32U32(s) => s.len(),
2805 Self::U64U8(s) => s.len(),
2806 Self::U64U16(s) => s.len(),
2807 Self::U64U32(s) => s.len(),
2808 }
2809 }
2810
2811 pub fn is_empty(&self) -> bool {
2812 self.len() == 0
2813 }
2814
2815 pub fn get(&self, index: usize) -> Option<&[u8]> {
2816 match self {
2817 Self::U32U8(s) => s.get(index),
2818 Self::U32U16(s) => s.get(index),
2819 Self::U32U32(s) => s.get(index),
2820 Self::U64U8(s) => s.get(index),
2821 Self::U64U16(s) => s.get(index),
2822 Self::U64U32(s) => s.get(index),
2823 }
2824 }
2825
2826 pub fn as_chars(&self) -> Result<CharsCowsAuto<'_>, StringTapeError> {
2853 match self {
2854 Self::U32U8(s) => Ok(CharsCowsAuto::U32U8(s.as_chars()?)),
2855 Self::U32U16(s) => Ok(CharsCowsAuto::U32U16(s.as_chars()?)),
2856 Self::U32U32(s) => Ok(CharsCowsAuto::U32U32(s.as_chars()?)),
2857 Self::U64U8(s) => Ok(CharsCowsAuto::U64U8(s.as_chars()?)),
2858 Self::U64U16(s) => Ok(CharsCowsAuto::U64U16(s.as_chars()?)),
2859 Self::U64U32(s) => Ok(CharsCowsAuto::U64U32(s.as_chars()?)),
2860 }
2861 }
2862}
2863
2864pub enum CharsTapeAuto<A: Allocator = Global> {
2870 I32(CharsTape<i32, A>),
2871 U32(CharsTape<u32, A>),
2872 U64(CharsTape<u64, A>),
2873}
2874
2875impl<A: Allocator> CharsTapeAuto<A> {
2876 pub fn new_in(allocator: A) -> Self {
2878 Self::I32(CharsTape::new_in(allocator))
2879 }
2880
2881 pub fn push(&mut self, s: &str) -> Result<(), StringTapeError> {
2882 match self {
2883 Self::I32(t) => t.push(s),
2884 Self::U32(t) => t.push(s),
2885 Self::U64(t) => t.push(s),
2886 }
2887 }
2888
2889 pub fn len(&self) -> usize {
2890 match self {
2891 Self::I32(t) => t.len(),
2892 Self::U32(t) => t.len(),
2893 Self::U64(t) => t.len(),
2894 }
2895 }
2896
2897 pub fn is_empty(&self) -> bool {
2898 self.len() == 0
2899 }
2900
2901 pub fn get(&self, index: usize) -> Option<&str> {
2902 match self {
2903 Self::I32(t) => t.get(index),
2904 Self::U32(t) => t.get(index),
2905 Self::U64(t) => t.get(index),
2906 }
2907 }
2908}
2909
2910impl Default for CharsTapeAuto<Global> {
2911 fn default() -> Self {
2912 Self::new_in(Global)
2913 }
2914}
2915
2916impl<A: Allocator + Clone> CharsTapeAuto<A> {
2917 pub fn from_iter_in<'a, I>(iter: I, allocator: A) -> Self
2920 where
2921 I: IntoIterator<Item = &'a str> + Clone,
2922 {
2923 let total_size: usize = iter.clone().into_iter().map(|s| s.len()).sum();
2925
2926 if total_size <= i32::MAX as usize {
2928 let mut tape = CharsTape::new_in(allocator);
2929 for s in iter {
2930 tape.push(s).ok();
2931 }
2932 Self::I32(tape)
2933 } else if total_size <= u32::MAX as usize {
2934 let mut tape = CharsTape::new_in(allocator);
2935 for s in iter {
2936 tape.push(s).ok();
2937 }
2938 Self::U32(tape)
2939 } else {
2940 let mut tape = CharsTape::new_in(allocator);
2941 for s in iter {
2942 tape.push(s).ok();
2943 }
2944 Self::U64(tape)
2945 }
2946 }
2947}
2948
2949impl CharsTapeAuto<Global> {
2950 #[allow(clippy::should_implement_trait)]
2952 pub fn from_iter<'a, I>(iter: I) -> Self
2953 where
2954 I: IntoIterator<Item = &'a str> + Clone,
2955 {
2956 Self::from_iter_in(iter, Global)
2957 }
2958}
2959
2960pub enum BytesTapeAuto<A: Allocator = Global> {
2966 U16(BytesTape<u16, A>),
2967 U32(BytesTape<u32, A>),
2968 U64(BytesTape<u64, A>),
2969}
2970
2971impl<A: Allocator> BytesTapeAuto<A> {
2972 pub fn new_in(allocator: A) -> Self {
2974 Self::U16(BytesTape::new_in(allocator))
2975 }
2976
2977 pub fn push(&mut self, bytes: &[u8]) -> Result<(), StringTapeError> {
2978 match self {
2979 Self::U16(t) => t.push(bytes),
2980 Self::U32(t) => t.push(bytes),
2981 Self::U64(t) => t.push(bytes),
2982 }
2983 }
2984
2985 pub fn len(&self) -> usize {
2986 match self {
2987 Self::U16(t) => t.len(),
2988 Self::U32(t) => t.len(),
2989 Self::U64(t) => t.len(),
2990 }
2991 }
2992
2993 pub fn is_empty(&self) -> bool {
2994 self.len() == 0
2995 }
2996
2997 pub fn get(&self, index: usize) -> Option<&[u8]> {
2998 match self {
2999 Self::U16(t) => t.get(index),
3000 Self::U32(t) => t.get(index),
3001 Self::U64(t) => t.get(index),
3002 }
3003 }
3004}
3005
3006impl Default for BytesTapeAuto<Global> {
3007 fn default() -> Self {
3008 Self::new_in(Global)
3009 }
3010}
3011
3012impl<A: Allocator + Clone> BytesTapeAuto<A> {
3013 pub fn from_iter_in<'a, I>(iter: I, allocator: A) -> Self
3016 where
3017 I: IntoIterator<Item = &'a [u8]> + Clone,
3018 {
3019 let total_size: usize = iter.clone().into_iter().map(|b| b.len()).sum();
3021
3022 if total_size <= u16::MAX as usize {
3024 let mut tape = BytesTape::new_in(allocator);
3025 for bytes in iter {
3026 tape.push(bytes).ok();
3027 }
3028 Self::U16(tape)
3029 } else if total_size <= u32::MAX as usize {
3030 let mut tape = BytesTape::new_in(allocator);
3031 for bytes in iter {
3032 tape.push(bytes).ok();
3033 }
3034 Self::U32(tape)
3035 } else {
3036 let mut tape = BytesTape::new_in(allocator);
3037 for bytes in iter {
3038 tape.push(bytes).ok();
3039 }
3040 Self::U64(tape)
3041 }
3042 }
3043}
3044
3045impl BytesTapeAuto<Global> {
3046 #[allow(clippy::should_implement_trait)]
3048 pub fn from_iter<'a, I>(iter: I) -> Self
3049 where
3050 I: IntoIterator<Item = &'a [u8]> + Clone,
3051 {
3052 Self::from_iter_in(iter, Global)
3053 }
3054}
3055
3056#[cfg(test)]
3057mod tests {
3058 use super::*;
3059
3060 #[cfg(not(feature = "std"))]
3061 use alloc::string::ToString;
3062 #[cfg(not(feature = "std"))]
3063 use alloc::vec;
3064 #[cfg(not(feature = "std"))]
3065 use alloc::vec::Vec;
3066
3067 #[test]
3068 fn basic_operations() {
3069 let mut tape = CharsTapeI32::new();
3070 assert!(tape.is_empty());
3071
3072 tape.push("hello").unwrap();
3073 tape.push("world").unwrap();
3074 tape.push("foo").unwrap();
3075
3076 assert_eq!(tape.len(), 3);
3077 assert_eq!(tape.get(0), Some("hello"));
3078 assert_eq!(tape.get(1), Some("world"));
3079 assert_eq!(tape.get(2), Some("foo"));
3080 assert_eq!(tape.get(3), None);
3081 }
3082
3083 #[test]
3084 fn unsigned_basic_operations() {
3085 let mut t32 = CharsTapeU32::new();
3087 t32.push("hello").unwrap();
3088 t32.push("world").unwrap();
3089 assert_eq!(t32.len(), 2);
3090 assert_eq!(t32.get(0), Some("hello"));
3091 assert_eq!(t32.get(1), Some("world"));
3092
3093 let mut t64 = CharsTapeU64::new();
3095 t64.extend(["a", "", "bbb"]).unwrap();
3096 assert_eq!(t64.len(), 3);
3097 assert_eq!(t64.get(0), Some("a"));
3098 assert_eq!(t64.get(1), Some(""));
3099 assert_eq!(t64.get(2), Some("bbb"));
3100 }
3101
3102 #[test]
3103 fn offsets_64bit() {
3104 let mut tape = CharsTapeI64::new();
3105 tape.push("test").unwrap();
3106 assert_eq!(tape.get(0), Some("test"));
3107 }
3108
3109 #[test]
3110 fn iterator_basics() {
3111 let mut tape = CharsTapeI32::new();
3112 tape.push("a").unwrap();
3113 tape.push("b").unwrap();
3114 tape.push("c").unwrap();
3115
3116 let strings: Vec<&str> = tape.iter().collect();
3117 assert_eq!(strings, vec!["a", "b", "c"]);
3118 }
3119
3120 #[test]
3121 fn empty_strings() {
3122 let mut tape = CharsTapeI32::new();
3123 tape.push("").unwrap();
3124 tape.push("non-empty").unwrap();
3125 tape.push("").unwrap();
3126
3127 assert_eq!(tape.len(), 3);
3128 assert_eq!(tape.get(0), Some(""));
3129 assert_eq!(tape.get(1), Some("non-empty"));
3130 assert_eq!(tape.get(2), Some(""));
3131 }
3132
3133 #[test]
3134 fn index_trait() {
3135 let mut tape = CharsTapeI32::new();
3136 tape.push("hello").unwrap();
3137 tape.push("world").unwrap();
3138
3139 assert_eq!(&tape[0], "hello");
3140 assert_eq!(&tape[1], "world");
3141 }
3142
3143 #[test]
3144 fn into_iterator() {
3145 let mut tape = CharsTapeI32::new();
3146 tape.push("a").unwrap();
3147 tape.push("b").unwrap();
3148 tape.push("c").unwrap();
3149
3150 let strings: Vec<&str> = (&tape).into_iter().collect();
3151 assert_eq!(strings, vec!["a", "b", "c"]);
3152
3153 let mut result = Vec::new();
3155 for s in &tape {
3156 result.push(s);
3157 }
3158 assert_eq!(result, vec!["a", "b", "c"]);
3159 }
3160
3161 #[test]
3162 fn from_iterator() {
3163 let strings = vec!["hello", "world", "test"];
3164 let tape: CharsTapeI32 = strings.into_iter().collect();
3165
3166 assert_eq!(tape.len(), 3);
3167 assert_eq!(tape.get(0), Some("hello"));
3168 assert_eq!(tape.get(1), Some("world"));
3169 assert_eq!(tape.get(2), Some("test"));
3170 }
3171
3172 #[test]
3173 fn from_iterator_unsigned() {
3174 let strings = vec!["hello", "world", "test"];
3175 let tape_u32: CharsTapeU32 = strings.clone().into_iter().collect();
3176 let tape_u64: CharsTapeU64 = strings.clone().into_iter().collect();
3177 assert_eq!(tape_u32.len(), 3);
3178 assert_eq!(tape_u64.len(), 3);
3179 assert_eq!(tape_u32.get(1), Some("world"));
3180 assert_eq!(tape_u64.get(2), Some("test"));
3181 }
3182
3183 #[test]
3184 fn extend() {
3185 let mut tape = CharsTapeI32::new();
3186 tape.push("initial").unwrap();
3187
3188 let additional = vec!["hello", "world"];
3189 tape.extend(additional).unwrap();
3190
3191 assert_eq!(tape.len(), 3);
3192 assert_eq!(tape.get(0), Some("initial"));
3193 assert_eq!(tape.get(1), Some("hello"));
3194 assert_eq!(tape.get(2), Some("world"));
3195 }
3196
3197 #[test]
3198 fn clear_and_truncate() {
3199 let mut tape = CharsTapeI32::new();
3200 tape.push("a").unwrap();
3201 tape.push("b").unwrap();
3202 tape.push("c").unwrap();
3203
3204 assert_eq!(tape.len(), 3);
3205
3206 tape.truncate(2);
3207 assert_eq!(tape.len(), 2);
3208 assert_eq!(tape.get(0), Some("a"));
3209 assert_eq!(tape.get(1), Some("b"));
3210 assert_eq!(tape.get(2), None);
3211
3212 tape.clear();
3213 assert_eq!(tape.len(), 0);
3214 assert!(tape.is_empty());
3215 }
3216
3217 #[test]
3218 fn unsigned_views_and_subviews() {
3219 let mut tape = CharsTapeU32::new();
3220 tape.extend(["0", "1", "22", "333"]).unwrap();
3221 let view = tape.subview(1, 4).unwrap();
3222 assert_eq!(view.len(), 3);
3223 assert_eq!(view.get(0), Some("1"));
3224 assert_eq!(view.get(2), Some("333"));
3225 let sub = view.subview(1, 2).unwrap();
3226 assert_eq!(sub.len(), 1);
3227 assert_eq!(sub.get(0), Some("22"));
3228 }
3229
3230 #[test]
3231 fn capacity() {
3232 let tape = CharsTapeI32::with_capacity(100, 10).unwrap();
3233 assert_eq!(tape.data_capacity(), 100);
3234 assert_eq!(tape.capacity(), 0); }
3236
3237 #[test]
3238 fn custom_allocator() {
3239 let mut tape: CharsTape<i32, Global> = CharsTape::new_in(Global);
3241
3242 tape.push("hello").unwrap();
3243 tape.push("world").unwrap();
3244
3245 assert_eq!(tape.len(), 2);
3246 assert_eq!(tape.get(0), Some("hello"));
3247 assert_eq!(tape.get(1), Some("world"));
3248
3249 let _allocator_ref = tape.allocator();
3251 }
3252
3253 #[test]
3254 fn custom_allocator_with_capacity() {
3255 let tape: CharsTape<i64, Global> = CharsTape::with_capacity_in(256, 50, Global).unwrap();
3256
3257 assert_eq!(tape.data_capacity(), 256);
3258 assert!(tape.is_empty());
3259 }
3260
3261 #[test]
3262 fn bytes_tape_basic() {
3263 let mut tape = BytesTapeI32::new();
3264 tape.push(&[1, 2, 3]).unwrap();
3265 tape.push(b"abc").unwrap();
3266
3267 assert_eq!(tape.len(), 2);
3268 assert_eq!(&tape[0], &[1u8, 2, 3] as &[u8]);
3269 assert_eq!(&tape[1], b"abc" as &[u8]);
3270 }
3271
3272 #[test]
3273 fn unsigned_bytes_tape_basic() {
3274 let mut tape = BytesTapeU64::new();
3275 tape.push(&[1u8, 2]).unwrap();
3276 tape.push(&[3u8, 4, 5]).unwrap();
3277 assert_eq!(tape.len(), 2);
3278 assert_eq!(&tape[0], &[1u8, 2] as &[u8]);
3279 assert_eq!(&tape[1], &[3u8, 4, 5] as &[u8]);
3280 }
3281
3282 #[test]
3283 fn chars_tape_view_basic() {
3284 let mut tape = CharsTapeI32::new();
3285 tape.push("hello").unwrap();
3286 tape.push("world").unwrap();
3287 tape.push("foo").unwrap();
3288 tape.push("bar").unwrap();
3289
3290 let view = tape.subview(1, 3).unwrap();
3292 assert_eq!(view.len(), 2);
3293 assert_eq!(view.get(0), Some("world"));
3294 assert_eq!(view.get(1), Some("foo"));
3295 assert_eq!(view.get(2), None);
3296
3297 assert_eq!(&view[0], "world");
3299 assert_eq!(&view[1], "foo");
3300 }
3301
3302 #[test]
3303 fn chars_tape_range_syntax() {
3304 let mut tape = CharsTapeI32::new();
3305 tape.push("a").unwrap();
3306 tape.push("b").unwrap();
3307 tape.push("c").unwrap();
3308 tape.push("d").unwrap();
3309
3310 let full_view = tape.view();
3312 assert_eq!(full_view.len(), 4);
3313 assert_eq!(full_view.get(0), Some("a"));
3314 assert_eq!(full_view.get(3), Some("d"));
3315
3316 let sub = tape.subview(1, 3).unwrap();
3318 assert_eq!(sub.len(), 2);
3319 assert_eq!(sub.get(0), Some("b"));
3320 assert_eq!(sub.get(1), Some("c"));
3321 }
3322
3323 #[test]
3324 fn chars_tape_view_subslicing() {
3325 let mut tape = CharsTapeI32::new();
3326 tape.push("0").unwrap();
3327 tape.push("1").unwrap();
3328 tape.push("2").unwrap();
3329 tape.push("3").unwrap();
3330 tape.push("4").unwrap();
3331
3332 let view = tape.subview(1, 4).unwrap(); assert_eq!(view.len(), 3);
3335
3336 let subview = view.subview(1, 2).unwrap(); assert_eq!(subview.len(), 1);
3339 assert_eq!(subview.get(0), Some("2"));
3340
3341 let subview_from = view.subview(1, view.len()).unwrap(); assert_eq!(subview_from.len(), 2);
3344 assert_eq!(subview_from.get(0), Some("2"));
3345 assert_eq!(subview_from.get(1), Some("3"));
3346
3347 let subview_to = view.subview(0, 2).unwrap(); assert_eq!(subview_to.len(), 2);
3349 assert_eq!(subview_to.get(0), Some("1"));
3350 assert_eq!(subview_to.get(1), Some("2"));
3351 }
3352
3353 #[test]
3354 fn bytes_tape_view_basic() {
3355 let mut tape = BytesTapeI32::new();
3356 tape.push(&[1u8, 2]).unwrap();
3357 tape.push(&[3u8, 4]).unwrap();
3358 tape.push(&[5u8, 6]).unwrap();
3359 tape.push(&[7u8, 8]).unwrap();
3360
3361 let view = tape.subview(1, 3).unwrap();
3363 assert_eq!(view.len(), 2);
3364 assert_eq!(view.get(0), Some(&[3u8, 4] as &[u8]));
3365 assert_eq!(view.get(1), Some(&[5u8, 6] as &[u8]));
3366 assert_eq!(view.get(2), None);
3367
3368 assert_eq!(&view[0], &[3u8, 4] as &[u8]);
3370 assert_eq!(&view[1], &[5u8, 6] as &[u8]);
3371 }
3372
3373 #[test]
3374 fn view_empty_strings() {
3375 let mut tape = CharsTapeI32::new();
3376 tape.push("").unwrap();
3377 tape.push("non-empty").unwrap();
3378 tape.push("").unwrap();
3379 tape.push("another").unwrap();
3380
3381 let view = tape.subview(0, 3).unwrap();
3382 assert_eq!(view.len(), 3);
3383 assert_eq!(view.get(0), Some(""));
3384 assert_eq!(view.get(1), Some("non-empty"));
3385 assert_eq!(view.get(2), Some(""));
3386 }
3387
3388 #[test]
3389 fn view_single_item() {
3390 let mut tape = CharsTapeI32::new();
3391 tape.push("only").unwrap();
3392
3393 let view = tape.subview(0, 1).unwrap();
3394 assert_eq!(view.len(), 1);
3395 assert_eq!(view.get(0), Some("only"));
3396 }
3397
3398 #[test]
3399 fn view_bounds_checking() {
3400 let mut tape = CharsTapeI32::new();
3401 tape.push("a").unwrap();
3402 tape.push("b").unwrap();
3403
3404 assert!(tape.subview(0, 3).is_err());
3406 assert!(tape.subview(2, 1).is_err());
3407 assert!(tape.subview(3, 4).is_err());
3408
3409 let empty_view = tape.subview(1, 1).unwrap();
3411 assert_eq!(empty_view.len(), 0);
3412 assert!(empty_view.is_empty());
3413 }
3414
3415 #[test]
3416 fn view_data_properties() {
3417 let mut tape = CharsTapeI32::new();
3418 tape.push("hello").unwrap(); tape.push("world").unwrap(); tape.push("!").unwrap(); let view = tape.subview(0, 2).unwrap(); assert_eq!(view.data_len(), 10);
3424 assert!(!view.is_empty());
3425
3426 let full_view = tape.subview(0, 3).unwrap(); assert_eq!(full_view.data_len(), 11);
3428 }
3429
3430 #[test]
3431 fn view_raw_parts() {
3432 let mut tape = CharsTapeI32::new();
3433 tape.push("test").unwrap();
3434 tape.push("data").unwrap();
3435
3436 let view = tape.subview(0, 2).unwrap();
3437 let parts = view.as_raw_parts();
3438
3439 assert!(!parts.data_ptr.is_null());
3440 assert!(!parts.offsets_ptr.is_null());
3441 assert_eq!(parts.data_len, 8); assert_eq!(parts.items_count, 2);
3443 }
3444
3445 #[test]
3446 fn subview_raw_parts_consistency_chars() {
3447 let mut tape = CharsTapeI32::new();
3448 tape.extend(["abc", "", "xyz", "pq"]).unwrap();
3449
3450 let view = tape.subview(1, 3).unwrap();
3452 let parts = view.as_raw_parts();
3453
3454 unsafe {
3456 let offsets: &[i32] =
3457 core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
3458 assert_eq!(offsets.len(), parts.items_count + 1);
3459 assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
3460 let last_abs = offsets[offsets.len() - 1] as usize;
3461 assert_eq!(last_abs, parts.data_len);
3462 }
3463
3464 assert_eq!(view.len(), 2);
3466 assert_eq!(view.get(0), Some(""));
3467 assert_eq!(view.get(1), Some("xyz"));
3468 }
3469
3470 #[test]
3471 fn subview_raw_parts_consistency_bytes() {
3472 let mut tape = BytesTapeI32::new();
3473 tape.extend([
3474 b"a".as_slice(),
3475 b"".as_slice(),
3476 b"bc".as_slice(),
3477 b"def".as_slice(),
3478 ])
3479 .unwrap();
3480
3481 let view = tape.subview(2, 4).unwrap();
3483 let parts = view.as_raw_parts();
3484
3485 unsafe {
3486 let offsets: &[i32] =
3487 core::slice::from_raw_parts(parts.offsets_ptr, parts.items_count + 1);
3488 assert_eq!(offsets.len(), parts.items_count + 1);
3489 assert!(offsets.windows(2).all(|w| w[0] <= w[1]));
3490 let last_abs = offsets[offsets.len() - 1] as usize;
3491 assert_eq!(last_abs, parts.data_len);
3492 }
3493
3494 assert_eq!(view.len(), 2);
3495 assert_eq!(view.get(0), Some(b"bc" as &[u8]));
3496 assert_eq!(view.get(1), Some(b"def" as &[u8]));
3497 }
3498
3499 #[test]
3500 fn view_type_aliases() {
3501 let mut tape = CharsTapeI32::new();
3502 tape.push("test").unwrap();
3503
3504 let _view: CharsTapeViewI32 = tape.subview(0, 1).unwrap();
3505
3506 let mut bytes_tape = BytesTapeI64::new();
3507 bytes_tape.push(b"test").unwrap();
3508
3509 let _bytes_view: BytesTapeViewI64 = bytes_tape.subview(0, 1).unwrap();
3510 }
3511
3512 #[test]
3513 fn build_i32_from_other_offset_iterators() {
3514 let items = ["x", "yy", "", "zzz"];
3515
3516 let mut u32t = CharsTapeU32::new();
3518 u32t.extend(items).unwrap();
3519 let t_from_u32: CharsTapeI32 = u32t.iter().collect();
3520 assert_eq!(t_from_u32.len(), items.len());
3521 assert_eq!(t_from_u32.get(1), Some("yy"));
3522
3523 let mut u64t = CharsTapeU64::new();
3525 u64t.extend(items).unwrap();
3526 let t_from_u64: CharsTapeI32 = u64t.iter().collect();
3527 assert_eq!(t_from_u64.len(), items.len());
3528 assert_eq!(t_from_u64.get(3), Some("zzz"));
3529
3530 let mut i64t = CharsTapeI64::new();
3532 i64t.extend(items).unwrap();
3533 let t_from_i64: CharsTapeI32 = i64t.iter().collect();
3534 assert_eq!(t_from_i64.len(), items.len());
3535 assert_eq!(t_from_i64.get(2), Some(""));
3536 }
3537
3538 #[test]
3539 fn range_indexing_syntax() {
3540 let mut tape = CharsTapeI32::new();
3541 tape.push("a").unwrap();
3542 tape.push("b").unwrap();
3543 tape.push("c").unwrap();
3544 tape.push("d").unwrap();
3545
3546 let full_view = tape.view();
3551 assert_eq!(full_view.len(), 4);
3552
3553 let sub = tape.subview(1, 3).unwrap();
3555 assert_eq!(sub.len(), 2);
3556 assert_eq!(sub.get(0), Some("b"));
3557 assert_eq!(sub.get(1), Some("c"));
3558
3559 let sub_sub = sub.subview(0, 1).unwrap();
3561 assert_eq!(sub_sub.len(), 1);
3562 assert_eq!(sub_sub.get(0), Some("b"));
3563 }
3564
3565 #[cfg(test)]
3566 use arrow::array::{Array, BinaryArray, StringArray};
3567 #[cfg(test)]
3568 use arrow::buffer::{Buffer, OffsetBuffer, ScalarBuffer};
3569
3570 #[test]
3571 fn charstape_to_arrow_string_array() {
3572 let mut tape = CharsTapeI32::new();
3573 tape.extend(["hello", "world", "", "arrow"]).unwrap();
3574
3575 let (data_slice, offsets_slice) = tape.arrow_slices();
3576 let data_buffer = Buffer::from_slice_ref(data_slice);
3577 let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
3578 Buffer::from_slice_ref(offsets_slice),
3579 0,
3580 offsets_slice.len(),
3581 ));
3582 let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
3583
3584 assert_eq!(arrow_array.len(), 4);
3585 assert_eq!(arrow_array.value(0), "hello");
3586 assert_eq!(arrow_array.value(2), "");
3587 }
3588
3589 #[test]
3590 fn arrow_string_array_to_charstape_view() {
3591 let arrow_array = StringArray::from(vec!["foo", "bar", ""]);
3592
3593 let view = unsafe {
3595 CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3596 };
3597
3598 assert_eq!(view.len(), 3);
3599 assert_eq!(view.get(0), Some("foo"));
3600 assert_eq!(view.get(1), Some("bar"));
3601 assert_eq!(view.get(2), Some(""));
3602 }
3603
3604 #[test]
3605 fn arrow_binary_array_to_bytestape_view() {
3606 let values: Vec<Option<&[u8]>> = vec![
3607 Some(&[1u8, 2, 3] as &[u8]),
3608 Some(&[] as &[u8]),
3609 Some(&[4u8, 5] as &[u8]),
3610 ];
3611 let arrow_array = BinaryArray::from(values);
3612
3613 let view = unsafe {
3615 BytesTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3616 };
3617
3618 assert_eq!(view.len(), 3);
3619 assert_eq!(view.get(0), Some(&[1u8, 2, 3] as &[u8]));
3620 assert_eq!(view.get(1), Some(&[] as &[u8]));
3621 assert_eq!(view.get(2), Some(&[4u8, 5] as &[u8]));
3622 }
3623
3624 #[test]
3625 fn zero_copy_roundtrip() {
3626 let mut tape = CharsTapeI32::new();
3628 tape.extend(["hello", "", "world"]).unwrap();
3629
3630 let (data_slice, offsets_slice) = tape.arrow_slices();
3632 let data_buffer = Buffer::from_slice_ref(data_slice);
3633 let offsets_buffer = OffsetBuffer::new(ScalarBuffer::new(
3634 Buffer::from_slice_ref(offsets_slice),
3635 0,
3636 offsets_slice.len(),
3637 ));
3638 let arrow_array = StringArray::new(offsets_buffer, data_buffer, None);
3639
3640 let view = unsafe {
3642 CharsTapeViewI32::from_raw_parts(arrow_array.values(), arrow_array.offsets().as_ref())
3643 };
3644
3645 assert_eq!(view.len(), 3);
3647 assert_eq!(view.get(0), Some("hello"));
3648 assert_eq!(view.get(1), Some(""));
3649 assert_eq!(view.get(2), Some("world"));
3650 }
3651
3652 #[test]
3653 fn bytes_to_string_conversion() {
3654 let mut bytes_tape = BytesTapeI32::new();
3656 bytes_tape.push(b"hello").unwrap();
3657 bytes_tape.push(b"world").unwrap();
3658 bytes_tape.push(b"").unwrap();
3659 bytes_tape.push(b"rust").unwrap();
3660
3661 let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
3662 assert!(chars_tape.is_ok());
3663
3664 let chars_tape = chars_tape.unwrap();
3665 assert_eq!(chars_tape.len(), 4);
3666 assert_eq!(chars_tape.get(0), Some("hello"));
3667 assert_eq!(chars_tape.get(1), Some("world"));
3668 assert_eq!(chars_tape.get(2), Some(""));
3669 assert_eq!(chars_tape.get(3), Some("rust"));
3670 }
3671
3672 #[test]
3673 fn bytes_to_string_invalid_utf8() {
3674 let mut bytes_tape = BytesTapeI32::new();
3676 bytes_tape.push(b"valid").unwrap();
3677 bytes_tape.push(&[0xFF, 0xFE]).unwrap(); bytes_tape.push(b"also valid").unwrap();
3679
3680 let chars_tape: Result<CharsTapeI32, _> = bytes_tape.try_into();
3681 assert!(chars_tape.is_err());
3682
3683 match chars_tape {
3684 Err(StringTapeError::Utf8Error(_)) => {}
3685 _ => panic!("Expected Utf8Error"),
3686 }
3687 }
3688
3689 #[test]
3690 fn string_to_bytes_conversion() {
3691 let mut chars_tape = CharsTapeI32::new();
3693 chars_tape.push("hello").unwrap();
3694 chars_tape.push("δΈη").unwrap(); chars_tape.push("").unwrap();
3696 chars_tape.push("π¦").unwrap(); let bytes_tape: BytesTapeI32 = chars_tape.into();
3699 assert_eq!(bytes_tape.len(), 4);
3700 assert_eq!(&bytes_tape[0], b"hello");
3701 assert_eq!(&bytes_tape[1], "δΈη".as_bytes());
3702 assert_eq!(&bytes_tape[2], b"");
3703 assert_eq!(&bytes_tape[3], "π¦".as_bytes());
3704 }
3705
3706 #[test]
3707 fn conversion_convenience_methods() {
3708 let mut bytes_tape = BytesTapeI32::new();
3710 bytes_tape.push(b"test").unwrap();
3711 let string_result = bytes_tape.try_into_chars_tape();
3712 assert!(string_result.is_ok());
3713 assert_eq!(string_result.unwrap().get(0), Some("test"));
3714
3715 let mut chars_tape = CharsTapeI32::new();
3717 chars_tape.push("test").unwrap();
3718 let bytes_back = chars_tape.into_bytes_tape();
3719 assert_eq!(&bytes_back[0], b"test");
3720 }
3721
3722 #[test]
3723 fn conversion_round_trip() {
3724 let mut original = CharsTapeI32::new();
3726 original.push("first").unwrap();
3727 original.push("second").unwrap();
3728 original.push("third").unwrap();
3729
3730 let expected = vec!["first", "second", "third"];
3732
3733 let bytes: BytesTapeI32 = original.into();
3735 let recovered: CharsTapeI32 = bytes.try_into().unwrap();
3736
3737 assert_eq!(expected.len(), recovered.len());
3738 for (i, expected_str) in expected.iter().enumerate() {
3739 assert_eq!(recovered.get(i), Some(*expected_str));
3740 }
3741 }
3742
3743 #[test]
3744 fn view_to_view_conversions_valid_utf8() {
3745 let mut ct = CharsTapeI32::new();
3747 ct.extend(["abc", "", "δΈη"]).unwrap();
3748 let chars_view = ct.view();
3749
3750 let bytes_view: BytesTapeViewI32 = chars_view.into_bytes_view();
3752 assert_eq!(bytes_view.len(), 3);
3753 assert_eq!(bytes_view.get(0), Some("abc".as_bytes()));
3754 assert_eq!(bytes_view.get(1), Some(b"" as &[u8]));
3755 assert_eq!(bytes_view.get(2), Some("δΈη".as_bytes()));
3756
3757 let chars_back: Result<CharsTapeViewI32, _> = bytes_view.try_into_chars_view();
3759 assert!(chars_back.is_ok());
3760 let chars_back = chars_back.unwrap();
3761 assert_eq!(chars_back.len(), 3);
3762 assert_eq!(chars_back.get(0), Some("abc"));
3763 assert_eq!(chars_back.get(1), Some(""));
3764 assert_eq!(chars_back.get(2), Some("δΈη"));
3765 }
3766
3767 #[test]
3768 fn view_to_view_bytes_to_chars_invalid_utf8() {
3769 let mut bt = BytesTapeI32::new();
3771 bt.push(b"ok").unwrap();
3772 bt.push(&[0xFF, 0xFE]).unwrap(); let bview = bt.view();
3774
3775 let res: Result<CharsTapeViewI32, _> = bview.try_into_chars_view();
3777 assert!(res.is_err());
3778 match res {
3779 Err(StringTapeError::Utf8Error(_)) => {}
3780 _ => panic!("Expected Utf8Error"),
3781 }
3782 }
3783
3784 #[test]
3785 fn chars_slices_basic() {
3786 let data = "hello world foo bar";
3787 let cows = CharsCowsU32U16::from_iter_and_data(
3788 data.split_whitespace(),
3789 Cow::Borrowed(data.as_bytes()),
3790 )
3791 .unwrap();
3792
3793 assert_eq!(cows.len(), 4);
3794 assert_eq!(cows.get(0), Some("hello"));
3795 assert_eq!(cows.get(1), Some("world"));
3796 assert_eq!(cows.get(2), Some("foo"));
3797 assert_eq!(cows.get(3), Some("bar"));
3798 assert_eq!(cows.get(4), None);
3799 }
3800
3801 #[test]
3802 fn chars_slices_index() {
3803 let data = "abc def";
3804
3805 let cows = CharsCowsU64U32::from_iter_and_data(
3806 data.split_whitespace(),
3807 Cow::Borrowed(data.as_bytes()),
3808 )
3809 .unwrap();
3810
3811 assert_eq!(&cows[0], "abc");
3812 assert_eq!(&cows[1], "def");
3813 }
3814
3815 #[test]
3816 fn chars_slices_iterator() {
3817 let data = "a b c";
3818
3819 let cows = CharsCowsU64U32::from_iter_and_data(
3820 data.split_whitespace(),
3821 Cow::Borrowed(data.as_bytes()),
3822 )
3823 .unwrap();
3824
3825 let result: Vec<&str> = cows.iter().collect();
3826 assert_eq!(result, vec!["a", "b", "c"]);
3827
3828 let mut count = 0;
3830 for s in &cows {
3831 assert_eq!(s.len(), 1);
3832 count += 1;
3833 }
3834 assert_eq!(count, 3);
3835 }
3836
3837 #[test]
3838 fn chars_slices_arbitrary_order() {
3839 let data = "0123456789";
3840 let s1 = &data[5..7]; let s2 = &data[0..1]; let s3 = &data[9..10]; let s4 = &data[2..5]; let cows =
3847 CharsCowsU64U32::from_iter_and_data([s1, s2, s3, s4], Cow::Borrowed(data.as_bytes()))
3848 .unwrap();
3849
3850 assert_eq!(cows.get(0), Some("56"));
3851 assert_eq!(cows.get(1), Some("0"));
3852 assert_eq!(cows.get(2), Some("9"));
3853 assert_eq!(cows.get(3), Some("234"));
3854 }
3855
3856 #[test]
3857 fn chars_slices_empty_strings() {
3858 let data = "ab";
3859 let s1 = &data[0..0]; let s2 = &data[1..2]; let s3 = &data[2..2]; let cows =
3864 CharsCowsU64U32::from_iter_and_data([s1, s2, s3], Cow::Borrowed(data.as_bytes()))
3865 .unwrap();
3866
3867 assert_eq!(cows.len(), 3);
3868 assert_eq!(cows.get(0), Some(""));
3869 assert_eq!(cows.get(1), Some("b"));
3870 assert_eq!(cows.get(2), Some(""));
3871 }
3872
3873 #[test]
3874 fn chars_slices_overflow_checks() {
3875 let data_vec = vec![b'x'; 300];
3876 let data = core::str::from_utf8(&data_vec).unwrap();
3877
3878 let long_slice = &data[0..256];
3880 let result = CharsCowsU32U8::from_iter_and_data(
3881 core::iter::once(long_slice),
3882 Cow::Borrowed(data.as_bytes()),
3883 );
3884 assert!(result.is_err());
3885 assert_eq!(result.unwrap_err(), StringTapeError::OffsetOverflow);
3886
3887 let result = CharsCowsU32U16::from_iter_and_data(
3889 core::iter::once(long_slice),
3890 Cow::Borrowed(data.as_bytes()),
3891 );
3892 assert!(result.is_ok());
3893 }
3894
3895 #[test]
3896 fn chars_slices_bounds_check() {
3897 let data = String::from("hello");
3898 let other_data = String::from("world");
3899
3900 let result = CharsCowsU64U32::from_iter_and_data(
3902 core::iter::once(other_data.as_str()),
3903 Cow::Borrowed(data.as_bytes()),
3904 );
3905 assert!(result.is_err());
3906 assert_eq!(result.unwrap_err(), StringTapeError::IndexOutOfBounds);
3907
3908 let result = CharsCowsU64U32::from_iter_and_data(
3910 core::iter::once(data.as_str()),
3911 Cow::Borrowed(data.as_bytes()),
3912 );
3913 assert!(result.is_ok());
3914 }
3915
3916 #[test]
3917 fn slices_conversions() {
3918 let data = "hello world";
3919 let chars = CharsCowsU32U8::from_iter_and_data(
3920 data.split_whitespace(),
3921 Cow::Borrowed(data.as_bytes()),
3922 )
3923 .unwrap();
3924
3925 let bytes: BytesCowsU32U8 = chars.into();
3927 assert_eq!(bytes.get(0), Some(b"hello" as &[u8]));
3928 assert_eq!(bytes.get(1), Some(b"world" as &[u8]));
3929
3930 let chars_back: CharsCowsU32U8 = bytes.try_into().unwrap();
3932 assert_eq!(chars_back.get(0), Some("hello"));
3933 assert_eq!(chars_back.get(1), Some("world"));
3934 }
3935
3936 #[test]
3937 fn slices_type_aliases() {
3938 let data = "test";
3939
3940 let _s1: CharsCowsU32U16 =
3941 CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3942 .unwrap();
3943 let _s2: CharsCowsU32U8 =
3944 CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3945 .unwrap();
3946 let _s3: CharsCowsU16U8 =
3947 CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3948 .unwrap();
3949 let _s4: CharsCowsU64U32 =
3950 CharsCows::from_iter_and_data(core::iter::once(data), Cow::Borrowed(data.as_bytes()))
3951 .unwrap();
3952 }
3953
3954 #[test]
3955 fn chars_slices_auto_sorted() {
3956 let data = "zebra apple banana cherry";
3957 let mut cows = CharsCowsAuto::from_iter_and_data(
3958 data.split_whitespace(),
3959 Cow::Borrowed(data.as_bytes()),
3960 )
3961 .unwrap();
3962
3963 cows.sort();
3965
3966 let sorted: Vec<&str> = cows.iter().collect();
3967 assert_eq!(sorted, vec!["apple", "banana", "cherry", "zebra"]);
3968 }
3969
3970 #[test]
3971 fn chars_slices_auto_to_vec_string() {
3972 let data = "hello world foo";
3973 let cows = CharsCowsAuto::from_iter_and_data(
3974 data.split_whitespace(),
3975 Cow::Borrowed(data.as_bytes()),
3976 )
3977 .unwrap();
3978
3979 let vec_string: Vec<String> = cows.iter().map(|s| s.to_string()).collect();
3981
3982 assert_eq!(vec_string, vec!["hello", "world", "foo"]);
3983 }
3984
3985 #[test]
3986 fn chars_slices_auto_filter_map() {
3987 let data = "hello world foo bar";
3988 let cows = CharsCowsAuto::from_iter_and_data(
3989 data.split_whitespace(),
3990 Cow::Borrowed(data.as_bytes()),
3991 )
3992 .unwrap();
3993
3994 let result: Vec<String> = cows
3996 .iter()
3997 .filter_map(|word| {
3998 if word.len() > 3 {
3999 Some(word.to_uppercase())
4000 } else {
4001 None
4002 }
4003 })
4004 .collect();
4005
4006 assert_eq!(result, vec!["HELLO", "WORLD"]);
4007 }
4008
4009 #[test]
4010 fn chars_slices_auto_type_selection() {
4011 let small = "hi";
4013 let s1 = CharsCowsAuto::from_iter_and_data(
4014 core::iter::once(small),
4015 Cow::Borrowed(small.as_bytes()),
4016 )
4017 .unwrap();
4018 assert!(matches!(s1, CharsCowsAuto::U32U8(_)));
4019 assert_eq!(s1.bytes_per_entry(), 5);
4020
4021 let long_word = "a".repeat(300);
4023 let s2 = CharsCowsAuto::from_iter_and_data(
4024 core::iter::once(long_word.as_str()),
4025 Cow::Borrowed(long_word.as_bytes()),
4026 )
4027 .unwrap();
4028 assert!(matches!(s2, CharsCowsAuto::U32U16(_)));
4029 assert_eq!(s2.bytes_per_entry(), 6);
4030 }
4031}
4032
4033#[cfg(all(feature = "std", not(test)))]
4038pub mod examples {
4039 use super::*;
4040 use std::env;
4041 use std::fs;
4042
4043 pub fn bench_vec_string() -> std::io::Result<()> {
4044 let path = env::args().nth(1).expect("Usage: bench_vec_string <file>");
4045
4046 eprintln!("[Vec<String>] Loading file: {}", path);
4047 let content = fs::read_to_string(&path)?;
4048 eprintln!("[Vec<String>] File size: {} bytes", content.len());
4049
4050 eprintln!("[Vec<String>] Collecting words...");
4051 let words: Vec<String> = content.split_whitespace().map(|s| s.to_string()).collect();
4052
4053 eprintln!("[Vec<String>] Collected {} words", words.len());
4054
4055 std::thread::sleep(std::time::Duration::from_millis(1000));
4057 Ok(())
4058 }
4059
4060 pub fn bench_vec_slice() -> std::io::Result<()> {
4061 let path = env::args().nth(1).expect("Usage: bench_vec_slice <file>");
4062
4063 eprintln!("[Vec<&[u8]>] Loading file: {}", path);
4064 let content = fs::read_to_string(&path)?;
4065 eprintln!("[Vec<&[u8]>] File size: {} bytes", content.len());
4066
4067 eprintln!("[Vec<&[u8]>] Collecting words...");
4068 let words: Vec<&[u8]> = content.split_whitespace().map(|s| s.as_bytes()).collect();
4069
4070 eprintln!("[Vec<&[u8]>] Collected {} words", words.len());
4071
4072 std::thread::sleep(std::time::Duration::from_millis(1000));
4074 Ok(())
4075 }
4076
4077 pub fn bench_chars_slices() -> Result<(), Box<dyn std::error::Error>> {
4078 let path = env::args()
4079 .nth(1)
4080 .expect("Usage: bench_chars_slices <file>");
4081
4082 eprintln!("[CharsCows] Loading file: {}", path);
4083 let content = fs::read_to_string(&path)?;
4084 eprintln!("[CharsCows] File size: {} bytes", content.len());
4085
4086 eprintln!("[CharsCows] Building CharsCows from words...");
4087 let cows = CharsCowsAuto::from_iter_and_data(
4089 content.split_whitespace(),
4090 Cow::Borrowed(content.as_bytes()),
4091 )?;
4092
4093 eprintln!("[CharsCows] Collected {} words", cows.len());
4094
4095 std::thread::sleep(std::time::Duration::from_millis(1000));
4097 Ok(())
4098 }
4099
4100 pub fn bench_chars_tape() -> Result<(), Box<dyn std::error::Error>> {
4101 let path = env::args().nth(1).expect("Usage: bench_chars_tape <file>");
4102
4103 eprintln!("[CharsTape] Loading file: {}", path);
4104 let content = fs::read_to_string(&path)?;
4105 eprintln!("[CharsTape] File size: {} bytes", content.len());
4106
4107 eprintln!("[CharsTape] Building CharsTape from words...");
4108 let tape = CharsTapeAuto::from_iter(content.split_whitespace());
4110
4111 eprintln!("[CharsTape] Collected {} words", tape.len());
4112
4113 std::thread::sleep(std::time::Duration::from_millis(1000));
4115 Ok(())
4116 }
4117}
4118
4119#[cfg(all(feature = "std", not(test)))]
4124#[allow(dead_code)] fn main() -> Result<(), Box<dyn std::error::Error>> {
4126 let exe_path = std::env::current_exe()?;
4127 let exe_name = exe_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
4128
4129 match exe_name {
4130 "bench_vec_string" => examples::bench_vec_string()?,
4131 "bench_vec_slice" => examples::bench_vec_slice()?,
4132 "bench_chars_slices" => examples::bench_chars_slices()?,
4133 "bench_chars_tape" => examples::bench_chars_tape()?,
4134 _ => {
4135 eprintln!("Unknown binary: {}", exe_name);
4136 eprintln!("Available: bench_vec_string, bench_vec_slice, bench_chars_slices, bench_chars_tape");
4137 std::process::exit(1);
4138 }
4139 }
4140
4141 Ok(())
4142}