pub struct Utf8Array<O>where
O: Offset,{ /* private fields */ }Expand description
A Utf8Array is arrow’s semantic equivalent of an immutable Vec<Option<String>>.
Cloning and slicing this struct is O(1).
Example
use arrow2::bitmap::Bitmap;
use arrow2::buffer::Buffer;
use arrow2::array::Utf8Array;
let array = Utf8Array::<i32>::from([Some("hi"), None, Some("there")]);
assert_eq!(array.value(0), "hi");
assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some("hi"), None, Some("there")]);
assert_eq!(array.values_iter().collect::<Vec<_>>(), vec!["hi", "", "there"]);
// the underlying representation
assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
assert_eq!(array.values(), &Buffer::from(b"hithere".to_vec()));
assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 2 + 5]));Generic parameter
The generic parameter Offset can only be i32 or i64 and tradeoffs maximum array length with
memory usage:
- the sum of lengths of all elements cannot exceed
Offset::MAX - the total size of the underlying data is
array.len() * size_of::<Offset>() + sum of lengths of all elements
Safety
The following invariants hold:
- Two consecutives
offsetscasted (as) tousizeare valid slices ofvalues. - A slice of
valuestaken from two consecutivesoffsetsis validutf8. lenis equal tovalidity.len(), when defined.
Implementations§
§impl<O> Utf8Array<O>where
O: Offset,
impl<O> Utf8Array<O>where O: Offset,
pub fn try_new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Result<Utf8Array<O>, Error>
pub fn try_new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Utf8Array<O>, Error>
Returns a Utf8Array created from its internal representation.
Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len(). - The
data_type’scrate::datatypes::PhysicalTypeis not equal to eitherUtf8orLargeUtf8. - The
valuesbetween two consecutiveoffsetsare not valid utf8
Implementation
This function is O(N) - checking utf8 is O(N)
pub fn from_slice<T, P>(slice: P) -> Utf8Array<O>where
T: AsRef<str>,
P: AsRef<[T]>,
pub fn from_slice<T, P>(slice: P) -> Utf8Array<O>where T: AsRef<str>, P: AsRef<[T]>,
Returns a Utf8Array from a slice of &str.
A convenience method that uses Self::from_trusted_len_values_iter.
pub fn from<T, P>(slice: P) -> Utf8Array<O>where
T: AsRef<str>,
P: AsRef<[Option<T>]>,
pub fn from<T, P>(slice: P) -> Utf8Array<O>where T: AsRef<str>, P: AsRef<[Option<T>]>,
Returns a new Utf8Array from a slice of &str.
A convenience method that uses Self::from_trusted_len_iter.
pub fn iter(
&self
) -> ZipValidity<&str, ArrayValuesIter<'_, Utf8Array<O>>, BitmapIter<'_>> ⓘ
pub fn iter( &self ) -> ZipValidity<&str, ArrayValuesIter<'_, Utf8Array<O>>, BitmapIter<'_>> ⓘ
Returns an iterator of Option<&str>
pub fn values_iter(&self) -> ArrayValuesIter<'_, Utf8Array<O>> ⓘ
pub fn values_iter(&self) -> ArrayValuesIter<'_, Utf8Array<O>> ⓘ
Returns an iterator of &str
pub fn value(&self, i: usize) -> &str
pub fn value(&self, i: usize) -> &str
Returns the value of the element at index i, ignoring the array’s validity.
Panic
This function panics iff i >= self.len.
pub unsafe fn value_unchecked(&self, i: usize) -> &str
pub unsafe fn value_unchecked(&self, i: usize) -> &str
Returns the value of the element at index i, ignoring the array’s validity.
Safety
This function is safe iff i < self.len.
pub fn offsets(&self) -> &OffsetsBuffer<O>
pub fn offsets(&self) -> &OffsetsBuffer<O>
Returns the offsets of this Utf8Array.
pub fn slice(&self, offset: usize, length: usize) -> Utf8Array<O>
pub fn slice(&self, offset: usize, length: usize) -> Utf8Array<O>
Returns a slice of this Utf8Array.
Implementation
This operation is O(1) as it amounts to essentially increase two ref counts.
Panic
This function panics iff offset + length >= self.len().
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Utf8Array<O>
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Utf8Array<O>
Returns a slice of this Utf8Array.
Implementation
This operation is O(1) as it amounts to essentially increase two ref counts.
Safety
The caller must ensure that offset + length <= self.len().
pub fn boxed(self) -> Box<dyn Array + 'static, Global>
pub fn boxed(self) -> Box<dyn Array + 'static, Global>
Boxes self into a Box<dyn Array>.
pub fn arced(self) -> Arc<dyn Array + 'static>
pub fn arced(self) -> Arc<dyn Array + 'static>
Boxes self into a std::sync::Arc<dyn Array>.
pub fn with_validity(self, validity: Option<Bitmap>) -> Utf8Array<O>
pub fn with_validity(self, validity: Option<Bitmap>) -> Utf8Array<O>
pub fn set_validity(&mut self, validity: Option<Bitmap>)
pub fn set_validity(&mut self, validity: Option<Bitmap>)
pub fn into_mut(self) -> Either<Utf8Array<O>, MutableUtf8Array<O>> ⓘ
pub fn into_mut(self) -> Either<Utf8Array<O>, MutableUtf8Array<O>> ⓘ
Try to convert this Utf8Array to a MutableUtf8Array
pub fn new_empty(data_type: DataType) -> Utf8Array<O>
pub fn new_empty(data_type: DataType) -> Utf8Array<O>
Returns a new empty Utf8Array.
The array is guaranteed to have no elements nor validity.
pub fn new_null(data_type: DataType, length: usize) -> Utf8Array<O>
pub fn new_null(data_type: DataType, length: usize) -> Utf8Array<O>
Returns a new Utf8Array whose all slots are null / None.
pub fn default_data_type() -> DataType
pub fn default_data_type() -> DataType
Returns a default DataType of this array, which depends on the generic parameter O: DataType::Utf8 or DataType::LargeUtf8
pub unsafe fn try_new_unchecked(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Result<Utf8Array<O>, Error>
pub unsafe fn try_new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Utf8Array<O>, Error>
Creates a new Utf8Array without checking for offsets monotinicity nor utf8-validity
Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len(). - The
data_type’scrate::datatypes::PhysicalTypeis not equal to eitherUtf8orLargeUtf8.
Safety
This function is unsound iff:
- The
valuesbetween two consecutiveoffsetsare not valid utf8
Implementation
This function is O(1)
pub fn new(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Utf8Array<O>
pub fn new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Utf8Array<O>
Creates a new Utf8Array.
Panics
This function panics iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len(). - The
data_type’scrate::datatypes::PhysicalTypeis not equal to eitherUtf8orLargeUtf8. - The
valuesbetween two consecutiveoffsetsare not valid utf8
Implementation
This function is O(N) - checking utf8 is O(N)
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>
) -> Utf8Array<O>
pub unsafe fn new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Utf8Array<O>
Creates a new Utf8Array without checking for offsets monotinicity.
Errors
This function returns an error iff:
- The last offset is not equal to the values’ length.
- the validity’s length is not equal to
offsets.len(). - The
data_type’scrate::datatypes::PhysicalTypeis not equal to eitherUtf8orLargeUtf8.
Safety
This function is unsound iff:
- the offsets are not monotonically increasing
- The
valuesbetween two consecutiveoffsetsare not valid utf8
Implementation
This function is O(1)
pub fn from_trusted_len_values_iter<T, I>(iterator: I) -> Utf8Array<O>where
T: AsRef<str>,
I: TrustedLen<Item = T>,
pub fn from_trusted_len_values_iter<T, I>(iterator: I) -> Utf8Array<O>where T: AsRef<str>, I: TrustedLen<Item = T>,
Returns a (non-null) Utf8Array created from a TrustedLen of &str.
Implementation
This function is O(N)
pub fn from_iter_values<T, I>(iterator: I) -> Utf8Array<O>where
T: AsRef<str>,
I: Iterator<Item = T>,
pub fn from_iter_values<T, I>(iterator: I) -> Utf8Array<O>where T: AsRef<str>, I: Iterator<Item = T>,
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Utf8Array<O>where
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Utf8Array<O>where P: AsRef<str>, I: Iterator<Item = Option<P>>,
Creates a Utf8Array from an iterator of trusted length.
Safety
The iterator must be TrustedLen.
I.e. that size_hint().1 correctly reports its length.
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Utf8Array<O>where
P: AsRef<str>,
I: TrustedLen<Item = Option<P>>,
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Utf8Array<O>where P: AsRef<str>, I: TrustedLen<Item = Option<P>>,
Creates a Utf8Array from an iterator of trusted length.
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
iterator: I
) -> Result<Utf8Array<O>, E>where
P: AsRef<str>,
I: IntoIterator<Item = Result<Option<P>, E>>,
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>( iterator: I ) -> Result<Utf8Array<O>, E>where P: AsRef<str>, I: IntoIterator<Item = Result<Option<P>, E>>,
Creates a Utf8Array from an falible iterator of trusted length.
Safety
The iterator must be TrustedLen.
I.e. that size_hint().1 correctly reports its length.
pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Utf8Array<O>, E>where
P: AsRef<str>,
I: TrustedLen<Item = Result<Option<P>, E>>,
pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Utf8Array<O>, E>where P: AsRef<str>, I: TrustedLen<Item = Result<Option<P>, E>>,
Creates a Utf8Array from an fallible iterator of trusted length.
Trait Implementations§
§impl<O> Array for Utf8Array<O>where
O: Offset,
impl<O> Array for Utf8Array<O>where O: Offset,
§fn as_any(&self) -> &(dyn Any + 'static)
fn as_any(&self) -> &(dyn Any + 'static)
Any, which enables downcasting to concrete types.§fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
Any, which enables mutable downcasting to concrete types.§fn len(&self) -> usize
fn len(&self) -> usize
Array. Every array has a length corresponding to the number of
elements (slots).§fn data_type(&self) -> &DataType
fn data_type(&self) -> &DataType
DataType of the Array. In combination with Array::as_any, this can be
used to downcast trait objects (dyn Array) to concrete arrays.§unsafe fn slice_unchecked(
&self,
offset: usize,
length: usize
) -> Box<dyn Array + 'static, Global>
unsafe fn slice_unchecked( &self, offset: usize, length: usize ) -> Box<dyn Array + 'static, Global>
§fn to_boxed(&self) -> Box<dyn Array + 'static, Global>
fn to_boxed(&self) -> Box<dyn Array + 'static, Global>
&dyn Array to an owned Box<dyn Array>.