pub struct Utf8Array<O>where
    O: Offset,{ /* private fields */ }
Expand description

A Utf8Array is arrow’s semantic equivalent of an immutable Vec<Option<String>>. Cloning and slicing this struct is O(1).

Example

use arrow2::bitmap::Bitmap;
use arrow2::buffer::Buffer;
use arrow2::array::Utf8Array;
let array = Utf8Array::<i32>::from([Some("hi"), None, Some("there")]);
assert_eq!(array.value(0), "hi");
assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some("hi"), None, Some("there")]);
assert_eq!(array.values_iter().collect::<Vec<_>>(), vec!["hi", "", "there"]);
// the underlying representation
assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));
assert_eq!(array.values(), &Buffer::from(b"hithere".to_vec()));
assert_eq!(array.offsets().buffer(), &Buffer::from(vec![0, 2, 2, 2 + 5]));

Generic parameter

The generic parameter Offset can only be i32 or i64 and tradeoffs maximum array length with memory usage:

  • the sum of lengths of all elements cannot exceed Offset::MAX
  • the total size of the underlying data is array.len() * size_of::<Offset>() + sum of lengths of all elements

Safety

The following invariants hold:

  • Two consecutives offsets casted (as) to usize are valid slices of values.
  • A slice of values taken from two consecutives offsets is valid utf8.
  • len is equal to validity.len(), when defined.

Implementations§

§

impl<O> Utf8Array<O>where O: Offset,

pub fn try_new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Utf8Array<O>, Error>

Returns a Utf8Array created from its internal representation.

Errors

This function returns an error iff:

  • The last offset is not equal to the values’ length.
  • the validity’s length is not equal to offsets.len().
  • The data_type’s crate::datatypes::PhysicalType is not equal to either Utf8 or LargeUtf8.
  • The values between two consecutive offsets are not valid utf8
Implementation

This function is O(N) - checking utf8 is O(N)

pub fn from_slice<T, P>(slice: P) -> Utf8Array<O>where T: AsRef<str>, P: AsRef<[T]>,

Returns a Utf8Array from a slice of &str.

A convenience method that uses Self::from_trusted_len_values_iter.

pub fn from<T, P>(slice: P) -> Utf8Array<O>where T: AsRef<str>, P: AsRef<[Option<T>]>,

Returns a new Utf8Array from a slice of &str.

A convenience method that uses Self::from_trusted_len_iter.

pub fn iter( &self ) -> ZipValidity<&str, ArrayValuesIter<'_, Utf8Array<O>>, BitmapIter<'_>>

Returns an iterator of Option<&str>

pub fn values_iter(&self) -> ArrayValuesIter<'_, Utf8Array<O>>

Returns an iterator of &str

pub fn len(&self) -> usize

Returns the length of this array

pub fn value(&self, i: usize) -> &str

Returns the value of the element at index i, ignoring the array’s validity.

Panic

This function panics iff i >= self.len.

pub unsafe fn value_unchecked(&self, i: usize) -> &str

Returns the value of the element at index i, ignoring the array’s validity.

Safety

This function is safe iff i < self.len.

pub fn data_type(&self) -> &DataType

Returns the DataType of this array.

pub fn values(&self) -> &Buffer<u8>

Returns the values of this Utf8Array.

pub fn offsets(&self) -> &OffsetsBuffer<O>

Returns the offsets of this Utf8Array.

pub fn validity(&self) -> Option<&Bitmap>

The optional validity.

pub fn slice(&self, offset: usize, length: usize) -> Utf8Array<O>

Returns a slice of this Utf8Array.

Implementation

This operation is O(1) as it amounts to essentially increase two ref counts.

Panic

This function panics iff offset + length >= self.len().

pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Utf8Array<O>

Returns a slice of this Utf8Array.

Implementation

This operation is O(1) as it amounts to essentially increase two ref counts.

Safety

The caller must ensure that offset + length <= self.len().

pub fn boxed(self) -> Box<dyn Array + 'static, Global>

Boxes self into a Box<dyn Array>.

pub fn arced(self) -> Arc<dyn Array + 'static>

Boxes self into a std::sync::Arc<dyn Array>.

pub fn with_validity(self, validity: Option<Bitmap>) -> Utf8Array<O>

Returns this Utf8Array with a new validity.

Panics

This function panics iff validity.len() != self.len().

pub fn set_validity(&mut self, validity: Option<Bitmap>)

Sets the validity of this Utf8Array.

Panics

This function panics iff validity.len() != self.len().

pub fn into_mut(self) -> Either<Utf8Array<O>, MutableUtf8Array<O>>

Try to convert this Utf8Array to a MutableUtf8Array

pub fn new_empty(data_type: DataType) -> Utf8Array<O>

Returns a new empty Utf8Array.

The array is guaranteed to have no elements nor validity.

pub fn new_null(data_type: DataType, length: usize) -> Utf8Array<O>

Returns a new Utf8Array whose all slots are null / None.

pub fn default_data_type() -> DataType

Returns a default DataType of this array, which depends on the generic parameter O: DataType::Utf8 or DataType::LargeUtf8

pub unsafe fn try_new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Result<Utf8Array<O>, Error>

Creates a new Utf8Array without checking for offsets monotinicity nor utf8-validity

Errors

This function returns an error iff:

  • The last offset is not equal to the values’ length.
  • the validity’s length is not equal to offsets.len().
  • The data_type’s crate::datatypes::PhysicalType is not equal to either Utf8 or LargeUtf8.
Safety

This function is unsound iff:

  • The values between two consecutive offsets are not valid utf8
Implementation

This function is O(1)

pub fn new( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Utf8Array<O>

Creates a new Utf8Array.

Panics

This function panics iff:

  • The last offset is not equal to the values’ length.
  • the validity’s length is not equal to offsets.len().
  • The data_type’s crate::datatypes::PhysicalType is not equal to either Utf8 or LargeUtf8.
  • The values between two consecutive offsets are not valid utf8
Implementation

This function is O(N) - checking utf8 is O(N)

pub unsafe fn new_unchecked( data_type: DataType, offsets: OffsetsBuffer<O>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Utf8Array<O>

Creates a new Utf8Array without checking for offsets monotinicity.

Errors

This function returns an error iff:

  • The last offset is not equal to the values’ length.
  • the validity’s length is not equal to offsets.len().
  • The data_type’s crate::datatypes::PhysicalType is not equal to either Utf8 or LargeUtf8.
Safety

This function is unsound iff:

  • the offsets are not monotonically increasing
  • The values between two consecutive offsets are not valid utf8
Implementation

This function is O(1)

pub fn from_trusted_len_values_iter<T, I>(iterator: I) -> Utf8Array<O>where T: AsRef<str>, I: TrustedLen<Item = T>,

Returns a (non-null) Utf8Array created from a TrustedLen of &str.

Implementation

This function is O(N)

pub fn from_iter_values<T, I>(iterator: I) -> Utf8Array<O>where T: AsRef<str>, I: Iterator<Item = T>,

Creates a new Utf8Array from a Iterator of &str.

pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Utf8Array<O>where P: AsRef<str>, I: Iterator<Item = Option<P>>,

Creates a Utf8Array from an iterator of trusted length.

Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

pub fn from_trusted_len_iter<I, P>(iterator: I) -> Utf8Array<O>where P: AsRef<str>, I: TrustedLen<Item = Option<P>>,

Creates a Utf8Array from an iterator of trusted length.

pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>( iterator: I ) -> Result<Utf8Array<O>, E>where P: AsRef<str>, I: IntoIterator<Item = Result<Option<P>, E>>,

Creates a Utf8Array from an falible iterator of trusted length.

Safety

The iterator must be TrustedLen. I.e. that size_hint().1 correctly reports its length.

pub fn try_from_trusted_len_iter<E, I, P>(iter: I) -> Result<Utf8Array<O>, E>where P: AsRef<str>, I: TrustedLen<Item = Result<Option<P>, E>>,

Creates a Utf8Array from an fallible iterator of trusted length.

pub fn apply_validity<F>(&mut self, f: F)where F: FnOnce(Bitmap) -> Bitmap,

Applies a function f to the validity of this array.

This is an API to leverage clone-on-write

Panics

This function panics if the function f modifies the length of the Bitmap.

Trait Implementations§

§

impl<O> Array for Utf8Array<O>where O: Offset,

§

fn as_any(&self) -> &(dyn Any + 'static)

Converts itself to a reference of Any, which enables downcasting to concrete types.
§

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

Converts itself to a mutable reference of Any, which enables mutable downcasting to concrete types.
§

fn len(&self) -> usize

The length of the Array. Every array has a length corresponding to the number of elements (slots).
§

fn data_type(&self) -> &DataType

The DataType of the Array. In combination with Array::as_any, this can be used to downcast trait objects (dyn Array) to concrete arrays.
§

fn validity(&self) -> Option<&Bitmap>

The validity of the Array: every array has an optional Bitmap that, when available specifies whether the array slot is valid or not (null). When the validity is None, all slots are valid.
§

fn slice(&self, offset: usize, length: usize) -> Box<dyn Array + 'static, Global>

Slices the Array, returning a new Box<dyn Array>. Read more
§

unsafe fn slice_unchecked( &self, offset: usize, length: usize ) -> Box<dyn Array + 'static, Global>

Slices the Array, returning a new Box<dyn Array>. Read more
§

fn with_validity( &self, validity: Option<Bitmap> ) -> Box<dyn Array + 'static, Global>

Clones this Array with a new new assigned bitmap. Read more
§

fn to_boxed(&self) -> Box<dyn Array + 'static, Global>

Clone a &dyn Array to an owned Box<dyn Array>.
§

fn is_empty(&self) -> bool

whether the array is empty
§

fn null_count(&self) -> usize

The number of null slots on this Array. Read more
§

fn is_null(&self, i: usize) -> bool

Returns whether slot i is null. Read more
§

fn is_valid(&self, i: usize) -> bool

Returns whether slot i is valid. Read more
§

impl<'a> ArrowGetItem for &'a Utf8Array<i64>

§

type Item = &'a str

§

fn get(&self, item: usize) -> Option<<&'a Utf8Array<i64> as ArrowGetItem>::Item>

§

unsafe fn get_unchecked( &self, item: usize ) -> Option<<&'a Utf8Array<i64> as ArrowGetItem>::Item>

Safety Read more
source§

impl ChunkApplyKernel<Utf8Array<i64>> for ChunkedArray<Utf8Type>

source§

fn apply_kernel( &self, f: &dyn Fn(&Utf8Array<i64>) -> Box<dyn Array + 'static, Global> ) -> ChunkedArray<Utf8Type>

Apply kernel and return result as a new ChunkedArray.
source§

fn apply_kernel_cast<S>( &self, f: &dyn Fn(&Utf8Array<i64>) -> Box<dyn Array + 'static, Global> ) -> ChunkedArray<S>where S: PolarsDataType,

Apply a kernel that outputs an array of different type.
§

impl<O> Clone for Utf8Array<O>where O: Clone + Offset,

§

fn clone(&self) -> Utf8Array<O>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
§

impl<O> Debug for Utf8Array<O>where O: Offset,

§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
§

impl<O> Default for Utf8Array<O>where O: Offset,

§

fn default() -> Utf8Array<O>

Returns the “default value” for a type. Read more
§

impl<'a, O> From<GrowableUtf8<'a, O>> for Utf8Array<O>where O: Offset,

§

fn from(val: GrowableUtf8<'a, O>) -> Utf8Array<O>

Converts to this type from the input type.
§

impl<O> From<MutableUtf8Array<O>> for Utf8Array<O>where O: Offset,

§

fn from(other: MutableUtf8Array<O>) -> Utf8Array<O>

Converts to this type from the input type.
§

impl<O> From<MutableUtf8ValuesArray<O>> for Utf8Array<O>where O: Offset,

§

fn from(other: MutableUtf8ValuesArray<O>) -> Utf8Array<O>

Converts to this type from the input type.
§

impl FromDataUtf8 for Utf8Array<i64>

§

unsafe fn from_data_unchecked_default( offsets: Buffer<i64>, values: Buffer<u8>, validity: Option<Bitmap> ) -> Utf8Array<i64>

Safety Read more
§

impl<O, P> FromIterator<Option<P>> for Utf8Array<O>where O: Offset, P: AsRef<str>,

§

fn from_iter<I>(iter: I) -> Utf8Array<O>where I: IntoIterator<Item = Option<P>>,

Creates a value from an iterator. Read more
§

impl<O> GenericBinaryArray<O> for Utf8Array<O>where O: Offset,

§

fn values(&self) -> &[u8]

The values of the array
§

fn offsets(&self) -> &[O]

The offsets of the array
§

impl<'a, O> IntoIterator for &'a Utf8Array<O>where O: Offset,

§

type Item = Option<&'a str>

The type of the elements being iterated over.
§

type IntoIter = ZipValidity<&'a str, ArrayValuesIter<'a, Utf8Array<O>>, BitmapIter<'a>>

Which kind of iterator are we turning this into?
§

fn into_iter(self) -> <&'a Utf8Array<O> as IntoIterator>::IntoIter

Creates an iterator from a value. Read more
§

impl<O> PartialEq<&(dyn Array + 'static)> for Utf8Array<O>where O: Offset,

§

fn eq(&self, other: &&(dyn Array + 'static)) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl<O> PartialEq<Utf8Array<O>> for &(dyn Array + 'static)where O: Offset,

§

fn eq(&self, other: &Utf8Array<O>) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl<O> PartialEq<Utf8Array<O>> for Utf8Array<O>where O: Offset,

§

fn eq(&self, other: &Utf8Array<O>) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl ValueSize for Utf8Array<i64>

§

fn get_values_size(&self) -> usize

Useful for a Utf8 or a List to get underlying value size. During a rechunk this is handy
§

impl ArrowArray for Utf8Array<i64>

Auto Trait Implementations§

§

impl<O> RefUnwindSafe for Utf8Array<O>where O: RefUnwindSafe,

§

impl<O> Send for Utf8Array<O>

§

impl<O> Sync for Utf8Array<O>

§

impl<O> Unpin for Utf8Array<O>

§

impl<O> UnwindSafe for Utf8Array<O>where O: RefUnwindSafe,

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> DynClone for Twhere T: Clone,

source§

fn __clone_box(&self, _: Private) -> *mut ()

source§

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

§

impl<A> IsValid for Awhere A: ArrowArray,

§

unsafe fn is_valid_unchecked(&self, i: usize) -> bool

Safety Read more
§

unsafe fn is_null_unchecked(&self, i: usize) -> bool

Safety Read more
§

impl<T> Pointable for T

§

const ALIGN: usize = mem::align_of::<T>()

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
§

impl<A> PolarsArray for Awhere A: Array + ?Sized,

§

fn has_validity(&self) -> bool

source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
const: unstable · source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V