Struct arrow::array::DictionaryArray
source · pub struct DictionaryArray<K>where
K: ArrowPrimitiveType,{ /* private fields */ }
Expand description
A dictionary array where each element is a single value indexed by an integer key. This is mostly used to represent strings or a limited set of primitive types as integers, for example when doing NLP analysis or representing chromosomes by name.
DictionaryArray
are represented using a keys
array and a
values
array, which may be different lengths. The keys
array
stores indexes in the values
array which holds
the corresponding logical value, as shown here:
┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
┌─────────────────┐ ┌─────────┐ │ ┌─────────────────┐
│ │ A │ │ 0 │ │ A │ values[keys[0]]
├─────────────────┤ ├─────────┤ │ ├─────────────────┤
│ │ D │ │ 2 │ │ B │ values[keys[1]]
├─────────────────┤ ├─────────┤ │ ├─────────────────┤
│ │ B │ │ 2 │ │ B │ values[keys[2]]
└─────────────────┘ ├─────────┤ │ ├─────────────────┤
│ │ 1 │ │ D │ values[keys[3]]
├─────────┤ │ ├─────────────────┤
│ │ 1 │ │ D │ values[keys[4]]
├─────────┤ │ ├─────────────────┤
│ │ 0 │ │ A │ values[keys[5]]
└─────────┘ │ └─────────────────┘
│ values keys
─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
Logical array
Contents
DictionaryArray
length = 6
Example with nullable data:
use arrow_array::{DictionaryArray, Int8Array, types::Int8Type};
let test = vec!["a", "a", "b", "c"];
let array : DictionaryArray<Int8Type> = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect();
assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)]));
Example without nullable data:
use arrow_array::{DictionaryArray, Int8Array, types::Int8Type};
let test = vec!["a", "a", "b", "c"];
let array : DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
Example from existing arrays:
use arrow_array::{DictionaryArray, Int8Array, StringArray, types::Int8Type};
// You can form your own DictionaryArray by providing the
// values (dictionary) and keys (indexes into the dictionary):
let values = StringArray::from_iter_values(["a", "b", "c"]);
let keys = Int8Array::from_iter_values([0, 0, 1, 2]);
let array = DictionaryArray::<Int8Type>::try_new(&keys, &values).unwrap();
let expected: DictionaryArray::<Int8Type> = vec!["a", "a", "b", "c"]
.into_iter()
.collect();
assert_eq!(&array, &expected);
Implementations
sourceimpl<K> DictionaryArray<K>where
K: ArrowPrimitiveType,
impl<K> DictionaryArray<K>where
K: ArrowPrimitiveType,
sourcepub fn try_new(
keys: &PrimitiveArray<K>,
values: &dyn Array
) -> Result<DictionaryArray<K>, ArrowError>
pub fn try_new(
keys: &PrimitiveArray<K>,
values: &dyn Array
) -> Result<DictionaryArray<K>, ArrowError>
Attempt to create a new DictionaryArray with a specified keys (indexes into the dictionary) and values (dictionary) array. Returns an error if there are any keys that are outside of the dictionary array.
sourcepub fn keys(&self) -> &PrimitiveArray<K>
pub fn keys(&self) -> &PrimitiveArray<K>
Return an array view of the keys of this dictionary as a PrimitiveArray.
sourcepub fn lookup_key(
&self,
value: &str
) -> Option<<K as ArrowPrimitiveType>::Native>
pub fn lookup_key(
&self,
value: &str
) -> Option<<K as ArrowPrimitiveType>::Native>
If value
is present in values
(aka the dictionary),
returns the corresponding key (index into the values
array). Otherwise returns None
.
Panics if values
is not a StringArray
.
sourcepub fn values(&self) -> &Arc<dyn Array + 'static>
pub fn values(&self) -> &Arc<dyn Array + 'static>
Returns a reference to the dictionary values array
sourcepub fn value_type(&self) -> DataType
pub fn value_type(&self) -> DataType
Returns a clone of the value type of this list.
sourcepub fn is_ordered(&self) -> bool
pub fn is_ordered(&self) -> bool
Currently exists for compatibility purposes with Arrow IPC.
sourcepub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>>
pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>>
Return an iterator over the keys (indexes into the dictionary)
sourcepub fn key(&self, i: usize) -> Option<usize>
pub fn key(&self, i: usize) -> Option<usize>
Return the value of keys
(the dictionary key) at index i
,
cast to usize
, None
if the value at i
is NULL
.
sourcepub fn downcast_dict<V>(&self) -> Option<TypedDictionaryArray<'_, K, V>>where
V: 'static,
pub fn downcast_dict<V>(&self) -> Option<TypedDictionaryArray<'_, K, V>>where
V: 'static,
Downcast this dictionary to a TypedDictionaryArray
use arrow_array::{Array, ArrayAccessor, DictionaryArray, StringArray, types::Int32Type};
let orig = [Some("a"), Some("b"), None];
let dictionary = DictionaryArray::<Int32Type>::from_iter(orig);
let typed = dictionary.downcast_dict::<StringArray>().unwrap();
assert_eq!(typed.value(0), "a");
assert_eq!(typed.value(1), "b");
assert!(typed.is_null(2));
sourcepub fn with_values(&self, values: &dyn Array) -> DictionaryArray<K>
pub fn with_values(&self, values: &dyn Array) -> DictionaryArray<K>
Returns a new dictionary with the same keys as the current instance but with a different set of dictionary values
This can be used to perform an operation on the values of a dictionary
Panics
Panics if values
has a length less than the current values
use arrow_array::builder::PrimitiveDictionaryBuilder;
use arrow_array::{Int8Array, Int64Array, ArrayAccessor};
use arrow_array::types::{Int32Type, Int8Type};
// Construct a Dict(Int32, Int8)
let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int8Type>::with_capacity(2, 200);
for i in 0..100 {
builder.append(i % 2).unwrap();
}
let dictionary = builder.finish();
// Perform a widening cast of dictionary values
let typed_dictionary = dictionary.downcast_dict::<Int8Array>().unwrap();
let values: Int64Array = typed_dictionary.values().unary(|x| x as i64);
// Create a Dict(Int32,
let new = dictionary.with_values(&values);
// Verify values are as expected
let new_typed = new.downcast_dict::<Int64Array>().unwrap();
for i in 0..100 {
assert_eq!(new_typed.value(i), (i % 2) as i64)
}
Trait Implementations
sourceimpl<T> Array for DictionaryArray<T>where
T: ArrowPrimitiveType,
impl<T> Array for DictionaryArray<T>where
T: ArrowPrimitiveType,
sourcefn data_ref(&self) -> &ArrayData
fn data_ref(&self) -> &ArrayData
sourcefn slice(&self, offset: usize, length: usize) -> Arc<dyn Array + 'static>
fn slice(&self, offset: usize, length: usize) -> Arc<dyn Array + 'static>
sourcefn offset(&self) -> usize
fn offset(&self) -> usize
0
. Read moresourcefn is_null(&self, index: usize) -> bool
fn is_null(&self, index: usize) -> bool
index
is null.
When using this function on a slice, the index is relative to the slice. Read moresourcefn is_valid(&self, index: usize) -> bool
fn is_valid(&self, index: usize) -> bool
index
is not null.
When using this function on a slice, the index is relative to the slice. Read moresourcefn null_count(&self) -> usize
fn null_count(&self) -> usize
sourcefn get_buffer_memory_size(&self) -> usize
fn get_buffer_memory_size(&self) -> usize
sourcefn get_array_memory_size(&self) -> usize
fn get_array_memory_size(&self) -> usize
get_buffer_memory_size()
and
includes the overhead of the data structures that contain the pointers to the various buffers. Read moresourceimpl<T> Debug for DictionaryArray<T>where
T: ArrowPrimitiveType,
impl<T> Debug for DictionaryArray<T>where
T: ArrowPrimitiveType,
sourceimpl<T> From<ArrayData> for DictionaryArray<T>where
T: ArrowPrimitiveType,
impl<T> From<ArrayData> for DictionaryArray<T>where
T: ArrowPrimitiveType,
Constructs a DictionaryArray
from an array data reference.
sourcefn from(data: ArrayData) -> DictionaryArray<T>
fn from(data: ArrayData) -> DictionaryArray<T>
sourceimpl<T> From<DictionaryArray<T>> for ArrayDatawhere
T: ArrowPrimitiveType,
impl<T> From<DictionaryArray<T>> for ArrayDatawhere
T: ArrowPrimitiveType,
sourcefn from(array: DictionaryArray<T>) -> ArrayData
fn from(array: DictionaryArray<T>) -> ArrayData
sourceimpl<'a, T> FromIterator<&'a str> for DictionaryArray<T>where
T: ArrowDictionaryKeyType,
impl<'a, T> FromIterator<&'a str> for DictionaryArray<T>where
T: ArrowDictionaryKeyType,
Constructs a DictionaryArray
from an iterator of strings.
Example:
use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type};
let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test.into_iter().collect();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n 1,\n 2,\n] values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
format!("{:?}", array)
);
sourcefn from_iter<I>(iter: I) -> DictionaryArray<T>where
I: IntoIterator<Item = &'a str>,
fn from_iter<I>(iter: I) -> DictionaryArray<T>where
I: IntoIterator<Item = &'a str>,
sourceimpl<'a, T> FromIterator<Option<&'a str>> for DictionaryArray<T>where
T: ArrowDictionaryKeyType,
impl<'a, T> FromIterator<Option<&'a str>> for DictionaryArray<T>where
T: ArrowDictionaryKeyType,
Constructs a DictionaryArray
from an iterator of optional strings.
Example:
use arrow_array::{DictionaryArray, PrimitiveArray, StringArray, types::Int8Type};
let test = vec!["a", "a", "b", "c"];
let array: DictionaryArray<Int8Type> = test
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
assert_eq!(
"DictionaryArray {keys: PrimitiveArray<Int8>\n[\n 0,\n 0,\n null,\n 1,\n] values: StringArray\n[\n \"a\",\n \"c\",\n]}\n",
format!("{:?}", array)
);