use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
use crate::datatype::{InfallibleBuild, PrimitiveDatatype, RefDatatype};
use crate::typed_array::TypedArray;
use crate::{ColumnError, Datatype};
pub struct Column<L: Datatype> {
array: TypedArray<L>,
metadata: std::collections::BTreeMap<String, String>,
}
impl<L: Datatype> Column<L> {
pub const NULLABLE: bool = L::NULLABLE;
pub fn try_new(array: ArrayRef) -> Result<Self, ColumnError> {
Ok(Self {
array: TypedArray::try_new(array)?,
metadata: std::collections::BTreeMap::new(),
})
}
#[must_use]
pub fn metadata(&self) -> &std::collections::BTreeMap<String, String> {
&self.metadata
}
pub fn metadata_mut(&mut self) -> &mut std::collections::BTreeMap<String, String> {
&mut self.metadata
}
#[must_use]
pub fn with_metadata(mut self, metadata: std::collections::BTreeMap<String, String>) -> Self {
self.metadata = metadata;
self
}
pub fn try_from_values(
values: impl IntoIterator<Item = impl Into<L::Owned>>,
) -> Result<Self, ColumnError> {
let array = L::build(values.into_iter().map(|value| Some(value.into())))?;
Self::try_new(array)
}
#[must_use]
pub fn datatype() -> DataType {
L::datatype()
}
#[must_use]
pub fn len(&self) -> usize {
self.array.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.array.is_empty()
}
#[must_use]
pub fn get(&self, index: usize) -> Option<L::Value<'_>> {
self.array.get(index)
}
#[must_use]
pub fn get_owned(&self, index: usize) -> Option<L::Owned> {
self.get(index).map(L::to_owned_value)
}
#[must_use]
pub fn value(&self, index: usize) -> L::Value<'_> {
self.array.value(index)
}
#[must_use]
pub fn value_owned(&self, index: usize) -> L::Owned {
L::to_owned_value(self.value(index))
}
#[must_use]
pub fn iter(&self) -> ColumnIter<'_, L> {
ColumnIter {
column: self,
index: 0,
}
}
pub fn iter_owned(&self) -> impl Iterator<Item = L::Owned> + '_ {
self.iter().map(L::to_owned_value)
}
#[must_use]
pub fn to_vec(&self) -> Vec<L::Owned> {
self.iter_owned().collect()
}
#[must_use]
pub fn slice(&self, offset: usize, length: usize) -> Self {
Self::try_new(self.array.as_arrow().slice(offset, length))
.expect("Cannot fail: slicing preserves datatype and validity")
.with_metadata(self.metadata.clone())
}
#[must_use]
pub fn as_arrow(&self) -> &ArrayRef {
self.array.as_arrow()
}
#[must_use]
pub fn into_arrow(self) -> ArrayRef {
self.array.into_arrow()
}
}
impl<L: RefDatatype> std::ops::Index<usize> for Column<L> {
type Output = L::Ref;
fn index(&self, index: usize) -> &Self::Output {
self.array.value_ref(index)
}
}
impl<L: PrimitiveDatatype> Column<L> {
#[must_use]
pub fn as_slice(&self) -> &[L::Native] {
self.array.values()
}
}
impl<L: Datatype> Column<L>
where
L: InfallibleBuild,
{
pub fn from_values(values: impl IntoIterator<Item = impl Into<L::Owned>>) -> Self {
Self::try_from_values(values).expect("Cannot fail: the logical type is InfallibleBuild")
}
}
impl<L: Datatype> Column<Option<L>> {
pub fn try_from_nullable_values(
values: impl IntoIterator<Item = Option<impl Into<L::Owned>>>,
) -> Result<Self, ColumnError> {
Self::try_from_values(values.into_iter().map(|value| value.map(Into::into)))
}
}
impl<L: InfallibleBuild> Column<Option<L>> {
pub fn from_nullable_values(
values: impl IntoIterator<Item = Option<impl Into<L::Owned>>>,
) -> Self {
Self::from_values(values.into_iter().map(|value| value.map(Into::into)))
}
}
impl<L: InfallibleBuild, T: Into<L::Owned>> From<Vec<T>> for Column<L> {
fn from(values: Vec<T>) -> Self {
Self::from_values(values)
}
}
impl<L: InfallibleBuild, T: Into<L::Owned>> FromIterator<T> for Column<L> {
fn from_iter<I: IntoIterator<Item = T>>(values: I) -> Self {
Self::from_values(values)
}
}
impl<L: Datatype> Default for Column<L> {
fn default() -> Self {
let array = arrow::array::new_empty_array(&L::datatype());
Self::try_new(array).expect("An empty array of the right datatype is always valid")
}
}
impl<L: Datatype> PartialEq for Column<L> {
fn eq(&self, other: &Self) -> bool {
self.metadata == other.metadata && self.array == other.array
}
}
impl<L: Datatype> Clone for Column<L> {
fn clone(&self) -> Self {
Self {
array: self.array.clone(),
metadata: self.metadata.clone(),
}
}
}
impl<L: Datatype> std::fmt::Debug for Column<L> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Column")
.field("array", self.array.as_arrow())
.field("metadata", &self.metadata)
.finish_non_exhaustive()
}
}
impl<L: Datatype> TryFrom<ArrayRef> for Column<L> {
type Error = ColumnError;
fn try_from(array: ArrayRef) -> Result<Self, Self::Error> {
Self::try_new(array)
}
}
pub struct ColumnIter<'a, L: Datatype> {
column: &'a Column<L>,
index: usize,
}
impl<'a, L: Datatype + 'a> Iterator for ColumnIter<'a, L> {
type Item = L::Value<'a>;
fn next(&mut self) -> Option<Self::Item> {
let value = self.column.get(self.index)?;
self.index += 1;
Some(value)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.column.len() - self.index;
(remaining, Some(remaining))
}
}
impl<'a, L: Datatype + 'a> ExactSizeIterator for ColumnIter<'a, L> {}
impl<'a, L: Datatype + 'a> IntoIterator for &'a Column<L> {
type Item = L::Value<'a>;
type IntoIter = ColumnIter<'a, L>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<L: Datatype> IntoIterator for Column<L> {
type Item = L::Owned;
type IntoIter = ColumnIntoIter<L>;
fn into_iter(self) -> Self::IntoIter {
ColumnIntoIter {
column: self,
index: 0,
}
}
}
pub struct ColumnIntoIter<L: Datatype> {
column: Column<L>,
index: usize,
}
impl<L: Datatype> Iterator for ColumnIntoIter<L> {
type Item = L::Owned;
fn next(&mut self) -> Option<Self::Item> {
let value = self.column.get(self.index)?;
let value = L::to_owned_value(value);
self.index += 1;
Some(value)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.column.len() - self.index;
(remaining, Some(remaining))
}
}
impl<L: Datatype> ExactSizeIterator for ColumnIntoIter<L> {}