quiver_types 0.3.0

Core types for the quiver crate
Documentation
//! Column descriptors: extracting single, named columns from a record batch.

use std::marker::PhantomData;

use arrow::array::ArrayRef;

use crate::{Column, DynColumn, Error, ErrorKind, LogicalType};

// Column descriptors

/// Identifies a strongly-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants, e.g.
/// `Measurements::COLUMN_TEMPERATURE`, for extracting a single column
/// from a record batch without hard-coding its name.
pub struct ColumnDesc<C> {
    /// The name of the `#[derive(Quiver)]` struct, for error messages.
    pub record_type: &'static str,

    /// The name of the column in the record batch.
    pub name: &'static str,

    /// The metadata declared with `#[quiver(metadata("key" = "value", …))]`.
    pub metadata: &'static [(&'static str, &'static str)],

    _marker: PhantomData<fn() -> C>,
}

impl<L: LogicalType> ColumnDesc<Column<L>> {
    /// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
    ///
    /// Usually not called directly: the derive generates these as `COLUMN_*` constants.
    pub const fn new(
        record_type: &'static str,
        name: &'static str,
        metadata: &'static [(&'static str, &'static str)],
    ) -> Self {
        Self {
            record_type,
            name,
            metadata,
            _marker: PhantomData,
        }
    }

    /// Extracts and validates this single column of a record batch.
    ///
    /// # Errors
    /// Errors if the column is missing, has the wrong datatype, or unexpected nulls.
    pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<Column<L>, Error> {
        let Self {
            record_type, name, ..
        } = *self;
        Column::extract_named(batch, name, record_type)
    }
}

impl<L: crate::ConcreteType> ColumnDesc<Column<L>> {
    /// The arrow field of this column, including the declared metadata.
    #[must_use]
    pub fn arrow_field(&self) -> arrow::datatypes::Field {
        arrow::datatypes::Field::new(self.name, L::datatype(), L::NULLABLE).with_metadata(
            self.metadata
                .iter()
                .map(|(key, value)| ((*key).to_owned(), (*value).to_owned()))
                .collect(),
        )
    }
}

/// Identifies a dynamically-typed column by name.
///
/// Generated by `#[derive(Quiver)]` as `COLUMN_*` constants
/// for raw arrow array fields.
pub struct DynColumnDesc {
    /// The name of the `#[derive(Quiver)]` struct, for error messages.
    pub record_type: &'static str,

    /// The name of the column in the record batch.
    pub name: &'static str,
}

impl DynColumnDesc {
    /// Describes the column `name` of the `#[derive(Quiver)]` struct `record_type`.
    ///
    /// Usually not called directly: the derive generates these as `COLUMN_*` constants.
    pub const fn new(record_type: &'static str, name: &'static str) -> Self {
        Self { record_type, name }
    }

    /// Extracts this single column of a record batch.
    ///
    /// # Errors
    /// Errors if the column is missing.
    pub fn extract(&self, batch: &arrow::record_batch::RecordBatch) -> Result<DynColumn, Error> {
        let Self { record_type, name } = *self;

        let index = batch
            .schema_ref()
            .index_of(name)
            .map_err(|_not_found| Error {
                record_type,
                kind: ErrorKind::MissingColumn {
                    column: name.to_owned(),
                },
            })?;

        Ok(DynColumn {
            field: std::sync::Arc::clone(&batch.schema_ref().fields()[index]),
            array: ArrayRef::clone(batch.column(index)),
        })
    }
}