polars-arrow 0.53.0

Minimal implementation of the Arrow specification forked from arrow2
Documentation
use std::sync::Arc;

use polars_utils::pl_str::PlSmallStr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use super::{ArrowDataType, Metadata};

// These two have the same encoding, but because older versions of Polars
// were unable to read non-u32-key arrow dictionaries while _PL_ENUM_VALUES
// is set we switched to a new version.
pub static DTYPE_ENUM_VALUES_LEGACY: &str = "_PL_ENUM_VALUES";
pub static DTYPE_ENUM_VALUES_NEW: &str = "_PL_ENUM_VALUES2";

// These have different encodings.
pub static DTYPE_CATEGORICAL_LEGACY: &str = "_PL_CATEGORICAL";
pub static DTYPE_CATEGORICAL_NEW: &str = "_PL_CATEGORICAL2";

pub static PARQUET_EMPTY_STRUCT: &str = "_PL_EMPTY_STRUCT";

pub static MAINTAIN_PL_TYPE: &str = "maintain_type";
pub static PL_KEY: &str = "pl";

/// Represents Arrow's metadata of a "column".
///
/// A [`Field`] is the closest representation of the traditional "column": a logical type
/// ([`ArrowDataType`]) with a name and nullability.
/// A Field has optional [`Metadata`] that can be used to annotate the field with custom metadata.
///
/// Almost all IO in this crate uses [`Field`] to represent logical information about the data
/// to be serialized.
#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
pub struct Field {
    /// Its name
    pub name: PlSmallStr,
    /// Its logical [`ArrowDataType`]
    pub dtype: ArrowDataType,
    /// Its nullability
    pub is_nullable: bool,
    /// Additional custom (opaque) metadata.
    pub metadata: Option<Arc<Metadata>>,
}

/// Support for `ArrowSchema::from_iter([field, ..])`
impl From<Field> for (PlSmallStr, Field) {
    fn from(value: Field) -> Self {
        (value.name.clone(), value)
    }
}

impl Field {
    /// Creates a new [`Field`].
    pub fn new(name: PlSmallStr, dtype: ArrowDataType, is_nullable: bool) -> Self {
        Field {
            name,
            dtype,
            is_nullable,
            metadata: Default::default(),
        }
    }

    /// Creates a new [`Field`] with metadata.
    #[inline]
    pub fn with_metadata(self, metadata: Metadata) -> Self {
        if metadata.is_empty() {
            return self;
        }
        Self {
            name: self.name,
            dtype: self.dtype,
            is_nullable: self.is_nullable,
            metadata: Some(Arc::new(metadata)),
        }
    }

    pub fn name(&self) -> &PlSmallStr {
        &self.name
    }

    /// Returns the [`Field`]'s [`ArrowDataType`].
    #[inline]
    pub fn dtype(&self) -> &ArrowDataType {
        &self.dtype
    }

    pub fn is_enum(&self) -> bool {
        if let Some(md) = &self.metadata {
            md.get(DTYPE_ENUM_VALUES_LEGACY).is_some() || md.get(DTYPE_ENUM_VALUES_NEW).is_some()
        } else {
            false
        }
    }

    pub fn is_categorical(&self) -> bool {
        if let Some(md) = &self.metadata {
            md.get(DTYPE_CATEGORICAL_LEGACY).is_some() || md.get(DTYPE_CATEGORICAL_NEW).is_some()
        } else {
            false
        }
    }

    pub fn is_pl_pq_empty_struct(&self) -> bool {
        self.metadata
            .as_ref()
            .is_some_and(|md| md.contains_key(PARQUET_EMPTY_STRUCT))
    }

    pub fn with_dtype(&self, dtype: ArrowDataType) -> Self {
        Self {
            name: self.name.clone(),
            dtype,
            is_nullable: self.is_nullable,
            metadata: self.metadata.clone(),
        }
    }
}