Skip to main content

polars_arrow/datatypes/
field.rs

1use std::sync::Arc;
2
3use polars_utils::pl_str::PlSmallStr;
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7use super::{ArrowDataType, Metadata};
8
9// These two have the same encoding, but because older versions of Polars
10// were unable to read non-u32-key arrow dictionaries while _PL_ENUM_VALUES
11// is set we switched to a new version.
12pub static DTYPE_ENUM_VALUES_LEGACY: &str = "_PL_ENUM_VALUES";
13pub static DTYPE_ENUM_VALUES_NEW: &str = "_PL_ENUM_VALUES2";
14
15// These have different encodings.
16pub static DTYPE_CATEGORICAL_LEGACY: &str = "_PL_CATEGORICAL";
17pub static DTYPE_CATEGORICAL_NEW: &str = "_PL_CATEGORICAL2";
18
19pub static PARQUET_EMPTY_STRUCT: &str = "_PL_EMPTY_STRUCT";
20
21pub static MAINTAIN_PL_TYPE: &str = "maintain_type";
22pub static PL_KEY: &str = "pl";
23
24/// Represents Arrow's metadata of a "column".
25///
26/// A [`Field`] is the closest representation of the traditional "column": a logical type
27/// ([`ArrowDataType`]) with a name and nullability.
28/// A Field has optional [`Metadata`] that can be used to annotate the field with custom metadata.
29///
30/// Almost all IO in this crate uses [`Field`] to represent logical information about the data
31/// to be serialized.
32#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
33#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
34#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
35pub struct Field {
36    /// Its name
37    pub name: PlSmallStr,
38    /// Its logical [`ArrowDataType`]
39    pub dtype: ArrowDataType,
40    /// Its nullability
41    pub is_nullable: bool,
42    /// Additional custom (opaque) metadata.
43    pub metadata: Option<Arc<Metadata>>,
44}
45
46/// Support for `ArrowSchema::from_iter([field, ..])`
47impl From<Field> for (PlSmallStr, Field) {
48    fn from(value: Field) -> Self {
49        (value.name.clone(), value)
50    }
51}
52
53impl Field {
54    /// Creates a new [`Field`].
55    pub fn new(name: PlSmallStr, dtype: ArrowDataType, is_nullable: bool) -> Self {
56        Field {
57            name,
58            dtype,
59            is_nullable,
60            metadata: Default::default(),
61        }
62    }
63
64    /// Creates a new [`Field`] with metadata.
65    #[inline]
66    pub fn with_metadata(self, metadata: Metadata) -> Self {
67        if metadata.is_empty() {
68            return self;
69        }
70        Self {
71            name: self.name,
72            dtype: self.dtype,
73            is_nullable: self.is_nullable,
74            metadata: Some(Arc::new(metadata)),
75        }
76    }
77
78    pub fn name(&self) -> &PlSmallStr {
79        &self.name
80    }
81
82    /// Returns the [`Field`]'s [`ArrowDataType`].
83    #[inline]
84    pub fn dtype(&self) -> &ArrowDataType {
85        &self.dtype
86    }
87
88    pub fn is_enum(&self) -> bool {
89        if let Some(md) = &self.metadata {
90            md.get(DTYPE_ENUM_VALUES_LEGACY).is_some() || md.get(DTYPE_ENUM_VALUES_NEW).is_some()
91        } else {
92            false
93        }
94    }
95
96    pub fn is_categorical(&self) -> bool {
97        if let Some(md) = &self.metadata {
98            md.get(DTYPE_CATEGORICAL_LEGACY).is_some() || md.get(DTYPE_CATEGORICAL_NEW).is_some()
99        } else {
100            false
101        }
102    }
103
104    pub fn is_pl_pq_empty_struct(&self) -> bool {
105        self.metadata
106            .as_ref()
107            .is_some_and(|md| md.contains_key(PARQUET_EMPTY_STRUCT))
108    }
109
110    pub fn with_dtype(&self, dtype: ArrowDataType) -> Self {
111        Self {
112            name: self.name.clone(),
113            dtype,
114            is_nullable: self.is_nullable,
115            metadata: self.metadata.clone(),
116        }
117    }
118}