polars_arrow/datatypes/
field.rs

1use std::sync::Arc;
2
3use polars_utils::pl_str::PlSmallStr;
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7use super::{ArrowDataType, Metadata};
8
9// These two have the same encoding, but because older versions of Polars
10// were unable to read non-u32-key arrow dictionaries while _PL_ENUM_VALUES
11// is set we switched to a new version.
12pub static DTYPE_ENUM_VALUES_LEGACY: &str = "_PL_ENUM_VALUES";
13pub static DTYPE_ENUM_VALUES_NEW: &str = "_PL_ENUM_VALUES2";
14
15// These have different encodings.
16pub static DTYPE_CATEGORICAL_LEGACY: &str = "_PL_CATEGORICAL";
17pub static DTYPE_CATEGORICAL_NEW: &str = "_PL_CATEGORICAL2";
18
19pub static PARQUET_EMPTY_STRUCT: &str = "_PL_EMPTY_STRUCT";
20
21pub static MAINTAIN_PL_TYPE: &str = "maintain_type";
22pub static PL_KEY: &str = "pl";
23
24/// Represents Arrow's metadata of a "column".
25///
26/// A [`Field`] is the closest representation of the traditional "column": a logical type
27/// ([`ArrowDataType`]) with a name and nullability.
28/// A Field has optional [`Metadata`] that can be used to annotate the field with custom metadata.
29///
30/// Almost all IO in this crate uses [`Field`] to represent logical information about the data
31/// to be serialized.
32#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
33#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
34#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
35pub struct Field {
36    /// Its name
37    pub name: PlSmallStr,
38    /// Its logical [`ArrowDataType`]
39    pub dtype: ArrowDataType,
40    /// Its nullability
41    pub is_nullable: bool,
42    /// Additional custom (opaque) metadata.
43    pub metadata: Option<Arc<Metadata>>,
44}
45
46/// Support for `ArrowSchema::from_iter([field, ..])`
47impl From<Field> for (PlSmallStr, Field) {
48    fn from(value: Field) -> Self {
49        (value.name.clone(), value)
50    }
51}
52
53impl Field {
54    /// Creates a new [`Field`].
55    pub fn new(name: PlSmallStr, dtype: ArrowDataType, is_nullable: bool) -> Self {
56        Field {
57            name,
58            dtype,
59            is_nullable,
60            metadata: Default::default(),
61        }
62    }
63
64    /// Creates a new [`Field`] with metadata.
65    #[inline]
66    pub fn with_metadata(self, metadata: Metadata) -> Self {
67        if metadata.is_empty() {
68            return self;
69        }
70        Self {
71            name: self.name,
72            dtype: self.dtype,
73            is_nullable: self.is_nullable,
74            metadata: Some(Arc::new(metadata)),
75        }
76    }
77
78    /// Returns the [`Field`]'s [`ArrowDataType`].
79    #[inline]
80    pub fn dtype(&self) -> &ArrowDataType {
81        &self.dtype
82    }
83
84    pub fn is_enum(&self) -> bool {
85        if let Some(md) = &self.metadata {
86            md.get(DTYPE_ENUM_VALUES_LEGACY).is_some() || md.get(DTYPE_ENUM_VALUES_NEW).is_some()
87        } else {
88            false
89        }
90    }
91
92    pub fn is_categorical(&self) -> bool {
93        if let Some(md) = &self.metadata {
94            md.get(DTYPE_CATEGORICAL_LEGACY).is_some() || md.get(DTYPE_CATEGORICAL_NEW).is_some()
95        } else {
96            false
97        }
98    }
99
100    pub fn is_pl_pq_empty_struct(&self) -> bool {
101        self.metadata
102            .as_ref()
103            .is_some_and(|md| md.contains_key(PARQUET_EMPTY_STRUCT))
104    }
105
106    pub fn map_dtype(mut self, f: impl FnOnce(ArrowDataType) -> ArrowDataType) -> Self {
107        self.dtype = f(self.dtype);
108        self
109    }
110
111    pub fn map_dtype_mut(&mut self, f: impl FnOnce(&mut ArrowDataType)) {
112        f(&mut self.dtype);
113    }
114
115    pub fn with_dtype(&self, dtype: ArrowDataType) -> Self {
116        let mut field = self.clone();
117        field.dtype = dtype;
118        field
119    }
120}