lance_arrow/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Extension to arrow schema
5
6use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
7
8use crate::BLOB_META_KEY;
9
10pub enum Indentation {
11    OneLine,
12    MultiLine(u8),
13}
14
15impl Indentation {
16    fn value(&self) -> String {
17        match self {
18            Self::OneLine => "".to_string(),
19            Self::MultiLine(spaces) => " ".repeat(*spaces as usize),
20        }
21    }
22
23    fn deepen(&self) -> Self {
24        match self {
25            Self::OneLine => Self::OneLine,
26            Self::MultiLine(spaces) => Self::MultiLine(spaces + 2),
27        }
28    }
29}
30
31/// Extends the functionality of [arrow_schema::Field].
32pub trait FieldExt {
33    /// Create a compact string representation of the field
34    ///
35    /// This is intended for display purposes and not for serialization
36    fn to_compact_string(&self, indent: Indentation) -> String;
37
38    /// Check if the field is marked as a packed struct
39    fn is_packed_struct(&self) -> bool;
40
41    /// Check if the field is marked as a blob
42    fn is_blob(&self) -> bool;
43}
44
45impl FieldExt for Field {
46    fn to_compact_string(&self, indent: Indentation) -> String {
47        let mut result = format!("{}: ", self.name().clone());
48        match self.data_type() {
49            DataType::Struct(fields) => {
50                result += "{";
51                result += &indent.value();
52                for (field_idx, field) in fields.iter().enumerate() {
53                    result += field.to_compact_string(indent.deepen()).as_str();
54                    if field_idx < fields.len() - 1 {
55                        result += ",";
56                    }
57                    result += indent.value().as_str();
58                }
59                result += "}";
60            }
61            DataType::List(field)
62            | DataType::LargeList(field)
63            | DataType::ListView(field)
64            | DataType::LargeListView(field) => {
65                result += "[";
66                result += field.to_compact_string(indent.deepen()).as_str();
67                result += "]";
68            }
69            DataType::FixedSizeList(child, dimension) => {
70                result += &format!(
71                    "[{}; {}]",
72                    child.to_compact_string(indent.deepen()),
73                    dimension
74                );
75            }
76            DataType::Dictionary(key_type, value_type) => {
77                result += &value_type.to_string();
78                result += "@";
79                result += &key_type.to_string();
80            }
81            _ => {
82                result += &self.data_type().to_string();
83            }
84        }
85        if self.is_nullable() {
86            result += "?";
87        }
88        result
89    }
90
91    // Check if field has metadata `packed` set to true, this check is case insensitive.
92    fn is_packed_struct(&self) -> bool {
93        let field_metadata = self.metadata();
94        field_metadata
95            .get("packed")
96            .map(|v| v.to_lowercase() == "true")
97            .unwrap_or(false)
98    }
99
100    fn is_blob(&self) -> bool {
101        let field_metadata = self.metadata();
102        field_metadata.get(BLOB_META_KEY).is_some()
103    }
104}
105
106/// Extends the functionality of [arrow_schema::Schema].
107pub trait SchemaExt {
108    /// Create a new [`Schema`] with one extra field.
109    fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError>;
110
111    fn try_with_column_at(
112        &self,
113        index: usize,
114        field: Field,
115    ) -> std::result::Result<Schema, ArrowError>;
116
117    fn field_names(&self) -> Vec<&String>;
118
119    fn without_column(&self, column_name: &str) -> Schema;
120
121    /// Create a compact string representation of the schema
122    ///
123    /// This is intended for display purposes and not for serialization
124    fn to_compact_string(&self, indent: Indentation) -> String;
125}
126
127impl SchemaExt for Schema {
128    fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError> {
129        if self.column_with_name(field.name()).is_some() {
130            return Err(ArrowError::SchemaError(format!(
131                "Can not append column {} on schema: {:?}",
132                field.name(),
133                self
134            )));
135        };
136        let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
137        fields.push(FieldRef::new(field));
138        Ok(Self::new_with_metadata(fields, self.metadata.clone()))
139    }
140
141    fn try_with_column_at(
142        &self,
143        index: usize,
144        field: Field,
145    ) -> std::result::Result<Schema, ArrowError> {
146        if self.column_with_name(field.name()).is_some() {
147            return Err(ArrowError::SchemaError(format!(
148                "Failed to modify schema: Inserting column {} would create a duplicate column in schema: {:?}",
149                field.name(),
150                self
151            )));
152        };
153        let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
154        fields.insert(index, FieldRef::new(field));
155        Ok(Self::new_with_metadata(fields, self.metadata.clone()))
156    }
157
158    /// Project the schema to remove the given column.
159    ///
160    /// This only works on top-level fields right now. If a field does not exist,
161    /// the schema will be returned as is.
162    fn without_column(&self, column_name: &str) -> Schema {
163        let fields: Vec<FieldRef> = self
164            .fields()
165            .iter()
166            .filter(|f| f.name() != column_name)
167            .cloned()
168            .collect();
169        Self::new_with_metadata(fields, self.metadata.clone())
170    }
171
172    fn field_names(&self) -> Vec<&String> {
173        self.fields().iter().map(|f| f.name()).collect()
174    }
175
176    fn to_compact_string(&self, indent: Indentation) -> String {
177        let mut result = "{".to_string();
178        result += &indent.value();
179        for (field_idx, field) in self.fields.iter().enumerate() {
180            result += field.to_compact_string(indent.deepen()).as_str();
181            if field_idx < self.fields.len() - 1 {
182                result += ",";
183            }
184            result += indent.value().as_str();
185        }
186        result += "}";
187        result
188    }
189}