1use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
7
8use crate::{ARROW_EXT_NAME_KEY, BLOB_META_KEY, BLOB_V2_EXT_NAME};
9
10pub enum Indentation {
11 OneLine,
12 MultiLine(u8),
13}
14
15impl Indentation {
16 fn value(&self) -> String {
17 match self {
18 Self::OneLine => "".to_string(),
19 Self::MultiLine(spaces) => " ".repeat(*spaces as usize),
20 }
21 }
22
23 fn deepen(&self) -> Self {
24 match self {
25 Self::OneLine => Self::OneLine,
26 Self::MultiLine(spaces) => Self::MultiLine(spaces + 2),
27 }
28 }
29}
30
31pub trait FieldExt {
33 fn to_compact_string(&self, indent: Indentation) -> String;
37
38 fn is_packed_struct(&self) -> bool;
40
41 fn is_blob(&self) -> bool;
43}
44
45impl FieldExt for Field {
46 fn to_compact_string(&self, indent: Indentation) -> String {
47 let mut result = format!("{}: ", self.name().clone());
48 match self.data_type() {
49 DataType::Struct(fields) => {
50 result += "{";
51 result += &indent.value();
52 for (field_idx, field) in fields.iter().enumerate() {
53 result += field.to_compact_string(indent.deepen()).as_str();
54 if field_idx < fields.len() - 1 {
55 result += ",";
56 }
57 result += indent.value().as_str();
58 }
59 result += "}";
60 }
61 DataType::List(field)
62 | DataType::LargeList(field)
63 | DataType::ListView(field)
64 | DataType::LargeListView(field) => {
65 result += "[";
66 result += field.to_compact_string(indent.deepen()).as_str();
67 result += "]";
68 }
69 DataType::FixedSizeList(child, dimension) => {
70 result += &format!(
71 "[{}; {}]",
72 child.to_compact_string(indent.deepen()),
73 dimension
74 );
75 }
76 DataType::Dictionary(key_type, value_type) => {
77 result += &value_type.to_string();
78 result += "@";
79 result += &key_type.to_string();
80 }
81 _ => {
82 result += &self.data_type().to_string();
83 }
84 }
85 if self.is_nullable() {
86 result += "?";
87 }
88 result
89 }
90
91 fn is_packed_struct(&self) -> bool {
93 let field_metadata = self.metadata();
94 const PACKED_KEYS: [&str; 2] = ["packed", "lance-encoding:packed"];
95 PACKED_KEYS.iter().any(|key| {
96 field_metadata
97 .get(*key)
98 .map(|value| value.eq_ignore_ascii_case("true"))
99 .unwrap_or(false)
100 })
101 }
102
103 fn is_blob(&self) -> bool {
104 let field_metadata = self.metadata();
105 field_metadata.get(BLOB_META_KEY).is_some()
106 || field_metadata
107 .get(ARROW_EXT_NAME_KEY)
108 .map(|value| value == BLOB_V2_EXT_NAME)
109 .unwrap_or(false)
110 }
111}
112
113pub trait SchemaExt {
115 fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError>;
117
118 fn try_with_column_at(
119 &self,
120 index: usize,
121 field: Field,
122 ) -> std::result::Result<Schema, ArrowError>;
123
124 fn field_names(&self) -> Vec<&String>;
125
126 fn without_column(&self, column_name: &str) -> Schema;
127
128 fn to_compact_string(&self, indent: Indentation) -> String;
132}
133
134impl SchemaExt for Schema {
135 fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError> {
136 if self.column_with_name(field.name()).is_some() {
137 return Err(ArrowError::SchemaError(format!(
138 "Can not append column {} on schema: {:?}",
139 field.name(),
140 self
141 )));
142 };
143 let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
144 fields.push(FieldRef::new(field));
145 Ok(Self::new_with_metadata(fields, self.metadata.clone()))
146 }
147
148 fn try_with_column_at(
149 &self,
150 index: usize,
151 field: Field,
152 ) -> std::result::Result<Schema, ArrowError> {
153 if self.column_with_name(field.name()).is_some() {
154 return Err(ArrowError::SchemaError(format!(
155 "Failed to modify schema: Inserting column {} would create a duplicate column in schema: {:?}",
156 field.name(),
157 self
158 )));
159 };
160 let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
161 fields.insert(index, FieldRef::new(field));
162 Ok(Self::new_with_metadata(fields, self.metadata.clone()))
163 }
164
165 fn without_column(&self, column_name: &str) -> Schema {
170 let fields: Vec<FieldRef> = self
171 .fields()
172 .iter()
173 .filter(|f| f.name() != column_name)
174 .cloned()
175 .collect();
176 Self::new_with_metadata(fields, self.metadata.clone())
177 }
178
179 fn field_names(&self) -> Vec<&String> {
180 self.fields().iter().map(|f| f.name()).collect()
181 }
182
183 fn to_compact_string(&self, indent: Indentation) -> String {
184 let mut result = "{".to_string();
185 result += &indent.value();
186 for (field_idx, field) in self.fields.iter().enumerate() {
187 result += field.to_compact_string(indent.deepen()).as_str();
188 if field_idx < self.fields.len() - 1 {
189 result += ",";
190 }
191 result += indent.value().as_str();
192 }
193 result += "}";
194 result
195 }
196}