1use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
7
8use crate::{ARROW_EXT_NAME_KEY, BLOB_META_KEY, BLOB_V2_EXT_NAME};
9
10pub enum Indentation {
11 OneLine,
12 MultiLine(u8),
13}
14
15impl Indentation {
16 fn value(&self) -> String {
17 match self {
18 Self::OneLine => "".to_string(),
19 Self::MultiLine(spaces) => " ".repeat(*spaces as usize),
20 }
21 }
22
23 fn deepen(&self) -> Self {
24 match self {
25 Self::OneLine => Self::OneLine,
26 Self::MultiLine(spaces) => Self::MultiLine(spaces + 2),
27 }
28 }
29}
30
31pub trait FieldExt {
33 fn to_compact_string(&self, indent: Indentation) -> String;
37
38 fn is_packed_struct(&self) -> bool;
40
41 fn is_blob(&self) -> bool;
43
44 fn is_blob_v2(&self) -> bool;
46}
47
48impl FieldExt for Field {
49 fn to_compact_string(&self, indent: Indentation) -> String {
50 let mut result = format!("{}: ", self.name().clone());
51 match self.data_type() {
52 DataType::Struct(fields) => {
53 result += "{";
54 result += &indent.value();
55 for (field_idx, field) in fields.iter().enumerate() {
56 result += field.to_compact_string(indent.deepen()).as_str();
57 if field_idx < fields.len() - 1 {
58 result += ",";
59 }
60 result += indent.value().as_str();
61 }
62 result += "}";
63 }
64 DataType::List(field)
65 | DataType::LargeList(field)
66 | DataType::ListView(field)
67 | DataType::LargeListView(field) => {
68 result += "[";
69 result += field.to_compact_string(indent.deepen()).as_str();
70 result += "]";
71 }
72 DataType::FixedSizeList(child, dimension) => {
73 result += &format!(
74 "[{}; {}]",
75 child.to_compact_string(indent.deepen()),
76 dimension
77 );
78 }
79 DataType::Dictionary(key_type, value_type) => {
80 result += &value_type.to_string();
81 result += "@";
82 result += &key_type.to_string();
83 }
84 _ => {
85 result += &self.data_type().to_string();
86 }
87 }
88 if self.is_nullable() {
89 result += "?";
90 }
91 result
92 }
93
94 fn is_packed_struct(&self) -> bool {
96 let field_metadata = self.metadata();
97 const PACKED_KEYS: [&str; 2] = ["packed", "lance-encoding:packed"];
98 PACKED_KEYS.iter().any(|key| {
99 field_metadata
100 .get(*key)
101 .map(|value| value.eq_ignore_ascii_case("true"))
102 .unwrap_or(false)
103 })
104 }
105
106 fn is_blob(&self) -> bool {
107 let field_metadata = self.metadata();
108 field_metadata.get(BLOB_META_KEY).is_some()
109 || field_metadata
110 .get(ARROW_EXT_NAME_KEY)
111 .map(|value| value == BLOB_V2_EXT_NAME)
112 .unwrap_or(false)
113 }
114
115 fn is_blob_v2(&self) -> bool {
116 let field_metadata = self.metadata();
117 field_metadata
118 .get(ARROW_EXT_NAME_KEY)
119 .map(|value| value == BLOB_V2_EXT_NAME)
120 .unwrap_or(false)
121 }
122}
123
124pub trait SchemaExt {
126 fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError>;
128
129 fn try_with_column_at(
130 &self,
131 index: usize,
132 field: Field,
133 ) -> std::result::Result<Schema, ArrowError>;
134
135 fn field_names(&self) -> Vec<&String>;
136
137 fn without_column(&self, column_name: &str) -> Schema;
138
139 fn to_compact_string(&self, indent: Indentation) -> String;
143}
144
145impl SchemaExt for Schema {
146 fn try_with_column(&self, field: Field) -> std::result::Result<Schema, ArrowError> {
147 if self.column_with_name(field.name()).is_some() {
148 return Err(ArrowError::SchemaError(format!(
149 "Can not append column {} on schema: {:?}",
150 field.name(),
151 self
152 )));
153 };
154 let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
155 fields.push(FieldRef::new(field));
156 Ok(Self::new_with_metadata(fields, self.metadata.clone()))
157 }
158
159 fn try_with_column_at(
160 &self,
161 index: usize,
162 field: Field,
163 ) -> std::result::Result<Schema, ArrowError> {
164 if self.column_with_name(field.name()).is_some() {
165 return Err(ArrowError::SchemaError(format!(
166 "Failed to modify schema: Inserting column {} would create a duplicate column in schema: {:?}",
167 field.name(),
168 self
169 )));
170 };
171 let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
172 fields.insert(index, FieldRef::new(field));
173 Ok(Self::new_with_metadata(fields, self.metadata.clone()))
174 }
175
176 fn without_column(&self, column_name: &str) -> Schema {
181 let fields: Vec<FieldRef> = self
182 .fields()
183 .iter()
184 .filter(|f| f.name() != column_name)
185 .cloned()
186 .collect();
187 Self::new_with_metadata(fields, self.metadata.clone())
188 }
189
190 fn field_names(&self) -> Vec<&String> {
191 self.fields().iter().map(|f| f.name()).collect()
192 }
193
194 fn to_compact_string(&self, indent: Indentation) -> String {
195 let mut result = "{".to_string();
196 result += &indent.value();
197 for (field_idx, field) in self.fields.iter().enumerate() {
198 result += field.to_compact_string(indent.deepen()).as_str();
199 if field_idx < self.fields.len() - 1 {
200 result += ",";
201 }
202 result += indent.value().as_str();
203 }
204 result += "}";
205 result
206 }
207}