peppi_arrow/
lib.rs

1use std::mem::MaybeUninit;
2
3use arrow::{
4	array::{
5		ArrayRef,
6		ArrayBuilder,
7		BooleanArray,
8		BooleanBuilder,
9		ListArray,
10		ListBuilder,
11		PrimitiveArray,
12		PrimitiveBuilder,
13		StructArray,
14		StructBuilder,
15	},
16	datatypes::{
17		ArrowPrimitiveType,
18		DataType,
19		Field,
20		Int8Type,
21		UInt8Type,
22		Int16Type,
23		UInt16Type,
24		Int32Type,
25		UInt32Type,
26		Int64Type,
27		UInt64Type,
28		Float32Type,
29	},
30};
31
32#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
33pub struct SlippiVersion (pub u8, pub u8, pub u8);
34
35pub trait Context: Copy {
36	fn slippi_version(&self) -> SlippiVersion;
37	fn avro_compatible_field_names(&self) -> bool { false }
38}
39
40pub trait Arrow {
41	type Builder: ArrayBuilder;
42	fn default() -> Self; // workaround for Default not working with const generics yet
43	fn fields<C: Context>(_context: C) -> Vec<Field> { unimplemented!() }
44	fn data_type<C: Context>(context: C) -> DataType;
45	fn is_nullable() -> bool { false }
46	fn builder<C: Context>(len: usize, context: C) -> Self::Builder;
47	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C);
48	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, context: C);
49	fn read(&mut self, array: ArrayRef, idx: usize);
50}
51
52macro_rules! primitives {
53	( $($type: ty : $arrow_type: ty),* $(,)? ) => { $(
54		impl Arrow for $type {
55			type Builder = PrimitiveBuilder::<$arrow_type>;
56
57			fn default() -> Self {
58				0 as $type
59			}
60
61			fn data_type<C: Context>(_context: C) -> DataType {
62				<$arrow_type>::DATA_TYPE
63			}
64
65			fn builder<C: Context>(len: usize, _context: C) -> Self::Builder {
66				PrimitiveBuilder::<$arrow_type>::new(len)
67			}
68
69			fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, _context: C) {
70				builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap().append_value(*self).unwrap()
71			}
72
73			fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, _context: C) {
74				builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap().append_null().unwrap()
75			}
76
77			fn read(&mut self, array: ArrayRef, idx: usize) {
78				*self = array.as_any().downcast_ref::<PrimitiveArray::<$arrow_type>>().unwrap().value(idx);
79			}
80		}
81	)* }
82}
83
84primitives!(
85	i8: Int8Type,
86	u8: UInt8Type,
87	i16: Int16Type,
88	u16: UInt16Type,
89	i32: Int32Type,
90	u32: UInt32Type,
91	i64: Int64Type,
92	u64: UInt64Type,
93	f32: Float32Type,
94);
95
96impl Arrow for bool {
97	type Builder = BooleanBuilder;
98
99	fn default() -> Self {
100		false
101	}
102
103	fn data_type<C: Context>(_context: C) -> DataType {
104		DataType::Boolean
105	}
106
107	fn builder<C: Context>(len: usize, _context: C) -> Self::Builder {
108		BooleanBuilder::new(len)
109	}
110
111	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, _context: C) {
112		builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap().append_value(*self).unwrap()
113	}
114
115	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, _context: C) {
116		builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap().append_null().unwrap()
117	}
118
119	fn read(&mut self, array: ArrayRef, idx: usize) {
120		*self = array.as_any().downcast_ref::<BooleanArray>().unwrap().value(idx)
121	}
122}
123
124impl<T> Arrow for Option<T> where T: Arrow {
125	type Builder = T::Builder;
126
127	fn default() -> Self {
128		None
129	}
130
131	fn data_type<C: Context>(context: C) -> DataType {
132		T::data_type(context)
133	}
134
135	fn is_nullable() -> bool {
136		true
137	}
138
139	fn builder<C: Context>(len: usize, context: C) -> Self::Builder {
140		T::builder(len, context)
141	}
142
143	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C) {
144		if let Some(value) = self {
145			value.write(builder, context)
146		} else {
147			T::write_null(builder, context)
148		}
149	}
150
151	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, context: C) {
152		T::write_null(builder, context)
153	}
154
155	fn read(&mut self, array: ArrayRef, idx: usize) {
156		*self = match array.is_valid(idx) {
157			true => {
158				let mut value = T::default();
159				value.read(array, idx);
160				Some(value)
161			},
162			_ => None,
163		};
164	}
165}
166
167impl<T> Arrow for Box<T> where T: Arrow {
168	type Builder = T::Builder;
169
170	fn default() -> Self {
171		Box::new(T::default())
172	}
173
174	fn data_type<C: Context>(context: C) -> DataType {
175		T::data_type(context)
176	}
177
178	fn builder<C: Context>(len: usize, context: C) -> Self::Builder {
179		T::builder(len, context)
180	}
181
182	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C) {
183		(**self).write(builder, context)
184	}
185
186	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, context: C) {
187		T::write_null(builder, context)
188	}
189
190	fn read(&mut self, array: ArrayRef, idx: usize) {
191		(*self).read(array, idx);
192	}
193}
194
195impl<T> Arrow for Vec<T> where T: Arrow {
196	type Builder = ListBuilder<T::Builder>;
197
198	fn default() -> Self {
199		Vec::new()
200	}
201
202	fn data_type<C: Context>(context: C) -> DataType {
203		DataType::List(Box::new(Field::new("list", T::data_type(context), T::is_nullable())))
204	}
205
206	fn builder<C: Context>(len: usize, context: C) -> Self::Builder {
207		Self::Builder::new(T::builder(len, context))
208	}
209
210	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C) {
211		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
212		for x in self {
213			x.write(builder.values(), context);
214		}
215		builder.append(true).unwrap();
216	}
217
218	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, _context: C) {
219		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
220		builder.append(false).unwrap();
221	}
222
223	fn read(&mut self, array: ArrayRef, idx: usize) {
224		let array = array.as_any().downcast_ref::<ListArray>().unwrap();
225		for i in 0 .. array.value_length(idx) {
226			let mut value = T::default();
227			value.read(array.value(idx), i as usize);
228			self.push(value);
229		}
230	}
231}
232
233/// TODO: replace with FixedSizeListArray once Parquet supports those
234impl<T, const N: usize> Arrow for [T; N] where T: Arrow {
235	type Builder = StructBuilder;
236
237	fn default() -> Self {
238		let mut data: [MaybeUninit<T>; N] = unsafe {
239			MaybeUninit::uninit().assume_init()
240		};
241		for elem in &mut data[..] {
242			*elem = MaybeUninit::new(T::default())
243		}
244		//unsafe { mem::transmute::<_, [T; N]>(data) }
245		unsafe { data.as_ptr().cast::<[T; N]>().read() }
246	}
247
248	fn fields<C: Context>(context: C) -> Vec<Field> {
249		let mut fields = vec![];
250		for i in 0 .. N {
251			let name = match context.avro_compatible_field_names() {
252				true => format!("_{}", i),
253				_ => format!("{}", i),
254			};
255			fields.push(Field::new(&name, T::data_type(context), T::is_nullable()));
256		}
257		fields
258	}
259
260	fn data_type<C: Context>(context: C) -> DataType {
261		DataType::Struct(Self::fields(context))
262	}
263
264	fn builder<C: Context>(len: usize, context: C) -> Self::Builder {
265		let fields = Self::fields(context);
266		let mut builders = vec![];
267		for _ in 0 .. N {
268			builders.push(Box::new(T::builder(len, context)) as Box<dyn ArrayBuilder>);
269		}
270		StructBuilder::new(fields, builders)
271	}
272
273	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C) {
274		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
275		for (i, x) in self.iter().enumerate() {
276			x.write(builder.field_builder::<T::Builder>(i).unwrap(), context);
277		}
278		builder.append(true).unwrap();
279	}
280
281	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, context: C) {
282		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
283		for i in 0 .. N {
284			T::write_null(builder.field_builder::<T::Builder>(i).unwrap(), context);
285		}
286		builder.append(false).unwrap();
287	}
288
289	fn read(&mut self, array: ArrayRef, idx: usize) {
290		let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
291		for (i, x) in self.iter_mut().enumerate() {
292			x.read(struct_array.column(i).clone(), idx);
293		}
294	}
295}
296
297/* For use when Parquet supports fixed-size lists
298impl<T, const N: usize> Arrow for [T; N] where T: Arrow {
299	type Builder = FixedSizeListBuilder<T::Builder>;
300
301	fn default() -> Self {
302		let mut data: [MaybeUninit<T>; N] = unsafe {
303			MaybeUninit::uninit().assume_init()
304		};
305		for elem in &mut data[..] {
306			*elem = MaybeUninit::new(T::default())
307		}
308		//unsafe { mem::transmute::<_, [T; N]>(data) }
309		unsafe { data.as_ptr().cast::<[T; N]>().read() }
310	}
311
312	fn data_type<C: Context>(context: C) -> DataType {
313		let field = Field::new("values", T::data_type(context), T::is_nullable());
314		DataType::FixedSizeList(Box::new(field), i32::try_from(N).unwrap())
315	}
316
317	fn builder<C: Context>(len: usize, context: C) -> Self::Builder {
318		Self::Builder::new(T::builder(len, context), i32::try_from(N).unwrap())
319	}
320
321	fn write<C: Context>(&self, builder: &mut dyn ArrayBuilder, context: C) {
322		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
323		for i in 0 .. N {
324			self[i].write(builder.values(), context);
325		}
326		builder.append(true).unwrap();
327	}
328
329	fn write_null<C: Context>(builder: &mut dyn ArrayBuilder, context: C) {
330		let builder = builder.as_any_mut().downcast_mut::<Self::Builder>().unwrap();
331		builder.append(false).unwrap();
332	}
333
334	fn read(&mut self, array: ArrayRef, idx: usize) {
335		let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
336		for i in 0 .. N {
337			self[i].read(struct_array.column(i).clone(), idx);
338		}
339	}
340}
341*/