vortex_array/pipeline/view.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5
6use vortex_buffer::ByteBuffer;
7use vortex_error::VortexExpect;
8
9use crate::pipeline::N;
10use crate::pipeline::bits::{BitView, BitViewMut};
11use crate::pipeline::types::{Element, VType};
12
13pub struct View<'a> {
14 /// The physical type of the vector, which defines how the elements are stored.
15 pub(super) vtype: VType,
16 /// A pointer to the allocated elements buffer.
17 /// Alignment is at least the size of the element type.
18 /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
19 pub(super) elements: *const u8,
20 /// The validity mask for the vector, indicating which elements in the buffer are valid.
21 /// This value can be `None` if the expected DType is `NonNullable`.
22 // TODO: support validity
23 #[allow(dead_code)]
24 pub(super) validity: Option<BitView<'a>>,
25 // A selection mask over the elements and validity of the vector.
26 pub(super) len: usize,
27
28 /// Additional buffers of data used by the vector, such as string data.
29 #[allow(dead_code)]
30 pub(super) data: Vec<ByteBuffer>,
31
32 /// Marker defining the lifetime of the contents of the vector.
33 pub(super) _marker: std::marker::PhantomData<&'a ()>,
34}
35
36impl<'a> View<'a> {
37 #[inline(always)]
38 pub fn len(&self) -> usize {
39 self.len
40 }
41
42 pub fn is_empty(&self) -> bool {
43 self.len == 0
44 }
45
46 // FIXME(ngates): we should return &[T; N]
47 pub fn as_slice<T>(&self) -> &'a [T]
48 where
49 T: Element,
50 {
51 debug_assert_eq!(self.vtype, T::vtype(), "Invalid type for canonical view");
52 // SAFETY: We assume that the elements are of type T and that the view is valid.
53 unsafe { std::slice::from_raw_parts(self.elements.cast(), self.len) }
54 }
55
56 /// Re-interpret cast the vector into a new type where the element has the same width.
57 #[inline(always)]
58 pub fn reinterpret_as<E: Element>(&mut self) {
59 assert_eq!(
60 self.vtype.byte_width(),
61 size_of::<E>(),
62 "Cannot reinterpret {} as {}",
63 self.vtype,
64 E::vtype()
65 );
66 self.vtype = E::vtype();
67 }
68}
69
70pub struct ViewMut<'a> {
71 /// The physical type of the vector, which defines how the elements are stored.
72 pub(super) vtype: VType,
73 /// A pointer to the allocated elements buffer.
74 /// Alignment is at least the size of the element type.
75 /// The capacity of the elements buffer is N * `size_of::<T>()` where T is the element type.
76 // TODO(ngates): it would be nice to guarantee _wider_ alignment, ideally 128 bytes, so that
77 // we can use aligned load/store instructions for wide SIMD lanes.
78 pub(super) elements: *mut u8,
79 /// The validity mask for the vector, indicating which elements in the buffer are valid.
80 /// This value can be `None` if the expected DType is `NonNullable`.
81 pub(super) validity: Option<BitViewMut<'a>>,
82
83 /// Additional buffers of data used by the vector, such as string data.
84 // TODO(ngates): ideally these buffers are compressed somehow? E.g. using FSST?
85 #[allow(dead_code)]
86 pub(super) data: Vec<ByteBuffer>,
87
88 /// The length of the prefix slice containing valid values.
89 pub(super) len: usize,
90
91 /// Marker defining the lifetime of the contents of the vector.
92 pub(super) _marker: std::marker::PhantomData<&'a mut ()>,
93}
94
95impl<'a> ViewMut<'a> {
96 pub fn new<E: Element>(elements: &'a mut [E], validity: Option<BitViewMut<'a>>) -> Self {
97 assert_eq!(elements.len(), N);
98 Self {
99 vtype: E::vtype(),
100 elements: elements.as_mut_ptr().cast(),
101 validity,
102 data: vec![],
103 len: N,
104 _marker: Default::default(),
105 }
106 }
107
108 /// Re-interpret cast the vector into a new type where the element has the same width.
109 #[inline(always)]
110 pub fn reinterpret_as<E: Element>(&mut self) {
111 assert_eq!(
112 self.vtype.byte_width(),
113 size_of::<E>(),
114 "Cannot reinterpret {} as {}",
115 self.vtype,
116 E::vtype()
117 );
118 self.vtype = E::vtype();
119 }
120
121 /// Returns an immutable slice of the elements in the vector.
122 #[inline(always)]
123 pub fn as_slice<E: Element>(&self) -> &'a [E] {
124 debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
125 unsafe { std::slice::from_raw_parts(self.elements.cast::<E>(), self.len) }
126 }
127
128 /// Returns a mutable slice of the elements in the vector, allowing for modification.
129 #[inline(always)]
130 pub fn as_slice_mut<E: Element>(&mut self) -> &'a mut [E] {
131 debug_assert_eq!(self.vtype, E::vtype(), "Invalid type for canonical view");
132 unsafe { std::slice::from_raw_parts_mut(self.elements.cast::<E>(), self.len) }
133 }
134
135 /// Access the validity mask of the vector.
136 ///
137 /// ## Panics
138 ///
139 /// Panics if the vector does not support validity, i.e. if the DType was non-nullable when
140 /// it was created.
141 pub fn validity(&mut self) -> &mut BitViewMut<'a> {
142 self.validity
143 .as_mut()
144 .vortex_expect("Vector does not support validity")
145 }
146
147 pub fn add_buffer(&mut self, buffer: ByteBuffer) {
148 self.data.push(buffer);
149 }
150
151 pub fn set_len(&mut self, len: usize) {
152 assert!(len <= N, "Length cannot exceed the capacity of the vector");
153 self.len = len;
154 }
155
156 /// Flatten the view by bringing the selected elements of the mask to the beginning of
157 /// the elements buffer.
158 ///
159 /// FIXME(ngates): also need to select validity bits.
160 pub fn select_mask<E: Element + Display>(&mut self, mask: &BitView) {
161 assert_eq!(
162 self.vtype,
163 E::vtype(),
164 "ViewMut::flatten_mask: type mismatch"
165 );
166
167 match mask.true_count() {
168 0 => {
169 // If the mask has no true bits, we set the length to 0.
170 }
171 N => {
172 // If the mask has N true bits, we copy all elements.
173 }
174 n if n > 3 * N / 4 => {
175 // High density: use iter_zeros to compact by removing gaps
176 let slice = self.as_slice_mut::<E>();
177 let mut write_idx = 0;
178 let mut read_idx = 0;
179
180 mask.iter_zeros(|zero_idx| {
181 // Copy elements from read_idx to zero_idx (exclusive) to write_idx
182 let count = zero_idx - read_idx;
183 unsafe {
184 // SAFETY: We assume that the elements are of type E and that the view is valid.
185 // Using memmove for potentially overlapping regions
186 std::ptr::copy(
187 slice.as_ptr().add(read_idx),
188 slice.as_mut_ptr().add(write_idx),
189 count,
190 );
191 write_idx += count;
192 }
193 read_idx = zero_idx + 1;
194 });
195
196 // Copy any remaining elements after the last zero
197 unsafe {
198 std::ptr::copy(
199 slice.as_ptr().add(read_idx),
200 slice.as_mut_ptr().add(write_idx),
201 N - read_idx,
202 );
203 }
204 }
205 _ => {
206 let mut offset = 0;
207 let slice = self.as_slice_mut::<E>();
208 mask.iter_ones(|idx| {
209 unsafe {
210 // SAFETY: We assume that the elements are of type E and that the view is valid.
211 let value = *slice.get_unchecked(idx);
212 // TODO(joe): use ptr increment (not offset).
213 *slice.get_unchecked_mut(offset) = value;
214
215 offset += 1;
216 }
217 });
218 }
219 }
220
221 self.len = mask.true_count();
222 }
223}