1use std::sync::Arc;
5
6use flatbuffers::root;
7use vortex_array::ArrayRegistry;
8use vortex_dtype::DType;
9use vortex_error::{VortexResult, vortex_bail, vortex_err};
10use vortex_flatbuffers::{FlatBuffer, ReadFlatBuffer, dtype as fbd};
11use vortex_layout::{LayoutRegistry, LayoutRegistryExt};
12use vortex_metrics::VortexMetrics;
13
14use crate::footer::{FileStatistics, Footer, Postscript, PostscriptSegment};
15use crate::{DEFAULT_REGISTRY, EOF_SIZE, MAGIC_BYTES, VERSION};
16
17pub trait FileType: Sized {
18 type Options;
19}
20
21pub struct VortexOpenOptions<F: FileType> {
23 pub(crate) options: F::Options,
25 pub(crate) registry: Arc<ArrayRegistry>,
27 pub(crate) layout_registry: Arc<LayoutRegistry>,
29 pub(crate) file_size: Option<u64>,
31 pub(crate) dtype: Option<DType>,
33 pub(crate) footer: Option<Footer>,
36 pub(crate) metrics: VortexMetrics,
38}
39
40impl<F: FileType> VortexOpenOptions<F> {
41 pub(crate) fn new(options: F::Options) -> Self {
46 Self {
47 options,
48 registry: DEFAULT_REGISTRY.clone(),
49 layout_registry: Arc::new(LayoutRegistry::default()),
50 file_size: None,
51 dtype: None,
52 footer: None,
53 metrics: VortexMetrics::default(),
54 }
55 }
56
57 pub fn with_array_registry(mut self, registry: Arc<ArrayRegistry>) -> Self {
59 self.registry = registry;
60 self
61 }
62
63 pub fn with_layout_registry(mut self, registry: Arc<LayoutRegistry>) -> Self {
65 self.layout_registry = registry;
66 self
67 }
68
69 pub fn with_file_size(mut self, file_size: u64) -> Self {
74 self.file_size = Some(file_size);
75 self
76 }
77
78 pub fn with_dtype(mut self, dtype: DType) -> Self {
84 self.dtype = Some(dtype);
85 self
86 }
87
88 pub fn with_footer(mut self, footer: Footer) -> Self {
93 self.dtype = Some(footer.layout().dtype().clone());
94 self.footer = Some(footer);
95 self
96 }
97
98 pub fn with_metrics(mut self, metrics: VortexMetrics) -> Self {
100 self.metrics = metrics;
101 self
102 }
103}
104
105impl<F: FileType> VortexOpenOptions<F> {
106 pub(crate) fn parse_postscript(&self, initial_read: &[u8]) -> VortexResult<Postscript> {
108 if initial_read.len() < EOF_SIZE {
109 vortex_bail!(
110 "Initial read must be at least EOF_SIZE ({}) bytes",
111 EOF_SIZE
112 );
113 }
114 let eof_loc = initial_read.len() - EOF_SIZE;
115 let magic_bytes_loc = eof_loc + (EOF_SIZE - MAGIC_BYTES.len());
116
117 let magic_number = &initial_read[magic_bytes_loc..];
118 if magic_number != MAGIC_BYTES {
119 vortex_bail!("Malformed file, invalid magic bytes, got {magic_number:?}")
120 }
121
122 let version = u16::from_le_bytes(
123 initial_read[eof_loc..eof_loc + 2]
124 .try_into()
125 .map_err(|e| vortex_err!("Version was not a u16 {e}"))?,
126 );
127 if version != VERSION {
128 vortex_bail!("Malformed file, unsupported version {version}")
129 }
130
131 let ps_size = u16::from_le_bytes(
132 initial_read[eof_loc + 2..eof_loc + 4]
133 .try_into()
134 .map_err(|e| vortex_err!("Postscript size was not a u16 {e}"))?,
135 ) as usize;
136
137 if initial_read.len() < ps_size + EOF_SIZE {
138 vortex_bail!(
139 "Initial read must be at least {} bytes to include the Postscript",
140 ps_size + EOF_SIZE
141 );
142 }
143
144 Postscript::read_flatbuffer_bytes(&initial_read[eof_loc - ps_size..eof_loc])
145 }
146
147 pub(crate) fn parse_dtype(
149 &self,
150 initial_offset: u64,
151 initial_read: &[u8],
152 segment: &PostscriptSegment,
153 ) -> VortexResult<DType> {
154 let offset = usize::try_from(segment.offset - initial_offset)?;
155 let sliced_buffer =
156 FlatBuffer::copy_from(&initial_read[offset..offset + (segment.length as usize)]);
157 let fbd_dtype = root::<fbd::DType>(&sliced_buffer)?;
158
159 DType::try_from_view(fbd_dtype, sliced_buffer.clone())
160 }
161
162 pub(crate) fn parse_file_statistics(
164 &self,
165 initial_offset: u64,
166 initial_read: &[u8],
167 segment: &PostscriptSegment,
168 ) -> VortexResult<FileStatistics> {
169 let offset = usize::try_from(segment.offset - initial_offset)?;
170 let sliced_buffer =
171 FlatBuffer::copy_from(&initial_read[offset..offset + (segment.length as usize)]);
172 FileStatistics::read_flatbuffer_bytes(&sliced_buffer)
173 }
174
175 pub(crate) fn parse_footer(
177 &self,
178 initial_offset: u64,
179 initial_read: &[u8],
180 footer_segment: &PostscriptSegment,
181 layout_segment: &PostscriptSegment,
182 dtype: DType,
183 file_stats: Option<FileStatistics>,
184 ) -> VortexResult<Footer> {
185 let footer_offset = usize::try_from(footer_segment.offset - initial_offset)?;
186 let footer_bytes = FlatBuffer::copy_from(
187 &initial_read[footer_offset..footer_offset + (footer_segment.length as usize)],
188 );
189
190 let layout_offset = usize::try_from(layout_segment.offset - initial_offset)?;
191 let layout_bytes = FlatBuffer::copy_from(
192 &initial_read[layout_offset..layout_offset + (layout_segment.length as usize)],
193 );
194
195 Footer::from_flatbuffer(
196 footer_bytes,
197 layout_bytes,
198 dtype,
199 file_stats,
200 &self.registry,
201 &self.layout_registry,
202 )
203 }
204}