1use std::sync::Arc;
2
3use flatbuffers::root;
4use vortex_array::ArrayRegistry;
5use vortex_dtype::DType;
6use vortex_error::{VortexResult, vortex_bail, vortex_err};
7use vortex_flatbuffers::{FlatBuffer, ReadFlatBuffer, dtype as fbd};
8use vortex_layout::{LayoutRegistry, LayoutRegistryExt};
9use vortex_metrics::VortexMetrics;
10
11use crate::footer::{FileStatistics, Footer, Postscript, PostscriptSegment};
12use crate::{DEFAULT_REGISTRY, EOF_SIZE, MAGIC_BYTES, VERSION};
13
14pub trait FileType: Sized {
15 type Options;
16}
17
18pub struct VortexOpenOptions<F: FileType> {
20 pub(crate) options: F::Options,
22 pub(crate) registry: Arc<ArrayRegistry>,
24 pub(crate) layout_registry: Arc<LayoutRegistry>,
26 pub(crate) file_size: Option<u64>,
28 pub(crate) dtype: Option<DType>,
30 pub(crate) footer: Option<Footer>,
33 pub(crate) metrics: VortexMetrics,
35}
36
37impl<F: FileType> VortexOpenOptions<F> {
38 pub(crate) fn new(options: F::Options) -> Self {
39 Self {
40 options,
41 registry: DEFAULT_REGISTRY.clone(),
42 layout_registry: Arc::new(LayoutRegistry::default()),
43 file_size: None,
44 dtype: None,
45 footer: None,
46 metrics: VortexMetrics::default(),
47 }
48 }
49
50 pub fn with_array_registry(mut self, registry: Arc<ArrayRegistry>) -> Self {
52 self.registry = registry;
53 self
54 }
55
56 pub fn with_layout_registry(mut self, registry: Arc<LayoutRegistry>) -> Self {
58 self.layout_registry = registry;
59 self
60 }
61
62 pub fn with_file_size(mut self, file_size: u64) -> Self {
67 self.file_size = Some(file_size);
68 self
69 }
70
71 pub fn with_dtype(mut self, dtype: DType) -> Self {
77 self.dtype = Some(dtype);
78 self
79 }
80
81 pub fn with_footer(mut self, footer: Footer) -> Self {
86 self.dtype = Some(footer.layout().dtype().clone());
87 self.footer = Some(footer);
88 self
89 }
90
91 pub fn with_metrics(mut self, metrics: VortexMetrics) -> Self {
93 self.metrics = metrics;
94 self
95 }
96}
97
98impl<F: FileType> VortexOpenOptions<F> {
99 pub(crate) fn parse_postscript(&self, initial_read: &[u8]) -> VortexResult<Postscript> {
101 if initial_read.len() < EOF_SIZE {
102 vortex_bail!(
103 "Initial read must be at least EOF_SIZE ({}) bytes",
104 EOF_SIZE
105 );
106 }
107 let eof_loc = initial_read.len() - EOF_SIZE;
108 let magic_bytes_loc = eof_loc + (EOF_SIZE - MAGIC_BYTES.len());
109
110 let magic_number = &initial_read[magic_bytes_loc..];
111 if magic_number != MAGIC_BYTES {
112 vortex_bail!("Malformed file, invalid magic bytes, got {magic_number:?}")
113 }
114
115 let version = u16::from_le_bytes(
116 initial_read[eof_loc..eof_loc + 2]
117 .try_into()
118 .map_err(|e| vortex_err!("Version was not a u16 {e}"))?,
119 );
120 if version != VERSION {
121 vortex_bail!("Malformed file, unsupported version {version}")
122 }
123
124 let ps_size = u16::from_le_bytes(
125 initial_read[eof_loc + 2..eof_loc + 4]
126 .try_into()
127 .map_err(|e| vortex_err!("Postscript size was not a u16 {e}"))?,
128 ) as usize;
129
130 if initial_read.len() < ps_size + EOF_SIZE {
131 vortex_bail!(
132 "Initial read must be at least {} bytes to include the Postscript",
133 ps_size + EOF_SIZE
134 );
135 }
136
137 Postscript::read_flatbuffer_bytes(&initial_read[eof_loc - ps_size..eof_loc])
138 }
139
140 pub(crate) fn parse_dtype(
142 &self,
143 initial_offset: u64,
144 initial_read: &[u8],
145 segment: &PostscriptSegment,
146 ) -> VortexResult<DType> {
147 let offset = usize::try_from(segment.offset - initial_offset)?;
148 let sliced_buffer =
149 FlatBuffer::copy_from(&initial_read[offset..offset + (segment.length as usize)]);
150 let fbd_dtype = root::<fbd::DType>(&sliced_buffer)?;
151
152 DType::try_from_view(fbd_dtype, sliced_buffer.clone())
153 }
154
155 pub(crate) fn parse_file_statistics(
157 &self,
158 initial_offset: u64,
159 initial_read: &[u8],
160 segment: &PostscriptSegment,
161 ) -> VortexResult<FileStatistics> {
162 let offset = usize::try_from(segment.offset - initial_offset)?;
163 let sliced_buffer =
164 FlatBuffer::copy_from(&initial_read[offset..offset + (segment.length as usize)]);
165 FileStatistics::read_flatbuffer_bytes(&sliced_buffer)
166 }
167
168 pub(crate) fn parse_footer(
170 &self,
171 initial_offset: u64,
172 initial_read: &[u8],
173 footer_segment: &PostscriptSegment,
174 layout_segment: &PostscriptSegment,
175 dtype: DType,
176 file_stats: Option<FileStatistics>,
177 ) -> VortexResult<Footer> {
178 let footer_offset = usize::try_from(footer_segment.offset - initial_offset)?;
179 let footer_bytes = FlatBuffer::copy_from(
180 &initial_read[footer_offset..footer_offset + (footer_segment.length as usize)],
181 );
182
183 let layout_offset = usize::try_from(layout_segment.offset - initial_offset)?;
184 let layout_bytes = FlatBuffer::copy_from(
185 &initial_read[layout_offset..layout_offset + (layout_segment.length as usize)],
186 );
187
188 Footer::from_flatbuffer(
189 footer_bytes,
190 layout_bytes,
191 dtype,
192 file_stats,
193 &self.registry,
194 &self.layout_registry,
195 )
196 }
197}