Skip to main content

vortex_layout/layouts/flat/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5pub mod writer;
6
7use std::env;
8use std::sync::Arc;
9use std::sync::LazyLock;
10
11use vortex_array::DeserializeMetadata;
12use vortex_array::ProstMetadata;
13use vortex_array::dtype::DType;
14use vortex_buffer::ByteBuffer;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_panic;
18use vortex_session::VortexSession;
19use vortex_session::registry::ReadContext;
20
21use crate::LayoutChildType;
22use crate::LayoutEncodingRef;
23use crate::LayoutId;
24use crate::LayoutReaderRef;
25use crate::LayoutRef;
26use crate::VTable;
27use crate::children::LayoutChildren;
28use crate::layouts::flat::reader::FlatReader;
29use crate::segments::SegmentId;
30use crate::segments::SegmentSource;
31use crate::vtable;
32
33/// Check if inline array node is enabled.
34pub(super) fn flat_layout_inline_array_node() -> bool {
35    static FLAT_LAYOUT_INLINE_ARRAY_NODE: LazyLock<bool> =
36        LazyLock::new(|| env::var("FLAT_LAYOUT_INLINE_ARRAY_NODE").is_ok_and(|v| v == "1"));
37    *FLAT_LAYOUT_INLINE_ARRAY_NODE
38}
39
40vtable!(Flat);
41
42impl VTable for Flat {
43    type Layout = FlatLayout;
44    type Encoding = FlatLayoutEncoding;
45    type Metadata = ProstMetadata<FlatLayoutMetadata>;
46
47    fn id(_encoding: &Self::Encoding) -> LayoutId {
48        LayoutId::new("vortex.flat")
49    }
50
51    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
52        LayoutEncodingRef::new_ref(FlatLayoutEncoding.as_ref())
53    }
54
55    fn row_count(layout: &Self::Layout) -> u64 {
56        layout.row_count
57    }
58
59    fn dtype(layout: &Self::Layout) -> &DType {
60        &layout.dtype
61    }
62
63    fn metadata(layout: &Self::Layout) -> Self::Metadata {
64        ProstMetadata(FlatLayoutMetadata {
65            array_encoding_tree: layout.array_tree.as_ref().map(|bytes| bytes.to_vec()),
66        })
67    }
68
69    fn segment_ids(layout: &Self::Layout) -> Vec<SegmentId> {
70        vec![layout.segment_id]
71    }
72
73    fn nchildren(_layout: &Self::Layout) -> usize {
74        0
75    }
76
77    fn child(_layout: &Self::Layout, _idx: usize) -> VortexResult<LayoutRef> {
78        vortex_bail!("Flat layout has no children");
79    }
80
81    fn child_type(_layout: &Self::Layout, _idx: usize) -> LayoutChildType {
82        vortex_panic!("Flat layout has no children");
83    }
84
85    fn new_reader(
86        layout: &Self::Layout,
87        name: Arc<str>,
88        segment_source: Arc<dyn SegmentSource>,
89        session: &VortexSession,
90    ) -> VortexResult<LayoutReaderRef> {
91        Ok(Arc::new(FlatReader::new(
92            layout.clone(),
93            name,
94            segment_source,
95            session.clone(),
96        )))
97    }
98
99    fn build(
100        _encoding: &Self::Encoding,
101        dtype: &DType,
102        row_count: u64,
103        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
104        segment_ids: Vec<SegmentId>,
105        _children: &dyn LayoutChildren,
106        ctx: &ReadContext,
107    ) -> VortexResult<Self::Layout> {
108        if segment_ids.len() != 1 {
109            vortex_bail!("Flat layout must have exactly one segment ID");
110        }
111        Ok(FlatLayout::new_with_metadata(
112            row_count,
113            dtype.clone(),
114            segment_ids[0],
115            ctx.clone(),
116            metadata
117                .array_encoding_tree
118                .as_ref()
119                .map(|v| ByteBuffer::from(v.clone())),
120        ))
121    }
122
123    fn with_children(_layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
124        if !children.is_empty() {
125            vortex_bail!("Flat layout has no children, got {}", children.len());
126        }
127        Ok(())
128    }
129}
130
131#[derive(Debug)]
132pub struct FlatLayoutEncoding;
133
134/// The terminal node of a layout tree. Stores a single chunk of array data as one serialized
135/// segment on disk.
136#[derive(Clone, Debug)]
137pub struct FlatLayout {
138    row_count: u64,
139    dtype: DType,
140    segment_id: SegmentId,
141    ctx: ReadContext,
142    array_tree: Option<ByteBuffer>,
143}
144
145impl FlatLayout {
146    pub fn new(row_count: u64, dtype: DType, segment_id: SegmentId, ctx: ReadContext) -> Self {
147        Self {
148            row_count,
149            dtype,
150            segment_id,
151            ctx,
152            array_tree: None,
153        }
154    }
155
156    pub fn new_with_metadata(
157        row_count: u64,
158        dtype: DType,
159        segment_id: SegmentId,
160        ctx: ReadContext,
161        metadata: Option<ByteBuffer>,
162    ) -> Self {
163        Self {
164            row_count,
165            dtype,
166            segment_id,
167            ctx,
168            array_tree: metadata,
169        }
170    }
171
172    #[inline]
173    pub fn segment_id(&self) -> SegmentId {
174        self.segment_id
175    }
176
177    #[inline]
178    pub fn array_ctx(&self) -> &ReadContext {
179        &self.ctx
180    }
181
182    #[inline]
183    pub fn array_tree(&self) -> Option<&ByteBuffer> {
184        self.array_tree.as_ref()
185    }
186}
187
188#[derive(prost::Message)]
189pub struct FlatLayoutMetadata {
190    // We can optionally store the array encoding tree here to avoid needing to fetch the segment
191    // to plan array deserialization.
192    // This will be a `ArrayNode`.
193    #[prost(optional, bytes, tag = "1")]
194    pub array_encoding_tree: Option<Vec<u8>>,
195}