vortex_layout/layouts/flat/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod reader;
5pub mod writer;
6
7use std::env;
8use std::sync::Arc;
9use std::sync::LazyLock;
10
11use vortex_array::ArrayContext;
12use vortex_array::DeserializeMetadata;
13use vortex_array::ProstMetadata;
14use vortex_buffer::ByteBuffer;
15use vortex_dtype::DType;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_error::vortex_panic;
19use vortex_session::VortexSession;
20
21use crate::LayoutChildType;
22use crate::LayoutEncodingRef;
23use crate::LayoutId;
24use crate::LayoutReaderRef;
25use crate::LayoutRef;
26use crate::VTable;
27use crate::children::LayoutChildren;
28use crate::layouts::flat::reader::FlatReader;
29use crate::segments::SegmentId;
30use crate::segments::SegmentSource;
31use crate::vtable;
32
33static FLAT_LAYOUT_INLINE_ARRAY_NODE: LazyLock<bool> =
34    LazyLock::new(|| env::var("FLAT_LAYOUT_INLINE_ARRAY_NODE").is_ok());
35
36vtable!(Flat);
37
38impl VTable for FlatVTable {
39    type Layout = FlatLayout;
40    type Encoding = FlatLayoutEncoding;
41    type Metadata = ProstMetadata<FlatLayoutMetadata>;
42
43    fn id(_encoding: &Self::Encoding) -> LayoutId {
44        LayoutId::new_ref("vortex.flat")
45    }
46
47    fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
48        LayoutEncodingRef::new_ref(FlatLayoutEncoding.as_ref())
49    }
50
51    fn row_count(layout: &Self::Layout) -> u64 {
52        layout.row_count
53    }
54
55    fn dtype(layout: &Self::Layout) -> &DType {
56        &layout.dtype
57    }
58
59    fn metadata(layout: &Self::Layout) -> Self::Metadata {
60        ProstMetadata(FlatLayoutMetadata {
61            array_encoding_tree: layout.array_tree.as_ref().map(|bytes| bytes.to_vec()),
62        })
63    }
64
65    fn segment_ids(layout: &Self::Layout) -> Vec<SegmentId> {
66        vec![layout.segment_id]
67    }
68
69    fn nchildren(_layout: &Self::Layout) -> usize {
70        0
71    }
72
73    fn child(_layout: &Self::Layout, _idx: usize) -> VortexResult<LayoutRef> {
74        vortex_bail!("Flat layout has no children");
75    }
76
77    fn child_type(_layout: &Self::Layout, _idx: usize) -> LayoutChildType {
78        vortex_panic!("Flat layout has no children");
79    }
80
81    fn new_reader(
82        layout: &Self::Layout,
83        name: Arc<str>,
84        segment_source: Arc<dyn SegmentSource>,
85        _session: &VortexSession,
86    ) -> VortexResult<LayoutReaderRef> {
87        Ok(Arc::new(FlatReader::new(
88            layout.clone(),
89            name,
90            segment_source,
91        )))
92    }
93
94    #[cfg(gpu_unstable)]
95    fn new_gpu_reader(
96        layout: &Self::Layout,
97        name: Arc<str>,
98        segment_source: Arc<dyn SegmentSource>,
99        ctx: Arc<cudarc::driver::CudaContext>,
100    ) -> VortexResult<crate::gpu::GpuLayoutReaderRef> {
101        Ok(Arc::new(crate::gpu::layouts::flat::GpuFlatReader::new(
102            layout.clone(),
103            name,
104            segment_source,
105            ctx,
106        )))
107    }
108
109    fn build(
110        _encoding: &Self::Encoding,
111        dtype: &DType,
112        row_count: u64,
113        metadata: &<Self::Metadata as DeserializeMetadata>::Output,
114        segment_ids: Vec<SegmentId>,
115        _children: &dyn LayoutChildren,
116        ctx: ArrayContext,
117    ) -> VortexResult<Self::Layout> {
118        if segment_ids.len() != 1 {
119            vortex_bail!("Flat layout must have exactly one segment ID");
120        }
121        Ok(FlatLayout::new_with_metadata(
122            row_count,
123            dtype.clone(),
124            segment_ids[0],
125            ctx,
126            metadata
127                .array_encoding_tree
128                .as_ref()
129                .map(|v| ByteBuffer::from(v.clone())),
130        ))
131    }
132}
133
134#[derive(Debug)]
135pub struct FlatLayoutEncoding;
136
137#[derive(Clone, Debug)]
138pub struct FlatLayout {
139    row_count: u64,
140    dtype: DType,
141    segment_id: SegmentId,
142    ctx: ArrayContext,
143    array_tree: Option<ByteBuffer>,
144}
145
146impl FlatLayout {
147    pub fn new(row_count: u64, dtype: DType, segment_id: SegmentId, ctx: ArrayContext) -> Self {
148        Self {
149            row_count,
150            dtype,
151            segment_id,
152            ctx,
153            array_tree: None,
154        }
155    }
156
157    pub fn new_with_metadata(
158        row_count: u64,
159        dtype: DType,
160        segment_id: SegmentId,
161        ctx: ArrayContext,
162        metadata: Option<ByteBuffer>,
163    ) -> Self {
164        Self {
165            row_count,
166            dtype,
167            segment_id,
168            ctx,
169            array_tree: metadata,
170        }
171    }
172
173    #[inline]
174    pub fn segment_id(&self) -> SegmentId {
175        self.segment_id
176    }
177
178    #[inline]
179    pub fn array_ctx(&self) -> &ArrayContext {
180        &self.ctx
181    }
182
183    #[inline]
184    pub fn array_tree(&self) -> Option<&ByteBuffer> {
185        self.array_tree.as_ref()
186    }
187}
188
189#[derive(prost::Message)]
190pub struct FlatLayoutMetadata {
191    // We can optionally store the array encoding tree here to avoid needing to fetch the segment
192    // to plan array deserialization.
193    // This will be a `ArrayNode`.
194    #[prost(optional, bytes, tag = "1")]
195    pub array_encoding_tree: Option<Vec<u8>>,
196}