vortex_layout/
vtable.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use std::collections::BTreeSet;
use std::fmt::Debug;
use std::ops::Deref;
use std::sync::Arc;

use vortex_array::ContextRef;
use vortex_dtype::FieldMask;
use vortex_error::VortexResult;

use crate::segments::AsyncSegmentReader;
use crate::{Layout, LayoutId, LayoutReader};

/// A reference to a layout VTable, either static or arc'd.
#[derive(Debug, Clone)]
pub struct LayoutVTableRef(Inner);

#[derive(Debug, Clone)]
enum Inner {
    Static(&'static dyn LayoutVTable),
    Arc(Arc<dyn LayoutVTable>),
}

impl LayoutVTableRef {
    pub const fn from_static(vtable: &'static dyn LayoutVTable) -> Self {
        LayoutVTableRef(Inner::Static(vtable))
    }

    pub fn from_arc(vtable: Arc<dyn LayoutVTable>) -> Self {
        LayoutVTableRef(Inner::Arc(vtable))
    }
}

impl Deref for LayoutVTableRef {
    type Target = dyn LayoutVTable;

    fn deref(&self) -> &Self::Target {
        match &self.0 {
            Inner::Static(vtable) => *vtable,
            Inner::Arc(vtable) => vtable.deref(),
        }
    }
}

pub trait LayoutVTable: Debug + Send + Sync {
    /// Returns the globally unique ID for this type of layout.
    fn id(&self) -> LayoutId;

    /// Construct a [`LayoutReader`] for the provided [`Layout`].
    ///
    /// May panic if the provided `Layout` is not the same encoding as this `LayoutEncoding`.
    fn reader(
        &self,
        layout: Layout,
        ctx: ContextRef,
        segments: Arc<dyn AsyncSegmentReader>,
    ) -> VortexResult<Arc<dyn LayoutReader>>;

    /// Register the row splits for this layout, these represent natural boundaries at which
    /// a reader can split the layout for independent processing.
    ///
    /// For example, a ChunkedLayout would register a boundary at the end of every chunk.
    ///
    /// The layout is passed a `row_offset` that identifies the starting row of the layout within
    /// the file.
    // TODO(ngates): we should check whether this is actually performant enough since we visit
    //  all nodes of the layout tree, often registering the same splits many times.
    fn register_splits(
        &self,
        layout: &Layout,
        field_mask: &[FieldMask],
        row_offset: u64,
        splits: &mut BTreeSet<u64>,
    ) -> VortexResult<()>;
}