vortex_layout/
strategy.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use async_trait::async_trait;
5use vortex_array::ArrayContext;
6use vortex_error::VortexResult;
7use vortex_io::runtime::Handle;
8
9use crate::LayoutRef;
10use crate::segments::SegmentSinkRef;
11use crate::sequence::{SendableSequentialStream, SequencePointer};
12
13// [layout writer]
14#[async_trait]
15pub trait LayoutStrategy: 'static + Send + Sync {
16    /// Asynchronously process an ordered stream of array chunks, emitting them into a sink and
17    /// returning the [`Layout`][crate::Layout] instance that can be parsed to retrieve the data
18    /// from rest.
19    ///
20    /// This trait uses the `#[async_trait]` attribute to denote that trait objects of this type
21    /// can be `Box`ed or `Arc`ed and shared around. Commonly, these strategies are composed to
22    /// form a operator of operations, each of which modifies the chunk stream in some way before
23    /// passing the data on to a downstream writer.
24    ///
25    /// # Sequencing and EOF
26    ///
27    /// The `stream` parameter is a stream of ordered array chunks, each of which is associated
28    /// with a sequence pointer that indicates its position in the overall array. By passing
29    /// around these pointers (essentially vector clocks), the writer can support concurrent
30    /// and parallel processing while maintaining a deterministic order of data in the file.
31    ///
32    /// The `eof` parameter is a guaranteed to be greater than all sequence pointers in the stream.
33    ///
34    /// Because child strategies can write to the end-of-file pointer, it is very important that
35    /// **all strategies must await all children concurrently**. Otherwise it is possible to
36    /// deadlock if one child is waiting to write to EOF while your strategy is preventing the
37    /// stream from progressing to completion.
38    ///
39    /// # Blocking operations
40    ///
41    /// This is an async trait method, which will return a `BoxFuture` that you can await from
42    /// any runtime. Implementations should avoid directly performing blocking work within the
43    /// `write_stream`, and should instead spawn it onto an appropriate runtime or threadpool
44    /// dedicated to such work.
45    ///
46    /// Such operations are common, and include things like compression and parsing large blobs
47    /// of data, or serializing very large messages to flatbuffers.
48    async fn write_stream(
49        &self,
50        ctx: ArrayContext,
51        segment_sink: SegmentSinkRef,
52        stream: SendableSequentialStream,
53        eof: SequencePointer,
54        handle: Handle,
55    ) -> VortexResult<LayoutRef>;
56
57    /// Returns the number of bytes currently buffered by this strategy and any child strategies.
58    ///
59    /// This method allows tracking of data that has been processed by the strategy but not yet
60    /// written to the underlying sink, providing more accurate estimates of final file size
61    /// during write operations.
62    fn buffered_bytes(&self) -> u64 {
63        0
64    }
65}
66// [layout writer]