vortex_layout/
strategy.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use async_trait::async_trait;
5use vortex_array::ArrayContext;
6use vortex_error::VortexResult;
7use vortex_io::runtime::Handle;
8
9use crate::LayoutRef;
10use crate::segments::SegmentSinkRef;
11use crate::sequence::SendableSequentialStream;
12use crate::sequence::SequencePointer;
13
14// [layout writer]
15#[async_trait]
16pub trait LayoutStrategy: 'static + Send + Sync {
17    /// Asynchronously process an ordered stream of array chunks, emitting them into a sink and
18    /// returning the [`Layout`][crate::Layout] instance that can be parsed to retrieve the data
19    /// from rest.
20    ///
21    /// This trait uses the `#[async_trait]` attribute to denote that trait objects of this type
22    /// can be `Box`ed or `Arc`ed and shared around. Commonly, these strategies are composed to
23    /// form a operator of operations, each of which modifies the chunk stream in some way before
24    /// passing the data on to a downstream writer.
25    ///
26    /// # Sequencing and EOF
27    ///
28    /// The `stream` parameter is a stream of ordered array chunks, each of which is associated
29    /// with a sequence pointer that indicates its position in the overall array. By passing
30    /// around these pointers (essentially vector clocks), the writer can support concurrent
31    /// and parallel processing while maintaining a deterministic order of data in the file.
32    ///
33    /// The `eof` parameter is a guaranteed to be greater than all sequence pointers in the stream.
34    ///
35    /// Because child strategies can write to the end-of-file pointer, it is very important that
36    /// **all strategies must await all children concurrently**. Otherwise it is possible to
37    /// deadlock if one child is waiting to write to EOF while your strategy is preventing the
38    /// stream from progressing to completion.
39    ///
40    /// # Blocking operations
41    ///
42    /// This is an async trait method, which will return a `BoxFuture` that you can await from
43    /// any runtime. Implementations should avoid directly performing blocking work within the
44    /// `write_stream`, and should instead spawn it onto an appropriate runtime or threadpool
45    /// dedicated to such work.
46    ///
47    /// Such operations are common, and include things like compression and parsing large blobs
48    /// of data, or serializing very large messages to flatbuffers.
49    async fn write_stream(
50        &self,
51        ctx: ArrayContext,
52        segment_sink: SegmentSinkRef,
53        stream: SendableSequentialStream,
54        eof: SequencePointer,
55        handle: Handle,
56    ) -> VortexResult<LayoutRef>;
57
58    /// Returns the number of bytes currently buffered by this strategy and any child strategies.
59    ///
60    /// This method allows tracking of data that has been processed by the strategy but not yet
61    /// written to the underlying sink, providing more accurate estimates of final file size
62    /// during write operations.
63    fn buffered_bytes(&self) -> u64 {
64        0
65    }
66}
67// [layout writer]