vortex_layout/strategy.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use async_trait::async_trait;
5use vortex_array::ArrayContext;
6use vortex_error::VortexResult;
7use vortex_io::runtime::Handle;
8
9use crate::LayoutRef;
10use crate::segments::SegmentSinkRef;
11use crate::sequence::{SendableSequentialStream, SequencePointer};
12
13// [layout writer]
14#[async_trait]
15pub trait LayoutStrategy: 'static + Send + Sync {
16 /// Asynchronously process an ordered stream of array chunks, emitting them into a sink and
17 /// returning the [`Layout`][crate::Layout] instance that can be parsed to retrieve the data
18 /// from rest.
19 ///
20 /// This trait uses the `#[async_trait]` attribute to denote that trait objects of this type
21 /// can be `Box`ed or `Arc`ed and shared around. Commonly, these strategies are composed to
22 /// form a operator of operations, each of which modifies the chunk stream in some way before
23 /// passing the data on to a downstream writer.
24 ///
25 /// # Sequencing and EOF
26 ///
27 /// The `stream` parameter is a stream of ordered array chunks, each of which is associated
28 /// with a sequence pointer that indicates its position in the overall array. By passing
29 /// around these pointers (essentially vector clocks), the writer can support concurrent
30 /// and parallel processing while maintaining a deterministic order of data in the file.
31 ///
32 /// The `eof` parameter is a guaranteed to be greater than all sequence pointers in the stream.
33 ///
34 /// Because child strategies can write to the end-of-file pointer, it is very important that
35 /// **all strategies must await all children concurrently**. Otherwise it is possible to
36 /// deadlock if one child is waiting to write to EOF while your strategy is preventing the
37 /// stream from progressing to completion.
38 ///
39 /// # Blocking operations
40 ///
41 /// This is an async trait method, which will return a `BoxFuture` that you can await from
42 /// any runtime. Implementations should avoid directly performing blocking work within the
43 /// `write_stream`, and should instead spawn it onto an appropriate runtime or threadpool
44 /// dedicated to such work.
45 ///
46 /// Such operations are common, and include things like compression and parsing large blobs
47 /// of data, or serializing very large messages to flatbuffers.
48 async fn write_stream(
49 &self,
50 ctx: ArrayContext,
51 segment_sink: SegmentSinkRef,
52 stream: SendableSequentialStream,
53 eof: SequencePointer,
54 handle: Handle,
55 ) -> VortexResult<LayoutRef>;
56
57 /// Returns the number of bytes currently buffered by this strategy and any child strategies.
58 ///
59 /// This method allows tracking of data that has been processed by the strategy but not yet
60 /// written to the underlying sink, providing more accurate estimates of final file size
61 /// during write operations.
62 fn buffered_bytes(&self) -> u64 {
63 0
64 }
65}
66// [layout writer]