vortex_layout/strategy.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use async_trait::async_trait;
5use vortex_array::ArrayContext;
6use vortex_error::VortexResult;
7use vortex_io::runtime::Handle;
8
9use crate::LayoutRef;
10use crate::segments::SegmentSinkRef;
11use crate::sequence::SendableSequentialStream;
12use crate::sequence::SequencePointer;
13
14// [layout writer]
15#[async_trait]
16pub trait LayoutStrategy: 'static + Send + Sync {
17 /// Asynchronously process an ordered stream of array chunks, emitting them into a sink and
18 /// returning the [`Layout`][crate::Layout] instance that can be parsed to retrieve the data
19 /// from rest.
20 ///
21 /// This trait uses the `#[async_trait]` attribute to denote that trait objects of this type
22 /// can be `Box`ed or `Arc`ed and shared around. Commonly, these strategies are composed to
23 /// form a operator of operations, each of which modifies the chunk stream in some way before
24 /// passing the data on to a downstream writer.
25 ///
26 /// # Sequencing and EOF
27 ///
28 /// The `stream` parameter is a stream of ordered array chunks, each of which is associated
29 /// with a sequence pointer that indicates its position in the overall array. By passing
30 /// around these pointers (essentially vector clocks), the writer can support concurrent
31 /// and parallel processing while maintaining a deterministic order of data in the file.
32 ///
33 /// The `eof` parameter is a guaranteed to be greater than all sequence pointers in the stream.
34 ///
35 /// Because child strategies can write to the end-of-file pointer, it is very important that
36 /// **all strategies must await all children concurrently**. Otherwise it is possible to
37 /// deadlock if one child is waiting to write to EOF while your strategy is preventing the
38 /// stream from progressing to completion.
39 ///
40 /// # Blocking operations
41 ///
42 /// This is an async trait method, which will return a `BoxFuture` that you can await from
43 /// any runtime. Implementations should avoid directly performing blocking work within the
44 /// `write_stream`, and should instead spawn it onto an appropriate runtime or threadpool
45 /// dedicated to such work.
46 ///
47 /// Such operations are common, and include things like compression and parsing large blobs
48 /// of data, or serializing very large messages to flatbuffers.
49 async fn write_stream(
50 &self,
51 ctx: ArrayContext,
52 segment_sink: SegmentSinkRef,
53 stream: SendableSequentialStream,
54 eof: SequencePointer,
55 handle: Handle,
56 ) -> VortexResult<LayoutRef>;
57
58 /// Returns the number of bytes currently buffered by this strategy and any child strategies.
59 ///
60 /// This method allows tracking of data that has been processed by the strategy but not yet
61 /// written to the underlying sink, providing more accurate estimates of final file size
62 /// during write operations.
63 fn buffered_bytes(&self) -> u64 {
64 0
65 }
66}
67// [layout writer]