datafusion_execution/
stream.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
19use datafusion_common::Result;
20use futures::Stream;
21use std::pin::Pin;
22
23/// Trait for types that stream [RecordBatch]
24///
25/// See [`SendableRecordBatchStream`] for more details.
26pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
27    /// Returns the schema of this `RecordBatchStream`.
28    ///
29    /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
30    /// stream should have the same schema as returned from this method.
31    fn schema(&self) -> SchemaRef;
32}
33
34/// Trait for a [`Stream`] of [`RecordBatch`]es that can be passed between threads
35///
36/// This trait is used to retrieve the results of DataFusion execution plan nodes.
37///
38/// The trait is a specialized Rust Async [`Stream`] that also knows the schema
39/// of the data it will return (even if the stream has no data). Every
40/// `RecordBatch` returned by the stream should have the same schema as returned
41/// by [`schema`](`RecordBatchStream::schema`).
42///
43/// # See Also
44///
45/// * [`RecordBatchStreamAdapter`] to convert an existing [`Stream`]
46///   to [`SendableRecordBatchStream`]
47///
48/// [`RecordBatchStreamAdapter`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/stream/struct.RecordBatchStreamAdapter.html
49///
50/// # Error Handling
51///
52/// Once a stream returns an error, it should not be polled again (the caller
53/// should stop calling `next`) and handle the error.
54///
55/// However, returning `Ready(None)` (end of stream) is likely the safest
56/// behavior after an error. Like [`Stream`]s, `RecordBatchStream`s should not
57/// be polled after end of stream or returning an error. However, also like
58/// [`Stream`]s there is no mechanism to prevent callers polling  so returning
59/// `Ready(None)` is recommended.
60pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;