1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
use std::{
    fmt::Debug,
    pin::Pin,
    task::{Context, Poll},
};

use super::{RowBatchBuilder, RowFormat};
use datafusion::arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
use futures::{Sink, SinkExt};

pub struct RowSink<R: RowFormat> {
    inner: Pin<Box<dyn Sink<RecordBatch, Error = crate::Error> + Send + 'static>>,
    builder: R::Builder,
    capacity: usize,
    schema: SchemaRef,
}

impl<R: RowFormat> Debug for RowSink<R> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("RowSink")
            .field("builder", &self.builder)
            .field("capacity", &self.capacity)
            .field("schema", &self.schema)
            .finish_non_exhaustive()
    }
}

impl<R> RowSink<R>
where
    R: RowFormat,
{
    pub fn try_new<S>(inner: S, schema: SchemaRef, capacity: usize) -> crate::Result<Self>
    where
        S: Sink<RecordBatch, Error = crate::Error> + Send + 'static,
    {
        let inner = Box::pin(inner);
        let builder = R::builder(&schema.fields)?;

        Ok(Self {
            inner,
            builder,
            capacity,
            schema,
        })
    }

    pub fn len(&self) -> usize {
        self.builder.len()
    }

    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    pub fn capacity(&self) -> usize {
        self.capacity
    }

    fn maybe_flush(&mut self, force: bool, cx: &mut Context<'_>) -> Poll<crate::Result<()>>
    where
        Self: Unpin,
    {
        if self.builder.len() >= self.capacity || !self.builder.is_empty() && force {
            futures::ready!(self.inner.poll_ready_unpin(cx))?;
            let batch = self.builder.build(self.schema.clone())?;
            Poll::Ready(self.inner.start_send_unpin(batch))
        } else {
            Poll::Ready(Ok(()))
        }
    }
}

impl<R> Sink<R> for RowSink<R>
where
    R: RowFormat,
    Self: Unpin,
{
    type Error = crate::Error;

    #[inline]
    fn poll_ready(
        mut self: std::pin::Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
    ) -> std::task::Poll<Result<(), Self::Error>> {
        self.maybe_flush(false, cx)
    }

    #[inline]
    fn start_send(mut self: std::pin::Pin<&mut Self>, item: R) -> Result<(), Self::Error> {
        self.builder.push(item);
        Ok(())
    }

    fn poll_flush(
        mut self: std::pin::Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
    ) -> std::task::Poll<Result<(), Self::Error>> {
        futures::ready!(self.maybe_flush(true, cx))?;
        self.inner.poll_flush_unpin(cx)
    }

    fn poll_close(
        mut self: std::pin::Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
    ) -> std::task::Poll<Result<(), Self::Error>> {
        futures::ready!(self.maybe_flush(true, cx))?;
        self.inner.poll_close_unpin(cx)
    }
}