1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
use std::fmt::Debug;
use std::time::Duration;

use crate::api::checkpoint::CheckpointFunction;
use crate::api::element::{Record, StreamStatus};
use crate::api::function::NamedFunction;
use crate::utils::date_time::timestamp_str;

pub const MAX_WATERMARK: Watermark = Watermark {
    timestamp: 253402185600000u64,
};
pub const MIN_WATERMARK: Watermark = Watermark { timestamp: 0x0 };

#[derive(Clone, Debug)]
pub struct Watermark {
    pub(crate) timestamp: u64,
}

impl Watermark {
    pub fn new(timestamp: u64) -> Self {
        Watermark { timestamp }
    }
}

impl PartialEq for Watermark {
    fn eq(&self, other: &Self) -> bool {
        self.timestamp == other.timestamp
    }
}

pub trait TimestampAssigner
where
    Self: NamedFunction + CheckpointFunction + Debug,
{
    fn extract_timestamp(&mut self, row: &mut Record, previous_element_timestamp: u64) -> u64;
}

pub trait WatermarkAssigner
where
    Self: TimestampAssigner + NamedFunction + Debug,
{
    /// Return the current `Watermark` and row's timestamp
    fn watermark(&mut self, stream_status: &StreamStatus) -> Option<Watermark>;
    fn current_watermark(&self) -> Option<Watermark>;
}

#[derive(Debug)]
pub struct BoundedOutOfOrdernessTimestampExtractor<E>
where
    E: TimestampAssigner,
{
    current_max_timestamp: u64,
    previous_emitted_watermark: u64,
    last_emitted_watermark: u64,
    max_out_of_orderness: u64,
    extract_timestamp: E,
}

impl<E> BoundedOutOfOrdernessTimestampExtractor<E>
where
    E: TimestampAssigner,
{
    pub fn new(max_out_of_orderness: Duration, extract_timestamp: E) -> Self {
        let max_out_of_orderness = max_out_of_orderness.as_millis() as u64;
        BoundedOutOfOrdernessTimestampExtractor {
            current_max_timestamp: max_out_of_orderness, // Long.MIN_VALUE + this.maxOutOfOrderness;
            previous_emitted_watermark: 0,
            last_emitted_watermark: 0, // Long.MIN_VALUE
            max_out_of_orderness,
            extract_timestamp,
        }
    }
}

impl<E> WatermarkAssigner for BoundedOutOfOrdernessTimestampExtractor<E>
where
    E: TimestampAssigner,
{
    fn watermark(&mut self, _stream_status: &StreamStatus) -> Option<Watermark> {
        let potential_wm = self.current_max_timestamp - self.max_out_of_orderness;
        debug!(
            "potential_wm={}, current_max_timestamp={}, max_out_of_orderness={}",
            timestamp_str(potential_wm),
            timestamp_str(self.current_max_timestamp),
            self.max_out_of_orderness,
        );
        if potential_wm > self.last_emitted_watermark {
            self.previous_emitted_watermark = self.last_emitted_watermark;
            self.last_emitted_watermark = potential_wm;

            debug!(
                "Create Watermark: {}",
                timestamp_str(self.last_emitted_watermark)
            );
            Some(Watermark::new(self.last_emitted_watermark))
        } else {
            None
        }
    }

    fn current_watermark(&self) -> Option<Watermark> {
        if self.last_emitted_watermark == 0 {
            None
        } else {
            Some(Watermark::new(self.last_emitted_watermark))
        }
    }
}

impl<E> TimestampAssigner for BoundedOutOfOrdernessTimestampExtractor<E>
where
    E: TimestampAssigner,
{
    fn extract_timestamp(&mut self, row: &mut Record, previous_element_timestamp: u64) -> u64 {
        let timestamp = self
            .extract_timestamp
            .extract_timestamp(row, previous_element_timestamp);
        if timestamp > self.current_max_timestamp {
            self.current_max_timestamp = timestamp;
        }
        return timestamp;
    }
}

impl<E> NamedFunction for BoundedOutOfOrdernessTimestampExtractor<E>
where
    E: TimestampAssigner,
{
    fn name(&self) -> &str {
        "BoundedOutOfOrdernessTimestampExtractor"
    }
}

impl<E> CheckpointFunction for BoundedOutOfOrdernessTimestampExtractor<E> where E: TimestampAssigner {}