1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
//! Persistence related stuff

use std::{
    collections::VecDeque,
    io::Error as IoError,
    num::{NonZeroU32, NonZeroU64, NonZeroUsize},
    path::PathBuf,
    result::Result as StdResult,
    time::{Duration, SystemTime},
};

use thiserror::Error;

use crate::{
    fs::WriteResult,
    time::{Interval, SystemInstant},
};

// TODO: Currently unused
pub mod field;

#[cfg(feature = "csv-storage")]
pub mod csv;

#[derive(Error, Debug)]
pub enum Error {
    #[error(transparent)]
    Io(#[from] IoError),

    #[cfg(feature = "csv-storage")]
    #[error(transparent)]
    Csv(#[from] ::csv::Error),

    #[error(transparent)]
    Other(#[from] anyhow::Error),
}

#[cfg(feature = "csv-storage")]
impl From<crate::fs::csv::Error> for Error {
    fn from(err: crate::fs::csv::Error) -> Self {
        use crate::fs::csv::Error::*;
        match err {
            Io(err) => Error::Io(err),
            Csv(err) => Error::Csv(err),
        }
    }
}

pub type Result<T> = StdResult<T, Error>;

// Maximum pre-allocated capacity to avoid allocation errors
// caused by excessively high capacity or limit parameters
pub const MAX_PREALLOCATED_CAPACITY_LIMIT: usize = 16_384; // 2^14

#[derive(Debug, Clone)]
pub struct StorageStatus {
    pub descriptor: StorageDescriptor,
    pub statistics: Option<StorageStatistics>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum TimeInterval {
    Days(NonZeroU32),
}

const SECONDS_PER_DAY: u64 = 24 * 3_600;

impl From<TimeInterval> for Duration {
    fn from(from: TimeInterval) -> Self {
        use TimeInterval::*;
        match from {
            Days(days) => Duration::from_secs(SECONDS_PER_DAY * u64::from(days.get())),
        }
    }
}

impl From<TimeInterval> for Interval {
    fn from(from: TimeInterval) -> Self {
        use TimeInterval::*;
        match from {
            Days(days) => Interval::Days(days.get()),
        }
    }
}

#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum MemorySize {
    Bytes(NonZeroU64),
}

#[derive(Debug, Clone, Eq, PartialEq)]
pub struct StorageConfig {
    pub retention_time: TimeInterval,
    pub segmentation: StorageSegmentConfig,
}

#[derive(Debug, Clone, Eq, PartialEq)]
pub struct StorageSegmentConfig {
    pub time_interval: TimeInterval,
    pub size_limit: MemorySize,
}

#[derive(Debug, Clone)]
pub struct StorageDescriptor {
    pub kind: String,
    pub base_path: Option<PathBuf>,
    pub binary_data_format: BinaryDataFormat,
}

#[derive(Debug, Clone)]
pub struct StorageStatistics {
    /// The total number of records (if known)
    pub total_records: Option<usize>,

    /// The total size in bytes (if known)
    pub total_bytes: Option<u64>,

    /// Segment statistics (if applicable and available)
    pub segments: Option<Vec<StorageSegmentStatistics>>,
}

#[derive(Debug, Clone)]
pub struct StorageSegmentStatistics {
    pub created_at: SystemTime,

    pub total_records: usize,

    /// The total size in bytes (if known)
    pub total_bytes: Option<u64>,
}

pub trait ReadableRecordPrelude {
    fn created_at_offset(&self) -> CreatedAtOffset;
}

pub trait WritableRecordPrelude {
    fn set_created_at_offset(&mut self, created_at_offset: CreatedAtOffset);
}

pub type CreatedAtOffsetNanos = u64;

#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
pub struct CreatedAtOffset {
    nanos: CreatedAtOffsetNanos,
}

impl CreatedAtOffset {
    #[must_use]
    pub fn system_time_from_origin(self, origin: SystemTime) -> SystemTime {
        origin + Duration::from(self)
    }

    #[must_use]
    pub const fn to_duration(self) -> Duration {
        let Self { nanos } = self;
        Duration::from_nanos(nanos)
    }
}

impl From<CreatedAtOffsetNanos> for CreatedAtOffset {
    fn from(nanos: CreatedAtOffsetNanos) -> Self {
        Self { nanos }
    }
}

impl From<CreatedAtOffset> for CreatedAtOffsetNanos {
    fn from(from: CreatedAtOffset) -> Self {
        let CreatedAtOffset { nanos } = from;
        nanos
    }
}

impl From<Duration> for CreatedAtOffset {
    fn from(from: Duration) -> Self {
        let nanos = from.as_nanos();
        // TODO: Handle overflow?
        debug_assert!(nanos <= u128::from(CreatedAtOffsetNanos::MAX));
        Self {
            nanos: nanos as CreatedAtOffsetNanos,
        }
    }
}

impl From<CreatedAtOffset> for Duration {
    fn from(from: CreatedAtOffset) -> Self {
        from.to_duration()
    }
}

#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct RecordPreludeFilter {
    pub since_created_at: Option<SystemTime>,
    pub until_created_at: Option<SystemTime>,
}

pub trait RecordStorageBase {
    fn descriptor(&self) -> &StorageDescriptor;

    fn config(&self) -> &StorageConfig;

    fn replace_config(&mut self, new_config: StorageConfig) -> StorageConfig;

    fn perform_housekeeping(&mut self) -> Result<()>;

    /// Try to drop records that have been created before the given time
    fn retain_all_records_created_since(&mut self, created_since: SystemTime) -> Result<()>;

    fn report_statistics(&mut self) -> Result<StorageStatistics>;
}

/// Format of custom, binary data
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)]
pub enum BinaryDataFormat {
    /// Arbitrary binary data
    ///
    /// Serialized as Base64 with standard alphabet and no padding.
    #[default]
    Bytes,

    /// Serialized UTF-8 data
    ///
    /// A typical use case is the tunneling of UTF-8 JSON data.
    Utf8,
}

fn encode_binary_data_bytes(input: impl AsRef<[u8]>) -> String {
    base64::encode_config(&input, base64::STANDARD_NO_PAD)
}

fn encode_binary_data_utf8(input: Vec<u8>) -> anyhow::Result<String> {
    String::from_utf8(input).map_err(Into::into)
}

pub fn encode_binary_data_into_string(
    input: Vec<u8>,
    format: BinaryDataFormat,
) -> anyhow::Result<String> {
    match format {
        BinaryDataFormat::Bytes => Ok(encode_binary_data_bytes(&input)),
        BinaryDataFormat::Utf8 => encode_binary_data_utf8(input),
    }
}

fn decode_binary_data_bytes(input: impl AsRef<[u8]>) -> anyhow::Result<Vec<u8>> {
    base64::decode_config(input, base64::STANDARD_NO_PAD).map_err(anyhow::Error::from)
}

fn decode_binary_data_utf8(input: String) -> Vec<u8> {
    input.into_bytes()
}

pub fn decode_binary_data_from_string(
    input: String,
    format: BinaryDataFormat,
) -> anyhow::Result<Vec<u8>> {
    match format {
        BinaryDataFormat::Bytes => decode_binary_data_bytes(&input),
        BinaryDataFormat::Utf8 => Ok(decode_binary_data_utf8(input)),
    }
}

pub trait RecordStorageWrite<R>: RecordStorageBase
where
    R: WritableRecordPrelude,
{
    fn append_record(
        &mut self,
        created_at: &SystemInstant,
        record: R,
    ) -> Result<(WriteResult, CreatedAtOffset)>;
}

pub trait RecordStorageRead<R>: RecordStorageBase {
    fn recent_records(&mut self, limit: NonZeroUsize) -> Result<Vec<(SystemTime, R)>>;
}

#[allow(missing_debug_implementations)]
pub struct InMemoryRecordStorage<R> {
    config: StorageConfig,
    descriptor: StorageDescriptor,
    created_at_origin: SystemInstant,
    records: VecDeque<R>,
    _record_phantom: std::marker::PhantomData<R>,
}

impl<R> InMemoryRecordStorage<R>
where
    R: Clone,
{
    #[must_use]
    pub fn new(config: StorageConfig) -> Self {
        let descriptor = StorageDescriptor {
            kind: "in-memory".to_string(),
            base_path: None,
            binary_data_format: Default::default(), // no serialization
        };
        Self {
            config,
            descriptor,
            created_at_origin: SystemInstant::now(),
            records: VecDeque::with_capacity(MAX_PREALLOCATED_CAPACITY_LIMIT),
            _record_phantom: Default::default(),
        }
    }

    pub fn recent_records(&mut self, limit: NonZeroUsize) -> Result<Vec<R>> {
        let total_count = self.records.len();
        let limited_count = limit.get().min(total_count);
        Ok(self
            .records
            .iter()
            .skip(total_count - limited_count)
            .cloned()
            .collect())
    }
}

impl<R> RecordStorageBase for InMemoryRecordStorage<R>
where
    R: ReadableRecordPrelude,
{
    fn descriptor(&self) -> &StorageDescriptor {
        &self.descriptor
    }

    fn config(&self) -> &StorageConfig {
        &self.config
    }

    fn replace_config(&mut self, new_config: StorageConfig) -> StorageConfig {
        std::mem::replace(&mut self.config, new_config)
    }

    fn perform_housekeeping(&mut self) -> Result<()> {
        Ok(())
    }

    fn retain_all_records_created_since(&mut self, created_since: SystemTime) -> Result<()> {
        let created_since_offset = created_since
            .duration_since(self.created_at_origin.system_time())
            .unwrap_or_default()
            .into();
        while let Some(first) = self.records.front() {
            if first.created_at_offset() >= created_since_offset {
                break;
            }
            self.records.pop_front();
        }
        Ok(())
    }

    fn report_statistics(&mut self) -> Result<StorageStatistics> {
        Ok(StorageStatistics {
            total_records: Some(self.records.len()),
            total_bytes: None,
            segments: None,
        })
    }
}

impl<R> RecordStorageWrite<R> for InMemoryRecordStorage<R>
where
    R: ReadableRecordPrelude + WritableRecordPrelude,
{
    fn append_record(
        &mut self,
        created_at: &SystemInstant,
        mut record: R,
    ) -> Result<(WriteResult, CreatedAtOffset)> {
        debug_assert!(created_at.instant() >= self.created_at_origin.instant());
        let created_at_offset =
            CreatedAtOffset::from(created_at.instant() - self.created_at_origin.instant());
        debug_assert_eq!(record.created_at_offset(), Default::default()); // not yet initialized
        record.set_created_at_offset(created_at_offset);
        self.records.push_back(record);
        Ok((Ok(()), created_at_offset))
    }
}