Skip to main content

ingest/
config.rs

1use std::time::Duration;
2
3use common::ObjectStoreConfig;
4use serde::{Deserialize, Serialize};
5use serde_with::{DurationMilliSeconds, serde_as};
6
7use crate::model::CompressionType;
8
9/// Configuration for an [`Ingestor`](crate::Ingestor).
10///
11/// Controls where data batches and the queue manifest are stored, how often
12/// batches are flushed, and when backpressure is applied.
13#[serde_as]
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct IngestorConfig {
16    /// Determines where and how ingest data is persisted. See [`ObjectStoreConfig`].
17    pub object_store: ObjectStoreConfig,
18
19    /// Path prefix for data batch objects in object storage.
20    ///
21    /// Defaults to `"ingest"`.
22    #[serde(default = "default_data_path_prefix")]
23    pub data_path_prefix: String,
24
25    /// Path to the queue manifest in object storage.
26    ///
27    /// Defaults to `"ingest/manifest"`.
28    #[serde(default = "default_manifest_path")]
29    pub manifest_path: String,
30
31    /// Time interval that triggers the flush of the current batch to object storage when elapsed.
32    ///
33    /// Defaults to 100 ms.
34    #[serde_as(as = "DurationMilliSeconds<u64>")]
35    #[serde(default = "default_flush_interval")]
36    pub flush_interval: Duration,
37
38    /// Batch size in bytes (entries and metadata) that triggers a flush when exceeded.
39    ///
40    /// Defaults to 64 MiB.
41    #[serde(default = "default_flush_size_bytes")]
42    pub flush_size_bytes: usize,
43
44    /// Maximum number of input entries vectors that can be buffered for the background
45    /// batch writer before backpressure is applied.
46    ///
47    /// Defaults to 1000.
48    #[serde(default = "default_max_buffered_inputs")]
49    pub max_buffered_inputs: usize,
50
51    /// Compression algorithm applied to the record block in data batches.
52    ///
53    /// Defaults to `None` (uncompressed).
54    #[serde(default)]
55    pub batch_compression: CompressionType,
56}
57
58/// Configuration for a [`Collector`](crate::Collector).
59///
60/// Controls where the queue manifest and data batches are read from.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct CollectorConfig {
63    /// Determines where and how ingest data is read. See [`ObjectStoreConfig`].
64    pub object_store: ObjectStoreConfig,
65
66    /// Path to the queue manifest in object storage.
67    ///
68    /// Defaults to `"ingest/manifest"`.
69    #[serde(default = "default_manifest_path")]
70    pub manifest_path: String,
71}
72
73fn default_data_path_prefix() -> String {
74    "ingest".to_string()
75}
76
77fn default_manifest_path() -> String {
78    "ingest/manifest".to_string()
79}
80
81fn default_flush_interval() -> Duration {
82    Duration::from_millis(100)
83}
84
85fn default_flush_size_bytes() -> usize {
86    64 * 1024 * 1024
87}
88
89fn default_max_buffered_inputs() -> usize {
90    1000
91}