ingest/config.rs
1use std::time::Duration;
2
3use common::ObjectStoreConfig;
4use serde::{Deserialize, Serialize};
5use serde_with::{DurationMilliSeconds, serde_as};
6
7use crate::model::CompressionType;
8
9/// Configuration for an [`Ingestor`](crate::Ingestor).
10///
11/// Controls where data batches and the queue manifest are stored, how often
12/// batches are flushed, and when backpressure is applied.
13#[serde_as]
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct IngestorConfig {
16 /// Determines where and how ingest data is persisted. See [`ObjectStoreConfig`].
17 pub object_store: ObjectStoreConfig,
18
19 /// Path prefix for data batch objects in object storage.
20 ///
21 /// Defaults to `"ingest"`.
22 #[serde(default = "default_data_path_prefix")]
23 pub data_path_prefix: String,
24
25 /// Path to the queue manifest in object storage.
26 ///
27 /// Defaults to `"ingest/manifest"`.
28 #[serde(default = "default_manifest_path")]
29 pub manifest_path: String,
30
31 /// Time interval that triggers the flush of the current batch to object storage when elapsed.
32 ///
33 /// Defaults to 100 ms.
34 #[serde_as(as = "DurationMilliSeconds<u64>")]
35 #[serde(default = "default_flush_interval")]
36 pub flush_interval: Duration,
37
38 /// Batch size in bytes (entries and metadata) that triggers a flush when exceeded.
39 ///
40 /// Defaults to 64 MiB.
41 #[serde(default = "default_flush_size_bytes")]
42 pub flush_size_bytes: usize,
43
44 /// Maximum number of input entries vectors that can be buffered for the background
45 /// batch writer before backpressure is applied.
46 ///
47 /// Defaults to 1000.
48 #[serde(default = "default_max_buffered_inputs")]
49 pub max_buffered_inputs: usize,
50
51 /// Compression algorithm applied to the record block in data batches.
52 ///
53 /// Defaults to `None` (uncompressed).
54 #[serde(default)]
55 pub batch_compression: CompressionType,
56}
57
58/// Configuration for a [`Collector`](crate::Collector).
59///
60/// Controls where the queue manifest and data batches are read from.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct CollectorConfig {
63 /// Determines where and how ingest data is read. See [`ObjectStoreConfig`].
64 pub object_store: ObjectStoreConfig,
65
66 /// Path to the queue manifest in object storage.
67 ///
68 /// Defaults to `"ingest/manifest"`.
69 #[serde(default = "default_manifest_path")]
70 pub manifest_path: String,
71}
72
73fn default_data_path_prefix() -> String {
74 "ingest".to_string()
75}
76
77fn default_manifest_path() -> String {
78 "ingest/manifest".to_string()
79}
80
81fn default_flush_interval() -> Duration {
82 Duration::from_millis(100)
83}
84
85fn default_flush_size_bytes() -> usize {
86 64 * 1024 * 1024
87}
88
89fn default_max_buffered_inputs() -> usize {
90 1000
91}