es_disk_planner/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod planner {
4    use std::fmt::{Display, Formatter, Result as FmtResult};
5
6    /// Represents the computed capacity plan for an Elasticsearch cluster.
7    ///
8    /// All values are expressed in **gigabytes (GB, base-10)**.
9    /// This struct is returned by the capacity calculation function and
10    /// provides both cluster-level and per-node estimates.
11    #[derive(Debug, Clone, Copy)]
12    pub struct Plan {
13        /// Total data size for all primary and replica shards combined.
14        ///
15        /// Formula: `primaries * shard_size_gb * (1 + replicas)`
16        pub base: f64,
17
18        /// Base size plus Lucene merge overhead.
19        ///
20        /// Formula: `base * (1 + overhead_merge)`
21        pub with_merge: f64,
22
23        /// Size after applying headroom for watermarks and ingestion bursts.
24        ///
25        /// Formula: `with_merge * (1 + headroom)`
26        pub with_headroom: f64,
27
28        /// Total relocation/rebalancing buffer for all nodes combined.
29        ///
30        /// Formula: `buffer_per_node_gb * nodes`
31        pub buffer_total: f64,
32
33        /// Total cluster disk requirement, including overhead, headroom, and buffer.
34        ///
35        /// Formula: `with_headroom + buffer_total`
36        pub total_cluster: f64,
37
38        /// Recommended data size per node, averaged across the cluster.
39        ///
40        /// Formula: `total_cluster / nodes`
41        pub per_node: f64,
42
43        /// Recommended physical disk size per node to stay below the target utilization.
44        ///
45        /// Formula: `per_node / target_utilization`
46        pub disk_per_node: f64,
47
48        // --- Inputs echoed for reporting ---
49        /// Target maximum disk utilization ratio (e.g. 0.75 = 75%).
50        pub target_utilization: f64,
51        /// Number of data nodes in the cluster.
52        pub nodes: u32,
53        /// Total number of primary shards.
54        pub primaries: u32,
55        /// Number of replica shards per primary.
56        pub replicas: u32,
57        /// Average shard size in GB (base-10).
58        pub shard_size_gb: f64,
59        /// Merge overhead fraction (e.g. 0.2 = 20%).
60        pub overhead_merge: f64,
61        /// Headroom fraction (e.g. 0.3 = 30%).
62        pub headroom: f64,
63        /// Optional relocation buffer per node in GB (defaults to shard size if `None`).
64        pub buffer_per_node_gb: Option<f64>,
65    }
66
67    /// Computes an estimated disk capacity plan for an Elasticsearch cluster.
68    ///
69    /// This function applies a simplified model to estimate how much **disk space**
70    /// (in gigabytes, base-10) is required across the entire cluster and per node.
71    ///
72    /// # Parameters
73    ///
74    /// - `nodes` — Number of data nodes in the cluster. Must be greater than zero.
75    /// - `primaries` — Total number of primary shards across all indices.
76    /// - `replicas` — Number of replicas for each primary shard.
77    /// - `shard_size_gb` — Average size of a single shard, in gigabytes (GB).
78    /// - `overhead_merge` — Fractional overhead for Lucene segment merges (e.g. `0.2` = 20%).
79    /// - `headroom` — Fractional safety margin for disk watermarks and ingestion bursts (e.g. `0.3` = 30%).
80    /// - `buffer_per_node_gb` — Optional relocation/rebalancing buffer per node.  
81    ///   If `None`, defaults to `shard_size_gb`.
82    /// - `target_utilization` — Desired maximum disk utilization ratio (e.g. `0.75` = 75%).  
83    ///   Must be within the range `(0, 1]`.
84    ///
85    /// # Returns
86    ///
87    /// On success, returns a [`Plan`] struct containing all intermediate and final
88    /// capacity estimates (cluster-level and per-node).
89    ///
90    /// # Errors
91    ///
92    /// Returns an [`Err`] string if:
93    ///
94    /// - `nodes` is `0`
95    /// - `target_utilization` ≤ `0.0` or > `1.0`
96    /// - `overhead_merge` or `headroom` < `0.0`
97    /// - `shard_size_gb` ≤ `0.0`
98    ///
99    /// # Formulas
100    ///
101    /// ```text
102    /// base = primaries * shard_size_gb * (1 + replicas)
103    /// with_merge = base * (1 + overhead_merge)
104    /// with_headroom = with_merge * (1 + headroom)
105    /// buffer_total = buffer_per_node_gb * nodes
106    /// total_cluster = with_headroom + buffer_total
107    /// per_node = total_cluster / nodes
108    /// disk_per_node = per_node / target_utilization
109    /// ```
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// use es_disk_planner::{plan_capacity, Plan};
115    ///
116    /// let plan = plan_capacity(5, 10, 1, 50.0, 0.20, 0.30, None, 0.75).unwrap();
117    ///
118    /// assert!((plan.total_cluster - 1810.0).abs() < 1e-6);
119    /// assert!((plan.disk_per_node - 482.7).abs() < 0.1);
120    /// ```
121    ///
122    /// # Notes
123    ///
124    /// - All calculations use **decimal gigabytes (GB)**, not GiB (1024-based).
125    /// - The model follows Elastic’s general sizing guidelines
126    ///   (20–50 GB per shard, ≤ 30 GB JVM heap, ≥ 64 GB node RAM).
127    ///
128    /// # See Also
129    ///
130    /// [`Plan`] — The struct containing the computed results.
131    #[allow(clippy::too_many_arguments)]
132    pub fn plan_capacity(
133        nodes: u32,
134        primaries: u32,
135        replicas: u32,
136        shard_size_gb: f64,
137        overhead_merge: f64,
138        headroom: f64,
139        buffer_per_node_gb: Option<f64>,
140        target_utilization: f64,
141    ) -> Result<Plan, String> {
142        if nodes == 0 {
143            return Err("nodes must be > 0".into());
144        }
145        if target_utilization <= 0.0 || target_utilization > 1.0 {
146            return Err("target_utilization must be in (0, 1]".into());
147        }
148        if overhead_merge < 0.0 || headroom < 0.0 {
149            return Err("overhead_merge/headroom must be >= 0".into());
150        }
151        if shard_size_gb <= 0.0 {
152            return Err("shard_size_gb must be > 0".into());
153        }
154
155        let nodes_f = nodes as f64;
156        let primaries_f = primaries as f64;
157        let replicas_f = replicas as f64;
158
159        let buf = buffer_per_node_gb.unwrap_or(shard_size_gb);
160
161        let base = primaries_f * shard_size_gb * (1.0 + replicas_f);
162
163        let with_merge = base * (1.0 + overhead_merge);
164
165        let with_headroom = with_merge * (1.0 + headroom);
166
167        let buffer_total = buf * nodes_f;
168
169        let total_cluster = with_headroom + buffer_total;
170
171        let per_node = total_cluster / nodes_f;
172
173        let disk_per_node = per_node / target_utilization;
174
175        Ok(Plan {
176            base,
177            with_merge,
178            with_headroom,
179            buffer_total,
180            total_cluster,
181            per_node,
182            disk_per_node,
183            target_utilization,
184            nodes,
185            primaries,
186            replicas,
187            shard_size_gb,
188            overhead_merge,
189            headroom,
190            buffer_per_node_gb,
191        })
192    }
193
194    fn fmt_gb(x: f64) -> String {
195        format!("{:.1} GB", x)
196    }
197    fn fmt_tb(x: f64) -> String {
198        format!("{:.2} TB", x / 1000.0)
199    }
200
201    impl Display for Plan {
202        fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
203            writeln!(f, "=== Elasticsearch Disk Capacity Planner ===")?;
204            writeln!(f, "Nodes: {}", self.nodes)?;
205            writeln!(f, "Primary shards: {}", self.primaries)?;
206            writeln!(f, "Replicas per shard: {}", self.replicas)?;
207            writeln!(
208                f,
209                "Shard size: {} | Overhead merge: {:.0}% | Headroom: {:.0}%",
210                fmt_gb(self.shard_size_gb),
211                self.overhead_merge * 100.0,
212                self.headroom * 100.0
213            )?;
214            writeln!(
215                f,
216                "Relocation buffer per node: {}",
217                fmt_gb(self.buffer_per_node_gb.unwrap_or(self.shard_size_gb))
218            )?;
219            writeln!(
220                f,
221                "Target disk utilization: {:.0}%",
222                self.target_utilization * 100.0
223            )?;
224            writeln!(f)?;
225
226            writeln!(
227                f,
228                "Base (primaries+replicas): {} ({})",
229                fmt_gb(self.base),
230                fmt_tb(self.base)
231            )?;
232            writeln!(
233                f,
234                "+ Merge overhead:         {} ({})",
235                fmt_gb(self.with_merge),
236                fmt_tb(self.with_merge)
237            )?;
238            writeln!(
239                f,
240                "+ Headroom:               {} ({})",
241                fmt_gb(self.with_headroom),
242                fmt_tb(self.with_headroom)
243            )?;
244            writeln!(
245                f,
246                "+ Total buffer:           {} ({})",
247                fmt_gb(self.buffer_total),
248                fmt_tb(self.buffer_total)
249            )?;
250            writeln!(
251                f,
252                "= Cluster total:          {} ({})",
253                fmt_gb(self.total_cluster),
254                fmt_tb(self.total_cluster)
255            )?;
256            writeln!(f)?;
257            writeln!(
258                f,
259                "Per node (recommended):   {} ({})",
260                fmt_gb(self.per_node),
261                fmt_tb(self.per_node)
262            )?;
263            writeln!(
264                f,
265                "Disk per node (<~{:.0}%): {} ({})",
266                self.target_utilization * 100.0,
267                fmt_gb(self.disk_per_node),
268                fmt_tb(self.disk_per_node)
269            )?;
270
271            Ok(())
272        }
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use crate::*;
279
280    // Scenario: 5 nodi, 10 primari, 1 replica, shard=50GB, overhead=20%, headroom=30%, buffer=default(=50GB), target=0.75
281    #[test]
282    fn example_numbers_match() {
283        let p = plan_capacity(5, 10, 1, 50.0, 0.20, 0.30, None, 0.75).unwrap();
284        assert!((p.base - 1000.0).abs() < 1e-6);
285        assert!((p.with_merge - 1200.0).abs() < 1e-6);
286        assert!((p.with_headroom - 1560.0).abs() < 1e-6);
287        assert!((p.buffer_total - 250.0).abs() < 1e-6);
288        assert!((p.total_cluster - 1810.0).abs() < 1e-6);
289        assert!((p.per_node - 362.0).abs() < 1e-6);
290        assert!((p.disk_per_node - 482.6666667).abs() < 1e-3);
291    }
292
293    #[test]
294    fn rejects_bad_utilization() {
295        assert!(plan_capacity(5, 10, 1, 50.0, 0.2, 0.3, None, 0.0).is_err());
296        assert!(plan_capacity(5, 10, 1, 50.0, 0.2, 0.3, None, 1.01).is_err());
297    }
298
299    #[test]
300    fn custom_buffer() {
301        let p = plan_capacity(3, 6, 1, 40.0, 0.1, 0.2, Some(80.0), 0.8).unwrap();
302        // base = 6*40*(1+1)=480; with_merge=528; with_headroom=633.6; buffer_total=80*3=240; total=873.6
303        assert!((p.total_cluster - 873.6).abs() < 1e-6);
304        // per_node = 291.2; disk_per_node = 291.2/0.8 = 364
305        assert!((p.disk_per_node - 364.0).abs() < 1e-6);
306    }
307}
308
309pub use planner::{plan_capacity, Plan};