es_disk_planner/lib.rs
1#![doc = include_str!("../README.md")]
2
3pub mod planner {
4 use std::fmt::{Display, Formatter, Result as FmtResult};
5
6 /// Represents the computed capacity plan for an Elasticsearch cluster.
7 ///
8 /// All values are expressed in **gigabytes (GB, base-10)**.
9 /// This struct is returned by the capacity calculation function and
10 /// provides both cluster-level and per-node estimates.
11 #[derive(Debug, Clone, Copy)]
12 pub struct Plan {
13 /// Total data size for all primary and replica shards combined.
14 ///
15 /// Formula: `primaries * shard_size_gb * (1 + replicas)`
16 pub base: f64,
17
18 /// Base size plus Lucene merge overhead.
19 ///
20 /// Formula: `base * (1 + overhead_merge)`
21 pub with_merge: f64,
22
23 /// Size after applying headroom for watermarks and ingestion bursts.
24 ///
25 /// Formula: `with_merge * (1 + headroom)`
26 pub with_headroom: f64,
27
28 /// Total relocation/rebalancing buffer for all nodes combined.
29 ///
30 /// Formula: `buffer_per_node_gb * nodes`
31 pub buffer_total: f64,
32
33 /// Total cluster disk requirement, including overhead, headroom, and buffer.
34 ///
35 /// Formula: `with_headroom + buffer_total`
36 pub total_cluster: f64,
37
38 /// Recommended data size per node, averaged across the cluster.
39 ///
40 /// Formula: `total_cluster / nodes`
41 pub per_node: f64,
42
43 /// Recommended physical disk size per node to stay below the target utilization.
44 ///
45 /// Formula: `per_node / target_utilization`
46 pub disk_per_node: f64,
47
48 // --- Inputs echoed for reporting ---
49 /// Target maximum disk utilization ratio (e.g. 0.75 = 75%).
50 pub target_utilization: f64,
51 /// Number of data nodes in the cluster.
52 pub nodes: u32,
53 /// Total number of primary shards.
54 pub primaries: u32,
55 /// Number of replica shards per primary.
56 pub replicas: u32,
57 /// Average shard size in GB (base-10).
58 pub shard_size_gb: f64,
59 /// Merge overhead fraction (e.g. 0.2 = 20%).
60 pub overhead_merge: f64,
61 /// Headroom fraction (e.g. 0.3 = 30%).
62 pub headroom: f64,
63 /// Optional relocation buffer per node in GB (defaults to shard size if `None`).
64 pub buffer_per_node_gb: Option<f64>,
65 }
66
67 /// Computes an estimated disk capacity plan for an Elasticsearch cluster.
68 ///
69 /// This function applies a simplified model to estimate how much **disk space**
70 /// (in gigabytes, base-10) is required across the entire cluster and per node.
71 ///
72 /// # Parameters
73 ///
74 /// - `nodes` — Number of data nodes in the cluster. Must be greater than zero.
75 /// - `primaries` — Total number of primary shards across all indices.
76 /// - `replicas` — Number of replicas for each primary shard.
77 /// - `shard_size_gb` — Average size of a single shard, in gigabytes (GB).
78 /// - `overhead_merge` — Fractional overhead for Lucene segment merges (e.g. `0.2` = 20%).
79 /// - `headroom` — Fractional safety margin for disk watermarks and ingestion bursts (e.g. `0.3` = 30%).
80 /// - `buffer_per_node_gb` — Optional relocation/rebalancing buffer per node.
81 /// If `None`, defaults to `shard_size_gb`.
82 /// - `target_utilization` — Desired maximum disk utilization ratio (e.g. `0.75` = 75%).
83 /// Must be within the range `(0, 1]`.
84 ///
85 /// # Returns
86 ///
87 /// On success, returns a [`Plan`] struct containing all intermediate and final
88 /// capacity estimates (cluster-level and per-node).
89 ///
90 /// # Errors
91 ///
92 /// Returns an [`Err`] string if:
93 ///
94 /// - `nodes` is `0`
95 /// - `target_utilization` ≤ `0.0` or > `1.0`
96 /// - `overhead_merge` or `headroom` < `0.0`
97 /// - `shard_size_gb` ≤ `0.0`
98 ///
99 /// # Formulas
100 ///
101 /// ```text
102 /// base = primaries * shard_size_gb * (1 + replicas)
103 /// with_merge = base * (1 + overhead_merge)
104 /// with_headroom = with_merge * (1 + headroom)
105 /// buffer_total = buffer_per_node_gb * nodes
106 /// total_cluster = with_headroom + buffer_total
107 /// per_node = total_cluster / nodes
108 /// disk_per_node = per_node / target_utilization
109 /// ```
110 ///
111 /// # Examples
112 ///
113 /// ```
114 /// use es_disk_planner::{plan_capacity, Plan};
115 ///
116 /// let plan = plan_capacity(5, 10, 1, 50.0, 0.20, 0.30, None, 0.75).unwrap();
117 ///
118 /// assert!((plan.total_cluster - 1810.0).abs() < 1e-6);
119 /// assert!((plan.disk_per_node - 482.7).abs() < 0.1);
120 /// ```
121 ///
122 /// # Notes
123 ///
124 /// - All calculations use **decimal gigabytes (GB)**, not GiB (1024-based).
125 /// - The model follows Elastic’s general sizing guidelines
126 /// (20–50 GB per shard, ≤ 30 GB JVM heap, ≥ 64 GB node RAM).
127 ///
128 /// # See Also
129 ///
130 /// [`Plan`] — The struct containing the computed results.
131 #[allow(clippy::too_many_arguments)]
132 pub fn plan_capacity(
133 nodes: u32,
134 primaries: u32,
135 replicas: u32,
136 shard_size_gb: f64,
137 overhead_merge: f64,
138 headroom: f64,
139 buffer_per_node_gb: Option<f64>,
140 target_utilization: f64,
141 ) -> Result<Plan, String> {
142 if nodes == 0 {
143 return Err("nodes must be > 0".into());
144 }
145 if target_utilization <= 0.0 || target_utilization > 1.0 {
146 return Err("target_utilization must be in (0, 1]".into());
147 }
148 if overhead_merge < 0.0 || headroom < 0.0 {
149 return Err("overhead_merge/headroom must be >= 0".into());
150 }
151 if shard_size_gb <= 0.0 {
152 return Err("shard_size_gb must be > 0".into());
153 }
154
155 let nodes_f = nodes as f64;
156 let primaries_f = primaries as f64;
157 let replicas_f = replicas as f64;
158
159 let buf = buffer_per_node_gb.unwrap_or(shard_size_gb);
160
161 let base = primaries_f * shard_size_gb * (1.0 + replicas_f);
162
163 let with_merge = base * (1.0 + overhead_merge);
164
165 let with_headroom = with_merge * (1.0 + headroom);
166
167 let buffer_total = buf * nodes_f;
168
169 let total_cluster = with_headroom + buffer_total;
170
171 let per_node = total_cluster / nodes_f;
172
173 let disk_per_node = per_node / target_utilization;
174
175 Ok(Plan {
176 base,
177 with_merge,
178 with_headroom,
179 buffer_total,
180 total_cluster,
181 per_node,
182 disk_per_node,
183 target_utilization,
184 nodes,
185 primaries,
186 replicas,
187 shard_size_gb,
188 overhead_merge,
189 headroom,
190 buffer_per_node_gb,
191 })
192 }
193
194 fn fmt_gb(x: f64) -> String {
195 format!("{:.1} GB", x)
196 }
197 fn fmt_tb(x: f64) -> String {
198 format!("{:.2} TB", x / 1000.0)
199 }
200
201 impl Display for Plan {
202 fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
203 writeln!(f, "=== Elasticsearch Disk Capacity Planner ===")?;
204 writeln!(f, "Nodes: {}", self.nodes)?;
205 writeln!(f, "Primary shards: {}", self.primaries)?;
206 writeln!(f, "Replicas per shard: {}", self.replicas)?;
207 writeln!(
208 f,
209 "Shard size: {} | Overhead merge: {:.0}% | Headroom: {:.0}%",
210 fmt_gb(self.shard_size_gb),
211 self.overhead_merge * 100.0,
212 self.headroom * 100.0
213 )?;
214 writeln!(
215 f,
216 "Relocation buffer per node: {}",
217 fmt_gb(self.buffer_per_node_gb.unwrap_or(self.shard_size_gb))
218 )?;
219 writeln!(
220 f,
221 "Target disk utilization: {:.0}%",
222 self.target_utilization * 100.0
223 )?;
224 writeln!(f)?;
225
226 writeln!(
227 f,
228 "Base (primaries+replicas): {} ({})",
229 fmt_gb(self.base),
230 fmt_tb(self.base)
231 )?;
232 writeln!(
233 f,
234 "+ Merge overhead: {} ({})",
235 fmt_gb(self.with_merge),
236 fmt_tb(self.with_merge)
237 )?;
238 writeln!(
239 f,
240 "+ Headroom: {} ({})",
241 fmt_gb(self.with_headroom),
242 fmt_tb(self.with_headroom)
243 )?;
244 writeln!(
245 f,
246 "+ Total buffer: {} ({})",
247 fmt_gb(self.buffer_total),
248 fmt_tb(self.buffer_total)
249 )?;
250 writeln!(
251 f,
252 "= Cluster total: {} ({})",
253 fmt_gb(self.total_cluster),
254 fmt_tb(self.total_cluster)
255 )?;
256 writeln!(f)?;
257 writeln!(
258 f,
259 "Per node (recommended): {} ({})",
260 fmt_gb(self.per_node),
261 fmt_tb(self.per_node)
262 )?;
263 writeln!(
264 f,
265 "Disk per node (<~{:.0}%): {} ({})",
266 self.target_utilization * 100.0,
267 fmt_gb(self.disk_per_node),
268 fmt_tb(self.disk_per_node)
269 )?;
270
271 Ok(())
272 }
273 }
274}
275
276#[cfg(test)]
277mod tests {
278 use crate::*;
279
280 // Scenario: 5 nodi, 10 primari, 1 replica, shard=50GB, overhead=20%, headroom=30%, buffer=default(=50GB), target=0.75
281 #[test]
282 fn example_numbers_match() {
283 let p = plan_capacity(5, 10, 1, 50.0, 0.20, 0.30, None, 0.75).unwrap();
284 assert!((p.base - 1000.0).abs() < 1e-6);
285 assert!((p.with_merge - 1200.0).abs() < 1e-6);
286 assert!((p.with_headroom - 1560.0).abs() < 1e-6);
287 assert!((p.buffer_total - 250.0).abs() < 1e-6);
288 assert!((p.total_cluster - 1810.0).abs() < 1e-6);
289 assert!((p.per_node - 362.0).abs() < 1e-6);
290 assert!((p.disk_per_node - 482.6666667).abs() < 1e-3);
291 }
292
293 #[test]
294 fn rejects_bad_utilization() {
295 assert!(plan_capacity(5, 10, 1, 50.0, 0.2, 0.3, None, 0.0).is_err());
296 assert!(plan_capacity(5, 10, 1, 50.0, 0.2, 0.3, None, 1.01).is_err());
297 }
298
299 #[test]
300 fn custom_buffer() {
301 let p = plan_capacity(3, 6, 1, 40.0, 0.1, 0.2, Some(80.0), 0.8).unwrap();
302 // base = 6*40*(1+1)=480; with_merge=528; with_headroom=633.6; buffer_total=80*3=240; total=873.6
303 assert!((p.total_cluster - 873.6).abs() < 1e-6);
304 // per_node = 291.2; disk_per_node = 291.2/0.8 = 364
305 assert!((p.disk_per_node - 364.0).abs() < 1e-6);
306 }
307}
308
309pub use planner::{plan_capacity, Plan};