Skip to main content

reifydb_routine/function/text/
format_bytes.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use reifydb_core::value::column::{Column, columns::Columns, data::ColumnData};
5use reifydb_type::value::{constraint::bytes::MaxBytes, container::utf8::Utf8Container, r#type::Type};
6
7use crate::function::{Function, FunctionCapability, FunctionContext, FunctionInfo, error::FunctionError};
8
9const IEC_UNITS: [&str; 6] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"];
10
11pub(super) fn format_bytes_internal(bytes: i64, base: f64, units: &[&str]) -> String {
12	if bytes == 0 {
13		return "0 B".to_string();
14	}
15
16	let bytes_abs = bytes.unsigned_abs() as f64;
17	let sign = if bytes < 0 {
18		"-"
19	} else {
20		""
21	};
22
23	let mut unit_index = 0;
24	let mut value = bytes_abs;
25
26	while value >= base && unit_index < units.len() - 1 {
27		value /= base;
28		unit_index += 1;
29	}
30
31	if unit_index == 0 {
32		format!("{}{} {}", sign, bytes_abs as i64, units[0])
33	} else if value == value.floor() {
34		format!("{}{} {}", sign, value as i64, units[unit_index])
35	} else {
36		let formatted = format!("{:.2}", value);
37		let trimmed = formatted.trim_end_matches('0').trim_end_matches('.');
38		format!("{}{} {}", sign, trimmed, units[unit_index])
39	}
40}
41
42#[macro_export]
43macro_rules! process_int_column {
44	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
45		let mut result_data = Vec::with_capacity($row_count);
46
47		for i in 0..$row_count {
48			if let Some(&value) = $container.get(i) {
49				result_data.push(format_bytes_internal(value as i64, $base, $units));
50			} else {
51				result_data.push(String::new());
52			}
53		}
54
55		ColumnData::Utf8 {
56			container: Utf8Container::new(result_data),
57			max_bytes: MaxBytes::MAX,
58		}
59	}};
60}
61
62#[macro_export]
63macro_rules! process_float_column {
64	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
65		let mut result_data = Vec::with_capacity($row_count);
66
67		for i in 0..$row_count {
68			if let Some(&value) = $container.get(i) {
69				result_data.push(format_bytes_internal(value as i64, $base, $units));
70			} else {
71				result_data.push(String::new());
72			}
73		}
74
75		ColumnData::Utf8 {
76			container: Utf8Container::new(result_data),
77			max_bytes: MaxBytes::MAX,
78		}
79	}};
80}
81
82#[macro_export]
83macro_rules! process_decimal_column {
84	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
85		let mut result_data = Vec::with_capacity($row_count);
86
87		for i in 0..$row_count {
88			if let Some(value) = $container.get(i) {
89				// Truncate decimal to integer by parsing the integer part
90				let s = value.to_string();
91				let int_part = s.split('.').next().unwrap_or("0");
92				let bytes = int_part.parse::<i64>().unwrap_or(0);
93				result_data.push(format_bytes_internal(bytes, $base, $units));
94			} else {
95				result_data.push(String::new());
96			}
97		}
98
99		ColumnData::Utf8 {
100			container: Utf8Container::new(result_data),
101			max_bytes: MaxBytes::MAX,
102		}
103	}};
104}
105
106/// Formats bytes using binary units (1024-based: B, KiB, MiB, GiB, TiB, PiB)
107pub struct FormatBytes {
108	info: FunctionInfo,
109}
110
111impl Default for FormatBytes {
112	fn default() -> Self {
113		Self::new()
114	}
115}
116
117impl FormatBytes {
118	pub fn new() -> Self {
119		Self {
120			info: FunctionInfo::new("text::format_bytes"),
121		}
122	}
123}
124
125impl Function for FormatBytes {
126	fn info(&self) -> &FunctionInfo {
127		&self.info
128	}
129
130	fn capabilities(&self) -> &[FunctionCapability] {
131		&[FunctionCapability::Scalar]
132	}
133
134	fn return_type(&self, _input_types: &[Type]) -> Type {
135		Type::Utf8
136	}
137
138	fn execute(&self, ctx: &FunctionContext, args: &Columns) -> Result<Columns, FunctionError> {
139		if args.len() != 1 {
140			return Err(FunctionError::ArityMismatch {
141				function: ctx.fragment.clone(),
142				expected: 1,
143				actual: args.len(),
144			});
145		}
146
147		let column = &args[0];
148		let (data, bitvec) = column.data().unwrap_option();
149		let row_count = data.len();
150
151		let result_data = match data {
152			ColumnData::Int1(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
153			ColumnData::Int2(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
154			ColumnData::Int4(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
155			ColumnData::Int8(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
156			ColumnData::Uint1(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
157			ColumnData::Uint2(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
158			ColumnData::Uint4(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
159			ColumnData::Uint8(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
160			ColumnData::Float4(container) => {
161				process_float_column!(container, row_count, 1024.0, &IEC_UNITS)
162			}
163			ColumnData::Float8(container) => {
164				process_float_column!(container, row_count, 1024.0, &IEC_UNITS)
165			}
166			ColumnData::Decimal {
167				container,
168				..
169			} => {
170				process_decimal_column!(container, row_count, 1024.0, &IEC_UNITS)
171			}
172			other => {
173				return Err(FunctionError::InvalidArgumentType {
174					function: ctx.fragment.clone(),
175					argument_index: 0,
176					expected: vec![
177						Type::Int1,
178						Type::Int2,
179						Type::Int4,
180						Type::Int8,
181						Type::Uint1,
182						Type::Uint2,
183						Type::Uint4,
184						Type::Uint8,
185						Type::Float4,
186						Type::Float8,
187						Type::Decimal,
188					],
189					actual: other.get_type(),
190				});
191			}
192		};
193
194		let final_data = match bitvec {
195			Some(bv) => ColumnData::Option {
196				inner: Box::new(result_data),
197				bitvec: bv.clone(),
198			},
199			None => result_data,
200		};
201		Ok(Columns::new(vec![Column::new(ctx.fragment.clone(), final_data)]))
202	}
203}
204
205pub(super) use process_decimal_column;
206pub(super) use process_float_column;
207pub(super) use process_int_column;