Skip to main content

reifydb_routine/function/text/
format_bytes.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use reifydb_core::value::column::{ColumnWithName, buffer::ColumnBuffer, columns::Columns};
5use reifydb_type::value::{constraint::bytes::MaxBytes, container::utf8::Utf8Container, r#type::Type};
6
7use crate::routine::{Function, FunctionKind, Routine, RoutineInfo, context::FunctionContext, error::RoutineError};
8
9const IEC_UNITS: [&str; 6] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"];
10
11pub(super) fn format_bytes_internal(bytes: i64, base: f64, units: &[&str]) -> String {
12	if bytes == 0 {
13		return "0 B".to_string();
14	}
15
16	let bytes_abs = bytes.unsigned_abs() as f64;
17	let sign = if bytes < 0 {
18		"-"
19	} else {
20		""
21	};
22
23	let mut unit_index = 0;
24	let mut value = bytes_abs;
25
26	while value >= base && unit_index < units.len() - 1 {
27		value /= base;
28		unit_index += 1;
29	}
30
31	if unit_index == 0 {
32		format!("{}{} {}", sign, bytes_abs as i64, units[0])
33	} else if value == value.floor() {
34		format!("{}{} {}", sign, value as i64, units[unit_index])
35	} else {
36		let formatted = format!("{:.2}", value);
37		let trimmed = formatted.trim_end_matches('0').trim_end_matches('.');
38		format!("{}{} {}", sign, trimmed, units[unit_index])
39	}
40}
41
42#[macro_export]
43macro_rules! process_int_column {
44	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
45		let mut result_data = Vec::with_capacity($row_count);
46
47		for i in 0..$row_count {
48			if let Some(&value) = $container.get(i) {
49				result_data.push(format_bytes_internal(value as i64, $base, $units));
50			} else {
51				result_data.push(String::new());
52			}
53		}
54
55		ColumnBuffer::Utf8 {
56			container: Utf8Container::new(result_data),
57			max_bytes: MaxBytes::MAX,
58		}
59	}};
60}
61
62#[macro_export]
63macro_rules! process_float_column {
64	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
65		let mut result_data = Vec::with_capacity($row_count);
66
67		for i in 0..$row_count {
68			if let Some(&value) = $container.get(i) {
69				result_data.push(format_bytes_internal(value as i64, $base, $units));
70			} else {
71				result_data.push(String::new());
72			}
73		}
74
75		ColumnBuffer::Utf8 {
76			container: Utf8Container::new(result_data),
77			max_bytes: MaxBytes::MAX,
78		}
79	}};
80}
81
82#[macro_export]
83macro_rules! process_decimal_column {
84	($container:expr, $row_count:expr, $base:expr, $units:expr) => {{
85		let mut result_data = Vec::with_capacity($row_count);
86
87		for i in 0..$row_count {
88			if let Some(value) = $container.get(i) {
89				// Truncate decimal to integer by parsing the integer part
90				let s = value.to_string();
91				let int_part = s.split('.').next().unwrap_or("0");
92				let bytes = int_part.parse::<i64>().unwrap_or(0);
93				result_data.push(format_bytes_internal(bytes, $base, $units));
94			} else {
95				result_data.push(String::new());
96			}
97		}
98
99		ColumnBuffer::Utf8 {
100			container: Utf8Container::new(result_data),
101			max_bytes: MaxBytes::MAX,
102		}
103	}};
104}
105
106/// Formats bytes using binary units (1024-based: B, KiB, MiB, GiB, TiB, PiB)
107pub struct FormatBytes {
108	info: RoutineInfo,
109}
110
111impl Default for FormatBytes {
112	fn default() -> Self {
113		Self::new()
114	}
115}
116
117impl FormatBytes {
118	pub fn new() -> Self {
119		Self {
120			info: RoutineInfo::new("text::format_bytes"),
121		}
122	}
123}
124
125impl<'a> Routine<FunctionContext<'a>> for FormatBytes {
126	fn info(&self) -> &RoutineInfo {
127		&self.info
128	}
129
130	fn return_type(&self, _input_types: &[Type]) -> Type {
131		Type::Utf8
132	}
133
134	fn execute(&self, ctx: &mut FunctionContext<'a>, args: &Columns) -> Result<Columns, RoutineError> {
135		if args.len() != 1 {
136			return Err(RoutineError::FunctionArityMismatch {
137				function: ctx.fragment.clone(),
138				expected: 1,
139				actual: args.len(),
140			});
141		}
142
143		let column = &args[0];
144		let (data, bitvec) = column.unwrap_option();
145		let row_count = data.len();
146
147		let result_data = match data {
148			ColumnBuffer::Int1(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
149			ColumnBuffer::Int2(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
150			ColumnBuffer::Int4(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
151			ColumnBuffer::Int8(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
152			ColumnBuffer::Uint1(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
153			ColumnBuffer::Uint2(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
154			ColumnBuffer::Uint4(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
155			ColumnBuffer::Uint8(container) => process_int_column!(container, row_count, 1024.0, &IEC_UNITS),
156			ColumnBuffer::Float4(container) => {
157				process_float_column!(container, row_count, 1024.0, &IEC_UNITS)
158			}
159			ColumnBuffer::Float8(container) => {
160				process_float_column!(container, row_count, 1024.0, &IEC_UNITS)
161			}
162			ColumnBuffer::Decimal {
163				container,
164				..
165			} => {
166				process_decimal_column!(container, row_count, 1024.0, &IEC_UNITS)
167			}
168			other => {
169				return Err(RoutineError::FunctionInvalidArgumentType {
170					function: ctx.fragment.clone(),
171					argument_index: 0,
172					expected: vec![
173						Type::Int1,
174						Type::Int2,
175						Type::Int4,
176						Type::Int8,
177						Type::Uint1,
178						Type::Uint2,
179						Type::Uint4,
180						Type::Uint8,
181						Type::Float4,
182						Type::Float8,
183						Type::Decimal,
184					],
185					actual: other.get_type(),
186				});
187			}
188		};
189
190		let final_data = match bitvec {
191			Some(bv) => ColumnBuffer::Option {
192				inner: Box::new(result_data),
193				bitvec: bv.clone(),
194			},
195			None => result_data,
196		};
197		Ok(Columns::new(vec![ColumnWithName::new(ctx.fragment.clone(), final_data)]))
198	}
199}
200
201impl Function for FormatBytes {
202	fn kinds(&self) -> &[FunctionKind] {
203		&[FunctionKind::Scalar]
204	}
205}
206
207pub(super) use process_decimal_column;
208pub(super) use process_float_column;
209pub(super) use process_int_column;