use crate::compiler::function::EnumVariant;
use crate::compiler::prelude::*;
use crate::value;
use core::convert::AsRef;
use parse_size::Config;
use rust_decimal::{Decimal, prelude::FromPrimitive, prelude::ToPrimitive};
use std::collections::HashMap;
use std::sync::LazyLock;
static DEFAULT_BASE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("2")));
static UNIT_ENUM: &[EnumVariant] = &[
EnumVariant {
value: "B",
description: "Bytes",
},
EnumVariant {
value: "kiB",
description: "Kilobytes (1024 bytes)",
},
EnumVariant {
value: "MiB",
description: "Megabytes (1024 ** 2 bytes)",
},
EnumVariant {
value: "GiB",
description: "Gigabytes (1024 ** 3 bytes)",
},
EnumVariant {
value: "TiB",
description: "Terabytes (1024 gigabytes)",
},
EnumVariant {
value: "PiB",
description: "Petabytes (1024 ** 2 gigabytes)",
},
EnumVariant {
value: "EiB",
description: "Exabytes (1024 ** 3 gigabytes)",
},
EnumVariant {
value: "kB",
description: "Kilobytes (1 thousand bytes in SI)",
},
EnumVariant {
value: "MB",
description: "Megabytes (1 million bytes in SI)",
},
EnumVariant {
value: "GB",
description: "Gigabytes (1 billion bytes in SI)",
},
EnumVariant {
value: "TB",
description: "Terabytes (1 thousand gigabytes in SI)",
},
EnumVariant {
value: "PB",
description: "Petabytes (1 million gigabytes in SI)",
},
EnumVariant {
value: "EB",
description: "Exabytes (1 billion gigabytes in SI)",
},
];
static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
vec![
Parameter::required(
"value",
kind::BYTES,
"The string of the duration with either binary or SI unit.",
),
Parameter::required("unit", kind::BYTES, "The output units for the byte.")
.enum_variants(UNIT_ENUM),
Parameter::optional(
"base",
kind::BYTES,
"The base for the byte, either 2 or 10.",
)
.default(&DEFAULT_BASE),
]
});
fn parse_bytes(bytes: &Value, unit: Value, base: &Bytes) -> Resolved {
let (units, parse_config) = match base.as_ref() {
b"2" => (&*BIN_UNITS, Config::new().with_binary()),
b"10" => (&*DEC_UNITS, Config::new().with_decimal()),
_ => unreachable!("enum invariant"),
};
let value = bytes.try_bytes_utf8_lossy()?;
let value: &str = value.as_ref();
let conversion_factor = {
let bytes = unit.try_bytes()?;
let string = String::from_utf8_lossy(&bytes);
units
.get(string.as_ref())
.ok_or(format!("unknown unit format: '{string}'"))?
};
let value = parse_config
.parse_size(value)
.map_err(|e| format!("unable to parse bytes: '{e}'"))?;
let value = Decimal::from_u64(value).ok_or(format!("unable to parse number: {value}"))?;
let number = value
.checked_div(*conversion_factor)
.ok_or("division by >1 divisor overflowed")?; let number = number
.to_f64()
.ok_or(format!("unable to parse number: '{number}'"))?;
Ok(Value::from_f64_or_zero(number))
}
static BIN_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
vec![
("B", Decimal::new(1, 0)),
("KiB", Decimal::new(1_024, 0)),
("MiB", Decimal::new(1_048_576, 0)),
("GiB", Decimal::new(1_073_741_824, 0)),
("TiB", Decimal::new(1_099_511_627_776, 0)),
("PiB", Decimal::new(1_125_899_906_842_624, 0)),
("EiB", Decimal::new(1_152_921_504_606_846_976, 0)),
("KB", Decimal::new(1_024, 0)),
("MB", Decimal::new(1_048_576, 0)),
("GB", Decimal::new(1_073_741_824, 0)),
("TB", Decimal::new(1_099_511_627_776, 0)),
("PB", Decimal::new(1_125_899_906_842_624, 0)),
("EB", Decimal::new(1_152_921_504_606_846_976, 0)),
]
.into_iter()
.map(|(k, v)| (k.to_owned(), v))
.collect()
});
static DEC_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
vec![
("B", Decimal::new(1, 0)),
("kB", Decimal::new(1_000, 0)),
("MB", Decimal::new(1_000_000, 0)),
("GB", Decimal::new(1_000_000_000, 0)),
("TB", Decimal::new(1_000_000_000_000, 0)),
("PB", Decimal::new(1_000_000_000_000_000, 0)),
("EB", Decimal::new(1_000_000_000_000_000_000, 0)),
]
.into_iter()
.map(|(k, v)| (k.to_owned(), v))
.collect()
});
#[derive(Clone, Copy, Debug)]
pub struct ParseBytes;
fn base_sets() -> Vec<Value> {
vec![value!("2"), value!("10")]
}
impl Function for ParseBytes {
fn identifier(&self) -> &'static str {
"parse_bytes"
}
fn usage(&self) -> &'static str {
"Parses the `value` into a human-readable bytes format specified by `unit` and `base`."
}
fn category(&self) -> &'static str {
Category::Parse.as_ref()
}
fn internal_failure_reasons(&self) -> &'static [&'static str] {
&["`value` is not a properly formatted bytes."]
}
fn return_kind(&self) -> u16 {
kind::FLOAT
}
fn examples(&self) -> &'static [Example] {
&[
example! {
title: "Parse bytes (kilobytes)",
source: r#"parse_bytes!("1024KiB", unit: "MiB")"#,
result: Ok("1.0"),
},
example! {
title: "Parse kilobytes in default binary units",
source: r#"parse_bytes!("1KiB", unit: "B")"#,
result: Ok("1024.0"),
},
example! {
title: "Parse bytes in SI unit (terabytes)",
source: r#"parse_bytes!("4TB", unit: "MB", base: "10")"#,
result: Ok("4000000.0"),
},
example! {
title: "Parse gigabytes in decimal units",
source: r#"parse_bytes!("1GB", unit: "B", base: "10")"#,
result: Ok("1000000000.0"),
},
example! {
title: "Parse bytes in ambiguous unit (gigabytes)",
source: r#"parse_bytes!("1GB", unit: "B", base: "2")"#,
result: Ok("1073741824.0"),
},
example! {
title: "Parse gigabytes in ambiguous decimal units",
source: r#"parse_bytes!("1GB", unit: "MB", base: "2")"#,
result: Ok("1024.0"),
},
]
}
fn compile(
&self,
state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");
let unit = arguments.required("unit");
let base = arguments
.optional_enum("base", &base_sets(), state)?
.unwrap_or_else(|| DEFAULT_BASE.clone())
.try_bytes()
.expect("base not bytes");
Ok(ParseBytesFn { value, unit, base }.as_expr())
}
fn parameters(&self) -> &'static [Parameter] {
PARAMETERS.as_slice()
}
}
#[derive(Debug, Clone)]
struct ParseBytesFn {
value: Box<dyn Expression>,
unit: Box<dyn Expression>,
base: Bytes,
}
impl FunctionExpression for ParseBytesFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let bytes = self.value.resolve(ctx)?;
let unit = self.unit.resolve(ctx)?;
parse_bytes(&bytes, unit, &self.base)
}
fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::float().fallible()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::value;
test_function![
parse_bytes => ParseBytes;
mib_b {
args: func_args![value: "1MiB",
unit: "B"],
want: Ok(value!(1_048_576.0)),
tdef: TypeDef::float().fallible(),
}
b_kib {
args: func_args![value: "512B",
unit: "KiB"],
want: Ok(0.5),
tdef: TypeDef::float().fallible(),
}
gib_mib {
args: func_args![value: "3.5GiB",
unit: "KiB"],
want: Ok(3_670_016.0),
tdef: TypeDef::float().fallible(),
}
tib_gib {
args: func_args![value: "12 TiB",
unit: "GiB"],
want: Ok(12_288.0),
tdef: TypeDef::float().fallible(),
}
mib_pib {
args: func_args![value: "256TiB",
unit: "PiB"],
want: Ok(0.25),
tdef: TypeDef::float().fallible(),
}
eib_tib {
args: func_args![value: "1EiB",
unit: "TiB"],
want: Ok(value!(1_048_576.0)),
tdef: TypeDef::float().fallible(),
}
mib_b_ambiguous {
args: func_args![value: "1MB",
unit: "B",],
want: Ok(value!(1_048_576.0)),
tdef: TypeDef::float().fallible(),
}
mb_b {
args: func_args![value: "1MB",
unit: "B",
base: "10"],
want: Ok(value!(1_000_000.0)),
tdef: TypeDef::float().fallible(),
}
b_kb {
args: func_args![value: "3B",
unit: "kB",
base: "10"],
want: Ok(0.003),
tdef: TypeDef::float().fallible(),
}
gb_mb {
args: func_args![value: "3.007GB",
unit: "kB",
base: "10"],
want: Ok(3_007_000.0),
tdef: TypeDef::float().fallible(),
}
tb_gb {
args: func_args![value: "12 TB",
unit: "GB",
base: "10"],
want: Ok(12_000.0),
tdef: TypeDef::float().fallible(),
}
mb_pb {
args: func_args![value: "768MB",
unit: "PB",
base: "10"],
want: Ok(0.000_000_768),
tdef: TypeDef::float().fallible(),
}
eb_tb {
args: func_args![value: "1EB",
unit: "TB",
base: "10"],
want: Ok(value!(1_000_000.0)),
tdef: TypeDef::float().fallible(),
}
error_invalid {
args: func_args![value: "foo",
unit: "KiB"],
want: Err("unable to parse bytes: 'invalid digit found in string'"),
tdef: TypeDef::float().fallible(),
}
error_unit {
args: func_args![value: "1YiB",
unit: "MiB"],
want: Err("unable to parse bytes: 'invalid digit found in string'"),
tdef: TypeDef::float().fallible(),
}
error_format {
args: func_args![value: "100KB",
unit: "ZB",
base: "10"],
want: Err("unknown unit format: 'ZB'"),
tdef: TypeDef::float().fallible(),
}
];
}