bashkit 0.1.18 - Docs.rs

//! numfmt builtin - convert numbers to/from human-readable format
//!
//! Supports --to=si/iec/iec-i, --from=si/iec/auto, --suffix, --padding,
//! --round, --format, --field, --delimiter.

use async_trait::async_trait;

use super::{Builtin, Context};
use crate::error::Result;
use crate::interpreter::ExecResult;

/// Maximum output size to prevent memory exhaustion.
/// THREAT[TM-DOS-059]: Bound numfmt output
const MAX_OUTPUT_BYTES: usize = 1_048_576;

pub struct Numfmt;

#[derive(Debug, Clone, Copy, PartialEq)]
enum Scale {
    None,
    Si,
    Iec,
    IecI,
    Auto,
}

#[derive(Debug, Clone, Copy, PartialEq)]
enum RoundMode {
    FromZero,
    TowardsZero,
    Up,
    Down,
    Nearest,
}

struct Options {
    from: Scale,
    to: Scale,
    suffix: String,
    padding: i32,
    round: RoundMode,
    format: Option<String>,
    field: usize,
    delimiter: Option<String>,
}

impl Default for Options {
    fn default() -> Self {
        Self {
            from: Scale::None,
            to: Scale::None,
            suffix: String::new(),
            padding: 0,
            round: RoundMode::FromZero,
            format: None,
            field: 1,
            delimiter: None,
        }
    }
}

fn parse_scale(s: &str) -> std::result::Result<Scale, String> {
    match s {
        "none" => Ok(Scale::None),
        "si" => Ok(Scale::Si),
        "iec" => Ok(Scale::Iec),
        "iec-i" => Ok(Scale::IecI),
        "auto" => Ok(Scale::Auto),
        _ => Err(format!("numfmt: invalid unit size: '{}'\n", s)),
    }
}

fn parse_round(s: &str) -> std::result::Result<RoundMode, String> {
    match s {
        "up" => Ok(RoundMode::Up),
        "down" => Ok(RoundMode::Down),
        "from-zero" => Ok(RoundMode::FromZero),
        "towards-zero" => Ok(RoundMode::TowardsZero),
        "nearest" => Ok(RoundMode::Nearest),
        _ => Err(format!("numfmt: invalid rounding mode: '{}'\n", s)),
    }
}

/// SI suffixes: K=1000, M=1e6, G=1e9, T=1e12, P=1e15, E=1e18
const SI_SUFFIXES: &[(char, f64)] = &[
    ('K', 1e3),
    ('M', 1e6),
    ('G', 1e9),
    ('T', 1e12),
    ('P', 1e15),
    ('E', 1e18),
];

/// IEC suffixes: K=1024, M=1024^2, G=1024^3, ...
const IEC_SUFFIXES: &[(char, f64)] = &[
    ('K', 1024.0),
    ('M', 1_048_576.0),
    ('G', 1_073_741_824.0),
    ('T', 1_099_511_627_776.0),
    ('P', 1_125_899_906_842_624.0),
    ('E', 1_152_921_504_606_846_976.0),
];

fn round_value(val: f64, mode: RoundMode) -> f64 {
    match mode {
        RoundMode::Up => val.ceil(),
        RoundMode::Down => val.floor(),
        RoundMode::FromZero => {
            if val >= 0.0 {
                val.ceil()
            } else {
                val.floor()
            }
        }
        RoundMode::TowardsZero => {
            if val >= 0.0 {
                val.floor()
            } else {
                val.ceil()
            }
        }
        RoundMode::Nearest => val.round(),
    }
}

/// Parse an input number, possibly with a suffix (from --from mode).
fn parse_number(input: &str, from: Scale) -> std::result::Result<f64, String> {
    let s = input.trim();
    if s.is_empty() {
        return Err(format!("numfmt: invalid number: '{}'\n", input.trim_end()));
    }

    match from {
        Scale::None => s
            .parse::<f64>()
            .map_err(|_| format!("numfmt: invalid number: '{}'\n", s)),
        Scale::Si | Scale::Iec | Scale::IecI | Scale::Auto => {
            // Try to split trailing suffix
            let (num_part, suffix) = split_suffix(s);
            let base: f64 = num_part
                .parse()
                .map_err(|_| format!("numfmt: invalid number: '{}'\n", s))?;

            if suffix.is_empty() {
                return Ok(base);
            }

            let suffix_upper = suffix.to_ascii_uppercase();
            let Some(first_char) = suffix_upper.chars().next() else {
                return Err(format!("numfmt: invalid suffix in input: '{}'\n", s));
            };

            // Auto: if suffix ends with 'i' (like Ki, Mi), use IEC; otherwise SI
            let use_iec = match from {
                Scale::Iec | Scale::IecI => true,
                Scale::Auto => suffix_upper.ends_with('I') && suffix_upper.len() >= 2,
                _ => false,
            };

            let table = if use_iec { IEC_SUFFIXES } else { SI_SUFFIXES };

            for &(c, factor) in table {
                if first_char == c {
                    return Ok(base * factor);
                }
            }

            Err(format!("numfmt: invalid suffix in input: '{}'\n", s))
        }
    }
}

fn split_suffix(s: &str) -> (&str, &str) {
    // Find where the numeric part ends
    let end = s
        .rfind(|c: char| c.is_ascii_digit() || c == '.')
        .map(|i| i + 1)
        .unwrap_or(0);
    (&s[..end], &s[end..])
}

/// Format a number for output with --to mode.
fn format_number(val: f64, to: Scale, round: RoundMode, suffix: &str, padding: i32) -> String {
    let formatted = match to {
        Scale::None => {
            let rounded = round_value(val, round);
            if rounded.fract() == 0.0 && rounded.abs() < i64::MAX as f64 {
                format!("{}{}", rounded as i64, suffix)
            } else {
                format!("{}{}", rounded, suffix)
            }
        }
        Scale::Si => format_with_scale(val, SI_SUFFIXES, false, round, suffix),
        Scale::Iec => format_with_scale(val, IEC_SUFFIXES, false, round, suffix),
        Scale::IecI => format_with_scale(val, IEC_SUFFIXES, true, round, suffix),
        Scale::Auto => {
            // --to=auto not valid, treat as none
            let rounded = round_value(val, round);
            format!("{}{}", rounded, suffix)
        }
    };

    apply_padding(&formatted, padding)
}

fn format_with_scale(
    val: f64,
    table: &[(char, f64)],
    iec_i_suffix: bool,
    round: RoundMode,
    suffix: &str,
) -> String {
    let abs_val = val.abs();

    // Find the largest unit that gives a value >= 1
    let mut chosen: Option<(char, f64)> = None;
    for &(c, factor) in table {
        if abs_val >= factor {
            chosen = Some((c, factor));
        }
    }

    match chosen {
        Some((c, factor)) => {
            let scaled = val / factor;
            let display = format_scaled_value(scaled, round);
            if iec_i_suffix {
                format!("{}{}i{}", display, c, suffix)
            } else {
                format!("{}{}{}", display, c, suffix)
            }
        }
        None => {
            // Value too small for any suffix
            let rounded = round_value(val, round);
            if rounded.fract() == 0.0 && rounded.abs() < i64::MAX as f64 {
                format!("{}{}", rounded as i64, suffix)
            } else {
                format!("{}{}", rounded, suffix)
            }
        }
    }
}

/// Format a scaled value like "1.0", "1.5", etc.
/// GNU numfmt shows one decimal place when the value is < 10.
fn format_scaled_value(val: f64, round: RoundMode) -> String {
    let abs = val.abs();
    if abs < 10.0 {
        // One decimal place, with rounding applied to the tenths
        let shifted = val * 10.0;
        let rounded = round_value(shifted, round) / 10.0;
        format!("{:.1}", rounded)
    } else {
        let rounded = round_value(val, round);
        format!("{}", rounded as i64)
    }
}

fn apply_padding(s: &str, padding: i32) -> String {
    let width = padding.unsigned_abs() as usize;
    if width <= s.len() {
        return s.to_string();
    }
    if padding > 0 {
        // Right-align (pad with spaces on left)
        format!("{:>width$}", s, width = width)
    } else {
        // Left-align (pad with spaces on right)
        format!("{:<width$}", s, width = width)
    }
}

fn parse_options(args: &[String]) -> std::result::Result<(Options, Vec<String>), String> {
    let mut opts = Options::default();
    let mut operands = Vec::new();
    let mut i = 0;

    while i < args.len() {
        let arg = &args[i];
        if arg == "--" {
            operands.extend(args[i + 1..].iter().cloned());
            break;
        } else if let Some(val) = arg.strip_prefix("--to=") {
            opts.to = parse_scale(val)?;
        } else if arg == "--to" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --to\n".to_string());
            }
            opts.to = parse_scale(&args[i])?;
        } else if let Some(val) = arg.strip_prefix("--from=") {
            opts.from = parse_scale(val)?;
        } else if arg == "--from" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --from\n".to_string());
            }
            opts.from = parse_scale(&args[i])?;
        } else if let Some(val) = arg.strip_prefix("--suffix=") {
            opts.suffix = val.to_string();
        } else if arg == "--suffix" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --suffix\n".to_string());
            }
            opts.suffix = args[i].clone();
        } else if let Some(val) = arg.strip_prefix("--padding=") {
            opts.padding = val
                .parse()
                .map_err(|_| format!("numfmt: invalid padding value: '{}'\n", val))?;
        } else if arg == "--padding" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --padding\n".to_string());
            }
            opts.padding = args[i]
                .parse()
                .map_err(|_| format!("numfmt: invalid padding value: '{}'\n", &args[i]))?;
        } else if let Some(val) = arg.strip_prefix("--round=") {
            opts.round = parse_round(val)?;
        } else if arg == "--round" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --round\n".to_string());
            }
            opts.round = parse_round(&args[i])?;
        } else if let Some(val) = arg.strip_prefix("--format=") {
            opts.format = Some(val.to_string());
        } else if arg == "--format" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --format\n".to_string());
            }
            opts.format = Some(args[i].clone());
        } else if let Some(val) = arg.strip_prefix("--field=") {
            opts.field = val
                .parse()
                .map_err(|_| format!("numfmt: invalid field value: '{}'\n", val))?;
            if opts.field == 0 {
                return Err("numfmt: invalid field value: '0'\n".to_string());
            }
        } else if arg == "--field" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --field\n".to_string());
            }
            opts.field = args[i]
                .parse()
                .map_err(|_| format!("numfmt: invalid field value: '{}'\n", &args[i]))?;
            if opts.field == 0 {
                return Err("numfmt: invalid field value: '0'\n".to_string());
            }
        } else if let Some(val) = arg.strip_prefix("--delimiter=") {
            opts.delimiter = Some(val.to_string());
        } else if arg == "--delimiter" || arg == "-d" {
            i += 1;
            if i >= args.len() {
                return Err("numfmt: missing argument for --delimiter\n".to_string());
            }
            opts.delimiter = Some(args[i].clone());
        } else if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") {
            // Unknown short option
            return Err(format!("numfmt: invalid option -- '{}'\n", &arg[1..]));
        } else if arg.starts_with("--") {
            return Err(format!("numfmt: unrecognized option '{}'\n", arg));
        } else {
            operands.push(arg.clone());
        }
        i += 1;
    }

    Ok((opts, operands))
}

fn convert_line(line: &str, opts: &Options) -> std::result::Result<String, String> {
    if let Some(ref delim) = opts.delimiter {
        // Split by delimiter, convert the specified field
        let parts: Vec<&str> = line.split(delim.as_str()).collect();
        let field_idx = opts.field - 1;
        if field_idx >= parts.len() {
            return Ok(line.to_string());
        }
        let val = parse_number(parts[field_idx], opts.from)?;
        let converted = format_number(val, opts.to, opts.round, &opts.suffix, opts.padding);
        let mut result_parts: Vec<String> = parts.iter().map(|s| s.to_string()).collect();
        result_parts[field_idx] = converted;
        Ok(result_parts.join(delim))
    } else if opts.field > 1 {
        // Split by whitespace, convert the specified field
        let parts: Vec<&str> = line.split_whitespace().collect();
        let field_idx = opts.field - 1;
        if field_idx >= parts.len() {
            return Ok(line.to_string());
        }
        let val = parse_number(parts[field_idx], opts.from)?;
        let converted = format_number(val, opts.to, opts.round, &opts.suffix, opts.padding);
        let mut result_parts: Vec<String> = parts.iter().map(|s| s.to_string()).collect();
        result_parts[field_idx] = converted;
        Ok(result_parts.join(" "))
    } else {
        // Convert the whole line (trimmed)
        let trimmed = line.trim();
        // Strip user suffix before parsing if present
        let to_parse = if !opts.suffix.is_empty() {
            trimmed.strip_suffix(&opts.suffix).unwrap_or(trimmed)
        } else {
            trimmed
        };
        let val = parse_number(to_parse, opts.from)?;
        format_with_printf(val, opts)
    }
}

fn format_with_printf(val: f64, opts: &Options) -> std::result::Result<String, String> {
    if let Some(ref fmt) = opts.format {
        // Basic printf-style: support %f, %g, %e with optional width/precision
        apply_printf_format(val, fmt, &opts.suffix, opts.padding)
    } else {
        Ok(format_number(
            val,
            opts.to,
            opts.round,
            &opts.suffix,
            opts.padding,
        ))
    }
}

fn apply_printf_format(
    val: f64,
    fmt: &str,
    suffix: &str,
    padding: i32,
) -> std::result::Result<String, String> {
    // Find the % format specifier
    let Some(pct_pos) = fmt.find('%') else {
        return Ok(format!("{}{}", fmt, suffix));
    };

    let before = &fmt[..pct_pos];
    let rest = &fmt[pct_pos + 1..];

    // Find the conversion character (f, g, e, d)
    let conv_pos = rest
        .find(['f', 'g', 'e', 'd', 'i'])
        .ok_or_else(|| format!("numfmt: invalid format '{}'\n", fmt))?;

    let spec = &rest[..conv_pos];
    let conv = rest.as_bytes()[conv_pos] as char;
    let after = &rest[conv_pos + 1..];

    let formatted = match conv {
        'f' => {
            if let Some(dot_pos) = spec.find('.') {
                let precision: usize = spec[dot_pos + 1..]
                    .parse()
                    .map_err(|_| format!("numfmt: invalid format '{}'\n", fmt))?;
                format!("{:.prec$}", val, prec = precision)
            } else {
                format!("{:.6}", val)
            }
        }
        'g' => format!("{}", val),
        'e' => format!("{:e}", val),
        'd' | 'i' => format!("{}", val as i64),
        _ => unreachable!(),
    };

    let result = format!("{}{}{}{}", before, formatted, suffix, after);
    Ok(apply_padding(&result, padding))
}

#[async_trait]
impl Builtin for Numfmt {
    async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
        if let Some(r) = super::check_help_version(
            ctx.args,
            "Usage: numfmt [OPTION]... [NUMBER]...\nReformat NUMBER(s), or the numbers from standard input.\n\n  --from=UNIT\tauto-scale input numbers to UNITs (none, si, iec, iec-i, auto)\n  --to=UNIT\tauto-scale output numbers to UNITs (none, si, iec, iec-i)\n  --suffix=SUFFIX\tadd SUFFIX to output numbers\n  --padding=N\tpad the output to N characters\n  --round=METHOD\tuse METHOD for rounding (up, down, from-zero, towards-zero, nearest)\n  --format=FORMAT\tuse printf-style FORMAT\n  --field=N\treplace the number in input field N (default 1)\n  -d, --delimiter=X\tuse X instead of whitespace for field delimiter\n  --help\t\t\tdisplay this help and exit\n  --version\t\toutput version information and exit\n",
            Some("numfmt (bashkit) 0.1"),
        ) {
            return Ok(r);
        }
        let (opts, operands) = match parse_options(ctx.args) {
            Ok(v) => v,
            Err(e) => return Ok(ExecResult::err(e, 1)),
        };

        let mut output = String::new();

        if operands.is_empty() {
            // Read from stdin
            if let Some(stdin) = ctx.stdin {
                for line in stdin.lines() {
                    if output.len() > MAX_OUTPUT_BYTES {
                        break;
                    }
                    match convert_line(line, &opts) {
                        Ok(converted) => {
                            output.push_str(&converted);
                            output.push('\n');
                        }
                        Err(e) => return Ok(ExecResult::err(e, 2)),
                    }
                }
            }
        } else {
            // Process each operand
            for operand in &operands {
                if output.len() > MAX_OUTPUT_BYTES {
                    break;
                }
                match convert_line(operand, &opts) {
                    Ok(converted) => {
                        output.push_str(&converted);
                        output.push('\n');
                    }
                    Err(e) => return Ok(ExecResult::err(e, 2)),
                }
            }
        }

        Ok(ExecResult::ok(output))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_number_plain() {
        assert_eq!(parse_number("1024", Scale::None).unwrap(), 1024.0);
        assert_eq!(parse_number("1048576", Scale::None).unwrap(), 1048576.0);
    }

    #[test]
    fn test_parse_number_iec() {
        assert_eq!(parse_number("1K", Scale::Iec).unwrap(), 1024.0);
        assert_eq!(parse_number("1M", Scale::Iec).unwrap(), 1_048_576.0);
    }

    #[test]
    fn test_parse_number_si() {
        assert_eq!(parse_number("1K", Scale::Si).unwrap(), 1000.0);
        assert_eq!(parse_number("1M", Scale::Si).unwrap(), 1_000_000.0);
    }

    #[test]
    fn test_format_to_iec() {
        let s = format_number(1_048_576.0, Scale::Iec, RoundMode::FromZero, "", 0);
        assert_eq!(s, "1.0M");
    }

    #[test]
    fn test_format_to_si() {
        let s = format_number(1_048_576.0, Scale::Si, RoundMode::FromZero, "", 0);
        assert_eq!(s, "1.1M");
    }

    #[test]
    fn test_format_to_iec_i() {
        let s = format_number(1_048_576.0, Scale::IecI, RoundMode::FromZero, "", 0);
        assert_eq!(s, "1.0Mi");
    }

    #[test]
    fn test_format_with_suffix() {
        let s = format_number(1_048_576.0, Scale::Iec, RoundMode::FromZero, "B", 0);
        assert_eq!(s, "1.0MB");
    }

    #[test]
    fn test_round_modes() {
        assert_eq!(round_value(1.1, RoundMode::Up), 2.0);
        assert_eq!(round_value(1.9, RoundMode::Down), 1.0);
        assert_eq!(round_value(1.5, RoundMode::Nearest), 2.0);
        assert_eq!(round_value(-1.5, RoundMode::FromZero), -2.0);
        assert_eq!(round_value(-1.5, RoundMode::TowardsZero), -1.0);
    }

    #[test]
    fn test_padding() {
        let s = format_number(1024.0, Scale::Iec, RoundMode::FromZero, "", 10);
        assert_eq!(s, "      1.0K");
    }

    #[test]
    fn test_invalid_number() {
        assert!(parse_number("abc", Scale::None).is_err());
        assert!(parse_number("", Scale::None).is_err());
    }
}