coreutils_rs/numfmt/
core.rs

1use std::io::Write;
2
3/// Unit scale for input/output conversion.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum ScaleUnit {
6    /// No scaling.
7    None,
8    /// SI: K=1000, M=10^6, G=10^9, T=10^12, P=10^15, E=10^18, Z=10^21, Y=10^24.
9    Si,
10    /// IEC: K=1024, M=1048576, G=2^30, T=2^40, P=2^50, E=2^60.
11    Iec,
12    /// IEC with 'i' suffix: Ki=1024, Mi=1048576, etc.
13    IecI,
14    /// Auto-detect from suffix (for --from=auto).
15    Auto,
16}
17
18/// Rounding method.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum RoundMethod {
21    /// Round up (toward +infinity).
22    Up,
23    /// Round down (toward -infinity).
24    Down,
25    /// Round away from zero.
26    FromZero,
27    /// Round toward zero.
28    TowardsZero,
29    /// Round to nearest, half away from zero (default).
30    Nearest,
31}
32
33/// How to handle invalid input.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum InvalidMode {
36    /// Print error and exit immediately.
37    Abort,
38    /// Print error but continue processing.
39    Fail,
40    /// Print warning but continue processing.
41    Warn,
42    /// Silently ignore invalid input.
43    Ignore,
44}
45
46/// Configuration for the numfmt command.
47pub struct NumfmtConfig {
48    pub from: ScaleUnit,
49    pub to: ScaleUnit,
50    pub from_unit: f64,
51    pub to_unit: f64,
52    pub padding: Option<i32>,
53    pub round: RoundMethod,
54    pub suffix: Option<String>,
55    pub format: Option<String>,
56    pub field: Vec<usize>,
57    pub delimiter: Option<char>,
58    pub header: usize,
59    pub invalid: InvalidMode,
60    pub grouping: bool,
61    pub zero_terminated: bool,
62}
63
64impl Default for NumfmtConfig {
65    fn default() -> Self {
66        Self {
67            from: ScaleUnit::None,
68            to: ScaleUnit::None,
69            from_unit: 1.0,
70            to_unit: 1.0,
71            padding: None,
72            round: RoundMethod::Nearest,
73            suffix: None,
74            format: None,
75            field: vec![1],
76            delimiter: None,
77            header: 0,
78            invalid: InvalidMode::Abort,
79            grouping: false,
80            zero_terminated: false,
81        }
82    }
83}
84
85/// SI suffix table: suffix char -> multiplier.
86/// GNU coreutils numfmt uses lowercase 'k' for SI (powers of 1000) and uppercase 'K' for IEC (powers of 1024).
87const SI_SUFFIXES: &[(char, f64)] = &[
88    ('k', 1e3),
89    ('M', 1e6),
90    ('G', 1e9),
91    ('T', 1e12),
92    ('P', 1e15),
93    ('E', 1e18),
94    ('Z', 1e21),
95    ('Y', 1e24),
96];
97
98/// IEC suffix table: suffix char -> multiplier (powers of 1024).
99const IEC_SUFFIXES: &[(char, f64)] = &[
100    ('K', 1024.0),
101    ('M', 1_048_576.0),
102    ('G', 1_073_741_824.0),
103    ('T', 1_099_511_627_776.0),
104    ('P', 1_125_899_906_842_624.0),
105    ('E', 1_152_921_504_606_846_976.0),
106    ('Z', 1_180_591_620_717_411_303_424.0),
107    ('Y', 1_208_925_819_614_629_174_706_176.0),
108];
109
110/// Parse a scale unit string.
111pub fn parse_scale_unit(s: &str) -> Result<ScaleUnit, String> {
112    match s {
113        "none" => Ok(ScaleUnit::None),
114        "si" => Ok(ScaleUnit::Si),
115        "iec" => Ok(ScaleUnit::Iec),
116        "iec-i" => Ok(ScaleUnit::IecI),
117        "auto" => Ok(ScaleUnit::Auto),
118        _ => Err(format!("invalid unit: '{}'", s)),
119    }
120}
121
122/// Parse a round method string.
123pub fn parse_round_method(s: &str) -> Result<RoundMethod, String> {
124    match s {
125        "up" => Ok(RoundMethod::Up),
126        "down" => Ok(RoundMethod::Down),
127        "from-zero" => Ok(RoundMethod::FromZero),
128        "towards-zero" => Ok(RoundMethod::TowardsZero),
129        "nearest" => Ok(RoundMethod::Nearest),
130        _ => Err(format!("invalid rounding method: '{}'", s)),
131    }
132}
133
134/// Parse an invalid mode string.
135pub fn parse_invalid_mode(s: &str) -> Result<InvalidMode, String> {
136    match s {
137        "abort" => Ok(InvalidMode::Abort),
138        "fail" => Ok(InvalidMode::Fail),
139        "warn" => Ok(InvalidMode::Warn),
140        "ignore" => Ok(InvalidMode::Ignore),
141        _ => Err(format!("invalid mode: '{}'", s)),
142    }
143}
144
145/// Parse a field specification string like "1", "1,3", "1-5", or "-".
146/// Returns 1-based field indices.
147pub fn parse_fields(s: &str) -> Result<Vec<usize>, String> {
148    if s == "-" {
149        // All fields - we represent this as an empty vec and handle it specially.
150        return Ok(vec![]);
151    }
152    let mut fields = Vec::new();
153    for part in s.split(',') {
154        let part = part.trim();
155        if let Some(dash_pos) = part.find('-') {
156            let start_str = &part[..dash_pos];
157            let end_str = &part[dash_pos + 1..];
158            // Handle open ranges like "-5" or "3-"
159            if start_str.is_empty() && end_str.is_empty() {
160                return Ok(vec![]);
161            }
162            let start: usize = if start_str.is_empty() {
163                1
164            } else {
165                start_str
166                    .parse()
167                    .map_err(|_| format!("invalid field value '{}'", part))?
168            };
169            let end: usize = if end_str.is_empty() {
170                // Open-ended range: we use 0 as sentinel for "all remaining"
171                // For simplicity, return a large upper bound.
172                9999
173            } else {
174                end_str
175                    .parse()
176                    .map_err(|_| format!("invalid field value '{}'", part))?
177            };
178            if start == 0 {
179                return Err(format!("fields are numbered from 1: '{}'", part));
180            }
181            for i in start..=end {
182                if !fields.contains(&i) {
183                    fields.push(i);
184                }
185            }
186        } else {
187            let n: usize = part
188                .parse()
189                .map_err(|_| format!("invalid field value '{}'", part))?;
190            if n == 0 {
191                return Err("fields are numbered from 1".to_string());
192            }
193            if !fields.contains(&n) {
194                fields.push(n);
195            }
196        }
197    }
198    fields.sort();
199    Ok(fields)
200}
201
202/// Parse a number with optional suffix, returning the raw numeric value.
203/// Handles suffixes like K, M, G, T, P, E, Z, Y (and Ki, Mi, etc. for iec-i).
204fn parse_number_with_suffix(s: &str, unit: ScaleUnit) -> Result<f64, String> {
205    let s = s.trim();
206    if s.is_empty() {
207        return Err("invalid number: ''".to_string());
208    }
209
210    // Find where the numeric part ends and the suffix begins.
211    let mut num_end = s.len();
212    let bytes = s.as_bytes();
213    let len = s.len();
214
215    // Check for trailing scale suffix characters.
216    if len > 0 {
217        let last_char = bytes[len - 1] as char;
218
219        match unit {
220            ScaleUnit::Auto | ScaleUnit::IecI => {
221                // Check for 'i' suffix (e.g., Ki, Mi).
222                if last_char == 'i' && len >= 2 {
223                    let prefix_char = (bytes[len - 2] as char).to_ascii_uppercase();
224                    if is_scale_suffix(prefix_char) {
225                        num_end = len - 2;
226                    }
227                } else {
228                    let upper = last_char.to_ascii_uppercase();
229                    if is_scale_suffix(upper) {
230                        num_end = len - 1;
231                    }
232                }
233            }
234            ScaleUnit::Si | ScaleUnit::Iec => {
235                let upper = last_char.to_ascii_uppercase();
236                if is_scale_suffix(upper) {
237                    num_end = len - 1;
238                }
239            }
240            ScaleUnit::None => {}
241        }
242    }
243
244    let num_str = &s[..num_end];
245    let suffix_str = &s[num_end..];
246
247    // Parse the numeric part.
248    let value: f64 = num_str
249        .parse()
250        .map_err(|_| format!("invalid number: '{}'", s))?;
251
252    // Apply suffix multiplier.
253    let multiplier = if suffix_str.is_empty() {
254        1.0
255    } else {
256        let suffix_upper = suffix_str.chars().next().unwrap().to_ascii_uppercase();
257        match unit {
258            ScaleUnit::Auto => {
259                // Auto-detect: if suffix ends with 'i', use IEC; otherwise SI.
260                if suffix_str.len() >= 2 && suffix_str.ends_with('i') {
261                    find_iec_multiplier(suffix_upper)?
262                } else {
263                    find_si_multiplier(suffix_upper)?
264                }
265            }
266            ScaleUnit::Si => find_si_multiplier(suffix_upper)?,
267            ScaleUnit::Iec | ScaleUnit::IecI => find_iec_multiplier(suffix_upper)?,
268            ScaleUnit::None => {
269                return Err(format!("invalid number: '{}'", s));
270            }
271        }
272    };
273
274    Ok(value * multiplier)
275}
276
277fn is_scale_suffix(c: char) -> bool {
278    matches!(c, 'K' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y')
279}
280
281fn find_si_multiplier(c: char) -> Result<f64, String> {
282    for &(suffix, mult) in SI_SUFFIXES {
283        if suffix.eq_ignore_ascii_case(&c) {
284            return Ok(mult);
285        }
286    }
287    Err(format!("invalid suffix: '{}'", c))
288}
289
290fn find_iec_multiplier(c: char) -> Result<f64, String> {
291    for &(suffix, mult) in IEC_SUFFIXES {
292        if suffix == c {
293            return Ok(mult);
294        }
295    }
296    Err(format!("invalid suffix: '{}'", c))
297}
298
299/// Apply rounding according to the specified method.
300fn apply_round(value: f64, method: RoundMethod) -> f64 {
301    match method {
302        RoundMethod::Up => value.ceil(),
303        RoundMethod::Down => value.floor(),
304        RoundMethod::FromZero => {
305            if value >= 0.0 {
306                value.ceil()
307            } else {
308                value.floor()
309            }
310        }
311        RoundMethod::TowardsZero => {
312            if value >= 0.0 {
313                value.floor()
314            } else {
315                value.ceil()
316            }
317        }
318        RoundMethod::Nearest => value.round(),
319    }
320}
321
322/// Format a number with scale suffix for output.
323fn format_scaled(value: f64, unit: ScaleUnit, round: RoundMethod) -> String {
324    match unit {
325        ScaleUnit::None => {
326            // Output as plain number.
327            format_plain_number(value)
328        }
329        ScaleUnit::Si => format_with_scale(value, SI_SUFFIXES, "", round),
330        ScaleUnit::Iec => format_with_scale(value, IEC_SUFFIXES, "", round),
331        ScaleUnit::IecI => format_with_scale(value, IEC_SUFFIXES, "i", round),
332        ScaleUnit::Auto => {
333            // For --to=auto, behave like SI.
334            format_with_scale(value, SI_SUFFIXES, "", round)
335        }
336    }
337}
338
339/// Format a plain number, removing unnecessary trailing zeros and decimal point.
340fn format_plain_number(value: f64) -> String {
341    let int_val = value as i64;
342    if value == (int_val as f64) {
343        format!("{}", int_val)
344    } else {
345        // Use enough precision to avoid loss.
346        format!("{:.1}", value)
347    }
348}
349
350/// Format a number with appropriate scale suffix.
351fn format_with_scale(
352    value: f64,
353    suffixes: &[(char, f64)],
354    i_suffix: &str,
355    round: RoundMethod,
356) -> String {
357    let abs_value = value.abs();
358    let sign = if value < 0.0 { "-" } else { "" };
359
360    // Find the largest suffix that applies.
361    let mut chosen_suffix = None;
362    let mut chosen_mult = 1.0;
363
364    for &(suffix, mult) in suffixes.iter().rev() {
365        if abs_value >= mult {
366            chosen_suffix = Some(suffix);
367            chosen_mult = mult;
368            break;
369        }
370    }
371
372    if let Some(suffix) = chosen_suffix {
373        let scaled = value / chosen_mult;
374        let scaled = apply_round_for_display(scaled, round);
375
376        // Check if rounding pushed the value to the next suffix level.
377        // E.g., 999.999k rounds to 1000.0k -> should become 1.0M.
378        let base = suffixes[0].1; // the base multiplier (e.g., 1000 for SI, 1024 for IEC)
379        if scaled.abs() >= base {
380            // Find the next suffix.
381            let mut found_current = false;
382            for &(next_suffix, next_mult) in suffixes.iter() {
383                if found_current {
384                    let re_scaled = value / next_mult;
385                    let re_scaled = apply_round_for_display(re_scaled, round);
386                    return format!("{sign}{:.1}{}{}", re_scaled.abs(), next_suffix, i_suffix);
387                }
388                if next_suffix == suffix {
389                    found_current = true;
390                }
391            }
392            // No next suffix available, just use what we have.
393        }
394
395        format!("{sign}{:.1}{}{}", scaled.abs(), suffix, i_suffix)
396    } else {
397        // Value is smaller than the smallest suffix, output as-is.
398        format_plain_number(value)
399    }
400}
401
402/// Apply rounding for display purposes (when formatting scaled output).
403fn apply_round_for_display(value: f64, method: RoundMethod) -> f64 {
404    // For display, we round to 1 decimal place.
405    let factor = 10.0;
406    let shifted = value * factor;
407    let rounded = match method {
408        RoundMethod::Up => shifted.ceil(),
409        RoundMethod::Down => shifted.floor(),
410        RoundMethod::FromZero => {
411            if shifted >= 0.0 {
412                shifted.ceil()
413            } else {
414                shifted.floor()
415            }
416        }
417        RoundMethod::TowardsZero => {
418            if shifted >= 0.0 {
419                shifted.floor()
420            } else {
421                shifted.ceil()
422            }
423        }
424        RoundMethod::Nearest => shifted.round(),
425    };
426    rounded / factor
427}
428
429/// Insert thousands grouping separators.
430fn group_thousands(s: &str) -> String {
431    // Find the integer part (before any decimal point).
432    let (integer_part, rest) = if let Some(dot_pos) = s.find('.') {
433        (&s[..dot_pos], &s[dot_pos..])
434    } else {
435        (s, "")
436    };
437
438    // Handle sign.
439    let (sign, digits) = if integer_part.starts_with('-') {
440        ("-", &integer_part[1..])
441    } else {
442        ("", integer_part)
443    };
444
445    if digits.len() <= 3 {
446        return format!("{}{}{}", sign, digits, rest);
447    }
448
449    let mut result = String::with_capacity(digits.len() + digits.len() / 3);
450    let remainder = digits.len() % 3;
451    if remainder > 0 {
452        result.push_str(&digits[..remainder]);
453    }
454    for (i, chunk) in digits.as_bytes()[remainder..].chunks(3).enumerate() {
455        if i > 0 || remainder > 0 {
456            result.push(',');
457        }
458        result.push_str(std::str::from_utf8(chunk).unwrap());
459    }
460
461    format!("{}{}{}", sign, result, rest)
462}
463
464/// Apply width/padding from a printf-style format string to an already-scaled string.
465/// Used when both --to and --format are specified.
466fn apply_format_padding(scaled: &str, fmt: &str) -> String {
467    let bytes = fmt.as_bytes();
468    let mut i = 0;
469
470    // Find '%'.
471    while i < bytes.len() && bytes[i] != b'%' {
472        i += 1;
473    }
474    let prefix = &fmt[..i];
475    if i >= bytes.len() {
476        return format!("{}{}", prefix, scaled);
477    }
478    i += 1; // skip '%'
479
480    // Parse flags.
481    let mut left_align = false;
482    while i < bytes.len() {
483        match bytes[i] {
484            b'0' | b'+' | b' ' | b'#' | b'\'' => {}
485            b'-' => left_align = true,
486            _ => break,
487        }
488        i += 1;
489    }
490
491    // Parse width.
492    let mut width: usize = 0;
493    while i < bytes.len() && bytes[i].is_ascii_digit() {
494        width = width
495            .saturating_mul(10)
496            .saturating_add((bytes[i] - b'0') as usize);
497        i += 1;
498    }
499
500    // Skip precision and conversion char.
501    while i < bytes.len() && (bytes[i] == b'.' || bytes[i].is_ascii_digit()) {
502        i += 1;
503    }
504    if i < bytes.len() {
505        i += 1; // skip conversion char
506    }
507    let suffix = &fmt[i..];
508
509    let padded = if width > 0 && scaled.len() < width {
510        let pad_len = width - scaled.len();
511        if left_align {
512            format!("{}{}", scaled, " ".repeat(pad_len))
513        } else {
514            format!("{}{}", " ".repeat(pad_len), scaled)
515        }
516    } else {
517        scaled.to_string()
518    };
519
520    format!("{}{}{}", prefix, padded, suffix)
521}
522
523/// Apply printf-style format to a number.
524fn apply_format(value: f64, fmt: &str) -> Result<String, String> {
525    // Parse format: %[flags][width][.precision]f
526    let bytes = fmt.as_bytes();
527    let mut i = 0;
528
529    // Find '%'.
530    while i < bytes.len() && bytes[i] != b'%' {
531        i += 1;
532    }
533    let prefix = &fmt[..i];
534    if i >= bytes.len() {
535        return Err(format!("invalid format: '{}'", fmt));
536    }
537    i += 1; // skip '%'
538
539    if i >= bytes.len() {
540        return Err(format!("invalid format: '{}'", fmt));
541    }
542
543    // Handle %%
544    if bytes[i] == b'%' {
545        return Ok(format!("{}%", prefix));
546    }
547
548    // Parse flags.
549    let mut zero_pad = false;
550    let mut left_align = false;
551    let mut plus_sign = false;
552    let mut space_sign = false;
553    while i < bytes.len() {
554        match bytes[i] {
555            b'0' => zero_pad = true,
556            b'-' => left_align = true,
557            b'+' => plus_sign = true,
558            b' ' => space_sign = true,
559            b'#' => {}
560            b'\'' => {} // grouping flag, handled separately
561            _ => break,
562        }
563        i += 1;
564    }
565
566    // Parse width.
567    let mut width: usize = 0;
568    while i < bytes.len() && bytes[i].is_ascii_digit() {
569        width = width
570            .saturating_mul(10)
571            .saturating_add((bytes[i] - b'0') as usize);
572        i += 1;
573    }
574
575    // Parse precision.
576    let mut precision: Option<usize> = None;
577    if i < bytes.len() && bytes[i] == b'.' {
578        i += 1;
579        let mut prec: usize = 0;
580        while i < bytes.len() && bytes[i].is_ascii_digit() {
581            prec = prec
582                .saturating_mul(10)
583                .saturating_add((bytes[i] - b'0') as usize);
584            i += 1;
585        }
586        precision = Some(prec);
587    }
588
589    // Parse conversion type.
590    if i >= bytes.len() {
591        return Err(format!("invalid format: '{}'", fmt));
592    }
593    let conv = bytes[i] as char;
594    i += 1;
595    let suffix = &fmt[i..];
596
597    let prec = precision.unwrap_or(6);
598    let formatted = match conv {
599        'f' => format!("{:.prec$}", value, prec = prec),
600        'e' => format_scientific(value, prec, 'e'),
601        'E' => format_scientific(value, prec, 'E'),
602        'g' => format_g(value, prec, false),
603        'G' => format_g(value, prec, true),
604        _ => return Err(format!("invalid format character: '{}'", conv)),
605    };
606
607    // Apply sign prefix.
608    let sign_str = if value < 0.0 {
609        ""
610    } else if plus_sign {
611        "+"
612    } else if space_sign {
613        " "
614    } else {
615        ""
616    };
617
618    let num_str = if !sign_str.is_empty() && !formatted.starts_with('-') {
619        format!("{}{}", sign_str, formatted)
620    } else {
621        formatted
622    };
623
624    // Apply width and padding.
625    let padded = if width > 0 && num_str.len() < width {
626        let pad_len = width - num_str.len();
627        if left_align {
628            format!("{}{}", num_str, " ".repeat(pad_len))
629        } else if zero_pad {
630            if num_str.starts_with('-') || num_str.starts_with('+') || num_str.starts_with(' ') {
631                let (sign, rest) = num_str.split_at(1);
632                format!("{}{}{}", sign, "0".repeat(pad_len), rest)
633            } else {
634                format!("{}{}", "0".repeat(pad_len), num_str)
635            }
636        } else {
637            format!("{}{}", " ".repeat(pad_len), num_str)
638        }
639    } else {
640        num_str
641    };
642
643    Ok(format!("{}{}{}", prefix, padded, suffix))
644}
645
646/// Format in scientific notation.
647fn format_scientific(value: f64, prec: usize, e_char: char) -> String {
648    if value == 0.0 {
649        let sign = if value.is_sign_negative() { "-" } else { "" };
650        if prec == 0 {
651            return format!("{sign}0{e_char}+00");
652        }
653        return format!("{sign}0.{:0>prec$}{e_char}+00", "", prec = prec);
654    }
655
656    let abs = value.abs();
657    let sign = if value < 0.0 { "-" } else { "" };
658    let exp = abs.log10().floor() as i32;
659    let mantissa = abs / 10f64.powi(exp);
660
661    let factor = 10f64.powi(prec as i32);
662    let mantissa = (mantissa * factor).round() / factor;
663
664    let (mantissa, exp) = if mantissa >= 10.0 {
665        (mantissa / 10.0, exp + 1)
666    } else {
667        (mantissa, exp)
668    };
669
670    let exp_sign = if exp >= 0 { '+' } else { '-' };
671    let exp_abs = exp.unsigned_abs();
672
673    if prec == 0 {
674        format!("{sign}{mantissa:.0}{e_char}{exp_sign}{exp_abs:02}")
675    } else {
676        format!(
677            "{sign}{mantissa:.prec$}{e_char}{exp_sign}{exp_abs:02}",
678            prec = prec
679        )
680    }
681}
682
683/// Format using %g - shortest representation.
684fn format_g(value: f64, prec: usize, upper: bool) -> String {
685    let prec = if prec == 0 { 1 } else { prec };
686
687    if value == 0.0 {
688        let sign = if value.is_sign_negative() { "-" } else { "" };
689        return format!("{sign}0");
690    }
691
692    let abs = value.abs();
693    let exp = abs.log10().floor() as i32;
694    let e_char = if upper { 'E' } else { 'e' };
695
696    if exp < -4 || exp >= prec as i32 {
697        let sig_prec = prec.saturating_sub(1);
698        let s = format_scientific(value, sig_prec, e_char);
699        trim_g_zeros(&s)
700    } else {
701        let decimal_prec = if prec as i32 > exp + 1 {
702            (prec as i32 - exp - 1) as usize
703        } else {
704            0
705        };
706        let s = format!("{value:.decimal_prec$}");
707        trim_g_zeros(&s)
708    }
709}
710
711fn trim_g_zeros(s: &str) -> String {
712    if let Some(e_pos) = s.find(['e', 'E']) {
713        let (mantissa, exponent) = s.split_at(e_pos);
714        let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.');
715        format!("{trimmed}{exponent}")
716    } else {
717        s.trim_end_matches('0').trim_end_matches('.').to_string()
718    }
719}
720
721/// Convert a single numeric token according to the config.
722fn convert_number(token: &str, config: &NumfmtConfig) -> Result<String, String> {
723    // Parse the input number (with optional suffix).
724    let raw_value = parse_number_with_suffix(token, config.from)?;
725
726    // Apply from-unit scaling.
727    let value = raw_value * config.from_unit;
728
729    // Apply to-unit scaling.
730    let value = value / config.to_unit;
731
732    // Format the output.
733    let mut result = if let Some(ref fmt) = config.format {
734        // If --to is also specified, first scale, then apply format padding.
735        if config.to != ScaleUnit::None {
736            let scaled = format_scaled(value, config.to, config.round);
737            // Extract width from the format string and apply padding.
738            apply_format_padding(&scaled, fmt)
739        } else {
740            let rounded = apply_round(value, config.round);
741            apply_format(rounded, fmt)?
742        }
743    } else if config.to != ScaleUnit::None {
744        format_scaled(value, config.to, config.round)
745    } else {
746        let rounded = apply_round(value, config.round);
747        format_plain_number(rounded)
748    };
749
750    // Apply grouping.
751    if config.grouping {
752        result = group_thousands(&result);
753    }
754
755    // Apply suffix.
756    if let Some(ref suffix) = config.suffix {
757        result.push_str(suffix);
758    }
759
760    // Apply padding.
761    if let Some(pad) = config.padding {
762        let pad_width = pad.unsigned_abs() as usize;
763        if result.len() < pad_width {
764            let deficit = pad_width - result.len();
765            if pad < 0 {
766                // Left-align (pad on right).
767                result = format!("{}{}", result, " ".repeat(deficit));
768            } else {
769                // Right-align (pad on left).
770                result = format!("{}{}", " ".repeat(deficit), result);
771            }
772        }
773    }
774
775    Ok(result)
776}
777
778/// Split a line into fields based on the delimiter.
779fn split_fields<'a>(line: &'a str, delimiter: Option<char>) -> Vec<&'a str> {
780    match delimiter {
781        Some(delim) => line.split(delim).collect(),
782        None => {
783            // Whitespace splitting: split on runs of whitespace, but preserve
784            // leading whitespace as empty fields.
785            let mut fields = Vec::new();
786            let mut chars = line.char_indices().peekable();
787            let mut field_start = 0;
788            let mut in_space = true;
789            let mut first = true;
790
791            while let Some(&(i, c)) = chars.peek() {
792                if c.is_whitespace() {
793                    if !in_space && !first {
794                        fields.push(&line[field_start..i]);
795                    }
796                    in_space = true;
797                    chars.next();
798                } else {
799                    if in_space {
800                        field_start = i;
801                        in_space = false;
802                        first = false;
803                    }
804                    chars.next();
805                }
806            }
807            if !in_space {
808                fields.push(&line[field_start..]);
809            }
810
811            if fields.is_empty() {
812                vec![line]
813            } else {
814                fields
815            }
816        }
817    }
818}
819
820/// Reassemble fields into a line with proper spacing.
821fn reassemble_fields(
822    original: &str,
823    fields: &[&str],
824    converted: &[String],
825    delimiter: Option<char>,
826) -> String {
827    match delimiter {
828        Some(delim) => converted.join(&delim.to_string()),
829        None => {
830            // For whitespace-delimited input, reconstruct preserving original spacing.
831            let mut result = String::with_capacity(original.len());
832            let mut field_idx = 0;
833            let mut in_space = true;
834            let mut i = 0;
835            let bytes = original.as_bytes();
836
837            while i < bytes.len() {
838                let c = bytes[i] as char;
839                if c.is_ascii_whitespace() {
840                    if !in_space && field_idx > 0 {
841                        // We just finished a field.
842                    }
843                    result.push(c);
844                    in_space = true;
845                    i += 1;
846                } else {
847                    if in_space {
848                        in_space = false;
849                        // Output the converted field instead of the original.
850                        if field_idx < converted.len() {
851                            result.push_str(&converted[field_idx]);
852                        } else if field_idx < fields.len() {
853                            result.push_str(fields[field_idx]);
854                        }
855                        field_idx += 1;
856                        // Skip past the original field characters.
857                        while i < bytes.len() && !(bytes[i] as char).is_ascii_whitespace() {
858                            i += 1;
859                        }
860                        continue;
861                    }
862                    i += 1;
863                }
864            }
865
866            result
867        }
868    }
869}
870
871/// Process a single line according to the numfmt configuration.
872pub fn process_line(line: &str, config: &NumfmtConfig) -> Result<String, String> {
873    let fields = split_fields(line, config.delimiter);
874
875    if fields.is_empty() {
876        return Ok(line.to_string());
877    }
878
879    let all_fields = config.field.is_empty();
880
881    let mut converted: Vec<String> = Vec::with_capacity(fields.len());
882    for (i, field) in fields.iter().enumerate() {
883        let field_num = i + 1; // 1-based
884        let should_convert = all_fields || config.field.contains(&field_num);
885
886        if should_convert {
887            match convert_number(field, config) {
888                Ok(s) => converted.push(s),
889                Err(e) => match config.invalid {
890                    InvalidMode::Abort => return Err(e),
891                    InvalidMode::Fail => {
892                        eprintln!("numfmt: {}", e);
893                        converted.push(field.to_string());
894                    }
895                    InvalidMode::Warn => {
896                        eprintln!("numfmt: {}", e);
897                        converted.push(field.to_string());
898                    }
899                    InvalidMode::Ignore => {
900                        converted.push(field.to_string());
901                    }
902                },
903            }
904        } else {
905            converted.push(field.to_string());
906        }
907    }
908
909    Ok(reassemble_fields(
910        line,
911        &fields,
912        &converted,
913        config.delimiter,
914    ))
915}
916
917/// Run the numfmt command with the given configuration and input.
918pub fn run_numfmt<R: std::io::BufRead, W: Write>(
919    input: R,
920    mut output: W,
921    config: &NumfmtConfig,
922) -> Result<(), String> {
923    let terminator = if config.zero_terminated { b'\0' } else { b'\n' };
924    let mut header_remaining = config.header;
925    let mut buf = Vec::new();
926    let mut reader = input;
927    let mut had_error = false;
928
929    loop {
930        buf.clear();
931        let bytes_read = reader
932            .read_until(terminator, &mut buf)
933            .map_err(|e| format!("read error: {}", e))?;
934        if bytes_read == 0 {
935            break;
936        }
937
938        // Remove the terminator for processing.
939        let line = if buf.last() == Some(&terminator) {
940            &buf[..buf.len() - 1]
941        } else {
942            &buf[..]
943        };
944        let line_str = String::from_utf8_lossy(line);
945
946        if header_remaining > 0 {
947            header_remaining -= 1;
948            output
949                .write_all(line_str.as_bytes())
950                .map_err(|e| format!("write error: {}", e))?;
951            output
952                .write_all(&[terminator])
953                .map_err(|e| format!("write error: {}", e))?;
954            continue;
955        }
956
957        match process_line(&line_str, config) {
958            Ok(result) => {
959                output
960                    .write_all(result.as_bytes())
961                    .map_err(|e| format!("write error: {}", e))?;
962                output
963                    .write_all(&[terminator])
964                    .map_err(|e| format!("write error: {}", e))?;
965            }
966            Err(e) => {
967                match config.invalid {
968                    InvalidMode::Abort => {
969                        eprintln!("numfmt: {}", e);
970                        return Err(e);
971                    }
972                    InvalidMode::Fail => {
973                        eprintln!("numfmt: {}", e);
974                        // Output original line.
975                        output
976                            .write_all(line_str.as_bytes())
977                            .map_err(|e| format!("write error: {}", e))?;
978                        output
979                            .write_all(&[terminator])
980                            .map_err(|e| format!("write error: {}", e))?;
981                        had_error = true;
982                    }
983                    InvalidMode::Warn => {
984                        eprintln!("numfmt: {}", e);
985                        output
986                            .write_all(line_str.as_bytes())
987                            .map_err(|e| format!("write error: {}", e))?;
988                        output
989                            .write_all(&[terminator])
990                            .map_err(|e| format!("write error: {}", e))?;
991                    }
992                    InvalidMode::Ignore => {
993                        output
994                            .write_all(line_str.as_bytes())
995                            .map_err(|e| format!("write error: {}", e))?;
996                        output
997                            .write_all(&[terminator])
998                            .map_err(|e| format!("write error: {}", e))?;
999                    }
1000                }
1001            }
1002        }
1003    }
1004
1005    output.flush().map_err(|e| format!("flush error: {}", e))?;
1006
1007    if had_error {
1008        Err("conversion errors occurred".to_string())
1009    } else {
1010        Ok(())
1011    }
1012}
coreutils_rs/numfmt/core.rs

coreutils_rs/numfmt/
core.rs