coreutils_rs/numfmt/
core.rs

1use std::io::Write;
2
3/// Unit scale for input/output conversion.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum ScaleUnit {
6    /// No scaling.
7    None,
8    /// SI: K=1000, M=10^6, G=10^9, T=10^12, P=10^15, E=10^18, Z=10^21, Y=10^24.
9    Si,
10    /// IEC: K=1024, M=1048576, G=2^30, T=2^40, P=2^50, E=2^60.
11    Iec,
12    /// IEC with 'i' suffix: Ki=1024, Mi=1048576, etc.
13    IecI,
14    /// Auto-detect from suffix (for --from=auto).
15    Auto,
16}
17
18/// Rounding method.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum RoundMethod {
21    /// Round up (toward +infinity).
22    Up,
23    /// Round down (toward -infinity).
24    Down,
25    /// Round away from zero.
26    FromZero,
27    /// Round toward zero.
28    TowardsZero,
29    /// Round to nearest, half away from zero (default).
30    Nearest,
31}
32
33/// How to handle invalid input.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum InvalidMode {
36    /// Print error and exit immediately.
37    Abort,
38    /// Print error but continue processing.
39    Fail,
40    /// Print warning but continue processing.
41    Warn,
42    /// Silently ignore invalid input.
43    Ignore,
44}
45
46/// Configuration for the numfmt command.
47pub struct NumfmtConfig {
48    pub from: ScaleUnit,
49    pub to: ScaleUnit,
50    pub from_unit: f64,
51    pub to_unit: f64,
52    pub padding: Option<i32>,
53    pub round: RoundMethod,
54    pub suffix: Option<String>,
55    pub format: Option<String>,
56    pub field: Vec<usize>,
57    pub delimiter: Option<char>,
58    pub header: usize,
59    pub invalid: InvalidMode,
60    pub grouping: bool,
61    pub zero_terminated: bool,
62}
63
64impl Default for NumfmtConfig {
65    fn default() -> Self {
66        Self {
67            from: ScaleUnit::None,
68            to: ScaleUnit::None,
69            from_unit: 1.0,
70            to_unit: 1.0,
71            padding: None,
72            round: RoundMethod::FromZero,
73            suffix: None,
74            format: None,
75            field: vec![1],
76            delimiter: None,
77            header: 0,
78            invalid: InvalidMode::Abort,
79            grouping: false,
80            zero_terminated: false,
81        }
82    }
83}
84
85/// SI suffix table: suffix char -> multiplier.
86/// GNU coreutils 9.4 uses uppercase 'K' for SI kilo (same suffix letter as IEC, but 1e3 not 1024).
87const SI_SUFFIXES: &[(char, f64)] = &[
88    ('K', 1e3),
89    ('M', 1e6),
90    ('G', 1e9),
91    ('T', 1e12),
92    ('P', 1e15),
93    ('E', 1e18),
94    ('Z', 1e21),
95    ('Y', 1e24),
96    ('R', 1e27),
97    ('Q', 1e30),
98];
99
100/// IEC suffix table: suffix char -> multiplier (powers of 1024).
101const IEC_SUFFIXES: &[(char, f64)] = &[
102    ('K', 1024.0),
103    ('M', 1_048_576.0),
104    ('G', 1_073_741_824.0),
105    ('T', 1_099_511_627_776.0),
106    ('P', 1_125_899_906_842_624.0),
107    ('E', 1_152_921_504_606_846_976.0),
108    ('Z', 1_180_591_620_717_411_303_424.0),
109    ('Y', 1_208_925_819_614_629_174_706_176.0),
110    ('R', 1_237_940_039_285_380_274_899_124_224.0),
111    ('Q', 1_267_650_600_228_229_401_496_703_205_376.0),
112];
113
114/// Parse a scale unit string.
115pub fn parse_scale_unit(s: &str) -> Result<ScaleUnit, String> {
116    match s {
117        "none" => Ok(ScaleUnit::None),
118        "si" => Ok(ScaleUnit::Si),
119        "iec" => Ok(ScaleUnit::Iec),
120        "iec-i" => Ok(ScaleUnit::IecI),
121        "auto" => Ok(ScaleUnit::Auto),
122        _ => Err(format!("invalid unit: '{}'", s)),
123    }
124}
125
126/// Parse a round method string.
127pub fn parse_round_method(s: &str) -> Result<RoundMethod, String> {
128    match s {
129        "up" => Ok(RoundMethod::Up),
130        "down" => Ok(RoundMethod::Down),
131        "from-zero" => Ok(RoundMethod::FromZero),
132        "towards-zero" => Ok(RoundMethod::TowardsZero),
133        "nearest" => Ok(RoundMethod::Nearest),
134        _ => Err(format!("invalid rounding method: '{}'", s)),
135    }
136}
137
138/// Parse an invalid mode string.
139pub fn parse_invalid_mode(s: &str) -> Result<InvalidMode, String> {
140    match s {
141        "abort" => Ok(InvalidMode::Abort),
142        "fail" => Ok(InvalidMode::Fail),
143        "warn" => Ok(InvalidMode::Warn),
144        "ignore" => Ok(InvalidMode::Ignore),
145        _ => Err(format!("invalid mode: '{}'", s)),
146    }
147}
148
149/// Parse a field specification string like "1", "1,3", "1-5", or "-".
150/// Returns 1-based field indices.
151pub fn parse_fields(s: &str) -> Result<Vec<usize>, String> {
152    if s == "-" {
153        // All fields - we represent this as an empty vec and handle it specially.
154        return Ok(vec![]);
155    }
156    let mut fields = Vec::new();
157    for part in s.split(',') {
158        let part = part.trim();
159        if let Some(dash_pos) = part.find('-') {
160            let start_str = &part[..dash_pos];
161            let end_str = &part[dash_pos + 1..];
162            // Handle open ranges like "-5" or "3-"
163            if start_str.is_empty() && end_str.is_empty() {
164                return Ok(vec![]);
165            }
166            let start: usize = if start_str.is_empty() {
167                1
168            } else {
169                start_str
170                    .parse()
171                    .map_err(|_| format!("invalid field value '{}'", part))?
172            };
173            let end: usize = if end_str.is_empty() {
174                // Open-ended range: we use 0 as sentinel for "all remaining"
175                // For simplicity, return a large upper bound.
176                9999
177            } else {
178                end_str
179                    .parse()
180                    .map_err(|_| format!("invalid field value '{}'", part))?
181            };
182            if start == 0 {
183                return Err(format!("fields are numbered from 1: '{}'", part));
184            }
185            for i in start..=end {
186                if !fields.contains(&i) {
187                    fields.push(i);
188                }
189            }
190        } else {
191            let n: usize = part
192                .parse()
193                .map_err(|_| format!("invalid field value '{}'", part))?;
194            if n == 0 {
195                return Err("fields are numbered from 1".to_string());
196            }
197            if !fields.contains(&n) {
198                fields.push(n);
199            }
200        }
201    }
202    fields.sort();
203    Ok(fields)
204}
205
206/// Parse a number with optional suffix, returning the raw numeric value.
207/// Handles suffixes like K, M, G, T, P, E, Z, Y (and Ki, Mi, etc. for iec-i).
208fn parse_number_with_suffix(s: &str, unit: ScaleUnit) -> Result<f64, String> {
209    let s = s.trim();
210    if s.is_empty() {
211        return Err("invalid number: ''".to_string());
212    }
213
214    // Find where the numeric part ends and the suffix begins.
215    let mut num_end = s.len();
216    let bytes = s.as_bytes();
217    let len = s.len();
218
219    // Check for trailing scale suffix characters.
220    if len > 0 {
221        let last_char = bytes[len - 1] as char;
222
223        match unit {
224            ScaleUnit::Auto | ScaleUnit::IecI => {
225                // Check for 'i' suffix (e.g., Ki, Mi).
226                if last_char == 'i' && len >= 2 {
227                    let prefix_char = (bytes[len - 2] as char).to_ascii_uppercase();
228                    if is_scale_suffix(prefix_char) {
229                        num_end = len - 2;
230                    }
231                } else {
232                    let upper = last_char.to_ascii_uppercase();
233                    if is_scale_suffix(upper) {
234                        num_end = len - 1;
235                    }
236                }
237            }
238            ScaleUnit::Si | ScaleUnit::Iec => {
239                let upper = last_char.to_ascii_uppercase();
240                if is_scale_suffix(upper) {
241                    num_end = len - 1;
242                }
243            }
244            ScaleUnit::None => {}
245        }
246    }
247
248    let num_str = &s[..num_end];
249    let suffix_str = &s[num_end..];
250
251    // Parse the numeric part.
252    let value: f64 = num_str
253        .parse()
254        .map_err(|_| format!("invalid number: '{}'", s))?;
255
256    // Apply suffix multiplier.
257    let multiplier = if suffix_str.is_empty() {
258        1.0
259    } else {
260        let suffix_upper = suffix_str.chars().next().unwrap().to_ascii_uppercase();
261        match unit {
262            ScaleUnit::Auto => {
263                // Auto-detect: if suffix ends with 'i', use IEC; otherwise SI.
264                if suffix_str.len() >= 2 && suffix_str.ends_with('i') {
265                    find_iec_multiplier(suffix_upper)?
266                } else {
267                    find_si_multiplier(suffix_upper)?
268                }
269            }
270            ScaleUnit::Si => find_si_multiplier(suffix_upper)?,
271            ScaleUnit::Iec | ScaleUnit::IecI => find_iec_multiplier(suffix_upper)?,
272            ScaleUnit::None => {
273                return Err(format!("invalid number: '{}'", s));
274            }
275        }
276    };
277
278    Ok(value * multiplier)
279}
280
281fn is_scale_suffix(c: char) -> bool {
282    matches!(c, 'K' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y' | 'R' | 'Q')
283}
284
285fn find_si_multiplier(c: char) -> Result<f64, String> {
286    for &(suffix, mult) in SI_SUFFIXES {
287        if suffix.eq_ignore_ascii_case(&c) {
288            return Ok(mult);
289        }
290    }
291    Err(format!("invalid suffix: '{}'", c))
292}
293
294fn find_iec_multiplier(c: char) -> Result<f64, String> {
295    for &(suffix, mult) in IEC_SUFFIXES {
296        if suffix == c {
297            return Ok(mult);
298        }
299    }
300    Err(format!("invalid suffix: '{}'", c))
301}
302
303/// Apply rounding according to the specified method.
304fn apply_round(value: f64, method: RoundMethod) -> f64 {
305    match method {
306        RoundMethod::Up => value.ceil(),
307        RoundMethod::Down => value.floor(),
308        RoundMethod::FromZero => {
309            if value >= 0.0 {
310                value.ceil()
311            } else {
312                value.floor()
313            }
314        }
315        RoundMethod::TowardsZero => {
316            if value >= 0.0 {
317                value.floor()
318            } else {
319                value.ceil()
320            }
321        }
322        RoundMethod::Nearest => value.round(),
323    }
324}
325
326/// Format a number with scale suffix for output.
327fn format_scaled(value: f64, unit: ScaleUnit, round: RoundMethod) -> String {
328    match unit {
329        ScaleUnit::None => {
330            // Output as plain number.
331            format_plain_number(value)
332        }
333        ScaleUnit::Si => format_with_scale(value, SI_SUFFIXES, "", round),
334        ScaleUnit::Iec => format_with_scale(value, IEC_SUFFIXES, "", round),
335        ScaleUnit::IecI => format_with_scale(value, IEC_SUFFIXES, "i", round),
336        ScaleUnit::Auto => {
337            // For --to=auto, behave like SI.
338            format_with_scale(value, SI_SUFFIXES, "", round)
339        }
340    }
341}
342
343/// Format a plain number, removing unnecessary trailing zeros and decimal point.
344fn format_plain_number(value: f64) -> String {
345    let int_val = value as i64;
346    if value == (int_val as f64) {
347        format!("{}", int_val)
348    } else {
349        // Use enough precision to avoid loss.
350        format!("{:.1}", value)
351    }
352}
353
354/// Format a number with appropriate scale suffix.
355/// Matches GNU numfmt behavior:
356/// - If scaled value < 10: display with 1 decimal place ("N.Nk")
357/// - If scaled value >= 10: display as integer ("NNk")
358/// - If integer would be >= 1000: promote to next suffix
359fn format_with_scale(
360    value: f64,
361    suffixes: &[(char, f64)],
362    i_suffix: &str,
363    round: RoundMethod,
364) -> String {
365    let abs_value = value.abs();
366    let sign = if value < 0.0 { "-" } else { "" };
367
368    // Find the largest suffix that applies.
369    let mut chosen_idx: Option<usize> = None;
370
371    for (idx, &(_suffix, mult)) in suffixes.iter().enumerate().rev() {
372        if abs_value >= mult {
373            chosen_idx = Some(idx);
374            break;
375        }
376    }
377
378    let Some(mut idx) = chosen_idx else {
379        // Value is smaller than the smallest suffix, output as-is.
380        return format_plain_number(value);
381    };
382
383    loop {
384        let (suffix, mult) = suffixes[idx];
385        let scaled = value / mult;
386        let abs_scaled = scaled.abs();
387
388        if abs_scaled < 10.0 {
389            // Display with 1 decimal place: "N.Nk"
390            let rounded = apply_round_for_display(scaled, round);
391            if rounded.abs() >= 10.0 {
392                // Rounding pushed it past 10, switch to integer display.
393                // apply_round_for_display rounds to 1 decimal, so the only
394                // value crossing this boundary is exactly 10.0 — truncation
395                // and rounding agree. Use `as i64` (truncation) which is safe.
396                let int_val = rounded as i64;
397                if int_val.unsigned_abs() >= 1000 && idx + 1 < suffixes.len() {
398                    idx += 1;
399                    continue;
400                }
401                return format!("{sign}{}{}{}", int_val.unsigned_abs(), suffix, i_suffix);
402            }
403            return format!("{sign}{:.1}{}{}", rounded.abs(), suffix, i_suffix);
404        } else {
405            // Display as integer: "NNk"
406            let int_val = apply_round_int(scaled, round);
407            if int_val.unsigned_abs() >= 1000 {
408                if idx + 1 < suffixes.len() {
409                    idx += 1;
410                    continue;
411                }
412                // No next suffix, just output what we have.
413            }
414            return format!("{sign}{}{}{}", int_val.unsigned_abs(), suffix, i_suffix);
415        }
416    }
417}
418
419/// Apply rounding for display purposes (when formatting scaled output).
420/// Rounds to 1 decimal place.
421fn apply_round_for_display(value: f64, method: RoundMethod) -> f64 {
422    let factor = 10.0;
423    let shifted = value * factor;
424    let rounded = match method {
425        RoundMethod::Up => shifted.ceil(),
426        RoundMethod::Down => shifted.floor(),
427        RoundMethod::FromZero => {
428            if shifted >= 0.0 {
429                shifted.ceil()
430            } else {
431                shifted.floor()
432            }
433        }
434        RoundMethod::TowardsZero => {
435            if shifted >= 0.0 {
436                shifted.floor()
437            } else {
438                shifted.ceil()
439            }
440        }
441        RoundMethod::Nearest => shifted.round(),
442    };
443    rounded / factor
444}
445
446/// Apply rounding to get an integer value for display.
447fn apply_round_int(value: f64, method: RoundMethod) -> i64 {
448    match method {
449        RoundMethod::Up => value.ceil() as i64,
450        RoundMethod::Down => value.floor() as i64,
451        RoundMethod::FromZero => {
452            if value >= 0.0 {
453                value.ceil() as i64
454            } else {
455                value.floor() as i64
456            }
457        }
458        RoundMethod::TowardsZero => {
459            if value >= 0.0 {
460                value.floor() as i64
461            } else {
462                value.ceil() as i64
463            }
464        }
465        RoundMethod::Nearest => value.round() as i64,
466    }
467}
468
469/// Insert thousands grouping separators.
470fn group_thousands(s: &str) -> String {
471    // Find the integer part (before any decimal point).
472    let (integer_part, rest) = if let Some(dot_pos) = s.find('.') {
473        (&s[..dot_pos], &s[dot_pos..])
474    } else {
475        (s, "")
476    };
477
478    // Handle sign.
479    let (sign, digits) = if integer_part.starts_with('-') {
480        ("-", &integer_part[1..])
481    } else {
482        ("", integer_part)
483    };
484
485    if digits.len() <= 3 {
486        return format!("{}{}{}", sign, digits, rest);
487    }
488
489    let mut result = String::with_capacity(digits.len() + digits.len() / 3);
490    let remainder = digits.len() % 3;
491    if remainder > 0 {
492        result.push_str(&digits[..remainder]);
493    }
494    for (i, chunk) in digits.as_bytes()[remainder..].chunks(3).enumerate() {
495        if i > 0 || remainder > 0 {
496            result.push(',');
497        }
498        result.push_str(std::str::from_utf8(chunk).unwrap());
499    }
500
501    format!("{}{}{}", sign, result, rest)
502}
503
504/// Apply width/padding from a printf-style format string to an already-scaled string.
505/// Used when both --to and --format are specified.
506fn apply_format_padding(scaled: &str, fmt: &str) -> String {
507    let bytes = fmt.as_bytes();
508    let mut i = 0;
509
510    // Find '%'.
511    while i < bytes.len() && bytes[i] != b'%' {
512        i += 1;
513    }
514    let prefix = &fmt[..i];
515    if i >= bytes.len() {
516        return format!("{}{}", prefix, scaled);
517    }
518    i += 1; // skip '%'
519
520    // Parse flags.
521    let mut left_align = false;
522    while i < bytes.len() {
523        match bytes[i] {
524            b'0' | b'+' | b' ' | b'#' | b'\'' => {}
525            b'-' => left_align = true,
526            _ => break,
527        }
528        i += 1;
529    }
530
531    // Parse width.
532    let mut width: usize = 0;
533    while i < bytes.len() && bytes[i].is_ascii_digit() {
534        width = width
535            .saturating_mul(10)
536            .saturating_add((bytes[i] - b'0') as usize);
537        i += 1;
538    }
539
540    // Skip precision and conversion char.
541    while i < bytes.len() && (bytes[i] == b'.' || bytes[i].is_ascii_digit()) {
542        i += 1;
543    }
544    if i < bytes.len() {
545        i += 1; // skip conversion char
546    }
547    let suffix = &fmt[i..];
548
549    let padded = if width > 0 && scaled.len() < width {
550        let pad_len = width - scaled.len();
551        if left_align {
552            format!("{}{}", scaled, " ".repeat(pad_len))
553        } else {
554            format!("{}{}", " ".repeat(pad_len), scaled)
555        }
556    } else {
557        scaled.to_string()
558    };
559
560    format!("{}{}{}", prefix, padded, suffix)
561}
562
563/// Apply printf-style format to a number.
564fn apply_format(value: f64, fmt: &str) -> Result<String, String> {
565    // Parse format: %[flags][width][.precision]f
566    let bytes = fmt.as_bytes();
567    let mut i = 0;
568
569    // Find '%'.
570    while i < bytes.len() && bytes[i] != b'%' {
571        i += 1;
572    }
573    let prefix = &fmt[..i];
574    if i >= bytes.len() {
575        return Err(format!("invalid format: '{}'", fmt));
576    }
577    i += 1; // skip '%'
578
579    if i >= bytes.len() {
580        return Err(format!("invalid format: '{}'", fmt));
581    }
582
583    // Handle %%
584    if bytes[i] == b'%' {
585        return Ok(format!("{}%", prefix));
586    }
587
588    // Parse flags.
589    let mut zero_pad = false;
590    let mut left_align = false;
591    let mut plus_sign = false;
592    let mut space_sign = false;
593    while i < bytes.len() {
594        match bytes[i] {
595            b'0' => zero_pad = true,
596            b'-' => left_align = true,
597            b'+' => plus_sign = true,
598            b' ' => space_sign = true,
599            b'#' => {}
600            b'\'' => {} // grouping flag, handled separately
601            _ => break,
602        }
603        i += 1;
604    }
605
606    // Parse width.
607    let mut width: usize = 0;
608    while i < bytes.len() && bytes[i].is_ascii_digit() {
609        width = width
610            .saturating_mul(10)
611            .saturating_add((bytes[i] - b'0') as usize);
612        i += 1;
613    }
614
615    // Parse precision.
616    let mut precision: Option<usize> = None;
617    if i < bytes.len() && bytes[i] == b'.' {
618        i += 1;
619        let mut prec: usize = 0;
620        while i < bytes.len() && bytes[i].is_ascii_digit() {
621            prec = prec
622                .saturating_mul(10)
623                .saturating_add((bytes[i] - b'0') as usize);
624            i += 1;
625        }
626        precision = Some(prec);
627    }
628
629    // Parse conversion type.
630    if i >= bytes.len() {
631        return Err(format!("invalid format: '{}'", fmt));
632    }
633    let conv = bytes[i] as char;
634    i += 1;
635    let suffix = &fmt[i..];
636
637    let prec = precision.unwrap_or(6);
638    let formatted = match conv {
639        'f' => format!("{:.prec$}", value, prec = prec),
640        'e' => format_scientific(value, prec, 'e'),
641        'E' => format_scientific(value, prec, 'E'),
642        'g' => format_g(value, prec, false),
643        'G' => format_g(value, prec, true),
644        _ => return Err(format!("invalid format character: '{}'", conv)),
645    };
646
647    // Apply sign prefix.
648    let sign_str = if value < 0.0 {
649        ""
650    } else if plus_sign {
651        "+"
652    } else if space_sign {
653        " "
654    } else {
655        ""
656    };
657
658    let num_str = if !sign_str.is_empty() && !formatted.starts_with('-') {
659        format!("{}{}", sign_str, formatted)
660    } else {
661        formatted
662    };
663
664    // Apply width and padding.
665    let padded = if width > 0 && num_str.len() < width {
666        let pad_len = width - num_str.len();
667        if left_align {
668            format!("{}{}", num_str, " ".repeat(pad_len))
669        } else if zero_pad {
670            if num_str.starts_with('-') || num_str.starts_with('+') || num_str.starts_with(' ') {
671                let (sign, rest) = num_str.split_at(1);
672                format!("{}{}{}", sign, "0".repeat(pad_len), rest)
673            } else {
674                format!("{}{}", "0".repeat(pad_len), num_str)
675            }
676        } else {
677            format!("{}{}", " ".repeat(pad_len), num_str)
678        }
679    } else {
680        num_str
681    };
682
683    Ok(format!("{}{}{}", prefix, padded, suffix))
684}
685
686/// Format in scientific notation.
687fn format_scientific(value: f64, prec: usize, e_char: char) -> String {
688    if value == 0.0 {
689        let sign = if value.is_sign_negative() { "-" } else { "" };
690        if prec == 0 {
691            return format!("{sign}0{e_char}+00");
692        }
693        return format!("{sign}0.{:0>prec$}{e_char}+00", "", prec = prec);
694    }
695
696    let abs = value.abs();
697    let sign = if value < 0.0 { "-" } else { "" };
698    let exp = abs.log10().floor() as i32;
699    let mantissa = abs / 10f64.powi(exp);
700
701    let factor = 10f64.powi(prec as i32);
702    let mantissa = (mantissa * factor).round() / factor;
703
704    let (mantissa, exp) = if mantissa >= 10.0 {
705        (mantissa / 10.0, exp + 1)
706    } else {
707        (mantissa, exp)
708    };
709
710    let exp_sign = if exp >= 0 { '+' } else { '-' };
711    let exp_abs = exp.unsigned_abs();
712
713    if prec == 0 {
714        format!("{sign}{mantissa:.0}{e_char}{exp_sign}{exp_abs:02}")
715    } else {
716        format!(
717            "{sign}{mantissa:.prec$}{e_char}{exp_sign}{exp_abs:02}",
718            prec = prec
719        )
720    }
721}
722
723/// Format using %g - shortest representation.
724fn format_g(value: f64, prec: usize, upper: bool) -> String {
725    let prec = if prec == 0 { 1 } else { prec };
726
727    if value == 0.0 {
728        let sign = if value.is_sign_negative() { "-" } else { "" };
729        return format!("{sign}0");
730    }
731
732    let abs = value.abs();
733    let exp = abs.log10().floor() as i32;
734    let e_char = if upper { 'E' } else { 'e' };
735
736    if exp < -4 || exp >= prec as i32 {
737        let sig_prec = prec.saturating_sub(1);
738        let s = format_scientific(value, sig_prec, e_char);
739        trim_g_zeros(&s)
740    } else {
741        let decimal_prec = if prec as i32 > exp + 1 {
742            (prec as i32 - exp - 1) as usize
743        } else {
744            0
745        };
746        let s = format!("{value:.decimal_prec$}");
747        trim_g_zeros(&s)
748    }
749}
750
751fn trim_g_zeros(s: &str) -> String {
752    if let Some(e_pos) = s.find(['e', 'E']) {
753        let (mantissa, exponent) = s.split_at(e_pos);
754        let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.');
755        format!("{trimmed}{exponent}")
756    } else {
757        s.trim_end_matches('0').trim_end_matches('.').to_string()
758    }
759}
760
761/// Convert a single numeric token according to the config.
762fn convert_number(token: &str, config: &NumfmtConfig) -> Result<String, String> {
763    // Parse the input number (with optional suffix).
764    let raw_value = parse_number_with_suffix(token, config.from)?;
765
766    // Apply from-unit scaling.
767    let value = raw_value * config.from_unit;
768
769    // Apply to-unit scaling.
770    let value = value / config.to_unit;
771
772    // Format the output.
773    let mut result = if let Some(ref fmt) = config.format {
774        // If --to is also specified, first scale, then apply format padding.
775        if config.to != ScaleUnit::None {
776            let scaled = format_scaled(value, config.to, config.round);
777            // Extract width from the format string and apply padding.
778            apply_format_padding(&scaled, fmt)
779        } else {
780            let rounded = apply_round(value, config.round);
781            apply_format(rounded, fmt)?
782        }
783    } else if config.to != ScaleUnit::None {
784        format_scaled(value, config.to, config.round)
785    } else {
786        let rounded = apply_round(value, config.round);
787        format_plain_number(rounded)
788    };
789
790    // Apply grouping.
791    if config.grouping {
792        result = group_thousands(&result);
793    }
794
795    // Apply suffix.
796    if let Some(ref suffix) = config.suffix {
797        result.push_str(suffix);
798    }
799
800    // Apply padding.
801    if let Some(pad) = config.padding {
802        let pad_width = pad.unsigned_abs() as usize;
803        if result.len() < pad_width {
804            let deficit = pad_width - result.len();
805            if pad < 0 {
806                // Left-align (pad on right).
807                result = format!("{}{}", result, " ".repeat(deficit));
808            } else {
809                // Right-align (pad on left).
810                result = format!("{}{}", " ".repeat(deficit), result);
811            }
812        }
813    }
814
815    Ok(result)
816}
817
818/// Split a line into fields based on the delimiter.
819fn split_fields<'a>(line: &'a str, delimiter: Option<char>) -> Vec<&'a str> {
820    match delimiter {
821        Some(delim) => line.split(delim).collect(),
822        None => {
823            // Whitespace splitting: split on runs of whitespace, but preserve
824            // leading whitespace as empty fields.
825            let mut fields = Vec::new();
826            let mut chars = line.char_indices().peekable();
827            let mut field_start = 0;
828            let mut in_space = true;
829            let mut first = true;
830
831            while let Some(&(i, c)) = chars.peek() {
832                if c.is_whitespace() {
833                    if !in_space && !first {
834                        fields.push(&line[field_start..i]);
835                    }
836                    in_space = true;
837                    chars.next();
838                } else {
839                    if in_space {
840                        field_start = i;
841                        in_space = false;
842                        first = false;
843                    }
844                    chars.next();
845                }
846            }
847            if !in_space {
848                fields.push(&line[field_start..]);
849            }
850
851            if fields.is_empty() {
852                vec![line]
853            } else {
854                fields
855            }
856        }
857    }
858}
859
860/// Reassemble fields into a line with proper spacing.
861fn reassemble_fields(
862    original: &str,
863    fields: &[&str],
864    converted: &[String],
865    delimiter: Option<char>,
866) -> String {
867    match delimiter {
868        Some(delim) => converted.join(&delim.to_string()),
869        None => {
870            // For whitespace-delimited input, reconstruct preserving original spacing.
871            let mut result = String::with_capacity(original.len());
872            let mut field_idx = 0;
873            let mut in_space = true;
874            let mut i = 0;
875            let bytes = original.as_bytes();
876
877            while i < bytes.len() {
878                let c = bytes[i] as char;
879                if c.is_ascii_whitespace() {
880                    if !in_space && field_idx > 0 {
881                        // We just finished a field.
882                    }
883                    result.push(c);
884                    in_space = true;
885                    i += 1;
886                } else {
887                    if in_space {
888                        in_space = false;
889                        // Output the converted field instead of the original.
890                        if field_idx < converted.len() {
891                            result.push_str(&converted[field_idx]);
892                        } else if field_idx < fields.len() {
893                            result.push_str(fields[field_idx]);
894                        }
895                        field_idx += 1;
896                        // Skip past the original field characters.
897                        while i < bytes.len() && !(bytes[i] as char).is_ascii_whitespace() {
898                            i += 1;
899                        }
900                        continue;
901                    }
902                    i += 1;
903                }
904            }
905
906            result
907        }
908    }
909}
910
911/// Process a single line according to the numfmt configuration.
912pub fn process_line(line: &str, config: &NumfmtConfig) -> Result<String, String> {
913    let fields = split_fields(line, config.delimiter);
914
915    if fields.is_empty() {
916        return Ok(line.to_string());
917    }
918
919    let all_fields = config.field.is_empty();
920
921    let mut converted: Vec<String> = Vec::with_capacity(fields.len());
922    for (i, field) in fields.iter().enumerate() {
923        let field_num = i + 1; // 1-based
924        let should_convert = all_fields || config.field.contains(&field_num);
925
926        if should_convert {
927            match convert_number(field, config) {
928                Ok(s) => converted.push(s),
929                Err(e) => match config.invalid {
930                    InvalidMode::Abort => return Err(e),
931                    InvalidMode::Fail => {
932                        eprintln!("numfmt: {}", e);
933                        converted.push(field.to_string());
934                    }
935                    InvalidMode::Warn => {
936                        eprintln!("numfmt: {}", e);
937                        converted.push(field.to_string());
938                    }
939                    InvalidMode::Ignore => {
940                        converted.push(field.to_string());
941                    }
942                },
943            }
944        } else {
945            converted.push(field.to_string());
946        }
947    }
948
949    Ok(reassemble_fields(
950        line,
951        &fields,
952        &converted,
953        config.delimiter,
954    ))
955}
956
957/// Run the numfmt command with the given configuration and input.
958pub fn run_numfmt<R: std::io::BufRead, W: Write>(
959    input: R,
960    mut output: W,
961    config: &NumfmtConfig,
962) -> Result<(), String> {
963    let terminator = if config.zero_terminated { b'\0' } else { b'\n' };
964    let mut header_remaining = config.header;
965    let mut buf = Vec::new();
966    let mut reader = input;
967    let mut had_error = false;
968
969    loop {
970        buf.clear();
971        let bytes_read = reader
972            .read_until(terminator, &mut buf)
973            .map_err(|e| format!("read error: {}", e))?;
974        if bytes_read == 0 {
975            break;
976        }
977
978        // Remove the terminator for processing.
979        let line = if buf.last() == Some(&terminator) {
980            &buf[..buf.len() - 1]
981        } else {
982            &buf[..]
983        };
984        let line_str = String::from_utf8_lossy(line);
985
986        if header_remaining > 0 {
987            header_remaining -= 1;
988            output
989                .write_all(line_str.as_bytes())
990                .map_err(|e| format!("write error: {}", e))?;
991            output
992                .write_all(&[terminator])
993                .map_err(|e| format!("write error: {}", e))?;
994            continue;
995        }
996
997        match process_line(&line_str, config) {
998            Ok(result) => {
999                output
1000                    .write_all(result.as_bytes())
1001                    .map_err(|e| format!("write error: {}", e))?;
1002                output
1003                    .write_all(&[terminator])
1004                    .map_err(|e| format!("write error: {}", e))?;
1005            }
1006            Err(e) => {
1007                match config.invalid {
1008                    InvalidMode::Abort => {
1009                        eprintln!("numfmt: {}", e);
1010                        return Err(e);
1011                    }
1012                    InvalidMode::Fail => {
1013                        eprintln!("numfmt: {}", e);
1014                        // Output original line.
1015                        output
1016                            .write_all(line_str.as_bytes())
1017                            .map_err(|e| format!("write error: {}", e))?;
1018                        output
1019                            .write_all(&[terminator])
1020                            .map_err(|e| format!("write error: {}", e))?;
1021                        had_error = true;
1022                    }
1023                    InvalidMode::Warn => {
1024                        eprintln!("numfmt: {}", e);
1025                        output
1026                            .write_all(line_str.as_bytes())
1027                            .map_err(|e| format!("write error: {}", e))?;
1028                        output
1029                            .write_all(&[terminator])
1030                            .map_err(|e| format!("write error: {}", e))?;
1031                    }
1032                    InvalidMode::Ignore => {
1033                        output
1034                            .write_all(line_str.as_bytes())
1035                            .map_err(|e| format!("write error: {}", e))?;
1036                        output
1037                            .write_all(&[terminator])
1038                            .map_err(|e| format!("write error: {}", e))?;
1039                    }
1040                }
1041            }
1042        }
1043    }
1044
1045    output.flush().map_err(|e| format!("flush error: {}", e))?;
1046
1047    if had_error {
1048        Err("conversion errors occurred".to_string())
1049    } else {
1050        Ok(())
1051    }
1052}
coreutils_rs/numfmt/core.rs

coreutils_rs/numfmt/
core.rs