coreutils_rs/numfmt/
core.rs

1use std::io::Write;
2
3/// Unit scale for input/output conversion.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum ScaleUnit {
6    /// No scaling.
7    None,
8    /// SI: K=1000, M=10^6, G=10^9, T=10^12, P=10^15, E=10^18, Z=10^21, Y=10^24.
9    Si,
10    /// IEC: K=1024, M=1048576, G=2^30, T=2^40, P=2^50, E=2^60.
11    Iec,
12    /// IEC with 'i' suffix: Ki=1024, Mi=1048576, etc.
13    IecI,
14    /// Auto-detect from suffix (for --from=auto).
15    Auto,
16}
17
18/// Rounding method.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum RoundMethod {
21    /// Round up (toward +infinity).
22    Up,
23    /// Round down (toward -infinity).
24    Down,
25    /// Round away from zero.
26    FromZero,
27    /// Round toward zero.
28    TowardsZero,
29    /// Round to nearest, half away from zero (default).
30    Nearest,
31}
32
33/// How to handle invalid input.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum InvalidMode {
36    /// Print error and exit immediately.
37    Abort,
38    /// Print error but continue processing.
39    Fail,
40    /// Print warning but continue processing.
41    Warn,
42    /// Silently ignore invalid input.
43    Ignore,
44}
45
46/// Configuration for the numfmt command.
47pub struct NumfmtConfig {
48    pub from: ScaleUnit,
49    pub to: ScaleUnit,
50    pub from_unit: f64,
51    pub to_unit: f64,
52    pub padding: Option<i32>,
53    pub round: RoundMethod,
54    pub suffix: Option<String>,
55    pub format: Option<String>,
56    pub field: Vec<usize>,
57    pub delimiter: Option<char>,
58    pub header: usize,
59    pub invalid: InvalidMode,
60    pub grouping: bool,
61    pub zero_terminated: bool,
62}
63
64impl Default for NumfmtConfig {
65    fn default() -> Self {
66        Self {
67            from: ScaleUnit::None,
68            to: ScaleUnit::None,
69            from_unit: 1.0,
70            to_unit: 1.0,
71            padding: None,
72            round: RoundMethod::Nearest,
73            suffix: None,
74            format: None,
75            field: vec![1],
76            delimiter: None,
77            header: 0,
78            invalid: InvalidMode::Abort,
79            grouping: false,
80            zero_terminated: false,
81        }
82    }
83}
84
85/// SI suffix table: suffix char -> multiplier.
86const SI_SUFFIXES: &[(char, f64)] = &[
87    ('K', 1e3),
88    ('M', 1e6),
89    ('G', 1e9),
90    ('T', 1e12),
91    ('P', 1e15),
92    ('E', 1e18),
93    ('Z', 1e21),
94    ('Y', 1e24),
95];
96
97/// IEC suffix table: suffix char -> multiplier (powers of 1024).
98const IEC_SUFFIXES: &[(char, f64)] = &[
99    ('K', 1024.0),
100    ('M', 1_048_576.0),
101    ('G', 1_073_741_824.0),
102    ('T', 1_099_511_627_776.0),
103    ('P', 1_125_899_906_842_624.0),
104    ('E', 1_152_921_504_606_846_976.0),
105    ('Z', 1_180_591_620_717_411_303_424.0),
106    ('Y', 1_208_925_819_614_629_174_706_176.0),
107];
108
109/// Parse a scale unit string.
110pub fn parse_scale_unit(s: &str) -> Result<ScaleUnit, String> {
111    match s {
112        "none" => Ok(ScaleUnit::None),
113        "si" => Ok(ScaleUnit::Si),
114        "iec" => Ok(ScaleUnit::Iec),
115        "iec-i" => Ok(ScaleUnit::IecI),
116        "auto" => Ok(ScaleUnit::Auto),
117        _ => Err(format!("invalid unit: '{}'", s)),
118    }
119}
120
121/// Parse a round method string.
122pub fn parse_round_method(s: &str) -> Result<RoundMethod, String> {
123    match s {
124        "up" => Ok(RoundMethod::Up),
125        "down" => Ok(RoundMethod::Down),
126        "from-zero" => Ok(RoundMethod::FromZero),
127        "towards-zero" => Ok(RoundMethod::TowardsZero),
128        "nearest" => Ok(RoundMethod::Nearest),
129        _ => Err(format!("invalid rounding method: '{}'", s)),
130    }
131}
132
133/// Parse an invalid mode string.
134pub fn parse_invalid_mode(s: &str) -> Result<InvalidMode, String> {
135    match s {
136        "abort" => Ok(InvalidMode::Abort),
137        "fail" => Ok(InvalidMode::Fail),
138        "warn" => Ok(InvalidMode::Warn),
139        "ignore" => Ok(InvalidMode::Ignore),
140        _ => Err(format!("invalid mode: '{}'", s)),
141    }
142}
143
144/// Parse a field specification string like "1", "1,3", "1-5", or "-".
145/// Returns 1-based field indices.
146pub fn parse_fields(s: &str) -> Result<Vec<usize>, String> {
147    if s == "-" {
148        // All fields - we represent this as an empty vec and handle it specially.
149        return Ok(vec![]);
150    }
151    let mut fields = Vec::new();
152    for part in s.split(',') {
153        let part = part.trim();
154        if let Some(dash_pos) = part.find('-') {
155            let start_str = &part[..dash_pos];
156            let end_str = &part[dash_pos + 1..];
157            // Handle open ranges like "-5" or "3-"
158            if start_str.is_empty() && end_str.is_empty() {
159                return Ok(vec![]);
160            }
161            let start: usize = if start_str.is_empty() {
162                1
163            } else {
164                start_str
165                    .parse()
166                    .map_err(|_| format!("invalid field value '{}'", part))?
167            };
168            let end: usize = if end_str.is_empty() {
169                // Open-ended range: we use 0 as sentinel for "all remaining"
170                // For simplicity, return a large upper bound.
171                9999
172            } else {
173                end_str
174                    .parse()
175                    .map_err(|_| format!("invalid field value '{}'", part))?
176            };
177            if start == 0 {
178                return Err(format!("fields are numbered from 1: '{}'", part));
179            }
180            for i in start..=end {
181                if !fields.contains(&i) {
182                    fields.push(i);
183                }
184            }
185        } else {
186            let n: usize = part
187                .parse()
188                .map_err(|_| format!("invalid field value '{}'", part))?;
189            if n == 0 {
190                return Err("fields are numbered from 1".to_string());
191            }
192            if !fields.contains(&n) {
193                fields.push(n);
194            }
195        }
196    }
197    fields.sort();
198    Ok(fields)
199}
200
201/// Parse a number with optional suffix, returning the raw numeric value.
202/// Handles suffixes like K, M, G, T, P, E, Z, Y (and Ki, Mi, etc. for iec-i).
203fn parse_number_with_suffix(s: &str, unit: ScaleUnit) -> Result<f64, String> {
204    let s = s.trim();
205    if s.is_empty() {
206        return Err("invalid number: ''".to_string());
207    }
208
209    // Find where the numeric part ends and the suffix begins.
210    let mut num_end = s.len();
211    let bytes = s.as_bytes();
212    let len = s.len();
213
214    // Check for trailing scale suffix characters.
215    if len > 0 {
216        let last_char = bytes[len - 1] as char;
217
218        match unit {
219            ScaleUnit::Auto | ScaleUnit::IecI => {
220                // Check for 'i' suffix (e.g., Ki, Mi).
221                if last_char == 'i' && len >= 2 {
222                    let prefix_char = (bytes[len - 2] as char).to_ascii_uppercase();
223                    if is_scale_suffix(prefix_char) {
224                        num_end = len - 2;
225                    }
226                } else {
227                    let upper = last_char.to_ascii_uppercase();
228                    if is_scale_suffix(upper) {
229                        num_end = len - 1;
230                    }
231                }
232            }
233            ScaleUnit::Si | ScaleUnit::Iec => {
234                let upper = last_char.to_ascii_uppercase();
235                if is_scale_suffix(upper) {
236                    num_end = len - 1;
237                }
238            }
239            ScaleUnit::None => {}
240        }
241    }
242
243    let num_str = &s[..num_end];
244    let suffix_str = &s[num_end..];
245
246    // Parse the numeric part.
247    let value: f64 = num_str
248        .parse()
249        .map_err(|_| format!("invalid number: '{}'", s))?;
250
251    // Apply suffix multiplier.
252    let multiplier = if suffix_str.is_empty() {
253        1.0
254    } else {
255        let suffix_upper = suffix_str.chars().next().unwrap().to_ascii_uppercase();
256        match unit {
257            ScaleUnit::Auto => {
258                // Auto-detect: if suffix ends with 'i', use IEC; otherwise SI.
259                if suffix_str.len() >= 2 && suffix_str.ends_with('i') {
260                    find_iec_multiplier(suffix_upper)?
261                } else {
262                    find_si_multiplier(suffix_upper)?
263                }
264            }
265            ScaleUnit::Si => find_si_multiplier(suffix_upper)?,
266            ScaleUnit::Iec | ScaleUnit::IecI => find_iec_multiplier(suffix_upper)?,
267            ScaleUnit::None => {
268                return Err(format!("invalid number: '{}'", s));
269            }
270        }
271    };
272
273    Ok(value * multiplier)
274}
275
276fn is_scale_suffix(c: char) -> bool {
277    matches!(c, 'K' | 'M' | 'G' | 'T' | 'P' | 'E' | 'Z' | 'Y')
278}
279
280fn find_si_multiplier(c: char) -> Result<f64, String> {
281    for &(suffix, mult) in SI_SUFFIXES {
282        if suffix == c {
283            return Ok(mult);
284        }
285    }
286    Err(format!("invalid suffix: '{}'", c))
287}
288
289fn find_iec_multiplier(c: char) -> Result<f64, String> {
290    for &(suffix, mult) in IEC_SUFFIXES {
291        if suffix == c {
292            return Ok(mult);
293        }
294    }
295    Err(format!("invalid suffix: '{}'", c))
296}
297
298/// Apply rounding according to the specified method.
299fn apply_round(value: f64, method: RoundMethod) -> f64 {
300    match method {
301        RoundMethod::Up => value.ceil(),
302        RoundMethod::Down => value.floor(),
303        RoundMethod::FromZero => {
304            if value >= 0.0 {
305                value.ceil()
306            } else {
307                value.floor()
308            }
309        }
310        RoundMethod::TowardsZero => {
311            if value >= 0.0 {
312                value.floor()
313            } else {
314                value.ceil()
315            }
316        }
317        RoundMethod::Nearest => value.round(),
318    }
319}
320
321/// Format a number with scale suffix for output.
322fn format_scaled(value: f64, unit: ScaleUnit, round: RoundMethod) -> String {
323    match unit {
324        ScaleUnit::None => {
325            // Output as plain number.
326            format_plain_number(value)
327        }
328        ScaleUnit::Si => format_with_scale(value, SI_SUFFIXES, "", round),
329        ScaleUnit::Iec => format_with_scale(value, IEC_SUFFIXES, "", round),
330        ScaleUnit::IecI => format_with_scale(value, IEC_SUFFIXES, "i", round),
331        ScaleUnit::Auto => {
332            // For --to=auto, behave like SI.
333            format_with_scale(value, SI_SUFFIXES, "", round)
334        }
335    }
336}
337
338/// Format a plain number, removing unnecessary trailing zeros and decimal point.
339fn format_plain_number(value: f64) -> String {
340    let int_val = value as i64;
341    if value == (int_val as f64) {
342        format!("{}", int_val)
343    } else {
344        // Use enough precision to avoid loss.
345        format!("{:.1}", value)
346    }
347}
348
349/// Format a number with appropriate scale suffix.
350fn format_with_scale(
351    value: f64,
352    suffixes: &[(char, f64)],
353    i_suffix: &str,
354    round: RoundMethod,
355) -> String {
356    let abs_value = value.abs();
357    let sign = if value < 0.0 { "-" } else { "" };
358
359    // Find the largest suffix that applies.
360    let mut chosen_suffix = None;
361    let mut chosen_mult = 1.0;
362
363    for &(suffix, mult) in suffixes.iter().rev() {
364        if abs_value >= mult {
365            chosen_suffix = Some(suffix);
366            chosen_mult = mult;
367            break;
368        }
369    }
370
371    if let Some(suffix) = chosen_suffix {
372        let scaled = value / chosen_mult;
373        let scaled = apply_round_for_display(scaled, round);
374
375        format!("{sign}{:.1}{}{}", scaled.abs(), suffix, i_suffix)
376    } else {
377        // Value is smaller than the smallest suffix, output as-is.
378        format_plain_number(value)
379    }
380}
381
382/// Apply rounding for display purposes (when formatting scaled output).
383fn apply_round_for_display(value: f64, method: RoundMethod) -> f64 {
384    // For display, we round to 1 decimal place.
385    let factor = 10.0;
386    let shifted = value * factor;
387    let rounded = match method {
388        RoundMethod::Up => shifted.ceil(),
389        RoundMethod::Down => shifted.floor(),
390        RoundMethod::FromZero => {
391            if shifted >= 0.0 {
392                shifted.ceil()
393            } else {
394                shifted.floor()
395            }
396        }
397        RoundMethod::TowardsZero => {
398            if shifted >= 0.0 {
399                shifted.floor()
400            } else {
401                shifted.ceil()
402            }
403        }
404        RoundMethod::Nearest => shifted.round(),
405    };
406    rounded / factor
407}
408
409/// Insert thousands grouping separators.
410fn group_thousands(s: &str) -> String {
411    // Find the integer part (before any decimal point).
412    let (integer_part, rest) = if let Some(dot_pos) = s.find('.') {
413        (&s[..dot_pos], &s[dot_pos..])
414    } else {
415        (s, "")
416    };
417
418    // Handle sign.
419    let (sign, digits) = if integer_part.starts_with('-') {
420        ("-", &integer_part[1..])
421    } else {
422        ("", integer_part)
423    };
424
425    if digits.len() <= 3 {
426        return format!("{}{}{}", sign, digits, rest);
427    }
428
429    let mut result = String::with_capacity(digits.len() + digits.len() / 3);
430    let remainder = digits.len() % 3;
431    if remainder > 0 {
432        result.push_str(&digits[..remainder]);
433    }
434    for (i, chunk) in digits.as_bytes()[remainder..].chunks(3).enumerate() {
435        if i > 0 || remainder > 0 {
436            result.push(',');
437        }
438        result.push_str(std::str::from_utf8(chunk).unwrap());
439    }
440
441    format!("{}{}{}", sign, result, rest)
442}
443
444/// Apply width/padding from a printf-style format string to an already-scaled string.
445/// Used when both --to and --format are specified.
446fn apply_format_padding(scaled: &str, fmt: &str) -> String {
447    let bytes = fmt.as_bytes();
448    let mut i = 0;
449
450    // Find '%'.
451    while i < bytes.len() && bytes[i] != b'%' {
452        i += 1;
453    }
454    let prefix = &fmt[..i];
455    if i >= bytes.len() {
456        return format!("{}{}", prefix, scaled);
457    }
458    i += 1; // skip '%'
459
460    // Parse flags.
461    let mut left_align = false;
462    while i < bytes.len() {
463        match bytes[i] {
464            b'0' | b'+' | b' ' | b'#' | b'\'' => {}
465            b'-' => left_align = true,
466            _ => break,
467        }
468        i += 1;
469    }
470
471    // Parse width.
472    let mut width: usize = 0;
473    while i < bytes.len() && bytes[i].is_ascii_digit() {
474        width = width
475            .saturating_mul(10)
476            .saturating_add((bytes[i] - b'0') as usize);
477        i += 1;
478    }
479
480    // Skip precision and conversion char.
481    while i < bytes.len() && (bytes[i] == b'.' || bytes[i].is_ascii_digit()) {
482        i += 1;
483    }
484    if i < bytes.len() {
485        i += 1; // skip conversion char
486    }
487    let suffix = &fmt[i..];
488
489    let padded = if width > 0 && scaled.len() < width {
490        let pad_len = width - scaled.len();
491        if left_align {
492            format!("{}{}", scaled, " ".repeat(pad_len))
493        } else {
494            format!("{}{}", " ".repeat(pad_len), scaled)
495        }
496    } else {
497        scaled.to_string()
498    };
499
500    format!("{}{}{}", prefix, padded, suffix)
501}
502
503/// Apply printf-style format to a number.
504fn apply_format(value: f64, fmt: &str) -> Result<String, String> {
505    // Parse format: %[flags][width][.precision]f
506    let bytes = fmt.as_bytes();
507    let mut i = 0;
508
509    // Find '%'.
510    while i < bytes.len() && bytes[i] != b'%' {
511        i += 1;
512    }
513    let prefix = &fmt[..i];
514    if i >= bytes.len() {
515        return Err(format!("invalid format: '{}'", fmt));
516    }
517    i += 1; // skip '%'
518
519    if i >= bytes.len() {
520        return Err(format!("invalid format: '{}'", fmt));
521    }
522
523    // Handle %%
524    if bytes[i] == b'%' {
525        return Ok(format!("{}%", prefix));
526    }
527
528    // Parse flags.
529    let mut zero_pad = false;
530    let mut left_align = false;
531    let mut plus_sign = false;
532    let mut space_sign = false;
533    while i < bytes.len() {
534        match bytes[i] {
535            b'0' => zero_pad = true,
536            b'-' => left_align = true,
537            b'+' => plus_sign = true,
538            b' ' => space_sign = true,
539            b'#' => {}
540            b'\'' => {} // grouping flag, handled separately
541            _ => break,
542        }
543        i += 1;
544    }
545
546    // Parse width.
547    let mut width: usize = 0;
548    while i < bytes.len() && bytes[i].is_ascii_digit() {
549        width = width
550            .saturating_mul(10)
551            .saturating_add((bytes[i] - b'0') as usize);
552        i += 1;
553    }
554
555    // Parse precision.
556    let mut precision: Option<usize> = None;
557    if i < bytes.len() && bytes[i] == b'.' {
558        i += 1;
559        let mut prec: usize = 0;
560        while i < bytes.len() && bytes[i].is_ascii_digit() {
561            prec = prec
562                .saturating_mul(10)
563                .saturating_add((bytes[i] - b'0') as usize);
564            i += 1;
565        }
566        precision = Some(prec);
567    }
568
569    // Parse conversion type.
570    if i >= bytes.len() {
571        return Err(format!("invalid format: '{}'", fmt));
572    }
573    let conv = bytes[i] as char;
574    i += 1;
575    let suffix = &fmt[i..];
576
577    let prec = precision.unwrap_or(6);
578    let formatted = match conv {
579        'f' => format!("{:.prec$}", value, prec = prec),
580        'e' => format_scientific(value, prec, 'e'),
581        'E' => format_scientific(value, prec, 'E'),
582        'g' => format_g(value, prec, false),
583        'G' => format_g(value, prec, true),
584        _ => return Err(format!("invalid format character: '{}'", conv)),
585    };
586
587    // Apply sign prefix.
588    let sign_str = if value < 0.0 {
589        ""
590    } else if plus_sign {
591        "+"
592    } else if space_sign {
593        " "
594    } else {
595        ""
596    };
597
598    let num_str = if !sign_str.is_empty() && !formatted.starts_with('-') {
599        format!("{}{}", sign_str, formatted)
600    } else {
601        formatted
602    };
603
604    // Apply width and padding.
605    let padded = if width > 0 && num_str.len() < width {
606        let pad_len = width - num_str.len();
607        if left_align {
608            format!("{}{}", num_str, " ".repeat(pad_len))
609        } else if zero_pad {
610            if num_str.starts_with('-') || num_str.starts_with('+') || num_str.starts_with(' ') {
611                let (sign, rest) = num_str.split_at(1);
612                format!("{}{}{}", sign, "0".repeat(pad_len), rest)
613            } else {
614                format!("{}{}", "0".repeat(pad_len), num_str)
615            }
616        } else {
617            format!("{}{}", " ".repeat(pad_len), num_str)
618        }
619    } else {
620        num_str
621    };
622
623    Ok(format!("{}{}{}", prefix, padded, suffix))
624}
625
626/// Format in scientific notation.
627fn format_scientific(value: f64, prec: usize, e_char: char) -> String {
628    if value == 0.0 {
629        let sign = if value.is_sign_negative() { "-" } else { "" };
630        if prec == 0 {
631            return format!("{sign}0{e_char}+00");
632        }
633        return format!("{sign}0.{:0>prec$}{e_char}+00", "", prec = prec);
634    }
635
636    let abs = value.abs();
637    let sign = if value < 0.0 { "-" } else { "" };
638    let exp = abs.log10().floor() as i32;
639    let mantissa = abs / 10f64.powi(exp);
640
641    let factor = 10f64.powi(prec as i32);
642    let mantissa = (mantissa * factor).round() / factor;
643
644    let (mantissa, exp) = if mantissa >= 10.0 {
645        (mantissa / 10.0, exp + 1)
646    } else {
647        (mantissa, exp)
648    };
649
650    let exp_sign = if exp >= 0 { '+' } else { '-' };
651    let exp_abs = exp.unsigned_abs();
652
653    if prec == 0 {
654        format!("{sign}{mantissa:.0}{e_char}{exp_sign}{exp_abs:02}")
655    } else {
656        format!(
657            "{sign}{mantissa:.prec$}{e_char}{exp_sign}{exp_abs:02}",
658            prec = prec
659        )
660    }
661}
662
663/// Format using %g - shortest representation.
664fn format_g(value: f64, prec: usize, upper: bool) -> String {
665    let prec = if prec == 0 { 1 } else { prec };
666
667    if value == 0.0 {
668        let sign = if value.is_sign_negative() { "-" } else { "" };
669        return format!("{sign}0");
670    }
671
672    let abs = value.abs();
673    let exp = abs.log10().floor() as i32;
674    let e_char = if upper { 'E' } else { 'e' };
675
676    if exp < -4 || exp >= prec as i32 {
677        let sig_prec = prec.saturating_sub(1);
678        let s = format_scientific(value, sig_prec, e_char);
679        trim_g_zeros(&s)
680    } else {
681        let decimal_prec = if prec as i32 > exp + 1 {
682            (prec as i32 - exp - 1) as usize
683        } else {
684            0
685        };
686        let s = format!("{value:.decimal_prec$}");
687        trim_g_zeros(&s)
688    }
689}
690
691fn trim_g_zeros(s: &str) -> String {
692    if let Some(e_pos) = s.find(['e', 'E']) {
693        let (mantissa, exponent) = s.split_at(e_pos);
694        let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.');
695        format!("{trimmed}{exponent}")
696    } else {
697        s.trim_end_matches('0').trim_end_matches('.').to_string()
698    }
699}
700
701/// Convert a single numeric token according to the config.
702fn convert_number(token: &str, config: &NumfmtConfig) -> Result<String, String> {
703    // Parse the input number (with optional suffix).
704    let raw_value = parse_number_with_suffix(token, config.from)?;
705
706    // Apply from-unit scaling.
707    let value = raw_value * config.from_unit;
708
709    // Apply to-unit scaling.
710    let value = value / config.to_unit;
711
712    // Format the output.
713    let mut result = if let Some(ref fmt) = config.format {
714        // If --to is also specified, first scale, then apply format padding.
715        if config.to != ScaleUnit::None {
716            let scaled = format_scaled(value, config.to, config.round);
717            // Extract width from the format string and apply padding.
718            apply_format_padding(&scaled, fmt)
719        } else {
720            let rounded = apply_round(value, config.round);
721            apply_format(rounded, fmt)?
722        }
723    } else if config.to != ScaleUnit::None {
724        format_scaled(value, config.to, config.round)
725    } else {
726        let rounded = apply_round(value, config.round);
727        format_plain_number(rounded)
728    };
729
730    // Apply grouping.
731    if config.grouping {
732        result = group_thousands(&result);
733    }
734
735    // Apply suffix.
736    if let Some(ref suffix) = config.suffix {
737        result.push_str(suffix);
738    }
739
740    // Apply padding.
741    if let Some(pad) = config.padding {
742        let pad_width = pad.unsigned_abs() as usize;
743        if result.len() < pad_width {
744            let deficit = pad_width - result.len();
745            if pad < 0 {
746                // Left-align (pad on right).
747                result = format!("{}{}", result, " ".repeat(deficit));
748            } else {
749                // Right-align (pad on left).
750                result = format!("{}{}", " ".repeat(deficit), result);
751            }
752        }
753    }
754
755    Ok(result)
756}
757
758/// Split a line into fields based on the delimiter.
759fn split_fields<'a>(line: &'a str, delimiter: Option<char>) -> Vec<&'a str> {
760    match delimiter {
761        Some(delim) => line.split(delim).collect(),
762        None => {
763            // Whitespace splitting: split on runs of whitespace, but preserve
764            // leading whitespace as empty fields.
765            let mut fields = Vec::new();
766            let mut chars = line.char_indices().peekable();
767            let mut field_start = 0;
768            let mut in_space = true;
769            let mut first = true;
770
771            while let Some(&(i, c)) = chars.peek() {
772                if c.is_whitespace() {
773                    if !in_space && !first {
774                        fields.push(&line[field_start..i]);
775                    }
776                    in_space = true;
777                    chars.next();
778                } else {
779                    if in_space {
780                        field_start = i;
781                        in_space = false;
782                        first = false;
783                    }
784                    chars.next();
785                }
786            }
787            if !in_space {
788                fields.push(&line[field_start..]);
789            }
790
791            if fields.is_empty() {
792                vec![line]
793            } else {
794                fields
795            }
796        }
797    }
798}
799
800/// Reassemble fields into a line with proper spacing.
801fn reassemble_fields(
802    original: &str,
803    fields: &[&str],
804    converted: &[String],
805    delimiter: Option<char>,
806) -> String {
807    match delimiter {
808        Some(delim) => converted.join(&delim.to_string()),
809        None => {
810            // For whitespace-delimited input, reconstruct preserving original spacing.
811            let mut result = String::with_capacity(original.len());
812            let mut field_idx = 0;
813            let mut in_space = true;
814            let mut i = 0;
815            let bytes = original.as_bytes();
816
817            while i < bytes.len() {
818                let c = bytes[i] as char;
819                if c.is_ascii_whitespace() {
820                    if !in_space && field_idx > 0 {
821                        // We just finished a field.
822                    }
823                    result.push(c);
824                    in_space = true;
825                    i += 1;
826                } else {
827                    if in_space {
828                        in_space = false;
829                        // Output the converted field instead of the original.
830                        if field_idx < converted.len() {
831                            result.push_str(&converted[field_idx]);
832                        } else if field_idx < fields.len() {
833                            result.push_str(fields[field_idx]);
834                        }
835                        field_idx += 1;
836                        // Skip past the original field characters.
837                        while i < bytes.len() && !(bytes[i] as char).is_ascii_whitespace() {
838                            i += 1;
839                        }
840                        continue;
841                    }
842                    i += 1;
843                }
844            }
845
846            result
847        }
848    }
849}
850
851/// Process a single line according to the numfmt configuration.
852pub fn process_line(line: &str, config: &NumfmtConfig) -> Result<String, String> {
853    let fields = split_fields(line, config.delimiter);
854
855    if fields.is_empty() {
856        return Ok(line.to_string());
857    }
858
859    let all_fields = config.field.is_empty();
860
861    let mut converted: Vec<String> = Vec::with_capacity(fields.len());
862    for (i, field) in fields.iter().enumerate() {
863        let field_num = i + 1; // 1-based
864        let should_convert = all_fields || config.field.contains(&field_num);
865
866        if should_convert {
867            match convert_number(field, config) {
868                Ok(s) => converted.push(s),
869                Err(e) => match config.invalid {
870                    InvalidMode::Abort => return Err(e),
871                    InvalidMode::Fail => {
872                        eprintln!("numfmt: {}", e);
873                        converted.push(field.to_string());
874                    }
875                    InvalidMode::Warn => {
876                        eprintln!("numfmt: {}", e);
877                        converted.push(field.to_string());
878                    }
879                    InvalidMode::Ignore => {
880                        converted.push(field.to_string());
881                    }
882                },
883            }
884        } else {
885            converted.push(field.to_string());
886        }
887    }
888
889    Ok(reassemble_fields(
890        line,
891        &fields,
892        &converted,
893        config.delimiter,
894    ))
895}
896
897/// Run the numfmt command with the given configuration and input.
898pub fn run_numfmt<R: std::io::BufRead, W: Write>(
899    input: R,
900    mut output: W,
901    config: &NumfmtConfig,
902) -> Result<(), String> {
903    let terminator = if config.zero_terminated { b'\0' } else { b'\n' };
904    let mut header_remaining = config.header;
905    let mut buf = Vec::new();
906    let mut reader = input;
907    let mut had_error = false;
908
909    loop {
910        buf.clear();
911        let bytes_read = reader
912            .read_until(terminator, &mut buf)
913            .map_err(|e| format!("read error: {}", e))?;
914        if bytes_read == 0 {
915            break;
916        }
917
918        // Remove the terminator for processing.
919        let line = if buf.last() == Some(&terminator) {
920            &buf[..buf.len() - 1]
921        } else {
922            &buf[..]
923        };
924        let line_str = String::from_utf8_lossy(line);
925
926        if header_remaining > 0 {
927            header_remaining -= 1;
928            output
929                .write_all(line_str.as_bytes())
930                .map_err(|e| format!("write error: {}", e))?;
931            output
932                .write_all(&[terminator])
933                .map_err(|e| format!("write error: {}", e))?;
934            continue;
935        }
936
937        match process_line(&line_str, config) {
938            Ok(result) => {
939                output
940                    .write_all(result.as_bytes())
941                    .map_err(|e| format!("write error: {}", e))?;
942                output
943                    .write_all(&[terminator])
944                    .map_err(|e| format!("write error: {}", e))?;
945            }
946            Err(e) => {
947                match config.invalid {
948                    InvalidMode::Abort => {
949                        eprintln!("numfmt: {}", e);
950                        return Err(e);
951                    }
952                    InvalidMode::Fail => {
953                        eprintln!("numfmt: {}", e);
954                        // Output original line.
955                        output
956                            .write_all(line_str.as_bytes())
957                            .map_err(|e| format!("write error: {}", e))?;
958                        output
959                            .write_all(&[terminator])
960                            .map_err(|e| format!("write error: {}", e))?;
961                        had_error = true;
962                    }
963                    InvalidMode::Warn => {
964                        eprintln!("numfmt: {}", e);
965                        output
966                            .write_all(line_str.as_bytes())
967                            .map_err(|e| format!("write error: {}", e))?;
968                        output
969                            .write_all(&[terminator])
970                            .map_err(|e| format!("write error: {}", e))?;
971                    }
972                    InvalidMode::Ignore => {
973                        output
974                            .write_all(line_str.as_bytes())
975                            .map_err(|e| format!("write error: {}", e))?;
976                        output
977                            .write_all(&[terminator])
978                            .map_err(|e| format!("write error: {}", e))?;
979                    }
980                }
981            }
982        }
983    }
984
985    output.flush().map_err(|e| format!("flush error: {}", e))?;
986
987    if had_error {
988        Err("conversion errors occurred".to_string())
989    } else {
990        Ok(())
991    }
992}
coreutils_rs/numfmt/core.rs

coreutils_rs/numfmt/
core.rs