fish_printf/
printf_impl.rs

1/** Rust printf implementation, based on musl. */
2use super::arg::Arg;
3use super::fmt_fp::format_float;
4use super::locale::Locale;
5use std::fmt::{self, Write};
6use std::mem;
7use std::result::Result;
8
9#[cfg(feature = "widestring")]
10use widestring::Utf32Str as wstr;
11
12/// Possible errors from printf.
13#[derive(Debug, PartialEq, Eq)]
14pub enum Error {
15    /// Invalid format string.
16    BadFormatString,
17    /// Too few arguments.
18    MissingArg,
19    /// Too many arguments.
20    ExtraArg,
21    /// Argument type doesn't match format specifier.
22    BadArgType,
23    /// Precision is too large to represent.
24    Overflow,
25    /// Error emitted by the output stream.
26    Fmt(fmt::Error),
27}
28
29// Convenience conversion from fmt::Error.
30impl From<fmt::Error> for Error {
31    fn from(err: fmt::Error) -> Error {
32        Error::Fmt(err)
33    }
34}
35
36#[derive(Debug, Copy, Clone, Default)]
37pub(super) struct ModifierFlags {
38    pub alt_form: bool, // #
39    pub zero_pad: bool, // 0
40    pub left_adj: bool, // negative field width
41    pub pad_pos: bool,  // space: blank before positive numbers
42    pub mark_pos: bool, // +: sign before positive numbers
43    pub grouped: bool,  // ': group indicator
44}
45
46impl ModifierFlags {
47    // If c is a modifier character, set the flag and return true.
48    // Otherwise return false. Note we allow repeated modifier flags.
49    fn try_set(&mut self, c: char) -> bool {
50        match c {
51            '#' => self.alt_form = true,
52            '0' => self.zero_pad = true,
53            '-' => self.left_adj = true,
54            ' ' => self.pad_pos = true,
55            '+' => self.mark_pos = true,
56            '\'' => self.grouped = true,
57            _ => return false,
58        };
59        true
60    }
61}
62
63// The set of prefixes of conversion specifiers.
64// Note that we mostly ignore prefixes - we take sizes of values from the arguments themselves.
65#[derive(Debug, Copy, Clone, PartialEq, Eq)]
66#[allow(non_camel_case_types)]
67enum ConversionPrefix {
68    Empty,
69    hh,
70    h,
71    l,
72    ll,
73    j,
74    t,
75    z,
76    L,
77}
78
79#[derive(Debug, Copy, Clone, PartialEq, Eq)]
80#[allow(non_camel_case_types)]
81#[rustfmt::skip]
82pub(super) enum ConversionSpec {
83    // Integers, with prefixes "hh", "h", "l", "ll", "j", "t", "z"
84    // Note that we treat '%i' as '%d'.
85    d, o, u, x, X,
86
87    // USizeRef receiver, with same prefixes as ints
88    n,
89
90    // Float, with prefixes "l" and "L"
91    a, A, e, E, f, F, g, G,
92
93    // Pointer, no prefixes
94    p,
95
96    // Character or String, with supported prefixes "l"
97    // Note that we treat '%C' as '%c' and '%S' as '%s'.
98    c, s,
99}
100
101impl ConversionSpec {
102    // Returns true if the given prefix is supported by this conversion specifier.
103    fn supports_prefix(self, prefix: ConversionPrefix) -> bool {
104        use ConversionPrefix::*;
105        use ConversionSpec::*;
106        if matches!(prefix, Empty) {
107            // No prefix is always supported.
108            return true;
109        }
110        match self {
111            d | o | u | x | X | n => matches!(prefix, hh | h | l | ll | j | t | z),
112            a | A | e | E | f | F | g | G => matches!(prefix, l | L),
113            p => false,
114            c | s => matches!(prefix, l),
115        }
116    }
117
118    // Returns true if the conversion specifier is lowercase,
119    // which affects certain rendering.
120    #[inline]
121    pub(super) fn is_lower(self) -> bool {
122        use ConversionSpec::*;
123        match self {
124            d | o | u | x | n | a | e | f | g | p | c | s => true,
125            X | A | E | F | G => false,
126        }
127    }
128
129    // Returns a ConversionSpec from a character, or None if none.
130    fn from_char(cc: char) -> Option<Self> {
131        use ConversionSpec::*;
132        let res = match cc {
133            'd' | 'i' => d,
134            'o' => o,
135            'u' => u,
136            'x' => x,
137            'X' => X,
138            'n' => n,
139            'a' => a,
140            'A' => A,
141            'e' => e,
142            'E' => E,
143            'f' => f,
144            'F' => F,
145            'g' => g,
146            'G' => G,
147            'p' => p,
148            'c' | 'C' => c,
149            's' | 'S' => s,
150            _ => return None,
151        };
152        Some(res)
153    }
154}
155
156// A helper type with convenience functions for format strings.
157pub trait FormatString {
158    // Return true if we are empty.
159    fn is_empty(&self) -> bool;
160
161    // Return the character at a given index, or None if out of bounds.
162    // Note the index is a count of characters, not bytes.
163    fn at(&self, index: usize) -> Option<char>;
164
165    // Advance by the given number of characters.
166    fn advance_by(&mut self, n: usize);
167
168    // Read a sequence of characters to be output literally, advancing the cursor.
169    // The characters may optionally be stored in the given buffer.
170    // This handles a tail of %%.
171    fn take_literal<'a: 'b, 'b>(&'a mut self, buffer: &'b mut String) -> &'b str;
172}
173
174impl FormatString for &str {
175    fn is_empty(&self) -> bool {
176        (*self).is_empty()
177    }
178
179    fn at(&self, index: usize) -> Option<char> {
180        self.chars().nth(index)
181    }
182
183    fn advance_by(&mut self, n: usize) {
184        let mut chars = self.chars();
185        for _ in 0..n {
186            let c = chars.next();
187            assert!(c.is_some(), "FormatString::advance(): index out of bounds");
188        }
189        *self = chars.as_str();
190    }
191
192    fn take_literal<'a: 'b, 'b>(&'a mut self, _buffer: &'b mut String) -> &'b str {
193        // Count length of non-percent characters.
194        let non_percents: usize = self
195            .chars()
196            .take_while(|&c| c != '%')
197            .map(|c| c.len_utf8())
198            .sum();
199        // Take only an even number of percents. Note we know these have byte length 1.
200        let percent_pairs = self[non_percents..]
201            .chars()
202            .take_while(|&c| c == '%')
203            .count()
204            / 2;
205        let (prefix, rest) = self.split_at(non_percents + percent_pairs * 2);
206        *self = rest;
207        // Trim half of the trailing percent characters from the prefix.
208        &prefix[..prefix.len() - percent_pairs]
209    }
210}
211
212#[cfg(feature = "widestring")]
213impl FormatString for &wstr {
214    fn is_empty(&self) -> bool {
215        (*self).is_empty()
216    }
217
218    fn at(&self, index: usize) -> Option<char> {
219        self.as_char_slice().get(index).copied()
220    }
221
222    fn advance_by(&mut self, n: usize) {
223        *self = &self[n..];
224    }
225
226    fn take_literal<'a: 'b, 'b>(&'a mut self, buffer: &'b mut String) -> &'b str {
227        let s = self.as_char_slice();
228        let non_percents = s.iter().take_while(|&&c| c != '%').count();
229        // Take only an even number of percents.
230        let percent_pairs: usize = s[non_percents..].iter().take_while(|&&c| c == '%').count() / 2;
231        *self = &self[non_percents + percent_pairs * 2..];
232        buffer.clear();
233        buffer.extend(s[..non_percents + percent_pairs].iter());
234        buffer.as_str()
235    }
236}
237
238// Read an int from a format string, stopping at the first non-digit.
239// Negative values are not supported.
240// If there are no digits, return 0.
241// Adjust the format string to point to the char after the int.
242fn get_int(fmt: &mut impl FormatString) -> Result<usize, Error> {
243    use Error::Overflow;
244    let mut i: usize = 0;
245    while let Some(digit) = fmt.at(0).and_then(|c| c.to_digit(10)) {
246        i = i.checked_mul(10).ok_or(Overflow)?;
247        i = i.checked_add(digit as usize).ok_or(Overflow)?;
248        fmt.advance_by(1);
249    }
250    Ok(i)
251}
252
253// Read a conversion prefix from a format string, advancing it.
254fn get_prefix(fmt: &mut impl FormatString) -> ConversionPrefix {
255    use ConversionPrefix as CP;
256    let prefix = match fmt.at(0).unwrap_or('\0') {
257        'h' if fmt.at(1) == Some('h') => CP::hh,
258        'h' => CP::h,
259        'l' if fmt.at(1) == Some('l') => CP::ll,
260        'l' => CP::l,
261        'j' => CP::j,
262        't' => CP::t,
263        'z' => CP::z,
264        'L' => CP::L,
265        _ => CP::Empty,
266    };
267    fmt.advance_by(match prefix {
268        CP::Empty => 0,
269        CP::hh | CP::ll => 2,
270        _ => 1,
271    });
272    prefix
273}
274
275// Read an (optionally prefixed) format specifier, such as d, Lf, etc.
276// Adjust the cursor to point to the char after the specifier.
277fn get_specifier(fmt: &mut impl FormatString) -> Result<ConversionSpec, Error> {
278    let prefix = get_prefix(fmt);
279    // Awkwardly placed hack to disallow %lC and %lS, since we otherwise treat
280    // them as the same.
281    if prefix != ConversionPrefix::Empty && matches!(fmt.at(0), Some('C' | 'S')) {
282        return Err(Error::BadFormatString);
283    }
284    let spec = fmt
285        .at(0)
286        .and_then(ConversionSpec::from_char)
287        .ok_or(Error::BadFormatString)?;
288    if !spec.supports_prefix(prefix) {
289        return Err(Error::BadFormatString);
290    }
291    fmt.advance_by(1);
292    Ok(spec)
293}
294
295// Pad output by emitting `c` until `min_width` is reached.
296pub(super) fn pad(
297    f: &mut impl Write,
298    c: char,
299    min_width: usize,
300    current_width: usize,
301) -> fmt::Result {
302    assert!(c == '0' || c == ' ');
303    if current_width >= min_width {
304        return Ok(());
305    }
306    const ZEROS: &str = "0000000000000000";
307    const SPACES: &str = "                ";
308    let buff = if c == '0' { ZEROS } else { SPACES };
309    let mut remaining = min_width - current_width;
310    while remaining > 0 {
311        let n = remaining.min(buff.len());
312        f.write_str(&buff[..n])?;
313        remaining -= n;
314    }
315    Ok(())
316}
317
318/// Formats a string using the provided format specifiers, arguments, and locale,
319/// and writes the output to the given `Write` implementation.
320///
321/// # Parameters
322/// - `f`: The receiver of formatted output.
323/// - `fmt`: The format string being parsed.
324/// - `locale`: The locale to use for number formatting.
325/// - `args`: Iterator over the arguments to format.
326///
327/// # Returns
328/// A `Result` which is `Ok` containing the number of bytes written on success, or an `Error`.
329///
330/// # Example
331///
332/// ```
333/// use fish_printf::{sprintf_locale, ToArg, FormatString, locale};
334/// use std::fmt::Write;
335///
336/// let mut output = String::new();
337/// let fmt: &str = "%'0.2f";
338/// let mut args = [1234567.89.to_arg()];
339///
340/// let result = sprintf_locale(&mut output, fmt, &locale::EN_US_LOCALE, &mut args);
341///
342/// assert!(result == Ok(12));
343/// assert_eq!(output, "1,234,567.89");
344/// ```
345pub fn sprintf_locale(
346    f: &mut impl Write,
347    fmt: impl FormatString,
348    locale: &Locale,
349    args: &mut [Arg],
350) -> Result<usize, Error> {
351    use ConversionSpec as CS;
352    let mut s = fmt;
353    let mut args = args.iter_mut();
354    let mut out_len: usize = 0;
355
356    // Shared storage for the output of the conversion specifier.
357    let buf = &mut String::new();
358    'main: while !s.is_empty() {
359        buf.clear();
360
361        // Handle literal text and %% format specifiers.
362        let lit = s.take_literal(buf);
363        if !lit.is_empty() {
364            f.write_str(lit)?;
365            out_len = out_len
366                .checked_add(lit.chars().count())
367                .ok_or(Error::Overflow)?;
368            continue 'main;
369        }
370
371        // Consume the % at the start of the format specifier.
372        debug_assert!(s.at(0) == Some('%'));
373        s.advance_by(1);
374
375        // Read modifier flags. '-' and '0' flags are mutually exclusive.
376        let mut flags = ModifierFlags::default();
377        while flags.try_set(s.at(0).unwrap_or('\0')) {
378            s.advance_by(1);
379        }
380        if flags.left_adj {
381            flags.zero_pad = false;
382        }
383
384        // Read field width. We do not support $.
385        let width = if s.at(0) == Some('*') {
386            let arg_width = args.next().ok_or(Error::MissingArg)?.as_sint()?;
387            s.advance_by(1);
388            if arg_width < 0 {
389                flags.left_adj = true;
390            }
391            arg_width
392                .unsigned_abs()
393                .try_into()
394                .map_err(|_| Error::Overflow)?
395        } else {
396            get_int(&mut s)?
397        };
398
399        // Optionally read precision. We do not support $.
400        let mut prec: Option<usize> = if s.at(0) == Some('.') && s.at(1) == Some('*') {
401            // "A negative precision is treated as though it were missing."
402            // Here we assume the precision is always signed.
403            s.advance_by(2);
404            let p = args.next().ok_or(Error::MissingArg)?.as_sint()?;
405            p.try_into().ok()
406        } else if s.at(0) == Some('.') {
407            s.advance_by(1);
408            Some(get_int(&mut s)?)
409        } else {
410            None
411        };
412        // Disallow precisions larger than i32::MAX, in keeping with C.
413        if prec.unwrap_or(0) > i32::MAX as usize {
414            return Err(Error::Overflow);
415        }
416
417        // Read out the format specifier and arg.
418        let conv_spec = get_specifier(&mut s)?;
419        let arg = args.next().ok_or(Error::MissingArg)?;
420        let mut prefix = "";
421
422        // Thousands grouping only works for d,u,i,f,F.
423        // 'i' is mapped to 'd'.
424        if flags.grouped && !matches!(conv_spec, CS::d | CS::u | CS::f | CS::F) {
425            return Err(Error::BadFormatString);
426        }
427
428        // Disable zero-pad if we have an explicit precision.
429        // "If a precision is given with a numeric conversion (d, i, o, u, i, x, and X),
430        // the 0 flag is ignored." p is included here.
431        let spec_is_numeric = matches!(conv_spec, CS::d | CS::u | CS::o | CS::p | CS::x | CS::X);
432        if spec_is_numeric && prec.is_some() {
433            flags.zero_pad = false;
434        }
435
436        // Apply the formatting. Some cases continue the main loop.
437        // Note that numeric conversions must leave 'body' empty if the value is 0.
438        let body: &str = match conv_spec {
439            CS::n => {
440                arg.set_count(out_len)?;
441                continue 'main;
442            }
443            CS::e | CS::f | CS::g | CS::a | CS::E | CS::F | CS::G | CS::A => {
444                // Floating point types handle output on their own.
445                let float = arg.as_float()?;
446                let len = format_float(f, float, width, prec, flags, locale, conv_spec, buf)?;
447                out_len = out_len.checked_add(len).ok_or(Error::Overflow)?;
448                continue 'main;
449            }
450            CS::p => {
451                const PTR_HEX_DIGITS: usize = 2 * mem::size_of::<*const u8>();
452                prec = prec.map(|p| p.max(PTR_HEX_DIGITS));
453                let uint = arg.as_uint()?;
454                if uint != 0 {
455                    prefix = "0x";
456                    write!(buf, "{:x}", uint)?;
457                }
458                buf
459            }
460            CS::x | CS::X => {
461                // If someone passes us a negative value, format it with the width
462                // we were given.
463                let lower = conv_spec.is_lower();
464                let (_, uint) = arg.as_wrapping_sint()?;
465                if uint != 0 {
466                    if flags.alt_form {
467                        prefix = if lower { "0x" } else { "0X" };
468                    }
469                    if lower {
470                        write!(buf, "{:x}", uint)?;
471                    } else {
472                        write!(buf, "{:X}", uint)?;
473                    }
474                }
475                buf
476            }
477            CS::o => {
478                let uint = arg.as_uint()?;
479                if uint != 0 {
480                    write!(buf, "{:o}", uint)?;
481                }
482                if flags.alt_form && prec.unwrap_or(0) <= buf.len() + 1 {
483                    prec = Some(buf.len() + 1);
484                }
485                buf
486            }
487            CS::u => {
488                let uint = arg.as_uint()?;
489                if uint != 0 {
490                    write!(buf, "{}", uint)?;
491                }
492                buf
493            }
494            CS::d => {
495                let arg_i = arg.as_sint()?;
496                if arg_i < 0 {
497                    prefix = "-";
498                } else if flags.mark_pos {
499                    prefix = "+";
500                } else if flags.pad_pos {
501                    prefix = " ";
502                }
503                if arg_i != 0 {
504                    write!(buf, "{}", arg_i.unsigned_abs())?;
505                }
506                buf
507            }
508            CS::c => {
509                // also 'C'
510                flags.zero_pad = false;
511                buf.push(arg.as_char()?);
512                buf
513            }
514            CS::s => {
515                // also 'S'
516                let s = arg.as_str(buf)?;
517                let p = prec.unwrap_or(s.len()).min(s.len());
518                prec = Some(p);
519                flags.zero_pad = false;
520                &s[..p]
521            }
522        };
523        // Numeric output should be empty iff the value is 0.
524        if spec_is_numeric && body.is_empty() {
525            debug_assert!(arg.as_uint().unwrap() == 0);
526        }
527
528        // Decide if we want to apply thousands grouping to the body, and compute its size.
529        // Note we have already errored out if grouped is set and this is non-numeric.
530        let wants_grouping = flags.grouped && locale.thousands_sep.is_some();
531        let body_len = match wants_grouping {
532            true => body.len() + locale.separator_count(body.len()),
533            false => body.len(),
534        };
535
536        // Resolve the precision.
537        // In the case of a non-numeric conversion, update the precision to at least the
538        // length of the string.
539        let prec = if !spec_is_numeric {
540            prec.unwrap_or(body_len)
541        } else {
542            prec.unwrap_or(1).max(body_len)
543        };
544
545        let prefix_len = prefix.len();
546        let unpadded_width = prefix_len.checked_add(prec).ok_or(Error::Overflow)?;
547        let width = width.max(unpadded_width);
548
549        // Pad on the left with spaces to the desired width?
550        if !flags.left_adj && !flags.zero_pad {
551            pad(f, ' ', width, unpadded_width)?;
552        }
553
554        // Output any prefix.
555        f.write_str(prefix)?;
556
557        // Pad after the prefix with zeros to the desired width?
558        if !flags.left_adj && flags.zero_pad {
559            pad(f, '0', width, unpadded_width)?;
560        }
561
562        // Pad on the left to the given precision?
563        pad(f, '0', prec, body_len)?;
564
565        // Output the actual value, perhaps with grouping.
566        if wants_grouping {
567            f.write_str(&locale.apply_grouping(body))?;
568        } else {
569            f.write_str(body)?;
570        }
571
572        // Pad on the right with spaces if we are left adjusted?
573        if flags.left_adj {
574            pad(f, ' ', width, unpadded_width)?;
575        }
576
577        out_len = out_len.checked_add(width).ok_or(Error::Overflow)?;
578    }
579
580    // Too many args?
581    if args.next().is_some() {
582        return Err(Error::ExtraArg);
583    }
584    Ok(out_len)
585}