printf_compat/
parser.rs

1use core::ffi::*;
2
3use crate::{Argument, DoubleFormat, Flags, SignedInt, Specifier, UnsignedInt};
4use itertools::Itertools;
5
6fn next_char(sub: &[u8]) -> &[u8] {
7    sub.get(1..).unwrap_or(&[])
8}
9
10/// Parse the [Flags field](https://en.wikipedia.org/wiki/Printf_format_string#Flags_field).
11fn parse_flags(mut sub: &[u8]) -> (Flags, &[u8]) {
12    let mut flags: Flags = Flags::empty();
13    while let Some(&ch) = sub.first() {
14        flags.insert(match ch {
15            b'-' => Flags::LEFT_ALIGN,
16            b'+' => Flags::PREPEND_PLUS,
17            b' ' => Flags::PREPEND_SPACE,
18            b'0' => Flags::PREPEND_ZERO,
19            b'\'' => Flags::THOUSANDS_GROUPING,
20            b'#' => Flags::ALTERNATE_FORM,
21            _ => break,
22        });
23        sub = next_char(sub)
24    }
25    (flags, sub)
26}
27
28/// Parse the [Width field](https://en.wikipedia.org/wiki/Printf_format_string#Width_field).
29unsafe fn parse_width<'a>(mut sub: &'a [u8], args: &mut VaList) -> (c_int, &'a [u8]) {
30    let mut width: c_int = 0;
31    if sub.first() == Some(&b'*') {
32        return (args.arg(), next_char(sub));
33    }
34    while let Some(&ch) = sub.first() {
35        match ch {
36            // https://rust-malaysia.github.io/code/2020/07/11/faster-integer-parsing.html#the-bytes-solution
37            b'0'..=b'9' => width = width * 10 + (ch & 0x0f) as c_int,
38            _ => break,
39        }
40        sub = next_char(sub);
41    }
42    (width, sub)
43}
44
45/// Parse the [Precision field](https://en.wikipedia.org/wiki/Printf_format_string#Precision_field).
46unsafe fn parse_precision<'a>(sub: &'a [u8], args: &mut VaList) -> (Option<c_int>, &'a [u8]) {
47    match sub.first() {
48        Some(&b'.') => {
49            let (prec, sub) = parse_width(next_char(sub), args);
50            (Some(prec), sub)
51        }
52        _ => (None, sub),
53    }
54}
55
56#[derive(Debug, Copy, Clone)]
57enum Length {
58    Int,
59    /// `hh`
60    Char,
61    /// `h`
62    Short,
63    /// `l`
64    Long,
65    /// `ll`
66    LongLong,
67    /// `z`
68    Usize,
69    /// `t`
70    Isize,
71}
72
73impl Length {
74    unsafe fn parse_signed(self, args: &mut VaList) -> SignedInt {
75        match self {
76            Length::Int => SignedInt::Int(args.arg()),
77            Length::Char => SignedInt::Char(args.arg::<i32>() as i8),
78            Length::Short => SignedInt::Short(args.arg::<i32>() as i16),
79            Length::Long => SignedInt::Long(args.arg()),
80            Length::LongLong => SignedInt::LongLong(args.arg()),
81            // for some reason, these exist as different options, yet produce the same output
82            Length::Usize | Length::Isize => SignedInt::Isize(args.arg()),
83        }
84    }
85    unsafe fn parse_unsigned(self, args: &mut VaList) -> UnsignedInt {
86        match self {
87            Length::Int => UnsignedInt::Int(args.arg()),
88            Length::Char => UnsignedInt::Char(args.arg::<u32>() as u8),
89            Length::Short => UnsignedInt::Short(args.arg::<u32>() as u16),
90            Length::Long => UnsignedInt::Long(args.arg()),
91            Length::LongLong => UnsignedInt::LongLong(args.arg()),
92            // for some reason, these exist as different options, yet produce the same output
93            Length::Usize | Length::Isize => UnsignedInt::Isize(args.arg()),
94        }
95    }
96}
97
98/// Parse the [Length field](https://en.wikipedia.org/wiki/Printf_format_string#Length_field).
99fn parse_length(sub: &[u8]) -> (Length, &[u8]) {
100    match sub.first().copied() {
101        Some(b'h') => match sub.get(1).copied() {
102            Some(b'h') => (Length::Char, sub.get(2..).unwrap_or(&[])),
103            _ => (Length::Short, next_char(sub)),
104        },
105        Some(b'l') => match sub.get(1).copied() {
106            Some(b'l') => (Length::LongLong, sub.get(2..).unwrap_or(&[])),
107            _ => (Length::Long, next_char(sub)),
108        },
109        Some(b'z') => (Length::Usize, next_char(sub)),
110        Some(b't') => (Length::Isize, next_char(sub)),
111        _ => (Length::Int, sub),
112    }
113}
114
115/// Parse a format parameter and write it somewhere.
116///
117/// # Safety
118///
119/// [`VaList`]s are *very* unsafe. The passed `format` and `args` parameter must be a valid [`printf` format string](http://www.cplusplus.com/reference/cstdio/printf/).
120pub unsafe fn format(
121    format: *const c_char,
122    mut args: VaList,
123    mut handler: impl FnMut(Argument) -> c_int,
124) -> c_int {
125    let str = CStr::from_ptr(format).to_bytes();
126    let mut iter = str.split(|&c| c == b'%');
127    let mut written = 0;
128
129    macro_rules! err {
130        ($ex: expr) => {{
131            let res = $ex;
132            if res < 0 {
133                return -1;
134            } else {
135                written += res;
136            }
137        }};
138    }
139    if let Some(begin) = iter.next() {
140        err!(handler(Specifier::Bytes(begin).into()));
141    }
142    let mut last_was_percent = false;
143    for (sub, next) in iter.map(Some).chain(core::iter::once(None)).tuple_windows() {
144        let sub = match sub {
145            Some(sub) => sub,
146            None => break,
147        };
148        if last_was_percent {
149            err!(handler(Specifier::Bytes(sub).into()));
150            last_was_percent = false;
151            continue;
152        }
153        let (flags, sub) = parse_flags(sub);
154        let (width, sub) = parse_width(sub, &mut args);
155        let (precision, sub) = parse_precision(sub, &mut args);
156        let (length, sub) = parse_length(sub);
157        let ch = sub
158            .first()
159            .unwrap_or(if next.is_some() { &b'%' } else { &0 });
160        err!(handler(Argument {
161            flags,
162            width,
163            precision,
164            specifier: match ch {
165                b'%' => {
166                    last_was_percent = true;
167                    Specifier::Percent
168                }
169                b'd' | b'i' => Specifier::Int(length.parse_signed(&mut args)),
170                b'x' => Specifier::Hex(length.parse_unsigned(&mut args)),
171                b'X' => Specifier::UpperHex(length.parse_unsigned(&mut args)),
172                b'u' => Specifier::Uint(length.parse_unsigned(&mut args)),
173                b'o' => Specifier::Octal(length.parse_unsigned(&mut args)),
174                b'f' | b'F' => Specifier::Double {
175                    value: args.arg(),
176                    format: DoubleFormat::Normal.set_upper(ch.is_ascii_uppercase()),
177                },
178                b'e' | b'E' => Specifier::Double {
179                    value: args.arg(),
180                    format: DoubleFormat::Scientific.set_upper(ch.is_ascii_uppercase()),
181                },
182                b'g' | b'G' => Specifier::Double {
183                    value: args.arg(),
184                    format: DoubleFormat::Auto.set_upper(ch.is_ascii_uppercase()),
185                },
186                b'a' | b'A' => Specifier::Double {
187                    value: args.arg(),
188                    format: DoubleFormat::Hex.set_upper(ch.is_ascii_uppercase()),
189                },
190                b's' => {
191                    let arg: *mut c_char = args.arg();
192                    // As a common extension supported by glibc, musl, and
193                    // others, format a NULL pointer as "(null)".
194                    if arg.is_null() {
195                        Specifier::Bytes(b"(null)")
196                    } else {
197                        Specifier::String(CStr::from_ptr(arg))
198                    }
199                }
200                b'c' => Specifier::Char(args.arg::<u32>() as u8),
201                b'p' => Specifier::Pointer(args.arg()),
202                b'n' => Specifier::WriteBytesWritten(written, args.arg()),
203                _ => return -1,
204            },
205        }));
206        err!(handler(Specifier::Bytes(next_char(sub)).into()));
207    }
208    written
209}