printf_compat/
parser.rs

1use core::ffi::VaList;
2
3use cstr_core::CStr;
4use cty::*;
5
6use crate::{Argument, DoubleFormat, Flags, SignedInt, Specifier, UnsignedInt};
7use itertools::Itertools;
8
9fn next_char(sub: &[u8]) -> &[u8] {
10    sub.get(1..).unwrap_or(&[])
11}
12
13/// Parse the [Flags field](https://en.wikipedia.org/wiki/Printf_format_string#Flags_field).
14fn parse_flags(mut sub: &[u8]) -> (Flags, &[u8]) {
15    let mut flags: Flags = Flags::empty();
16    while let Some(&ch) = sub.get(0) {
17        flags.insert(match ch {
18            b'-' => Flags::LEFT_ALIGN,
19            b'+' => Flags::PREPEND_PLUS,
20            b' ' => Flags::PREPEND_SPACE,
21            b'0' => Flags::PREPEND_ZERO,
22            b'\'' => Flags::THOUSANDS_GROUPING,
23            b'#' => Flags::ALTERNATE_FORM,
24            _ => break,
25        });
26        sub = next_char(sub)
27    }
28    (flags, sub)
29}
30
31/// Parse the [Width field](https://en.wikipedia.org/wiki/Printf_format_string#Width_field).
32unsafe fn parse_width<'a>(mut sub: &'a [u8], args: &mut VaList) -> (c_int, &'a [u8]) {
33    let mut width: c_int = 0;
34    if sub.get(0) == Some(&b'*') {
35        return (args.arg(), next_char(sub));
36    }
37    while let Some(&ch) = sub.get(0) {
38        match ch {
39            // https://rust-malaysia.github.io/code/2020/07/11/faster-integer-parsing.html#the-bytes-solution
40            b'0'..=b'9' => width = width * 10 + (ch & 0x0f) as c_int,
41            _ => break,
42        }
43        sub = next_char(sub);
44    }
45    (width, sub)
46}
47
48/// Parse the [Precision field](https://en.wikipedia.org/wiki/Printf_format_string#Precision_field).
49unsafe fn parse_precision<'a>(sub: &'a [u8], args: &mut VaList) -> (Option<c_int>, &'a [u8]) {
50    match sub.get(0) {
51        Some(&b'.') => {
52            let (prec, sub) = parse_width(next_char(sub), args);
53            (Some(prec), sub)
54        }
55        _ => (None, sub),
56    }
57}
58
59#[derive(Debug, Copy, Clone)]
60enum Length {
61    Int,
62    /// `hh`
63    Char,
64    /// `h`
65    Short,
66    /// `l`
67    Long,
68    /// `ll`
69    LongLong,
70    /// `z`
71    Usize,
72    /// `t`
73    Isize,
74}
75
76impl Length {
77    unsafe fn parse_signed(self, args: &mut VaList) -> SignedInt {
78        match self {
79            Length::Int => SignedInt::Int(args.arg()),
80            Length::Char => SignedInt::Char(args.arg()),
81            Length::Short => SignedInt::Short(args.arg()),
82            Length::Long => SignedInt::Long(args.arg()),
83            Length::LongLong => SignedInt::LongLong(args.arg()),
84            // for some reason, these exist as different options, yet produce the same output
85            Length::Usize | Length::Isize => SignedInt::Isize(args.arg()),
86        }
87    }
88    unsafe fn parse_unsigned(self, args: &mut VaList) -> UnsignedInt {
89        match self {
90            Length::Int => UnsignedInt::Int(args.arg()),
91            Length::Char => UnsignedInt::Char(args.arg()),
92            Length::Short => UnsignedInt::Short(args.arg()),
93            Length::Long => UnsignedInt::Long(args.arg()),
94            Length::LongLong => UnsignedInt::LongLong(args.arg()),
95            // for some reason, these exist as different options, yet produce the same output
96            Length::Usize | Length::Isize => UnsignedInt::Isize(args.arg()),
97        }
98    }
99}
100
101/// Parse the [Length field](https://en.wikipedia.org/wiki/Printf_format_string#Length_field).
102fn parse_length(sub: &[u8]) -> (Length, &[u8]) {
103    match sub.get(0).copied() {
104        Some(b'h') => match sub.get(1).copied() {
105            Some(b'h') => (Length::Char, sub.get(2..).unwrap_or(&[])),
106            _ => (Length::Short, next_char(sub)),
107        },
108        Some(b'l') => match sub.get(1).copied() {
109            Some(b'l') => (Length::LongLong, sub.get(2..).unwrap_or(&[])),
110            _ => (Length::Long, next_char(sub)),
111        },
112        Some(b'z') => (Length::Usize, next_char(sub)),
113        Some(b't') => (Length::Isize, next_char(sub)),
114        _ => (Length::Int, sub),
115    }
116}
117
118/// Parse a format parameter and write it somewhere.
119///
120/// # Safety
121///
122/// [`VaList`]s are *very* unsafe. The passed `format` and `args` parameter must be a valid [`printf` format string](http://www.cplusplus.com/reference/cstdio/printf/).
123pub unsafe fn format(
124    format: *const c_char,
125    mut args: VaList,
126    mut handler: impl FnMut(Argument) -> c_int,
127) -> c_int {
128    let str = CStr::from_ptr(format).to_bytes();
129    let mut iter = str.split(|&c| c == b'%');
130    let mut written = 0;
131
132    macro_rules! err {
133        ($ex: expr) => {{
134            let res = $ex;
135            if res < 0 {
136                return -1;
137            } else {
138                written += res;
139            }
140        }};
141    }
142    if let Some(begin) = iter.next() {
143        err!(handler(Specifier::Bytes(begin).into()));
144    }
145    let mut last_was_percent = false;
146    for (sub, next) in iter.map(Some).chain(core::iter::once(None)).tuple_windows() {
147        let sub = match sub {
148            Some(sub) => sub,
149            None => break,
150        };
151        if last_was_percent {
152            err!(handler(Specifier::Bytes(sub).into()));
153            last_was_percent = false;
154            continue;
155        }
156        let (flags, sub) = parse_flags(sub);
157        let (width, sub) = parse_width(sub, &mut args);
158        let (precision, sub) = parse_precision(sub, &mut args);
159        let (length, sub) = parse_length(sub);
160        let ch = sub
161            .get(0)
162            .unwrap_or(if next.is_some() { &b'%' } else { &0 });
163        err!(handler(Argument {
164            flags,
165            width,
166            precision,
167            specifier: match ch {
168                b'%' => {
169                    last_was_percent = true;
170                    Specifier::Percent
171                }
172                b'd' | b'i' => Specifier::Int(length.parse_signed(&mut args)),
173                b'x' => Specifier::Hex(length.parse_unsigned(&mut args)),
174                b'X' => Specifier::UpperHex(length.parse_unsigned(&mut args)),
175                b'u' => Specifier::Uint(length.parse_unsigned(&mut args)),
176                b'o' => Specifier::Octal(length.parse_unsigned(&mut args)),
177                b'f' | b'F' => Specifier::Double {
178                    value: args.arg(),
179                    format: DoubleFormat::Normal.set_upper(ch.is_ascii_uppercase()),
180                },
181                b'e' | b'E' => Specifier::Double {
182                    value: args.arg(),
183                    format: DoubleFormat::Scientific.set_upper(ch.is_ascii_uppercase()),
184                },
185                b'g' | b'G' => Specifier::Double {
186                    value: args.arg(),
187                    format: DoubleFormat::Auto.set_upper(ch.is_ascii_uppercase()),
188                },
189                b'a' | b'A' => Specifier::Double {
190                    value: args.arg(),
191                    format: DoubleFormat::Hex.set_upper(ch.is_ascii_uppercase()),
192                },
193                b's' => Specifier::String(CStr::from_ptr(args.arg())),
194                b'c' => Specifier::Char(args.arg()),
195                b'p' => Specifier::Pointer(args.arg()),
196                b'n' => Specifier::WriteBytesWritten(written, args.arg()),
197                _ => return -1,
198            },
199        }));
200        err!(handler(Specifier::Bytes(next_char(sub)).into()));
201    }
202    written
203}