printf_compat/lib.rs
1//! `printf` reimplemented in Rust
2//!
3//! This is a complete reimplementation of `printf` in Rust, using the unstable
4//! (i.e. **requires a Nightly compiler**) `c_variadic` feature.
5//!
6//! - [Many C][sigrok-log] [libraries][libusb-log] provide a way to provide a
7//! custom log callback. With this crate, you can provide a pure Rust option,
8//! and do whatever you want with it. Log it to the console, store it in a
9//! string, or do anything else.
10//! - If you're writing a Rust-first program for a microcontroller and need to
11//! interface with a C library, you might not *have* a libc and have to
12//! reimplement it yourself. If it uses `printf`, use this crate to easily add
13//! your own output. [`core::fmt`] too big? No problem! Write your own
14//! formatting code, or use a minimal formatting library like [`ufmt`] or
15//! [`defmt`]. Don't need *every* single option given by `printf` format
16//! strings? No problem! Just don't implement it.
17//! - Likewise, if you're using `wasm32-unknown-unknown` instead of emscripten
18//! (as wasm-bindgen is only compatible with the former), you have no libc. If
19//! you want to interface with a C library, you'll have to do it all yourself.
20//! With this crate, that turns into 5 lines instead of hundreds for `printf`.
21//!
22//! # Benefits
23//!
24//! ## ⚒ Modular
25//!
26//! printf-compat lets you pick how you want to output a message. Use
27//! pre-written adapters for [`fmt::Write`][output::fmt_write] (like a
28//! [`String`]) or [`io::Write`][output::io_write] (like
29//! [`io::stdout()`][std::io::stdout]), or implement your own.
30//!
31//! ## 🔬 Small
32//!
33//! This crate is `no_std` compatible (with `default-features = false`).
34//! The main machinery doesn't require the use of [`core::fmt`], and it can't panic.
35//!
36//! ## 🔒 Safe (as can be)
37//!
38//! Of course, `printf` is *completely* unsafe, as it requires the use of
39//! `va_list`. However, outside of that, all of the actual string parsing is
40//! written in completely safe Rust. No buffer overflow attacks!
41//!
42//! The `n` format specifier, which writes to a user-provided pointer, is
43//! considered a serious security vulnerability if a user-provided string is
44//! ever passed to `printf`. It *is* supported by this crate; however, it
45//! doesn't do anything by default, and you'll have to explicitly do the writing
46//! yourself.
47//!
48//! ## 🧹 Tested
49//!
50//! A wide [test suite] is used to ensure that many different possibilities are
51//! identical to glibc's `printf`. [Differences are
52//! documented][output::fmt_write#differences].
53//!
54//! # Getting Started
55//!
56//! Start by adding the unstable feature:
57//!
58//! ```rust
59//! #![feature(c_variadic)]
60//! ```
61//!
62//! Now, add your function signature:
63//!
64//! ```rust
65//! # #![feature(c_variadic)]
66//! use core::ffi::{c_char, c_int};
67//!
68//! #[no_mangle]
69//! unsafe extern "C" fn c_library_print(str: *const c_char, mut args: ...) -> c_int {
70//! todo!()
71//! }
72//! ```
73//!
74//! Think about what you're doing:
75//!
76//! - If you're implenting `printf` *because you don't have one*, you'll want to
77//! call it `printf` and add `#[no_mangle]`.
78//! - Likewise, if you're creating a custom log function for a C library and it
79//! expects to call a globally-defined function, keep `#[no_mangle]` and
80//! rename the function to what it expects.
81//! - On the other hand, if your C library expects you to call a function to
82//! register a callback ([example 1][sigrok-log], [example 2][libusb-log]),
83//! remove `#[no_mangle]`.
84//!
85//! Now, add your logic:
86//!
87//! ```rust
88//! # #![feature(c_variadic)]
89//! # use core::ffi::{c_char, c_int};
90//! # #[no_mangle]
91//! # unsafe extern "C" fn c_library_print(str: *const c_char, mut args: ...) -> c_int {
92//! use printf_compat::{format, output};
93//! let mut s = String::new();
94//! let bytes_written = format(str, args.as_va_list(), output::fmt_write(&mut s));
95//! println!("{}", s);
96//! bytes_written
97//! # }
98//! ```
99//!
100//! Of course, replace [`output::fmt_write`] with whatever you like—some are
101//! provided for you in [`output`]. If you'd like to write your own, follow
102//! their function signature: you need to provide a function to [`format()`]
103//! that takes an [`Argument`] and returns the number of bytes written (although
104//! you don't *need* to if your C library doesn't use it) or -1 if there was an
105//! error.
106//!
107//! [sigrok-log]: https://sigrok.org/api/libsigrok/unstable/a00074.html#ga4240b8fe79be72ef758f40f9acbd4316
108//! [libusb-log]: http://libusb.sourceforge.net/api-1.0/group__libusb__lib.html#ga2efb66b8f16ffb0851f3907794c06e20
109//! [test suite]: https://github.com/lights0123/printf-compat/blob/master/src/tests.rs
110//! [`ufmt`]: https://docs.rs/ufmt/
111//! [`defmt`]: https://defmt.ferrous-systems.com/
112
113#![cfg_attr(not(any(test, feature = "std")), no_std)]
114#![feature(c_variadic)]
115
116use core::{ffi::*, fmt};
117
118pub mod output;
119mod parser;
120use argument::*;
121pub use parser::format;
122pub mod argument {
123 use super::*;
124
125 bitflags::bitflags! {
126 /// Flags field.
127 ///
128 /// Definitions from
129 /// [Wikipedia](https://en.wikipedia.org/wiki/Printf_format_string#Flags_field).
130 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
131 pub struct Flags: u8 {
132 /// Left-align the output of this placeholder. (The default is to
133 /// right-align the output.)
134 const LEFT_ALIGN = 0b00000001;
135 /// Prepends a plus for positive signed-numeric types. positive =
136 /// `+`, negative = `-`.
137 ///
138 /// (The default doesn't prepend anything in front of positive
139 /// numbers.)
140 const PREPEND_PLUS = 0b00000010;
141 /// Prepends a space for positive signed-numeric types. positive = `
142 /// `, negative = `-`. This flag is ignored if the
143 /// [`PREPEND_PLUS`][Flags::PREPEND_PLUS] flag exists.
144 ///
145 /// (The default doesn't prepend anything in front of positive
146 /// numbers.)
147 const PREPEND_SPACE = 0b00000100;
148 /// When the 'width' option is specified, prepends zeros for numeric
149 /// types. (The default prepends spaces.)
150 ///
151 /// For example, `printf("%4X",3)` produces ` 3`, while
152 /// `printf("%04X",3)` produces `0003`.
153 const PREPEND_ZERO = 0b00001000;
154 /// The integer or exponent of a decimal has the thousands grouping
155 /// separator applied.
156 const THOUSANDS_GROUPING = 0b00010000;
157 /// Alternate form:
158 ///
159 /// For `g` and `G` types, trailing zeros are not removed. \
160 /// For `f`, `F`, `e`, `E`, `g`, `G` types, the output always
161 /// contains a decimal point. \ For `o`, `x`, `X` types,
162 /// the text `0`, `0x`, `0X`, respectively, is prepended
163 /// to non-zero numbers.
164 const ALTERNATE_FORM = 0b00100000;
165 }
166 }
167
168 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
169 pub enum DoubleFormat {
170 /// `f`
171 Normal,
172 /// `F`
173 UpperNormal,
174 /// `e`
175 Scientific,
176 /// `E`
177 UpperScientific,
178 /// `g`
179 Auto,
180 /// `G`
181 UpperAuto,
182 /// `a`
183 Hex,
184 /// `A`
185 UpperHex,
186 }
187
188 impl DoubleFormat {
189 /// If the format is uppercase.
190 pub fn is_upper(self) -> bool {
191 use DoubleFormat::*;
192 matches!(self, UpperNormal | UpperScientific | UpperAuto | UpperHex)
193 }
194
195 pub fn set_upper(self, upper: bool) -> Self {
196 use DoubleFormat::*;
197 match self {
198 Normal | UpperNormal => {
199 if upper {
200 UpperNormal
201 } else {
202 Normal
203 }
204 }
205 Scientific | UpperScientific => {
206 if upper {
207 UpperScientific
208 } else {
209 Scientific
210 }
211 }
212 Auto | UpperAuto => {
213 if upper {
214 UpperAuto
215 } else {
216 Auto
217 }
218 }
219 Hex | UpperHex => {
220 if upper {
221 UpperHex
222 } else {
223 Hex
224 }
225 }
226 }
227 }
228 }
229
230 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
231 #[non_exhaustive]
232 pub enum SignedInt {
233 Int(c_int),
234 Char(c_schar),
235 Short(c_short),
236 Long(c_long),
237 LongLong(c_longlong),
238 Isize(isize),
239 }
240
241 impl From<SignedInt> for i64 {
242 fn from(num: SignedInt) -> Self {
243 // Some casts are only needed on some platforms.
244 #[allow(clippy::unnecessary_cast)]
245 match num {
246 SignedInt::Int(x) => x as i64,
247 SignedInt::Char(x) => x as i64,
248 SignedInt::Short(x) => x as i64,
249 SignedInt::Long(x) => x as i64,
250 SignedInt::LongLong(x) => x as i64,
251 SignedInt::Isize(x) => x as i64,
252 }
253 }
254 }
255
256 impl SignedInt {
257 pub fn is_sign_negative(self) -> bool {
258 match self {
259 SignedInt::Int(x) => x < 0,
260 SignedInt::Char(x) => x < 0,
261 SignedInt::Short(x) => x < 0,
262 SignedInt::Long(x) => x < 0,
263 SignedInt::LongLong(x) => x < 0,
264 SignedInt::Isize(x) => x < 0,
265 }
266 }
267 }
268
269 impl fmt::Display for SignedInt {
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271 match self {
272 SignedInt::Int(x) => fmt::Display::fmt(x, f),
273 SignedInt::Char(x) => fmt::Display::fmt(x, f),
274 SignedInt::Short(x) => fmt::Display::fmt(x, f),
275 SignedInt::Long(x) => fmt::Display::fmt(x, f),
276 SignedInt::LongLong(x) => fmt::Display::fmt(x, f),
277 SignedInt::Isize(x) => fmt::Display::fmt(x, f),
278 }
279 }
280 }
281
282 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
283 #[non_exhaustive]
284 pub enum UnsignedInt {
285 Int(c_uint),
286 Char(c_uchar),
287 Short(c_ushort),
288 Long(c_ulong),
289 LongLong(c_ulonglong),
290 Isize(usize),
291 }
292
293 impl From<UnsignedInt> for u64 {
294 fn from(num: UnsignedInt) -> Self {
295 // Some casts are only needed on some platforms.
296 #[allow(clippy::unnecessary_cast)]
297 match num {
298 UnsignedInt::Int(x) => x as u64,
299 UnsignedInt::Char(x) => x as u64,
300 UnsignedInt::Short(x) => x as u64,
301 UnsignedInt::Long(x) => x as u64,
302 UnsignedInt::LongLong(x) => x as u64,
303 UnsignedInt::Isize(x) => x as u64,
304 }
305 }
306 }
307
308 impl fmt::Display for UnsignedInt {
309 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310 match self {
311 UnsignedInt::Int(x) => fmt::Display::fmt(x, f),
312 UnsignedInt::Char(x) => fmt::Display::fmt(x, f),
313 UnsignedInt::Short(x) => fmt::Display::fmt(x, f),
314 UnsignedInt::Long(x) => fmt::Display::fmt(x, f),
315 UnsignedInt::LongLong(x) => fmt::Display::fmt(x, f),
316 UnsignedInt::Isize(x) => fmt::Display::fmt(x, f),
317 }
318 }
319 }
320
321 impl fmt::LowerHex for UnsignedInt {
322 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323 match self {
324 UnsignedInt::Int(x) => fmt::LowerHex::fmt(x, f),
325 UnsignedInt::Char(x) => fmt::LowerHex::fmt(x, f),
326 UnsignedInt::Short(x) => fmt::LowerHex::fmt(x, f),
327 UnsignedInt::Long(x) => fmt::LowerHex::fmt(x, f),
328 UnsignedInt::LongLong(x) => fmt::LowerHex::fmt(x, f),
329 UnsignedInt::Isize(x) => fmt::LowerHex::fmt(x, f),
330 }
331 }
332 }
333
334 impl fmt::UpperHex for UnsignedInt {
335 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336 match self {
337 UnsignedInt::Int(x) => fmt::UpperHex::fmt(x, f),
338 UnsignedInt::Char(x) => fmt::UpperHex::fmt(x, f),
339 UnsignedInt::Short(x) => fmt::UpperHex::fmt(x, f),
340 UnsignedInt::Long(x) => fmt::UpperHex::fmt(x, f),
341 UnsignedInt::LongLong(x) => fmt::UpperHex::fmt(x, f),
342 UnsignedInt::Isize(x) => fmt::UpperHex::fmt(x, f),
343 }
344 }
345 }
346
347 impl fmt::Octal for UnsignedInt {
348 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
349 match self {
350 UnsignedInt::Int(x) => fmt::Octal::fmt(x, f),
351 UnsignedInt::Char(x) => fmt::Octal::fmt(x, f),
352 UnsignedInt::Short(x) => fmt::Octal::fmt(x, f),
353 UnsignedInt::Long(x) => fmt::Octal::fmt(x, f),
354 UnsignedInt::LongLong(x) => fmt::Octal::fmt(x, f),
355 UnsignedInt::Isize(x) => fmt::Octal::fmt(x, f),
356 }
357 }
358 }
359
360 /// An argument as passed to [`format()`].
361 #[derive(Debug, Copy, Clone, PartialEq)]
362 pub struct Argument<'a> {
363 pub flags: Flags,
364 pub width: c_int,
365 pub precision: Option<c_int>,
366 pub specifier: Specifier<'a>,
367 }
368
369 impl<'a> From<Specifier<'a>> for Argument<'a> {
370 fn from(specifier: Specifier<'a>) -> Self {
371 Self {
372 flags: Flags::empty(),
373 width: 0,
374 precision: None,
375 specifier,
376 }
377 }
378 }
379
380 /// A [format specifier](https://en.wikipedia.org/wiki/Printf_format_string#Type_field).
381 #[derive(Debug, Copy, Clone, PartialEq)]
382 #[non_exhaustive]
383 pub enum Specifier<'a> {
384 /// `%`
385 Percent,
386 /// `d`, `i`
387 Int(SignedInt),
388 /// `u`
389 Uint(UnsignedInt),
390 /// `o`
391 Octal(UnsignedInt),
392 /// `f`, `F`, `e`, `E`, `g`, `G`, `a`, `A`
393 Double { value: f64, format: DoubleFormat },
394 /// string outside of formatting
395 Bytes(&'a [u8]),
396 /// `s`
397 ///
398 /// The same as [`Bytes`][Specifier::Bytes] but guaranteed to be
399 /// null-terminated. This can be used for optimizations, where if you
400 /// need to null terminate a string to print it, you can skip that step.
401 String(&'a CStr),
402 /// `c`
403 Char(u8),
404 /// `x`
405 Hex(UnsignedInt),
406 /// `X`
407 UpperHex(UnsignedInt),
408 /// `p`
409 Pointer(*const ()),
410 /// `n`
411 ///
412 /// # Safety
413 ///
414 /// This can be a serious security vulnerability if the format specifier
415 /// of `printf` is allowed to be user-specified. This shouldn't ever
416 /// happen, but poorly-written software may do so.
417 WriteBytesWritten(c_int, *const c_int),
418 }
419}