printf_compat/lib.rs
1//! `printf` reimplemented in Rust
2//!
3//! This is a complete reimplementation of `printf` in Rust, using the unstable
4//! (i.e. **requires a Nightly compiler**) `c_variadic` feature.
5//!
6//! - [Many C][sigrok-log] [libraries][libusb-log] provide a way to provide a
7//! custom log callback. With this crate, you can provide a pure Rust option,
8//! and do whatever you want with it. Log it to the console, store it in a
9//! string, or do anything else.
10//! - If you're writing a Rust-first program for a microcontroller and need to
11//! interface with a C library, you might not *have* a libc and have to
12//! reimplement it yourself. If it uses `printf`, use this crate to easily add
13//! your own output. [`core::fmt`] too big? No problem! Write your own
14//! formatting code, or use a minimal formatting library like [`ufmt`] or
15//! [`defmt`]. Don't need *every* single option given by `printf` format
16//! strings? No problem! Just don't implement it.
17//! - Likewise, if you're using `wasm32-unknown-unknown` instead of emscripten
18//! (as wasm-bindgen is only compatible with the former), you have no libc. If
19//! you want to interface with a C library, you'll have to do it all yourself.
20//! With this crate, that turns into 5 lines instead of hundreds for `printf`.
21//!
22//! # Benefits
23//!
24//! ## ⚒ Modular
25//!
26//! printf-compat lets you pick how you want to output a message. Use
27//! pre-written adapters for [`fmt::Write`][output::fmt_write] (like a
28//! [`String`]) or [`io::Write`][output::io_write] (like
29//! [`io::stdout()`][std::io::stdout]), or implement your own.
30//!
31//! ## 🔬 Small
32//!
33//! This crate is `no_std` compatible (`printf-compat = { version = "0.1",
34//! default-features = false }` in your Cargo.toml). The main machinery doesn't
35//! require the use of [`core::fmt`], and it can't panic.
36//!
37//! ## 🔒 Safe (as can be)
38//!
39//! Of course, `printf` is *completely* unsafe, as it requires the use of
40//! `va_list`. However, outside of that, all of the actual string parsing is
41//! written in completely safe Rust. No buffer overflow attacks!
42//!
43//! The `n` format specifier, which writes to a user-provided pointer, is
44//! considered a serious security vulnerability if a user-provided string is
45//! ever passed to `printf`. It *is* supported by this crate; however, it
46//! doesn't do anything by default, and you'll have to explicitly do the writing
47//! yourself.
48//!
49//! ## 🧹 Tested
50//!
51//! A wide [test suite] is used to ensure that many different possibilities are
52//! identical to glibc's `printf`. [Differences are
53//! documented][output::fmt_write#differences].
54//!
55//! # Getting Started
56//!
57//! Start by adding the unstable feature:
58//!
59//! ```rust
60//! #![feature(c_variadic)]
61//! ```
62//!
63//! Now, add your function signature:
64//!
65//! ```rust
66//! # #![feature(c_variadic)]
67//! use cty::{c_char, c_int};
68//!
69//! #[no_mangle]
70//! unsafe extern "C" fn c_library_print(str: *const c_char, mut args: ...) -> c_int {
71//! todo!()
72//! }
73//! ```
74//!
75//! If you have access to [`std`], i.e. not an embedded platform, you can use
76//! [`std::os::raw`] instead of [`cty`]. Also, think about what you're doing:
77//!
78//! - If you're implenting `printf` *because you don't have one*, you'll want to
79//! call it `printf` and add `#[no_mangle]`.
80//! - Likewise, if you're creating a custom log function for a C library and it
81//! expects to call a globally-defined function, keep `#[no_mangle]` and
82//! rename the function to what it expects.
83//! - On the other hand, if your C library expects you to call a function to
84//! register a callback ([example 1][sigrok-log], [example 2][libusb-log]),
85//! remove `#[no_mangle]`.
86//!
87//! Now, add your logic:
88//!
89//! ```rust
90//! # #![feature(c_variadic)]
91//! # use cty::{c_char, c_int};
92//! # #[no_mangle]
93//! # unsafe extern "C" fn c_library_print(str: *const c_char, mut args: ...) -> c_int {
94//! use printf_compat::{format, output};
95//! let mut s = String::new();
96//! let bytes_written = format(str, args.as_va_list(), output::fmt_write(&mut s));
97//! println!("{}", s);
98//! bytes_written
99//! # }
100//! ```
101//!
102//! Of course, replace [`output::fmt_write`] with whatever you like—some are
103//! provided for you in [`output`]. If you'd like to write your own, follow
104//! their function signature: you need to provide a function to [`format()`]
105//! that takes an [`Argument`] and returns the number of bytes written (although
106//! you don't *need* to if your C library doesn't use it) or -1 if there was an
107//! error.
108//!
109//! [sigrok-log]: https://sigrok.org/api/libsigrok/unstable/a00074.html#ga4240b8fe79be72ef758f40f9acbd4316
110//! [libusb-log]: http://libusb.sourceforge.net/api-1.0/group__libusb__lib.html#ga2efb66b8f16ffb0851f3907794c06e20
111//! [test suite]: https://github.com/lights0123/printf-compat/blob/master/src/tests.rs
112//! [`ufmt`]: https://docs.rs/ufmt/
113//! [`defmt`]: https://defmt.ferrous-systems.com/
114
115#![cfg_attr(not(feature = "std"), no_std)]
116#![feature(c_variadic)]
117
118use core::fmt;
119use cstr_core::CStr;
120use cty::*;
121
122pub mod output;
123mod parser;
124#[cfg(test)]
125mod tests;
126use argument::*;
127pub use parser::format;
128pub mod argument {
129 use super::*;
130
131 bitflags::bitflags! {
132 /// Flags field.
133 ///
134 /// Definitions from
135 /// [Wikipedia](https://en.wikipedia.org/wiki/Printf_format_string#Flags_field).
136 pub struct Flags: u8 {
137 /// Left-align the output of this placeholder. (The default is to
138 /// right-align the output.)
139 const LEFT_ALIGN = 0b00000001;
140 /// Prepends a plus for positive signed-numeric types. positive =
141 /// `+`, negative = `-`.
142 ///
143 /// (The default doesn't prepend anything in front of positive
144 /// numbers.)
145 const PREPEND_PLUS = 0b00000010;
146 /// Prepends a space for positive signed-numeric types. positive = `
147 /// `, negative = `-`. This flag is ignored if the
148 /// [`PREPEND_PLUS`][Flags::PREPEND_PLUS] flag exists.
149 ///
150 /// (The default doesn't prepend anything in front of positive
151 /// numbers.)
152 const PREPEND_SPACE = 0b00000100;
153 /// When the 'width' option is specified, prepends zeros for numeric
154 /// types. (The default prepends spaces.)
155 ///
156 /// For example, `printf("%4X",3)` produces ` 3`, while
157 /// `printf("%04X",3)` produces `0003`.
158 const PREPEND_ZERO = 0b00001000;
159 /// The integer or exponent of a decimal has the thousands grouping
160 /// separator applied.
161 const THOUSANDS_GROUPING = 0b00010000;
162 /// Alternate form:
163 ///
164 /// For `g` and `G` types, trailing zeros are not removed. \
165 /// For `f`, `F`, `e`, `E`, `g`, `G` types, the output always
166 /// contains a decimal point. \ For `o`, `x`, `X` types,
167 /// the text `0`, `0x`, `0X`, respectively, is prepended
168 /// to non-zero numbers.
169 const ALTERNATE_FORM = 0b00100000;
170 }
171 }
172
173 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
174 pub enum DoubleFormat {
175 /// `f`
176 Normal,
177 /// `F`
178 UpperNormal,
179 /// `e`
180 Scientific,
181 /// `E`
182 UpperScientific,
183 /// `g`
184 Auto,
185 /// `G`
186 UpperAuto,
187 /// `a`
188 Hex,
189 /// `A`
190 UpperHex,
191 }
192
193 impl DoubleFormat {
194 /// If the format is uppercase.
195 pub fn is_upper(self) -> bool {
196 use DoubleFormat::*;
197 matches!(self, UpperNormal | UpperScientific | UpperAuto | UpperHex)
198 }
199
200 pub fn set_upper(self, upper: bool) -> Self {
201 use DoubleFormat::*;
202 match self {
203 Normal | UpperNormal => {
204 if upper {
205 UpperNormal
206 } else {
207 Normal
208 }
209 }
210 Scientific | UpperScientific => {
211 if upper {
212 UpperScientific
213 } else {
214 Scientific
215 }
216 }
217 Auto | UpperAuto => {
218 if upper {
219 UpperAuto
220 } else {
221 Auto
222 }
223 }
224 Hex | UpperHex => {
225 if upper {
226 UpperHex
227 } else {
228 Hex
229 }
230 }
231 }
232 }
233 }
234
235 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
236 #[non_exhaustive]
237 pub enum SignedInt {
238 Int(c_int),
239 Char(c_schar),
240 Short(c_short),
241 Long(c_long),
242 LongLong(c_longlong),
243 Isize(isize),
244 }
245
246 impl From<SignedInt> for i64 {
247 fn from(num: SignedInt) -> Self {
248 match num {
249 SignedInt::Int(x) => x as i64,
250 SignedInt::Char(x) => x as i64,
251 SignedInt::Short(x) => x as i64,
252 SignedInt::Long(x) => x as i64,
253 SignedInt::LongLong(x) => x as i64,
254 SignedInt::Isize(x) => x as i64,
255 }
256 }
257 }
258
259 impl SignedInt {
260 pub fn is_sign_negative(self) -> bool {
261 match self {
262 SignedInt::Int(x) => x < 0,
263 SignedInt::Char(x) => x < 0,
264 SignedInt::Short(x) => x < 0,
265 SignedInt::Long(x) => x < 0,
266 SignedInt::LongLong(x) => x < 0,
267 SignedInt::Isize(x) => x < 0,
268 }
269 }
270 }
271
272 impl fmt::Display for SignedInt {
273 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274 match self {
275 SignedInt::Int(x) => fmt::Display::fmt(x, f),
276 SignedInt::Char(x) => fmt::Display::fmt(x, f),
277 SignedInt::Short(x) => fmt::Display::fmt(x, f),
278 SignedInt::Long(x) => fmt::Display::fmt(x, f),
279 SignedInt::LongLong(x) => fmt::Display::fmt(x, f),
280 SignedInt::Isize(x) => fmt::Display::fmt(x, f),
281 }
282 }
283 }
284
285 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
286 #[non_exhaustive]
287 pub enum UnsignedInt {
288 Int(c_uint),
289 Char(c_uchar),
290 Short(c_ushort),
291 Long(c_ulong),
292 LongLong(c_ulonglong),
293 Isize(usize),
294 }
295
296 impl From<UnsignedInt> for u64 {
297 fn from(num: UnsignedInt) -> Self {
298 match num {
299 UnsignedInt::Int(x) => x as u64,
300 UnsignedInt::Char(x) => x as u64,
301 UnsignedInt::Short(x) => x as u64,
302 UnsignedInt::Long(x) => x as u64,
303 UnsignedInt::LongLong(x) => x as u64,
304 UnsignedInt::Isize(x) => x as u64,
305 }
306 }
307 }
308
309 impl fmt::Display for UnsignedInt {
310 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
311 match self {
312 UnsignedInt::Int(x) => fmt::Display::fmt(x, f),
313 UnsignedInt::Char(x) => fmt::Display::fmt(x, f),
314 UnsignedInt::Short(x) => fmt::Display::fmt(x, f),
315 UnsignedInt::Long(x) => fmt::Display::fmt(x, f),
316 UnsignedInt::LongLong(x) => fmt::Display::fmt(x, f),
317 UnsignedInt::Isize(x) => fmt::Display::fmt(x, f),
318 }
319 }
320 }
321
322 impl fmt::LowerHex for UnsignedInt {
323 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324 match self {
325 UnsignedInt::Int(x) => fmt::LowerHex::fmt(x, f),
326 UnsignedInt::Char(x) => fmt::LowerHex::fmt(x, f),
327 UnsignedInt::Short(x) => fmt::LowerHex::fmt(x, f),
328 UnsignedInt::Long(x) => fmt::LowerHex::fmt(x, f),
329 UnsignedInt::LongLong(x) => fmt::LowerHex::fmt(x, f),
330 UnsignedInt::Isize(x) => fmt::LowerHex::fmt(x, f),
331 }
332 }
333 }
334
335 impl fmt::UpperHex for UnsignedInt {
336 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
337 match self {
338 UnsignedInt::Int(x) => fmt::UpperHex::fmt(x, f),
339 UnsignedInt::Char(x) => fmt::UpperHex::fmt(x, f),
340 UnsignedInt::Short(x) => fmt::UpperHex::fmt(x, f),
341 UnsignedInt::Long(x) => fmt::UpperHex::fmt(x, f),
342 UnsignedInt::LongLong(x) => fmt::UpperHex::fmt(x, f),
343 UnsignedInt::Isize(x) => fmt::UpperHex::fmt(x, f),
344 }
345 }
346 }
347
348 impl fmt::Octal for UnsignedInt {
349 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350 match self {
351 UnsignedInt::Int(x) => fmt::Octal::fmt(x, f),
352 UnsignedInt::Char(x) => fmt::Octal::fmt(x, f),
353 UnsignedInt::Short(x) => fmt::Octal::fmt(x, f),
354 UnsignedInt::Long(x) => fmt::Octal::fmt(x, f),
355 UnsignedInt::LongLong(x) => fmt::Octal::fmt(x, f),
356 UnsignedInt::Isize(x) => fmt::Octal::fmt(x, f),
357 }
358 }
359 }
360
361 /// An argument as passed to [`format`][crate::format].
362 #[derive(Debug, Copy, Clone, PartialEq)]
363 pub struct Argument<'a> {
364 pub flags: Flags,
365 pub width: c_int,
366 pub precision: Option<c_int>,
367 pub specifier: Specifier<'a>,
368 }
369
370 impl<'a> From<Specifier<'a>> for Argument<'a> {
371 fn from(specifier: Specifier<'a>) -> Self {
372 Self {
373 flags: Flags::empty(),
374 width: 0,
375 precision: None,
376 specifier,
377 }
378 }
379 }
380
381 /// A [format specifier](https://en.wikipedia.org/wiki/Printf_format_string#Type_field).
382 #[derive(Debug, Copy, Clone, PartialEq)]
383 #[non_exhaustive]
384 pub enum Specifier<'a> {
385 /// `%`
386 Percent,
387 /// `d`, `i`
388 Int(SignedInt),
389 /// `u`
390 Uint(UnsignedInt),
391 /// `o`
392 Octal(UnsignedInt),
393 /// `f`, `F`, `e`, `E`, `g`, `G`, `a`, `A`
394 Double { value: f64, format: DoubleFormat },
395 /// string outside of formatting
396 Bytes(&'a [u8]),
397 /// `s`
398 ///
399 /// The same as [`Bytes`][Specifier::Bytes] but guaranteed to be
400 /// null-terminated. This can be used for optimizations, where if you
401 /// need to null terminate a string to print it, you can skip that step.
402 String(&'a CStr),
403 /// `c`
404 Char(u8),
405 /// `x`
406 Hex(UnsignedInt),
407 /// `X`
408 UpperHex(UnsignedInt),
409 /// `p`
410 Pointer(*const ()),
411 /// `n`
412 ///
413 /// # Safety
414 ///
415 /// This can be a serious security vulnerability if the format specifier
416 /// of `printf` is allowed to be user-specified. This shouldn't ever
417 /// happen, but poorly-written software may do so.
418 WriteBytesWritten(c_int, *const c_int),
419 }
420}