lexical_parse_integer/
lib.rs

1//! Fast lexical string-to-integer conversion routines.
2//!
3//! This contains high-performance methods to parse integers from bytes.
4//! Using [`from_lexical`] is analogous to [`parse`][`core-parse`],
5//! while enabling parsing from bytes as well as [`str`].
6//!
7//! [`from_lexical`]: FromLexical::from_lexical
8//! [`core-parse`]: core::str::FromStr
9//!
10//! # Getting Started
11//!
12//! To parse a number from bytes, use [`from_lexical`]:
13//!
14//! ```rust
15//! # #[no_std]
16//! # use core::str;
17//! use lexical_parse_integer::{Error, FromLexical};
18//!
19//! let value = u64::from_lexical("1234".as_bytes());
20//! assert_eq!(value, Ok(1234));
21//!
22//! let value = u64::from_lexical("18446744073709551616".as_bytes());
23//! assert_eq!(value, Err(Error::Overflow(19)));
24//!
25//! let value = u64::from_lexical("1234 }, {\"Key\", \"Value\"}}".as_bytes());
26//! assert_eq!(value, Err(Error::InvalidDigit(4)));
27//! ```
28//!
29//! If wishing to incrementally parse a string from bytes, that is, parse as
30//! many characters until an invalid digit is found, you can use the partial
31//! parsers. This is useful in parsing data where the type is known, such as
32//! JSON, but where the end of the number is not yet known.
33//!
34//! ```rust
35//! # #[no_std]
36//! # use core::str;
37//! use lexical_parse_integer::{Error, FromLexical};
38//!
39//! let value = u64::from_lexical_partial("1234 }, {\"Key\", \"Value\"}}".as_bytes());
40//! assert_eq!(value, Ok((1234, 4)));
41//!
42//! let value = u64::from_lexical_partial("18446744073709551616 }, {\"Key\", \"Value\"}}".as_bytes());
43//! assert_eq!(value, Err(Error::Overflow(19)));
44//! ```
45//!
46//! # Options/Formatting API
47//!
48//! Each integer parser contains extensive formatting control through
49//! [`mod@format`], particularly digit [`separator`] support (that is,
50//! integers such as `1_2__3`). For options, we have custom formats
51//! optimized for both [`small`] and [`large`] integers.
52//!
53//! [`small`]: crate::options::SMALL_NUMBERS
54//! [`large`]: crate::options::LARGE_NUMBERS
55//! [`separator`]: NumberFormat::digit_separator
56//!
57//! To optimize for smaller integers at the expense of performance of larger
58//! ones, you can use [`OptionsBuilder::no_multi_digit`] (defaults to [`true`]).
59//!
60//! ```rust
61//! # use core::{num, str};
62//! use lexical_parse_integer::{options, NumberFormatBuilder, FromLexicalWithOptions};
63//!
64//! const FORMAT: u128 = NumberFormatBuilder::new().build_strict();
65//!
66//! // a bit faster
67//! let value = u64::from_lexical_with_options::<FORMAT>(b"12", &options::SMALL_NUMBERS);
68//! assert_eq!(value, Ok(12));
69//!
70//! // a lot slower
71//! let value = u64::from_lexical_with_options::<FORMAT>(b"18446744073709551615", &options::SMALL_NUMBERS);
72//! assert_eq!(value, Ok(0xffffffffffffffff));
73//! ```
74//!
75//! # Features
76//!
77//! * `format` - Add support for parsing custom integer formats.
78//! * `power-of-two` - Add support for parsing power-of-two integer strings.
79//! * `radix` - Add support for strings of any radix.
80//! * `compact` - Reduce code size at the cost of performance.
81//! * `std` (Default) - Disable to allow use in a [`no_std`] environment.
82//!
83//! [`no_std`]: https://docs.rust-embedded.org/book/intro/no-std.html
84//!
85//! A complete description of supported features includes:
86//!
87//! #### format
88//!
89//! Add support custom float formatting specifications. This should be used in
90//! conjunction with [`Options`] for extensible integer parsing. This allows
91//! changing the use of digit separators, requiring or not allowing signs, and
92//! more.
93//!
94//! ##### JSON
95//!
96//! For example, in JSON, the following integers are valid or invalid:
97//!
98//! ```text
99//! -1          // valid
100//! +1          // invalid
101//! 1           // valid
102//! ```
103//!
104//! All of these are valid in our default format (the format of Rust strings),
105//! so we must use a custom format to parse JSON strings:
106//!
107//! ```rust
108//! # #[cfg(feature = "format")] {
109//! # use core::str;
110//! use lexical_parse_integer::{format, Error, FromLexicalWithOptions, Options};
111//!
112//! const OPTIONS: Options = Options::new();
113//! let value = u64::from_lexical_with_options::<{ format::JSON }>("1234".as_bytes(), &OPTIONS);
114//! assert_eq!(value, Ok(1234));
115//!
116//! let value = u64::from_lexical_with_options::<{ format::JSON }>("+1234".as_bytes(), &OPTIONS);
117//! assert_eq!(value, Err(Error::InvalidPositiveSign(0)));
118//! # }
119//! ```
120//!
121//! ##### Custom Format
122//!
123//! An example of building a custom format to with digit separator support is:
124//!
125//! ```rust
126//! # #[cfg(all(feature = "format", feature = "power-of-two"))] {
127//! # use core::{num, str};
128//! use lexical_parse_integer::{NumberFormatBuilder, Options, FromLexicalWithOptions};
129//!
130//! const FORMAT: u128 = NumberFormatBuilder::new()
131//!     // require that a `+` or `-` preceeds the number
132//!     .required_mantissa_sign(true)
133//!     // allow internal digit separators, that is, a special character between digits
134//!     .integer_internal_digit_separator(true)
135//!     // use `_` as the digit separator
136//!     .digit_separator(num::NonZeroU8::new(b'_'))
137//!     // allow an optional `0d` prefix to the number
138//!     .base_prefix(num::NonZeroU8::new(b'd'))
139//!     // build the number format, panicking on error
140//!     .build_strict();
141//! const OPTIONS: Options = Options::new();
142//!
143//! let value = u64::from_lexical_with_options::<FORMAT>("+12_3_4".as_bytes(), &OPTIONS);
144//! assert_eq!(value, Ok(1234));
145//!
146//! let value = u64::from_lexical_with_options::<FORMAT>("+0d12_3_4".as_bytes(), &OPTIONS);
147//! assert_eq!(value, Ok(1234));
148//! # }
149//! ```
150//!
151//! For a list of all supported fields, see [Parse Integer
152//! Fields][NumberFormatBuilder#parse-integer-fields].
153//!
154//! Enabling the [`format`](crate#format) API significantly increases compile
155//! times, however, it enables a large amount of customization in how integers
156//! are parsed.
157//!
158//! #### power-of-two
159//!
160//! Enable parsing numbers that are powers of two, that is, `2`, `4`, `8`, `16`,
161//! and `32`.
162//!
163//! ```rust
164//! # #[no_std]
165//! # #[cfg(feature = "power-of-two")] {
166//! # use core::str;
167//! use lexical_parse_integer::{FromLexicalWithOptions, NumberFormatBuilder, Options};
168//!
169//! const BINARY: u128 = NumberFormatBuilder::binary();
170//! const OPTIONS: Options = Options::new();
171//! let value = u64::from_lexical_with_options::<BINARY>("10011010010".as_bytes(), &OPTIONS);
172//! assert_eq!(value, Ok(1234));
173//! # }
174//! ```
175//!
176//! #### radix
177//!
178//! Enable parsing numbers using all radixes from `2` to `36`. This requires
179//! more static storage than [`power-of-two`][crate#power-of-two], and increases
180//! compile times, but can be quite useful for esoteric programming languages
181//! which use duodecimal integers.
182//!
183//! ```rust
184//! # #[no_std]
185//! # #[cfg(feature = "radix")] {
186//! # use core::str;
187//! use lexical_parse_integer::{FromLexicalWithOptions, NumberFormatBuilder, Options};
188//!
189//! const BINARY: u128 = NumberFormatBuilder::from_radix(12);
190//! const OPTIONS: Options = Options::new();
191//! let value = u64::from_lexical_with_options::<BINARY>("86A".as_bytes(), &OPTIONS);
192//! assert_eq!(value, Ok(1234));
193//! # }
194//! ```
195//!
196//! #### compact
197//!
198//! Reduce the generated code size at the cost of performance. This minimizes
199//! the number of static tables, inlining, and generics used, drastically
200//! reducing the size of the generated binaries. However, this resulting
201//! performance of the generated code is much lower.
202//!
203//! #### std
204//!
205//! Enable use of the standard library. Currently, the standard library
206//! is not used, and may be disabled without any change in functionality
207//! on stable.
208//!
209//! # Higher-Level APIs
210//!
211//! If you would like an API that supports multiple numeric conversions rather
212//! than just writing integers, use [`lexical`] or [`lexical-core`] instead.
213//!
214//! [`lexical`]: https://crates.io/crates/lexical
215//! [`lexical-core`]: https://crates.io/crates/lexical-core
216//!
217//! # Version Support
218//!
219//! The minimum, standard, required version is [`1.63.0`][`rust-1.63.0`], for
220//! const generic support. Older versions of lexical support older Rust
221//! versions.
222//!
223//! # Algorithm
224//!
225//! The default implementations are highly optimized both for simple
226//! strings, as well as input with large numbers of digits. In order to
227//! keep performance optimal for simple strings, we avoid overly branching
228//! to minimize the number of branches (and therefore optimization checks).
229//! Most of the branches in the code are resolved at compile-time, and
230//! the resulting ASM is monitored to ensure there are no regressions. For
231//! larger strings, a limited number of optimization checks are included
232//! to try faster, multi-digit parsing algorithms. For 32-bit integers,
233//! we try to parse 4 digits at a time, and for 64-bit and larger integers,
234//! we try to parse 8 digits at a time. Attempting both checks leads to
235//! significant performance penalties for simple strings, so only 1
236//! optimization is used at at a time.
237//!
238//! In addition, a compact, fallback algorithm uses a naive, simple
239//! algorithm, parsing only a single digit at a time. This avoid any
240//! unnecessary branching and produces smaller binaries, but comes
241//! at a significant performance penalty for integers with more digits.
242//!
243//! # Design
244//!
245//! - [Algorithm Approach](https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-parse-integer/docs/Algorithm.md)
246//! - [Benchmarks](https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-parse-integer/docs/Benchmarks.md)
247//! - [Comprehensive Benchmarks](https://github.com/Alexhuszagh/lexical-benchmarks)
248//!
249//! [`rust-1.63.0`]: https://blog.rust-lang.org/2022/08/11/Rust-1.63.0.html
250
251// FIXME: Implement clippy/allow reasons once we drop support for 1.80.0 and below
252// Clippy reasons were stabilized in 1.81.0.
253
254// We want to have the same safety guarantees as Rust core,
255// so we allow unused unsafe to clearly document safety guarantees.
256#![allow(unused_unsafe)]
257#![cfg_attr(feature = "lint", warn(unsafe_op_in_unsafe_fn))]
258#![cfg_attr(not(feature = "std"), no_std)]
259#![cfg_attr(docsrs, feature(doc_cfg))]
260#![cfg_attr(docsrs, feature(doc_auto_cfg))]
261#![deny(
262    clippy::doc_markdown,
263    clippy::unnecessary_safety_comment,
264    clippy::semicolon_if_nothing_returned,
265    clippy::unwrap_used,
266    clippy::as_underscore
267)]
268#![allow(
269    // used when concepts are logically separate
270    clippy::match_same_arms,
271    // loss of precision is intentional
272    clippy::integer_division,
273    // mathematical names use 1-character identifiers
274    clippy::min_ident_chars,
275    // these are not cryptographically secure contexts
276    clippy::integer_division_remainder_used,
277    // this can be intentional
278    clippy::module_name_repetitions,
279    // this is intentional: already passing a pointer and need performance
280    clippy::needless_pass_by_value,
281    // we use this for inline formatting for unsafe blocks
282    clippy::semicolon_inside_block,
283)]
284
285pub mod algorithm;
286pub mod options;
287pub mod parse;
288
289mod api;
290
291// Re-exports
292pub use lexical_util::error::Error;
293pub use lexical_util::format::{self, NumberFormat, NumberFormatBuilder};
294pub use lexical_util::options::ParseOptions;
295pub use lexical_util::result::Result;
296
297pub use self::api::{FromLexical, FromLexicalWithOptions};
298#[doc(inline)]
299pub use self::options::{Options, OptionsBuilder};