fast_float_to_integer/lib.rs
1//! Convert floating point values to integer types faster than the standard `as` operator.
2//!
3//! The standard way of converting floating point values to integers is with the [`as` operator](https://doc.rust-lang.org/reference/expressions/operator-expr.html#type-cast-expressions). This conversion has various guarantees as listed in the reference. One of them is that it saturates: Input values out of range of the output type convert to the minimal/maximal value of the output type.
4//!
5//! ```
6//! assert_eq!(300f32 as u8, 255);
7//! assert_eq!(-5f32 as u8, 0);
8//! ```
9//!
10//! This contrasts C/C++, where this kind of cast is [undefined behavior](https://github.com/e00E/cpp-clamp-cast). Saturation comes with a downside. It is slower than the C/C++ version. On many [hardware targets](https://doc.rust-lang.org/nightly/rustc/platform-support.html) a float to integer conversion can be done in one instruction. For example [`CVTTSS2SI`](https://www.felixcloutier.com/x86/cvttss2si) on x86_84+SSE. Rust has to do more work than this, because the instruction does not provide saturation.
11//!
12//! Sometimes you want faster conversions and don't need saturation. This is what this crate provides. The behavior of the conversion functions in this crate depends on whether the input value is in range of the output type. If in range, then the conversion functions work like the standard `as` operator conversion. If not in range (including NaN), then you get an unspecified value.
13//!
14//! You never get undefined behavior but you can get unspecified behavior. In the unspecified case, you get an arbitrary value. The function returns and you get a valid value of the output type, but there is no guarantee what that value is.
15//!
16//! This crate picks an implementation automatically at compile time based on the [target](https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch) and [features](https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute). If there is no specialized implementation, then this crate picks the standard `as` operator conversion. This crate has optimized implementations on the following targets:
17//!
18//! - `target_arch = "x86_64", target_feature = "sse"`: all conversions except 128 bit integers
19//! - `target_arch = "x86", target_feature = "sse"`: all conversions except 64 bit and 128 bit integers
20//!
21//! # Assembly comparison
22//!
23//! The [repository](https://github.com/e00E/fast-float-to-integer) contains generated assembly for every conversion and target. Here are some typical examples on x86_64+SSE.
24//!
25//
26// We could do something like `#![doc = include_str!("../generated assembly/x86_64_default/f32_to_i64")]` to include the assembly directly. The downside of that is that compiling the library requires the assembly file to be there and we have to publish the file.
27//
28//! standard:
29//!
30//! ```asm
31//! f32_to_i64:
32//! cvttss2si rax, xmm0
33//! ucomiss xmm0, dword ptr [rip + .L_0]
34//! movabs rcx, 9223372036854775807
35//! cmovbe rcx, rax
36//! xor eax, eax
37//! ucomiss xmm0, xmm0
38//! cmovnp rax, rcx
39//! ret
40//! ```
41//!
42//! fast:
43//!
44//! ```asm
45//! f32_to_i64:
46//! cvttss2si rax, xmm0
47//! ret
48//! ```
49//!
50//! standard:
51//!
52//! ```asm
53//! f32_to_u64:
54//! cvttss2si rax, xmm0
55//! mov rcx, rax
56//! sar rcx, 63
57//! movaps xmm1, xmm0
58//! subss xmm1, dword ptr [rip + .L_0]
59//! cvttss2si rdx, xmm1
60//! and rdx, rcx
61//! or rdx, rax
62//! xor ecx, ecx
63//! xorps xmm1, xmm1
64//! ucomiss xmm0, xmm1
65//! cmovae rcx, rdx
66//! ucomiss xmm0, dword ptr [rip + .L_1]
67//! mov rax, -1
68//! cmovbe rax, rcx
69//! ret
70//! ```
71//!
72//! fast:
73//!
74//! ```asm
75//! f32_to_u64:
76//! cvttss2si rcx, xmm0
77//! addss xmm0, dword ptr [rip + .L_0]
78//! cvttss2si rdx, xmm0
79//! mov rax, rcx
80//! sar rax, 63
81//! and rax, rdx
82//! or rax, rcx
83//! ret
84//! ```
85
86#![cfg_attr(not(test), no_std)]
87
88/// Raise two to some power.
89///
90/// This function exists because libcore does not provide the [`f32::powi]`] family of functions.
91#[allow(dead_code)]
92const fn power_of_two_f32(exponent: u32) -> f32 {
93 (2u128).pow(exponent) as f32
94}
95
96/// Like power_of_two_f32 but for f64.
97#[allow(dead_code)]
98const fn power_of_two_f64(exponent: u32) -> f64 {
99 (2u128).pow(exponent) as f64
100}
101
102macro_rules! create_target {
103 ($name:ident) => {
104 use $name as active_target;
105
106 // Create a test with the target name so we can check that the expected target is active. The following command prints the active target through the test name:
107 //
108 // cargo test --quiet --package fast-float-to-integer --lib -- --list
109 #[test]
110 fn $name() {}
111 };
112}
113
114// Conditionally compiled target specific modules.The condition is set based on the availability of the intrinsics they use. This makes it safe to use the module. See the `default` module for the interface.
115//
116// We would put the mod declaration inside of the create_target macro too, but then rustfmt does not understand it.
117cfg_if::cfg_if! {
118 if #[cfg(feature = "force-default")] {
119 mod target_default;
120 create_target!(target_default);
121 } else if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] {
122 mod target_x86_64_sse;
123 create_target!(target_x86_64_sse);
124 } else if #[cfg(all(target_arch = "x86", target_feature = "sse"))] {
125 mod target_x86_sse;
126 create_target!(target_x86_sse);
127 } else {
128 mod target_default;
129 create_target!(target_default);
130 }
131}
132
133macro_rules! create_function {
134 ($name:ident, $Float:ty, $Integer:ty) => {
135 /// Convert the input floating point value to the output integer type.
136 ///
137 /// If the input value is out of range of the output type, then the result is unspecified. Otherwise, the result is the same as the standard `as` conversion.
138 #[cfg_attr(feature = "show-asm", inline(never))]
139 #[cfg_attr(not(feature = "show-asm"), inline(always))]
140 pub fn $name(float: $Float) -> $Integer {
141 active_target::implementation::$name(float)
142 }
143 };
144}
145
146create_function! {f32_to_i8, f32, i8}
147create_function! {f32_to_u8, f32, u8}
148create_function! {f32_to_i16, f32, i16}
149create_function! {f32_to_u16, f32, u16}
150create_function! {f32_to_i32, f32, i32}
151create_function! {f32_to_u32, f32, u32}
152create_function! {f32_to_i64, f32, i64}
153create_function! {f32_to_u64, f32, u64}
154create_function! {f32_to_i128, f32, i128}
155create_function! {f32_to_u128, f32, u128}
156
157create_function! {f64_to_i8, f64, i8}
158create_function! {f64_to_u8, f64, u8}
159create_function! {f64_to_i16, f64, i16}
160create_function! {f64_to_u16, f64, u16}
161create_function! {f64_to_i32, f64, i32}
162create_function! {f64_to_u32, f64, u32}
163create_function! {f64_to_i64, f64, i64}
164create_function! {f64_to_u64, f64, u64}
165create_function! {f64_to_i128, f64, i128}
166create_function! {f64_to_u128, f64, u128}