erydanos/
atan.rs

1/*
2 * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
3 * //
4 * // Use of this source code is governed by a BSD-style
5 * // license that can be found in the LICENSE file.
6 */
7
8#[cfg(all(target_arch = "x86", target_feature = "sse4.1"))]
9use std::arch::x86::*;
10#[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
11use std::arch::x86_64::*;
12
13use crate::generalf::mlaf;
14#[cfg(all(
15    any(target_arch = "x86_64", target_arch = "x86"),
16    target_feature = "sse4.1"
17))]
18use crate::{_mm_atan_pd, _mm_extract_pd};
19
20// Chebyshev
21pub const ATAN_POLY_1_D: f64 = 0.9999999999999999f64;
22pub const ATAN_POLY_2_D: f64 = -0.33333333333330667f64;
23pub const ATAN_POLY_3_D: f64 = 0.19999999999752202f64;
24pub const ATAN_POLY_4_D: f64 = -0.14285714274686542f64;
25pub const ATAN_POLY_5_D: f64 = 0.1111111082752676523817;
26pub const ATAN_POLY_6_D: f64 = -0.09090904383293700958748;
27pub const ATAN_POLY_7_D: f64 = 0.07692253564182837240753;
28pub const ATAN_POLY_8_D: f64 = -0.06666214370915691620596;
29pub const ATAN_POLY_9_D: f64 = 0.05879509976336718711646;
30pub const ATAN_POLY_10_D: f64 = -0.05249366638684718970866;
31pub const ATAN_POLY_11_D: f64 = 0.04709246978012999772633;
32pub const ATAN_POLY_12_D: f64 = -0.04187077253614157915467;
33pub const ATAN_POLY_13_D: f64 = 0.03602449800199764785191;
34pub const ATAN_POLY_14_D: f64 = -0.02897361824350472260206;
35pub const ATAN_POLY_15_D: f64 = 0.02089260664889144716522;
36pub const ATAN_POLY_16_D: f64 = -0.01291264672794562707631;
37pub const ATAN_POLY_17_D: f64 = 0.006522051887574912863745;
38pub const ATAN_POLY_18_D: f64 = -0.002548984741415465180309;
39pub const ATAN_POLY_19_D: f64 = 0.0007161859397322825641862;
40pub const ATAN_POLY_20_D: f64 = -0.0001278956066478230268751;
41pub const ATAN_POLY_21_D: f64 = 0.00001085532590549307282752;
42
43#[inline]
44fn do_atan(d: f64) -> f64 {
45    let mut x = d;
46    let q = if x < 0f64 {
47        x = -x;
48        1
49    } else {
50        0
51    };
52    let c = x;
53    if x > 1f64 {
54        x = 1f64 / x;
55    }
56
57    let x2 = x * x;
58    let mut u = ATAN_POLY_21_D;
59    u = mlaf(u, x2, ATAN_POLY_20_D);
60    u = mlaf(u, x2, ATAN_POLY_19_D);
61    u = mlaf(u, x2, ATAN_POLY_18_D);
62    u = mlaf(u, x2, ATAN_POLY_17_D);
63    u = mlaf(u, x2, ATAN_POLY_16_D);
64    u = mlaf(u, x2, ATAN_POLY_15_D);
65    u = mlaf(u, x2, ATAN_POLY_14_D);
66    u = mlaf(u, x2, ATAN_POLY_13_D);
67    u = mlaf(u, x2, ATAN_POLY_12_D);
68    u = mlaf(u, x2, ATAN_POLY_11_D);
69    u = mlaf(u, x2, ATAN_POLY_10_D);
70    u = mlaf(u, x2, ATAN_POLY_9_D);
71    u = mlaf(u, x2, ATAN_POLY_8_D);
72    u = mlaf(u, x2, ATAN_POLY_7_D);
73    u = mlaf(u, x2, ATAN_POLY_6_D);
74    u = mlaf(u, x2, ATAN_POLY_5_D);
75    u = mlaf(u, x2, ATAN_POLY_4_D);
76    u = mlaf(u, x2, ATAN_POLY_3_D);
77    u = mlaf(u, x2, ATAN_POLY_2_D);
78    u = mlaf(u, x2, ATAN_POLY_1_D);
79    u *= x;
80
81    u = if c > 1f64 {
82        std::f64::consts::FRAC_PI_2 - u
83    } else {
84        u
85    };
86    if q & 1 != 0 {
87        u = -u;
88    }
89    u
90}
91
92#[cfg(all(
93    any(target_arch = "x86_64", target_arch = "x86"),
94    target_feature = "sse4.1"
95))]
96#[inline]
97fn do_atan_sse(d: f64) -> f64 {
98    unsafe {
99        let j = _mm_set1_pd(d);
100        _mm_extract_pd::<0>(_mm_atan_pd(j))
101    }
102}
103
104#[inline]
105/// Computes atan for f64 with error bound *ULP 2.0*
106pub fn eatan(d: f64) -> f64 {
107    let mut _dispatcher: fn(f64) -> f64 = do_atan;
108    #[cfg(all(
109        any(target_arch = "x86_64", target_arch = "x86"),
110        target_feature = "sse4.1"
111    ))]
112    {
113        _dispatcher = do_atan_sse;
114    }
115    _dispatcher(d)
116}