use core::{f32, f64};
#[cfg(target_os = "cuda")]
use cuda_std::GpuFloat;
use rand_core::RngCore;
fn u64_to_unit_f64(x: u64) -> f64 {
(x >> 11) as f64 * (1.0 / (1u64 << 53) as f64)
}
pub trait GpuRand: RngCore {
fn uniform_f32(&mut self) -> f32;
fn uniform_f64(&mut self) -> f64;
fn normal_f32(&mut self) -> f32;
fn normal_f64(&mut self) -> f64;
fn normal_f32_2(&mut self) -> [f32; 2];
fn normal_f64_2(&mut self) -> [f64; 2];
}
impl<T: RngCore> GpuRand for T {
fn uniform_f32(&mut self) -> f32 {
u64_to_unit_f64(self.next_u64()) as f32
}
fn uniform_f64(&mut self) -> f64 {
u64_to_unit_f64(self.next_u64())
}
fn normal_f32(&mut self) -> f32 {
let u1 = self.uniform_f32();
let u2 = self.uniform_f32();
(-2.0 * u1.ln()).sqrt() * ((f32::consts::PI * 2.0) * u2).cos()
}
fn normal_f64(&mut self) -> f64 {
let u1 = self.uniform_f64();
let u2 = self.uniform_f64();
(-2.0 * u1.ln()).sqrt() * ((f64::consts::PI * 2.0) * u2).cos()
}
fn normal_f32_2(&mut self) -> [f32; 2] {
let u1 = self.uniform_f32();
let u2 = self.uniform_f32();
[
(-2.0 * u1.ln()).sqrt() * ((f32::consts::PI * 2.0) * u2).cos(),
(-2.0 * u1.ln()).sqrt() * ((f32::consts::PI * 2.0) * u2).sin(),
]
}
fn normal_f64_2(&mut self) -> [f64; 2] {
let u1 = self.uniform_f64();
let u2 = self.uniform_f64();
[
(-2.0 * u1.ln()).sqrt() * ((f64::consts::PI * 2.0) * u2).cos(),
(-2.0 * u1.ln()).sqrt() * ((f64::consts::PI * 2.0) * u2).sin(),
]
}
}