max(min(x, hi), lo)
}
+/// Returns the nearest integer to a given `f32`.
+///
+/// Respects IEEE-754 tiesToEven
+#[inline(always)]
+fn round_ties_to_even(x: f32) -> f32 {
+ #[cfg(target_feature = "sse4.1")]
+ unsafe {
+ use std::arch::x86_64::{
+ _mm_load_ss, _mm_round_ss, _MM_FROUND_NO_EXC, _MM_FROUND_TO_NEAREST_INT,
+ };
+ const ROUNDING: i32 = (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) as i32;
+ let x = _mm_load_ss(&x);
+ let x = _mm_round_ss::<ROUNDING>(x, x);
+ std::arch::x86_64::_mm_cvtss_f32(x)
+ }
+}
+
#[macro_export]
macro_rules! impl_shared {
($name:ty, $t:ty, $n:expr) => {
//
// Sollya code for generating these polynomials is in `doc/sincostan.sollya`
+use crate::round_ties_to_even;
+
// constants for sin(pi x), cos(pi x) for x on [-1/4,1/4]
const F32_SIN_PI_7_K: [f32; 3] = unsafe {
std::mem::transmute::<[u32; 3], _>([
let a = if a.abs() < 16777216.0 { a } else { a * 0.0 };
// Range reduction.
- let r = (a + a).round();
+ let r = round_ties_to_even(a + a);
+
+ // Safety: The clamp above avoids the possibility of overflow here.
let i = unsafe { r.to_int_unchecked::<i32>() } as u32;
let r = r.mul_add(-0.5, a);
//
// Sollya code for generating these polynomials is in `doc/sincostan.sollya`
+use crate::round_ties_to_even;
+
const F32_TAN_PI_15_K: [f32; 7] = unsafe {
std::mem::transmute::<[u32; 7], _>([
0x41255def, // 0x1.4abbdep3
const T: [f32; 7] = F32_TAN_PI_15_K;
// Range reduction.
- let r = (a + a).round();
+ let r = round_ties_to_even(a + a);
+
+ // Safety: The clamp above avoids the possibility of overflow here.
let i = unsafe { r.to_int_unchecked::<i32>() } as u32;
let r = r.mul_add(-0.5, a);
pub fn exhaustive_cos_pi() {
let errors = check_exhaustive_f32(ref_cos_pi_f32, |a| sin_cos_pi_f32(a).1, false);
println!("COS: {:?}", errors);
- assert_eq!(errors.num_errors, 62_426_004);
+ assert_eq!(errors.num_errors, 45_896_848);
assert_eq!(errors.max_error_ulp, 1);
}