From: Josh Simmons Date: Fri, 4 Nov 2022 20:59:59 +0000 (+0100) Subject: Fix reduction using the wrong rounding mode (#1) X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=1b87d0565a0b16fc5ab03dd7ef267933dfe0a8fa;p=josh%2Fnarcissus Fix reduction using the wrong rounding mode (#1) Previously we were using `f32::round` in the reduction step, however that isn't correct. We really need IEEE-754 tiesToEven semantics for the rounding step, so manually implement that. --- diff --git a/narcissus-maths/src/lib.rs b/narcissus-maths/src/lib.rs index ab5eda3..2e71ed0 100644 --- a/narcissus-maths/src/lib.rs +++ b/narcissus-maths/src/lib.rs @@ -172,6 +172,23 @@ pub fn clamp(x: f32, lo: f32, hi: f32) -> f32 { max(min(x, hi), lo) } +/// Returns the nearest integer to a given `f32`. +/// +/// Respects IEEE-754 tiesToEven +#[inline(always)] +fn round_ties_to_even(x: f32) -> f32 { + #[cfg(target_feature = "sse4.1")] + unsafe { + use std::arch::x86_64::{ + _mm_load_ss, _mm_round_ss, _MM_FROUND_NO_EXC, _MM_FROUND_TO_NEAREST_INT, + }; + const ROUNDING: i32 = (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) as i32; + let x = _mm_load_ss(&x); + let x = _mm_round_ss::(x, x); + std::arch::x86_64::_mm_cvtss_f32(x) + } +} + #[macro_export] macro_rules! impl_shared { ($name:ty, $t:ty, $n:expr) => { diff --git a/narcissus-maths/src/sin_cos_pi.rs b/narcissus-maths/src/sin_cos_pi.rs index e9912cb..65c9aaa 100644 --- a/narcissus-maths/src/sin_cos_pi.rs +++ b/narcissus-maths/src/sin_cos_pi.rs @@ -2,6 +2,8 @@ // // Sollya code for generating these polynomials is in `doc/sincostan.sollya` +use crate::round_ties_to_even; + // constants for sin(pi x), cos(pi x) for x on [-1/4,1/4] const F32_SIN_PI_7_K: [f32; 3] = unsafe { std::mem::transmute::<[u32; 3], _>([ @@ -48,7 +50,9 @@ pub fn sin_cos_pi_f32(a: f32) -> (f32, f32) { let a = if a.abs() < 16777216.0 { a } else { a * 0.0 }; // Range reduction. - let r = (a + a).round(); + let r = round_ties_to_even(a + a); + + // Safety: The clamp above avoids the possibility of overflow here. let i = unsafe { r.to_int_unchecked::() } as u32; let r = r.mul_add(-0.5, a); diff --git a/narcissus-maths/src/tan_pi.rs b/narcissus-maths/src/tan_pi.rs index 16d1a5a..37700bc 100644 --- a/narcissus-maths/src/tan_pi.rs +++ b/narcissus-maths/src/tan_pi.rs @@ -4,6 +4,8 @@ // // Sollya code for generating these polynomials is in `doc/sincostan.sollya` +use crate::round_ties_to_even; + const F32_TAN_PI_15_K: [f32; 7] = unsafe { std::mem::transmute::<[u32; 7], _>([ 0x41255def, // 0x1.4abbdep3 @@ -31,7 +33,9 @@ pub fn tan_pi_f32(a: f32) -> f32 { const T: [f32; 7] = F32_TAN_PI_15_K; // Range reduction. - let r = (a + a).round(); + let r = round_ties_to_even(a + a); + + // Safety: The clamp above avoids the possibility of overflow here. let i = unsafe { r.to_int_unchecked::() } as u32; let r = r.mul_add(-0.5, a); diff --git a/narcissus-maths/tests/exhaustive_f32.rs b/narcissus-maths/tests/exhaustive_f32.rs index d02f3b0..0fcc12f 100644 --- a/narcissus-maths/tests/exhaustive_f32.rs +++ b/narcissus-maths/tests/exhaustive_f32.rs @@ -432,7 +432,7 @@ pub fn exhaustive_sin_pi() { pub fn exhaustive_cos_pi() { let errors = check_exhaustive_f32(ref_cos_pi_f32, |a| sin_cos_pi_f32(a).1, false); println!("COS: {:?}", errors); - assert_eq!(errors.num_errors, 62_426_004); + assert_eq!(errors.num_errors, 45_896_848); assert_eq!(errors.max_error_ulp, 1); }