Fix reduction using the wrong rounding mode (#1)

author Josh Simmons <josh@nega.tv>

Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)

committer GitHub <noreply@github.com>

Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)
author Josh Simmons <josh@nega.tv>
Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)
committer GitHub <noreply@github.com>
Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)
diff --git a/narcissus-maths/src/lib.rs b/narcissus-maths/src/lib.rs

index ab5eda3bdfdf9addfafac1cd3ab8a93fa5c55661..2e71ed061a729941a85c193f4a0f842b08aff173 100644 (file)
--- a/narcissus-maths/src/lib.rs
+++ b/narcissus-maths/src/lib.rs
@@ -172,6 +172,23 @@ pub fn clamp(x: f32, lo: f32, hi: f32) -> f32 {
      max(min(x, hi), lo)
  }
  
+/// Returns the nearest integer to a given `f32`.
+///
+/// Respects IEEE-754 tiesToEven
+#[inline(always)]
+fn round_ties_to_even(x: f32) -> f32 {
+    #[cfg(target_feature = "sse4.1")]
+    unsafe {
+        use std::arch::x86_64::{
+            _mm_load_ss, _mm_round_ss, _MM_FROUND_NO_EXC, _MM_FROUND_TO_NEAREST_INT,
+        };
+        const ROUNDING: i32 = (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) as i32;
+        let x = _mm_load_ss(&x);
+        let x = _mm_round_ss::<ROUNDING>(x, x);
+        std::arch::x86_64::_mm_cvtss_f32(x)
+    }
+}
+
  #[macro_export]
  macro_rules! impl_shared {
      ($name:ty, $t:ty, $n:expr) => {
diff --git a/narcissus-maths/src/sin_cos_pi.rs b/narcissus-maths/src/sin_cos_pi.rs

index e9912cba95c8fc93dcd3ef71d7e57716e530e0d4..65c9aaa0d37219dc63a6ded23103dfb8590d0f32 100644 (file)
--- a/narcissus-maths/src/sin_cos_pi.rs
+++ b/narcissus-maths/src/sin_cos_pi.rs
@@ -2,6 +2,8 @@
  //
  // Sollya code for generating these polynomials is in `doc/sincostan.sollya`
  
+use crate::round_ties_to_even;
+
  // constants for sin(pi x), cos(pi x) for x on [-1/4,1/4]
  const F32_SIN_PI_7_K: [f32; 3] = unsafe {
      std::mem::transmute::<[u32; 3], _>([
@@ -48,7 +50,9 @@ pub fn sin_cos_pi_f32(a: f32) -> (f32, f32) {
      let a = if a.abs() < 16777216.0 { a } else { a * 0.0 };
  
      // Range reduction.
-    let r = (a + a).round();
+    let r = round_ties_to_even(a + a);
+
+    // Safety: The clamp above avoids the possibility of overflow here.
      let i = unsafe { r.to_int_unchecked::<i32>() } as u32;
      let r = r.mul_add(-0.5, a);
  
diff --git a/narcissus-maths/src/tan_pi.rs b/narcissus-maths/src/tan_pi.rs

index 16d1a5a0d72f2511126aea8d6cb5b455be404830..37700bc827755cb94f40a9aaadc76b6de5bd2935 100644 (file)
--- a/narcissus-maths/src/tan_pi.rs
+++ b/narcissus-maths/src/tan_pi.rs
@@ -4,6 +4,8 @@
  //
  // Sollya code for generating these polynomials is in `doc/sincostan.sollya`
  
+use crate::round_ties_to_even;
+
  const F32_TAN_PI_15_K: [f32; 7] = unsafe {
      std::mem::transmute::<[u32; 7], _>([
          0x41255def, // 0x1.4abbdep3
@@ -31,7 +33,9 @@ pub fn tan_pi_f32(a: f32) -> f32 {
      const T: [f32; 7] = F32_TAN_PI_15_K;
  
      // Range reduction.
-    let r = (a + a).round();
+    let r = round_ties_to_even(a + a);
+
+    // Safety: The clamp above avoids the possibility of overflow here.
      let i = unsafe { r.to_int_unchecked::<i32>() } as u32;
      let r = r.mul_add(-0.5, a);
  
diff --git a/narcissus-maths/tests/exhaustive_f32.rs b/narcissus-maths/tests/exhaustive_f32.rs

index d02f3b049fa318b8aab59a56c48451290687fa9d..0fcc12fbb8f8dcb91fe95d4c025159f6681f0fb7 100644 (file)
--- a/narcissus-maths/tests/exhaustive_f32.rs
+++ b/narcissus-maths/tests/exhaustive_f32.rs
@@ -432,7 +432,7 @@ pub fn exhaustive_sin_pi() {
  pub fn exhaustive_cos_pi() {
      let errors = check_exhaustive_f32(ref_cos_pi_f32, |a| sin_cos_pi_f32(a).1, false);
      println!("COS: {:?}", errors);
-    assert_eq!(errors.num_errors, 62_426_004);
+    assert_eq!(errors.num_errors, 45_896_848);
      assert_eq!(errors.max_error_ulp, 1);
  }
author	Josh Simmons <josh@nega.tv>
	Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)
committer	GitHub <noreply@github.com>
	Fri, 4 Nov 2022 20:59:59 +0000 (21:59 +0100)
narcissus-maths/src/lib.rs		patch \| blob \| blame \| history
narcissus-maths/src/sin_cos_pi.rs		patch \| blob \| blame \| history
narcissus-maths/src/tan_pi.rs		patch \| blob \| blame \| history
narcissus-maths/tests/exhaustive_f32.rs		patch \| blob \| blame \| history