Add `{sin,cos,tan}_pi_f32` functions.
Add `next_after_f32`.
Add `HalfTurns` unit type.
Switch `Mat4` to use turns and `sin_cos_pi_f32`.
# It is not intended for manual editing.
version = 3
+[[package]]
+name = "az"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973"
+
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
+[[package]]
+name = "gmp-mpfr-sys"
+version = "1.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea3f42dadb6c75f122e9aa87e757ef11d4282f664c9f2e6476a9c2c8970f9d19"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.135"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c"
+
[[package]]
name = "narcissus"
version = "0.1.0"
[[package]]
name = "narcissus-maths"
version = "0.1.0"
+dependencies = [
+ "rug",
+]
[[package]]
name = "narcissus-world"
name = "renderdoc-sys"
version = "0.1.0"
+[[package]]
+name = "rug"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "203180f444c95eac53586ed04793ecf6454c5d28f9eca8eead815fc19e136c47"
+dependencies = [
+ "az",
+ "gmp-mpfr-sys",
+ "libc",
+]
+
[[package]]
name = "sdl2-sys"
version = "0.1.0"
[[package]]
name = "vulkan-sys"
version = "0.1.0"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
"ffi/stb_image-sys",
"ffi/vulkan-sys",
"narcissus",
+ "narcissus-app",
"narcissus-core",
"narcissus-gpu",
- "narcissus-app",
"narcissus-maths",
"narcissus-world",
]
[features]
[dependencies]
+
+[dev-dependencies]
+rug = "1.17.0"
\ No newline at end of file
--- /dev/null
+
+x = 0:0.001:1/4
+
+tanpi7 = x .* (3.1414539813995361328125 + x.^2
+ .* (10.40517330169677734375 + x.^2
+ .* (35.490978240966796875 + x.^2
+ .* 284.301177978515625)));
+
+tanpi9 = x .* (3.14160251617431640625 +
+ x.^2 .* (10.3275852203369140625 +
+ x.^2 .* (41.77574920654296875 +
+ x.^2 .* (122.0411834716796875 +
+ x.^2 .* 1306.0660400390625))));
+
+tanpi13 = x .* (3.1415927410125732421875 +
+ x.^2 .* (10.3353290557861328125 +
+ x.^2 .* (40.82488250732421875 +
+ x.^2 .* (161.0076446533203125 +
+ x.^2 .* (736.640869140625 +
+ x.^2 .* (809.0543212890625 +
+ x.^2 .* 27793.21875))))));
+
+tanpi15 = x .* (3.1415927410125732421875 +
+ x.^2 .* (10.33537769317626953125 +
+ x.^2 .* (40.80986785888671875 +
+ x.^2 .* (162.5588531494140625 +
+ x.^2 .* (662.82147216796875 +
+ x.^2 .* (2586.50048828125 +
+ x.^2 .* (6733.18896484375 +
+ x.^2 .* 97579.828125)))))));
+
+tanpic = x .* (3.1415927410125732421875 +
+ x.^2 .* (1.03354301e+1 +
+ x.^2 .* (4.08006248e+1 +
+ x.^2 .* (1.63275925e+2 +
+ x.^2 .* (6.34397766e+2 +
+ x.^2 .* (3.18810547e+3 +
+ x.^2 .* (2.59904968e+2 +
+ x.^2 .* (1.25408000e+5))))))));
+
+tanpo = x .* (3.1415927410125732421875 +
+ x.^2 .* (10.3353290557861328125 +
+ x.^2 .* (40.82488250732421875 +
+ x.^2 .* (161.0076446533203125 +
+ x.^2 .* (736.640869140625 +
+ x.^2 .* (809.0543212890625 +
+ x.^2 .* 27793.21875))))));
+
+tanpid = x .* (3.14159270438032223182744928635656833648681640625 +
+ x.^2 .* (10.335346221923828125 +
+ x.^2 .* (40.822620391845703125 +
+ x.^2 .* (161.1344146728515625 +
+ x.^2 .* (733.203857421875 +
+ x.^2 .* (853.6375732421875 +
+ x.^2 .* 27571.314453125))))));
+
+tanpid2 = x .* (3.1415926499513684433395610540173947811126708984375 +
+ x.^2 .* (10.33543300628662109375 +
+ x.^2 .* (40.800151824951171875 +
+ x.^2 .* (163.3097381591796875 +
+ x.^2 .* (633.093994140625 +
+ x.^2 .* (3215.677978515625 +
+ x.^2 .* (-38.601871490478515625 +
+ x.^2 .* 1.2669925e5)))))));
+
+tanpi = tan(x .* pi);
+
+plot (x, (tanpi7 - tanpi) ./ tanpi);
+title("tanpi7");
+xlabel("x");
+ylabel("rel error");
+figure
+plot (x, (tanpid2 - tanpi) ./ tanpi)
+title("tanpi double");
+xlabel("x");
+ylabel("rel error");
+
+cospi = cos(x .* pi);
+
+cospi8 = x.^2 .* (-4.93480205535888671875 +
+ x.^2 .* (4.0586986541748046875 +
+ x.^2 .* (-1.33483588695526123046875 +
+ x.^2 .* 0.22974522411823272705078125))) + 1.0;
+
+figure
+plot (x, (cospi - cospi8) ./ cospi)
+title("cospi");
+xlabel("x");
+ylabel("abs error");
--- /dev/null
+// create primary range approximations of sin(pi x) and cos(pi x) for x on [-1/4,1/4]
+// both for single and double precision
+// stolen from https://marc-b-reynolds.github.io/math/2020/03/11/SinCosPi.html
+
+R = [0;1/4];
+T = floating;
+E = relative;
+B = [|SG...|];
+
+// write fp values such that they always have a decimal point
+procedure fp_write(v)
+{
+ if (v != nearestint(v)) then {
+ write(v);
+ } else {
+ write(v,".0");
+ };
+};
+
+// make a copy of list with f applied
+procedure list_apply(l,f)
+{
+ var r,v,i;
+
+ r = [||];
+
+ for i from 0 to length(l)-1 do {
+ v = f(l[i]);
+ r = r :. v;
+ };
+
+ return r;
+};
+
+f32_round = proc(n) { return single(n); };
+f64_round = proc(n) { return double(n); };
+
+// make a copy of list with elements rounded to binary32
+procedure list_to_f32(l) { return list_apply(l, f32_round); };
+procedure list_to_f64(l) { return list_apply(l, f64_round); };
+
+procedure list_of_even_coeff(p)
+{
+ var r,e,i;
+ e = degree(p);
+ r = [||];
+
+ for i from 0 to e by 2 do { r = r :. coeff(p,i); };
+
+ return r;
+};
+
+procedure list_of_odd_coeff(p)
+{
+ var r,e,i;
+ e = degree(p);
+ r = [||];
+
+ for i from 1 to e by 2 do { r = r :. coeff(p,i); };
+
+ return r;
+};
+
+procedure f32_write_list(p,l)
+{
+ var e,i;
+ e = length(l)-1;
+ write("const ", p,"_K: [f32; " @ length(l) @ "] = [\n");
+ display=hexadecimal!;
+ for i from 0 to e do { printsingle(l[i]); print(", //", l[i]); };
+ print("];\n");
+ display=decimal!;
+};
+
+procedure f32_data_write(name,a,f,r)
+{
+ var e,z,d,l,i;
+
+ // dump out the supnorm
+ display=hexadecimal!;
+ e = single(dirtyinfnorm(a-f, r));
+ write("// peak-error: ", e);
+ display=decimal!;
+ print(" (~", e,")");
+
+ // create a debug list of values of 'x' where the approximation
+ // error is max.
+ z = list_to_f32(dirtyfindzeros(diff(a-f), r));
+ l = length(z);
+
+
+ write("const " @ name @ "_EMAX: [f32; " @ l @ "] = [");
+
+ printsingle(z[0]);
+
+ for i from 1 to l-1 do {
+ write(",");
+ printsingle(z[i]);
+ };
+
+ z = list_to_f32(dirtyfindzeros(a-f, r));
+ l = length(z);
+
+ // and where the error is approximately zero
+ write("];\nconst " @ name @ "_EMIN: [f32; " @ l @ "] = [");
+
+ if (l > 0) then {
+ printsingle(z[0]);
+ for i from 1 to l-1 do {
+ write(",");
+ printsingle(z[i]);
+ };
+ };
+
+ // dump out the supnorm
+ e = single(dirtyinfnorm(a-f, r));
+ print("];\n");
+
+ a;
+};
+
+// Actual start of building polynomial
+
+print("sin(pi x) in binary32");
+I = [|1,3,5,7|];
+F = sin(pi*x);
+P = fpminimax(F,I,B,R,T,E);
+f32_data_write("F32_SIN_PI_7", P,F,R);
+f32_write_list("F32_SIN_PI_7", list_of_odd_coeff(P));
+print("\n");
+
+print("cos(pi x) in binary32");
+I = [|0,2,4,6,8|];
+F = cos(pi*x);
+P = fpminimax(F,I,B,R,T,E);
+f32_data_write("F32_COS_PI_8", P,F,R);
+f32_write_list("F32_COS_PI_8", list_of_even_coeff(P));
+print("\n");
+
+B = [|D,SG...|];
+
+print("tan(pi x) in binary32");
+I = [|1,3,5,7,9,11,13,15|];
+F = tan(pi*x);
+
+P = fpminimax(F,I,B,R,T,E);
+
+f32_data_write("F32_TAN_PI", P,F,R);
+f32_write_list("F32_TAN_PI", list_of_odd_coeff(P));
mod mat2;
mod mat3;
mod mat4;
+mod next_after_f32;
mod point2;
mod point3;
mod quat;
+mod sin_cos_pi;
+mod tan_pi;
mod vec2;
mod vec3;
mod vec4;
pub use mat2::Mat2;
pub use mat3::Mat3;
pub use mat4::Mat4;
+pub use next_after_f32::next_after_f32;
pub use point2::Point2;
pub use point3::Point3;
pub use quat::Quat;
+pub use sin_cos_pi::sin_cos_pi_f32;
+pub use tan_pi::tan_pi_f32;
pub use vec2::Vec2;
pub use vec3::Vec3;
pub use vec4::Vec4;
pub struct Rad(f32);
impl Rad {
+ #[inline(always)]
pub const fn new(x: f32) -> Self {
Self(x)
}
+ #[inline(always)]
pub const fn as_f32(self) -> f32 {
self.0
}
pub struct Deg(f32);
impl Deg {
+ #[inline(always)]
+ pub const fn new(x: f32) -> Self {
+ Self(x)
+ }
+
+ #[inline(always)]
+ pub const fn as_f32(self) -> f32 {
+ self.0
+ }
+}
+
+/// Unit type for an angle expressed in half-turns.
+///
+/// A turn represents a 360 degree rotation, a half-turn represents a 180 degree rotation. A
+/// half-turn is implicitly scaled by pi.
+#[derive(Clone, Copy, PartialEq, PartialOrd, Debug, Default)]
+pub struct HalfTurn(f32);
+
+impl HalfTurn {
+ #[inline(always)]
pub const fn new(x: f32) -> Self {
Self(x)
}
+ #[inline(always)]
pub const fn as_f32(self) -> f32 {
self.0
}
}
}
+impl From<HalfTurn> for Deg {
+ fn from(x: HalfTurn) -> Self {
+ Self(x.0 * 180.0)
+ }
+}
+
impl From<Deg> for Rad {
#[inline(always)]
fn from(x: Deg) -> Self {
}
}
+impl From<HalfTurn> for Rad {
+ #[inline(always)]
+ fn from(x: HalfTurn) -> Self {
+ Self(x.0 * std::f32::consts::PI)
+ }
+}
+
+impl From<Rad> for HalfTurn {
+ #[inline(always)]
+ fn from(x: Rad) -> Self {
+ Self(x.0 / std::f32::consts::PI)
+ }
+}
+
+impl From<Deg> for HalfTurn {
+ #[inline(always)]
+ fn from(x: Deg) -> Self {
+ Self(x.0 / 180.0)
+ }
+}
+
/// Returns the minimum of `x` and `y`.
///
/// This function returns a platform dependent value if either of its inputs are `NaN`.
-use crate::{Point2, Point3, Rad, Vec2, Vec3, Vec4};
+use crate::{sin_cos_pi_f32, HalfTurn, Point2, Point3, Rad, Vec2, Vec3, Vec4};
/// 4x4 matrix.
///
}
/// Constructs a transformation matrix which rotates around the given `axis` by `angle`.
- pub fn from_axis_angle(axis: Vec3, angle: Rad) -> Mat4 {
- let (sin, cos) = angle.as_f32().sin_cos();
+ pub fn from_axis_rotation(axis: Vec3, rotation: HalfTurn) -> Mat4 {
+ let (sin, cos) = sin_cos_pi_f32(rotation.as_f32());
let axis_sin = axis * sin;
let axis_sq = axis * axis;
let one_minus_cos = 1.0 - cos;
#[cfg(test)]
mod tests {
- use crate::Deg;
-
use super::*;
const IDENTITY: Mat4 = Mat4::IDENTITY;
#[test]
fn axis_angle() {
- let rot_180_x = Mat4::from_axis_angle(Vec3::X, Deg::new(180.0).into());
+ let rot_180_x = Mat4::from_axis_rotation(Vec3::X, HalfTurn::new(1.0));
assert_eq!(rot_180_x * Vec3::X, Vec3::X);
- // TODO: requires approximate equality assert.
- // assert_eq!(rot_180_x * Vec3::Y, -Vec3::Y);
- // assert_eq!(rot_180_x * Vec3::Z, -Vec3::Z);
- let rot_180_y = Mat4::from_axis_angle(Vec3::Y, Deg::new(180.0).into());
+ assert_eq!(rot_180_x * Vec3::Y, -Vec3::Y);
+ assert_eq!(rot_180_x * Vec3::Z, -Vec3::Z);
+ let rot_180_y = Mat4::from_axis_rotation(Vec3::Y, HalfTurn::new(1.0));
+ assert_eq!(rot_180_y * Vec3::X, -Vec3::X);
assert_eq!(rot_180_y * Vec3::Y, Vec3::Y);
- let rot_180_z = Mat4::from_axis_angle(Vec3::Z, Deg::new(180.0).into());
+ assert_eq!(rot_180_y * Vec3::Z, -Vec3::Z);
+ let rot_180_z = Mat4::from_axis_rotation(Vec3::Z, HalfTurn::new(1.0));
+ assert_eq!(rot_180_z * Vec3::X, -Vec3::X);
+ assert_eq!(rot_180_z * Vec3::Y, -Vec3::Y);
assert_eq!(rot_180_z * Vec3::Z, Vec3::Z);
}
--- /dev/null
+/// Calculate the next representable floating-point value following x in the direction of y.
+///
+/// If y is less than x, these functions will return the largest representable number less than x.
+///
+/// # Returns
+///
+/// On success, the function returns the next representable floating-point value after x in the
+/// direction of y.
+///
+/// * If `x` equals `y`, then `y` is returned.
+/// * If `x` or `y` is a `NaN`, a `NaN` is returned.
+/// * If `x` is finite, and the result would overflow, a range error occurs, and the function
+/// returns `inf` with the correct mathematical sign.
+/// * If `x` is not equal to `y`, and the correct function result would be subnormal, zero, or
+/// underflow, a range error occurs, and either the correct value (if it can be represented),
+/// or `0.0`, is returned.
+/// * If x equals y, the function returns y.
+pub fn next_after_f32(x: f32, y: f32) -> f32 {
+ if x.is_nan() || y.is_nan() {
+ return x + y;
+ }
+
+ let ux = x.to_bits();
+ let uy = y.to_bits();
+
+ if ux == uy {
+ return y;
+ }
+
+ let ax = ux & 0x7fff_ffff;
+ let ay = uy & 0x7fff_ffff;
+
+ let ux = if ax == 0 {
+ if ay == 0 {
+ return y;
+ }
+ (uy & 0x8000_0000) | 1
+ } else if ax > ay || ((ux ^ uy) & 0x8000_0000) != 0 {
+ ux - 1
+ } else {
+ ux + 1
+ };
+
+ let e = ux & 0x7f800000;
+ // Overflow if ux is infinite, and x is finite.
+ if e == 0x7f800000 {
+ return x + x;
+ }
+ // Force underflow if ux is subnormal or zero.
+ if e == 0 {
+ let mut force_eval = 0.0;
+ let val = f32::from_bits(ux);
+ unsafe { std::ptr::write_volatile(&mut force_eval, x * x + val * val) };
+ }
+
+ f32::from_bits(ux)
+}
--- /dev/null
+// Based on https://marc-b-reynolds.github.io/math/2020/03/11/SinCosPi.html
+//
+// Sollya code for generating these polynomials is in `doc/sincostan.sollya`
+
+// constants for sin(pi x), cos(pi x) for x on [-1/4,1/4]
+const F32_SIN_PI_7_K: [f32; 4] = unsafe {
+ std::mem::transmute::<[u32; 4], _>([
+ 0x40490fdb, // 0x1.921fb6p1
+ 0xc0a55df6, // -0x1.4abbecp2
+ 0x40233590, // 0x1.466b2p1
+ 0xbf17acc9, // -0x1.2f5992p-1
+ ])
+};
+
+const F32_COS_PI_8_K: [f32; 4] = unsafe {
+ std::mem::transmute::<[u32; 4], _>([
+ 0xc09de9e6, // -0x1.3bd3ccp2
+ 0x4081e0dc, // 0x1.03c1b8p2
+ 0xbfaadbe7, // -0x1.55b7cep0
+ 0x3e6b4255, // 0x1.d684aap-3
+ ])
+};
+
+#[inline(always)]
+fn mulsign_f32(x: f32, s: u32) -> f32 {
+ f32::from_bits(x.to_bits() ^ s)
+}
+
+/// Simultaneously computes the sine and cosine of `a` expressed in multiples of *pi* radians, or half-turns.
+///
+/// Returns `(sin(a * pi), cos(a * pi))`
+///
+/// # Examples
+///
+/// ```
+/// use narcissus_maths::sin_cos_pi_f32;
+/// let (sin, cos) = sin_cos_pi_f32(0.0);
+/// assert_eq!(sin, 0.0);
+/// assert_eq!(cos, 1.0);
+/// ```
+pub fn sin_cos_pi_f32(a: f32) -> (f32, f32) {
+ const S: [f32; 4] = F32_SIN_PI_7_K;
+ const C: [f32; 4] = F32_COS_PI_8_K;
+
+ // Range reduction.
+ let r = (a + a).round();
+ let i: u32 = unsafe { r.to_int_unchecked() };
+ let r = r.mul_add(-0.5, a);
+
+ let sx = (i >> 1) << 31;
+ let sy = (i << 31) ^ sx;
+
+ // Core approximation.
+ let r2 = r * r;
+ let r = mulsign_f32(r, sy);
+
+ let c = C[3];
+ let c = c.mul_add(r2, C[2]);
+ let c = c.mul_add(r2, C[1]);
+ let c = c.mul_add(r2, C[0]);
+ let c = c.mul_add(r2, 1.0);
+ let c = mulsign_f32(c, sx);
+
+ let r3 = r2 * r;
+ let s = S[3];
+ let s = s.mul_add(r2, S[2]);
+ let s = s.mul_add(r2, S[1]);
+ let s = r.mul_add(S[0], r3 * s);
+
+ if i & 1 != 0 {
+ (c, s)
+ } else {
+ (s, c)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::sin_cos_pi_f32;
+
+ #[test]
+ fn basics() {
+ assert_eq!(sin_cos_pi_f32(-1.5), (1.0, 0.0));
+ assert_eq!(sin_cos_pi_f32(-1.0), (0.0, -1.0));
+ assert_eq!(sin_cos_pi_f32(-0.5), (-1.0, 0.0));
+ assert_eq!(sin_cos_pi_f32(-0.0), (0.0, 1.0));
+
+ assert_eq!(sin_cos_pi_f32(0.0), (0.0, 1.0));
+ assert_eq!(sin_cos_pi_f32(0.5), (1.0, 0.0));
+ assert_eq!(sin_cos_pi_f32(1.0), (0.0, -1.0));
+ assert_eq!(sin_cos_pi_f32(1.5), (-1.0, 0.0));
+ }
+}
--- /dev/null
+// Based on Norbert Juffa's tanpi posted to the cuda forums. Using my own polynomial, but that might
+// be worse, todo: check whether polynomial is worse.
+// https://forums.developer.nvidia.com/t/an-implementation-of-single-precision-tanpi-for-cuda/48024
+//
+// Sollya code for generating these polynomials is in `doc/sincostan.sollya`
+
+const F32_TAN_PI_15_K: [f32; 7] = unsafe {
+ std::mem::transmute::<[u32; 7], _>([
+ 0x41255def, // 0x1.4abbdep3
+ 0x4223335b, // 0x1.4666b6p5
+ 0x43234f4b, // 0x1.469e96p7
+ 0x441e4604, // 0x1.3c8c08p9
+ 0x4548fad9, // 0x1.91f5b2p11
+ 0xc21a6851, // -0x1.34d0a2p5
+ 0x47f775a0, // 0x1.eeeb4p16
+ ])
+};
+
+/// Computes the tangent of `a` expressed in multiples of *pi* radians, or half-turns.
+///
+/// Returns `tan(a * pi)`
+///
+/// # Examples
+///
+/// ```
+/// use narcissus_maths::tan_pi_f32;
+/// ```
+pub fn tan_pi_f32(a: f32) -> f32 {
+ const T: [f32; 7] = F32_TAN_PI_15_K;
+
+ // Range reduction.
+ let r = (a + a).round();
+ let i: u32 = unsafe { r.to_int_unchecked() };
+ let r = r.mul_add(-0.5, a);
+
+ let e = if i.wrapping_add(1) & 2 != 0 {
+ -0.0
+ } else {
+ 0.0
+ };
+
+ // Core approximation.
+ let r2 = r * r;
+ let p = T[6];
+ let p = p.mul_add(r2, T[5]);
+ let p = p.mul_add(r2, T[4]);
+ let p = p.mul_add(r2, T[3]);
+ let p = p.mul_add(r2, T[2]);
+ let p = p.mul_add(r2, T[1]);
+ let p = p.mul_add(r2, T[0]);
+
+ let t = r2 * r;
+ let t = p.mul_add(t, -8.742278e-8 * r);
+ let r = r.mul_add(std::f32::consts::PI, t);
+
+ // Handle half-integer arguments.
+ let r = if r == 0.0 { e } else { r };
+ let r = if i & 1 == 1 { 1.0 / -r } else { r };
+
+ // Handle integer arguments.
+ if a == a.floor() {
+ a * e
+ } else {
+ r
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::tan_pi_f32;
+
+ #[test]
+ fn basics() {
+ assert_eq!(tan_pi_f32(0.0), 0.0);
+ assert_eq!(tan_pi_f32(0.25), 1.0);
+ }
+}
--- /dev/null
+/// Rough translation of Paul Zimmermann's binary32_exhaustive to Rust.
+/// https://gitlab.inria.fr/zimmerma/math_accuracy/-/blob/master/binary32_exhaustive/check_exhaustive.c
+use std::{
+ ops::MulAssign,
+ sync::atomic::{AtomicUsize, Ordering},
+};
+
+use narcissus_maths::{next_after_f32, sin_cos_pi_f32, tan_pi_f32};
+use rug::{
+ ops::{AssignRound, DivAssignRound},
+ Assign,
+};
+
+fn ulp_error_f32(our_val: f32, ref_val: f32, x: f32, reference: fn(&mut rug::Float)) -> u32 {
+ if our_val == ref_val {
+ return 0;
+ }
+
+ let mut our_val = our_val;
+ let mut ref_val = ref_val;
+
+ if ref_val.is_infinite() {
+ if our_val.is_infinite() {
+ // Then ours and reference have different signs.
+ assert!(our_val * ref_val < 0.0);
+ return u32::MAX;
+ }
+
+ our_val /= 2.0;
+ let mut ref_val_2 = rug::Float::new(24);
+ ref_val_2.assign_round(x, rug::float::Round::Down);
+ reference(&mut ref_val_2);
+ ref_val_2.div_assign_round(2, rug::float::Round::Down);
+ ref_val = ref_val_2.to_f32_round(rug::float::Round::Down);
+ if ref_val.is_infinite() {
+ ref_val = if ref_val > 0.0 {
+ f32::from_bits(0x7f00_0000)
+ } else {
+ f32::from_bits(0xff00_0000)
+ };
+ }
+ }
+
+ if our_val.is_infinite() {
+ assert!(!ref_val.is_infinite());
+ // If ours gives +/-Inf but the correct rounding is in the binary32 range, assume we gave
+ // +/-2^128.
+ ref_val /= 2.0;
+ our_val = if our_val > 0.0 {
+ f32::from_bits(0x7f00_0000)
+ } else {
+ f32::from_bits(0xff00_0000)
+ }
+ }
+
+ let err = our_val - ref_val;
+ let ulp = next_after_f32(ref_val, our_val) - ref_val;
+ let err = (err / ulp).abs();
+ if err >= u32::MAX as f32 {
+ u32::MAX
+ } else {
+ err as u32
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct TestRange {
+ base: u32,
+ end: u32,
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+struct FloatErrors {
+ num_errors: u32,
+ max_error_ulp: u32,
+ max_error_val: u32,
+}
+
+const PREC: u32 = 280;
+
+struct FloatCheck {
+ reference: fn(&mut rug::Float),
+ ours: fn(f32) -> f32,
+
+ tan_mode: bool,
+
+ pi: rug::Float,
+ tmp: rug::Float,
+
+ errors: FloatErrors,
+}
+
+impl FloatCheck {
+ fn new(reference: fn(&mut rug::Float), ours: fn(f32) -> f32, tan_mode: bool) -> Self {
+ Self {
+ reference,
+ ours,
+ tan_mode,
+ pi: rug::Float::with_val(PREC, rug::float::Constant::Pi),
+ tmp: rug::Float::new(PREC),
+ errors: Default::default(),
+ }
+ }
+
+ #[inline(always)]
+ fn check(&mut self, u: u32) {
+ let x = f32::from_bits(u);
+ assert!(x.is_finite());
+
+ let our_val = (self.ours)(x);
+
+ if self.tan_mode {
+ let fract = x.fract();
+ if fract == 0.5 || fract == -0.5 {
+ assert!(our_val.is_infinite());
+ return;
+ }
+ }
+
+ self.tmp.assign(x);
+ self.tmp.mul_assign(&self.pi);
+ (self.reference)(&mut self.tmp);
+ self.tmp.subnormalize_ieee();
+ let ref_val = self.tmp.to_f32_round(rug::float::Round::Nearest);
+
+ if our_val != ref_val && !(our_val.is_nan() && ref_val.is_nan()) {
+ println!("u: {u:#08x}, our_val: {our_val}, ref_val: {ref_val}");
+
+ self.errors.num_errors += 1;
+ let e = ulp_error_f32(our_val, ref_val, x, self.reference);
+ if e > self.errors.max_error_ulp {
+ self.errors.max_error_ulp = e;
+ self.errors.max_error_val = u;
+ }
+ }
+ }
+}
+
+fn check_exhaustive_f32(reference: fn(&mut rug::Float), ours: fn(f32) -> f32, tan_mode: bool) {
+ const COUNT: u32 = 2_139_095_040_u32;
+ const SPLIT: u32 = 256;
+
+ // Generate ranges.
+ assert_eq!(COUNT % SPLIT, 0);
+ let per_split = COUNT / SPLIT;
+
+ let work_index = AtomicUsize::new(0);
+ let mut work = (0..SPLIT)
+ .map(|i| {
+ let base = i * per_split;
+ let end = i * per_split + per_split;
+ TestRange { base, end }
+ })
+ .collect::<Vec<_>>();
+
+ // Try and start with big numbers to avoid reallocs?
+ work.reverse();
+
+ let count = AtomicUsize::new(0);
+
+ let mut errors = FloatErrors::default();
+
+ // Spawn threads.
+ std::thread::scope(|s| {
+ let num_threads = std::thread::available_parallelism().unwrap().get();
+
+ let threads = (0..num_threads)
+ .map(|_| {
+ s.spawn(|| {
+ let mut float_check = FloatCheck::new(reference, ours, tan_mode);
+ loop {
+ let index = work_index.fetch_add(1, Ordering::SeqCst);
+ if let Some(range) = work.get(index) {
+ let start = std::time::Instant::now();
+ for i in range.base..range.end {
+ float_check.check(i);
+ }
+ for i in range.base..range.end {
+ float_check.check(0x8000_0000 + i);
+ }
+
+ let i = count.fetch_add(1, Ordering::SeqCst);
+ println!(
+ "{:.1}% chunk {index} took {:?}",
+ ((i + 1) as f32 * (1.0 / SPLIT as f32)) * 100.0,
+ std::time::Instant::now() - start
+ );
+
+ continue;
+ }
+ break;
+ }
+ float_check.errors
+ })
+ })
+ .collect::<Vec<_>>();
+
+ for thread in threads {
+ let thread_errors = thread.join().unwrap();
+ errors.num_errors += thread_errors.num_errors;
+ if thread_errors.max_error_ulp > errors.max_error_ulp {
+ errors.max_error_ulp = thread_errors.max_error_ulp;
+ errors.max_error_val = thread_errors.max_error_val;
+ }
+ }
+ });
+
+ println!(
+ "errors: {} ({:.2}%)",
+ errors.num_errors,
+ (errors.num_errors as f64 / (COUNT * 2) as f64) * 100.0
+ );
+ println!("max error ulps: {}", errors.max_error_ulp);
+ println!("max error error at: {:#08x}", errors.max_error_val);
+ assert_eq!(errors.num_errors, 0);
+}
+
+fn ref_sin_pi_f32(x: &mut rug::Float) {
+ x.sin_mut();
+}
+
+fn ref_cos_pi_f32(x: &mut rug::Float) {
+ x.cos_mut();
+}
+
+fn ref_tan_pi_f32(x: &mut rug::Float) {
+ x.tan_mut();
+}
+
+#[test]
+#[ignore]
+pub fn exhaustive_sin_pi() {
+ check_exhaustive_f32(ref_sin_pi_f32, |a| sin_cos_pi_f32(a).0, false)
+}
+
+#[test]
+#[ignore]
+pub fn exhaustive_cos_pi() {
+ check_exhaustive_f32(ref_cos_pi_f32, |a| sin_cos_pi_f32(a).1, false)
+}
+
+#[test]
+#[ignore]
+pub fn exhaustive_tan_pi() {
+ check_exhaustive_f32(ref_tan_pi_f32, tan_pi_f32, true)
+}
--- /dev/null
+use narcissus_maths::next_after_f32;
+
+mod libc {
+ use std::ffi::c_float;
+ extern "C" {
+ pub fn nextafterf(x: c_float, y: c_float) -> c_float;
+ }
+}
+
+#[inline(always)]
+fn nextafterf(x: f32, y: f32) -> f32 {
+ unsafe { libc::nextafterf(x, y) }
+}
+
+fn test_towards(y: f32) {
+ for u in 0..=0xffff_ffff_u32 {
+ let x = f32::from_bits(u);
+ let ours = next_after_f32(x, y);
+ let reference = nextafterf(x, y);
+ assert!(
+ ours == reference || (ours.is_nan() && reference.is_nan()),
+ "x ({u:X}): ours ({ours}) != reference ({reference})"
+ );
+ }
+}
+
+#[test]
+#[ignore]
+fn next_after_f32_to_zero() {
+ test_towards(0.0)
+}
+
+#[test]
+#[ignore]
+fn next_after_f32_to_inf() {
+ test_towards(f32::INFINITY)
+}
+
+#[test]
+#[ignore]
+fn next_after_f32_to_neg_inf() {
+ test_towards(f32::NEG_INFINITY)
+}