From: Joshua Simmons Date: Sat, 15 Oct 2022 13:32:22 +0000 (+0200) Subject: Add some new maths functions X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=ed87042544b4a8e254611409d68b5fa3249edf54;p=josh%2Fnarcissus Add some new maths functions Add `{sin,cos,tan}_pi_f32` functions. Add `next_after_f32`. Add `HalfTurns` unit type. Switch `Mat4` to use turns and `sin_cos_pi_f32`. --- diff --git a/Cargo.lock b/Cargo.lock index a070e00..2351191 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,34 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "az" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973" + [[package]] name = "fast-float" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" +[[package]] +name = "gmp-mpfr-sys" +version = "1.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea3f42dadb6c75f122e9aa87e757ef11d4282f664c9f2e6476a9c2c8970f9d19" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "libc" +version = "0.2.135" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" + [[package]] name = "narcissus" version = "0.1.0" @@ -46,6 +68,9 @@ dependencies = [ [[package]] name = "narcissus-maths" version = "0.1.0" +dependencies = [ + "rug", +] [[package]] name = "narcissus-world" @@ -58,6 +83,17 @@ dependencies = [ name = "renderdoc-sys" version = "0.1.0" +[[package]] +name = "rug" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "203180f444c95eac53586ed04793ecf6454c5d28f9eca8eead815fc19e136c47" +dependencies = [ + "az", + "gmp-mpfr-sys", + "libc", +] + [[package]] name = "sdl2-sys" version = "0.1.0" @@ -69,3 +105,25 @@ version = "0.1.0" [[package]] name = "vulkan-sys" version = "0.1.0" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 6d034de..0f8c72e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,9 @@ members = [ "ffi/stb_image-sys", "ffi/vulkan-sys", "narcissus", + "narcissus-app", "narcissus-core", "narcissus-gpu", - "narcissus-app", "narcissus-maths", "narcissus-world", ] diff --git a/narcissus-maths/Cargo.toml b/narcissus-maths/Cargo.toml index 344b2d1..5065f8e 100644 --- a/narcissus-maths/Cargo.toml +++ b/narcissus-maths/Cargo.toml @@ -8,3 +8,6 @@ edition = "2021" [features] [dependencies] + +[dev-dependencies] +rug = "1.17.0" \ No newline at end of file diff --git a/narcissus-maths/doc/sincostan.m b/narcissus-maths/doc/sincostan.m new file mode 100644 index 0000000..2d12e6f --- /dev/null +++ b/narcissus-maths/doc/sincostan.m @@ -0,0 +1,89 @@ + +x = 0:0.001:1/4 + +tanpi7 = x .* (3.1414539813995361328125 + x.^2 + .* (10.40517330169677734375 + x.^2 + .* (35.490978240966796875 + x.^2 + .* 284.301177978515625))); + +tanpi9 = x .* (3.14160251617431640625 + + x.^2 .* (10.3275852203369140625 + + x.^2 .* (41.77574920654296875 + + x.^2 .* (122.0411834716796875 + + x.^2 .* 1306.0660400390625)))); + +tanpi13 = x .* (3.1415927410125732421875 + + x.^2 .* (10.3353290557861328125 + + x.^2 .* (40.82488250732421875 + + x.^2 .* (161.0076446533203125 + + x.^2 .* (736.640869140625 + + x.^2 .* (809.0543212890625 + + x.^2 .* 27793.21875)))))); + +tanpi15 = x .* (3.1415927410125732421875 + + x.^2 .* (10.33537769317626953125 + + x.^2 .* (40.80986785888671875 + + x.^2 .* (162.5588531494140625 + + x.^2 .* (662.82147216796875 + + x.^2 .* (2586.50048828125 + + x.^2 .* (6733.18896484375 + + x.^2 .* 97579.828125))))))); + +tanpic = x .* (3.1415927410125732421875 + + x.^2 .* (1.03354301e+1 + + x.^2 .* (4.08006248e+1 + + x.^2 .* (1.63275925e+2 + + x.^2 .* (6.34397766e+2 + + x.^2 .* (3.18810547e+3 + + x.^2 .* (2.59904968e+2 + + x.^2 .* (1.25408000e+5)))))))); + +tanpo = x .* (3.1415927410125732421875 + + x.^2 .* (10.3353290557861328125 + + x.^2 .* (40.82488250732421875 + + x.^2 .* (161.0076446533203125 + + x.^2 .* (736.640869140625 + + x.^2 .* (809.0543212890625 + + x.^2 .* 27793.21875)))))); + +tanpid = x .* (3.14159270438032223182744928635656833648681640625 + + x.^2 .* (10.335346221923828125 + + x.^2 .* (40.822620391845703125 + + x.^2 .* (161.1344146728515625 + + x.^2 .* (733.203857421875 + + x.^2 .* (853.6375732421875 + + x.^2 .* 27571.314453125)))))); + +tanpid2 = x .* (3.1415926499513684433395610540173947811126708984375 + + x.^2 .* (10.33543300628662109375 + + x.^2 .* (40.800151824951171875 + + x.^2 .* (163.3097381591796875 + + x.^2 .* (633.093994140625 + + x.^2 .* (3215.677978515625 + + x.^2 .* (-38.601871490478515625 + + x.^2 .* 1.2669925e5))))))); + +tanpi = tan(x .* pi); + +plot (x, (tanpi7 - tanpi) ./ tanpi); +title("tanpi7"); +xlabel("x"); +ylabel("rel error"); +figure +plot (x, (tanpid2 - tanpi) ./ tanpi) +title("tanpi double"); +xlabel("x"); +ylabel("rel error"); + +cospi = cos(x .* pi); + +cospi8 = x.^2 .* (-4.93480205535888671875 + + x.^2 .* (4.0586986541748046875 + + x.^2 .* (-1.33483588695526123046875 + + x.^2 .* 0.22974522411823272705078125))) + 1.0; + +figure +plot (x, (cospi - cospi8) ./ cospi) +title("cospi"); +xlabel("x"); +ylabel("abs error"); diff --git a/narcissus-maths/doc/sincostan.sollya b/narcissus-maths/doc/sincostan.sollya new file mode 100644 index 0000000..eb3d517 --- /dev/null +++ b/narcissus-maths/doc/sincostan.sollya @@ -0,0 +1,149 @@ +// create primary range approximations of sin(pi x) and cos(pi x) for x on [-1/4,1/4] +// both for single and double precision +// stolen from https://marc-b-reynolds.github.io/math/2020/03/11/SinCosPi.html + +R = [0;1/4]; +T = floating; +E = relative; +B = [|SG...|]; + +// write fp values such that they always have a decimal point +procedure fp_write(v) +{ + if (v != nearestint(v)) then { + write(v); + } else { + write(v,".0"); + }; +}; + +// make a copy of list with f applied +procedure list_apply(l,f) +{ + var r,v,i; + + r = [||]; + + for i from 0 to length(l)-1 do { + v = f(l[i]); + r = r :. v; + }; + + return r; +}; + +f32_round = proc(n) { return single(n); }; +f64_round = proc(n) { return double(n); }; + +// make a copy of list with elements rounded to binary32 +procedure list_to_f32(l) { return list_apply(l, f32_round); }; +procedure list_to_f64(l) { return list_apply(l, f64_round); }; + +procedure list_of_even_coeff(p) +{ + var r,e,i; + e = degree(p); + r = [||]; + + for i from 0 to e by 2 do { r = r :. coeff(p,i); }; + + return r; +}; + +procedure list_of_odd_coeff(p) +{ + var r,e,i; + e = degree(p); + r = [||]; + + for i from 1 to e by 2 do { r = r :. coeff(p,i); }; + + return r; +}; + +procedure f32_write_list(p,l) +{ + var e,i; + e = length(l)-1; + write("const ", p,"_K: [f32; " @ length(l) @ "] = [\n"); + display=hexadecimal!; + for i from 0 to e do { printsingle(l[i]); print(", //", l[i]); }; + print("];\n"); + display=decimal!; +}; + +procedure f32_data_write(name,a,f,r) +{ + var e,z,d,l,i; + + // dump out the supnorm + display=hexadecimal!; + e = single(dirtyinfnorm(a-f, r)); + write("// peak-error: ", e); + display=decimal!; + print(" (~", e,")"); + + // create a debug list of values of 'x' where the approximation + // error is max. + z = list_to_f32(dirtyfindzeros(diff(a-f), r)); + l = length(z); + + + write("const " @ name @ "_EMAX: [f32; " @ l @ "] = ["); + + printsingle(z[0]); + + for i from 1 to l-1 do { + write(","); + printsingle(z[i]); + }; + + z = list_to_f32(dirtyfindzeros(a-f, r)); + l = length(z); + + // and where the error is approximately zero + write("];\nconst " @ name @ "_EMIN: [f32; " @ l @ "] = ["); + + if (l > 0) then { + printsingle(z[0]); + for i from 1 to l-1 do { + write(","); + printsingle(z[i]); + }; + }; + + // dump out the supnorm + e = single(dirtyinfnorm(a-f, r)); + print("];\n"); + + a; +}; + +// Actual start of building polynomial + +print("sin(pi x) in binary32"); +I = [|1,3,5,7|]; +F = sin(pi*x); +P = fpminimax(F,I,B,R,T,E); +f32_data_write("F32_SIN_PI_7", P,F,R); +f32_write_list("F32_SIN_PI_7", list_of_odd_coeff(P)); +print("\n"); + +print("cos(pi x) in binary32"); +I = [|0,2,4,6,8|]; +F = cos(pi*x); +P = fpminimax(F,I,B,R,T,E); +f32_data_write("F32_COS_PI_8", P,F,R); +f32_write_list("F32_COS_PI_8", list_of_even_coeff(P)); +print("\n"); + +B = [|D,SG...|]; + +print("tan(pi x) in binary32"); +I = [|1,3,5,7,9,11,13,15|]; +F = tan(pi*x); + +P = fpminimax(F,I,B,R,T,E); + +f32_data_write("F32_TAN_PI", P,F,R); +f32_write_list("F32_TAN_PI", list_of_odd_coeff(P)); diff --git a/narcissus-maths/src/lib.rs b/narcissus-maths/src/lib.rs index 0af05bf..ab5eda3 100644 --- a/narcissus-maths/src/lib.rs +++ b/narcissus-maths/src/lib.rs @@ -3,9 +3,12 @@ mod affine3; mod mat2; mod mat3; mod mat4; +mod next_after_f32; mod point2; mod point3; mod quat; +mod sin_cos_pi; +mod tan_pi; mod vec2; mod vec3; mod vec4; @@ -15,9 +18,12 @@ pub use affine3::Affine3; pub use mat2::Mat2; pub use mat3::Mat3; pub use mat4::Mat4; +pub use next_after_f32::next_after_f32; pub use point2::Point2; pub use point3::Point3; pub use quat::Quat; +pub use sin_cos_pi::sin_cos_pi_f32; +pub use tan_pi::tan_pi_f32; pub use vec2::Vec2; pub use vec3::Vec3; pub use vec4::Vec4; @@ -27,10 +33,12 @@ pub use vec4::Vec4; pub struct Rad(f32); impl Rad { + #[inline(always)] pub const fn new(x: f32) -> Self { Self(x) } + #[inline(always)] pub const fn as_f32(self) -> f32 { self.0 } @@ -41,10 +49,31 @@ impl Rad { pub struct Deg(f32); impl Deg { + #[inline(always)] + pub const fn new(x: f32) -> Self { + Self(x) + } + + #[inline(always)] + pub const fn as_f32(self) -> f32 { + self.0 + } +} + +/// Unit type for an angle expressed in half-turns. +/// +/// A turn represents a 360 degree rotation, a half-turn represents a 180 degree rotation. A +/// half-turn is implicitly scaled by pi. +#[derive(Clone, Copy, PartialEq, PartialOrd, Debug, Default)] +pub struct HalfTurn(f32); + +impl HalfTurn { + #[inline(always)] pub const fn new(x: f32) -> Self { Self(x) } + #[inline(always)] pub const fn as_f32(self) -> f32 { self.0 } @@ -57,6 +86,12 @@ impl From for Deg { } } +impl From for Deg { + fn from(x: HalfTurn) -> Self { + Self(x.0 * 180.0) + } +} + impl From for Rad { #[inline(always)] fn from(x: Deg) -> Self { @@ -64,6 +99,27 @@ impl From for Rad { } } +impl From for Rad { + #[inline(always)] + fn from(x: HalfTurn) -> Self { + Self(x.0 * std::f32::consts::PI) + } +} + +impl From for HalfTurn { + #[inline(always)] + fn from(x: Rad) -> Self { + Self(x.0 / std::f32::consts::PI) + } +} + +impl From for HalfTurn { + #[inline(always)] + fn from(x: Deg) -> Self { + Self(x.0 / 180.0) + } +} + /// Returns the minimum of `x` and `y`. /// /// This function returns a platform dependent value if either of its inputs are `NaN`. diff --git a/narcissus-maths/src/mat4.rs b/narcissus-maths/src/mat4.rs index fe11369..c506275 100644 --- a/narcissus-maths/src/mat4.rs +++ b/narcissus-maths/src/mat4.rs @@ -1,4 +1,4 @@ -use crate::{Point2, Point3, Rad, Vec2, Vec3, Vec4}; +use crate::{sin_cos_pi_f32, HalfTurn, Point2, Point3, Rad, Vec2, Vec3, Vec4}; /// 4x4 matrix. /// @@ -111,8 +111,8 @@ impl Mat4 { } /// Constructs a transformation matrix which rotates around the given `axis` by `angle`. - pub fn from_axis_angle(axis: Vec3, angle: Rad) -> Mat4 { - let (sin, cos) = angle.as_f32().sin_cos(); + pub fn from_axis_rotation(axis: Vec3, rotation: HalfTurn) -> Mat4 { + let (sin, cos) = sin_cos_pi_f32(rotation.as_f32()); let axis_sin = axis * sin; let axis_sq = axis * axis; let one_minus_cos = 1.0 - cos; @@ -489,8 +489,6 @@ impl std::ops::Mul for Mat4 { #[cfg(test)] mod tests { - use crate::Deg; - use super::*; const IDENTITY: Mat4 = Mat4::IDENTITY; @@ -524,14 +522,17 @@ mod tests { #[test] fn axis_angle() { - let rot_180_x = Mat4::from_axis_angle(Vec3::X, Deg::new(180.0).into()); + let rot_180_x = Mat4::from_axis_rotation(Vec3::X, HalfTurn::new(1.0)); assert_eq!(rot_180_x * Vec3::X, Vec3::X); - // TODO: requires approximate equality assert. - // assert_eq!(rot_180_x * Vec3::Y, -Vec3::Y); - // assert_eq!(rot_180_x * Vec3::Z, -Vec3::Z); - let rot_180_y = Mat4::from_axis_angle(Vec3::Y, Deg::new(180.0).into()); + assert_eq!(rot_180_x * Vec3::Y, -Vec3::Y); + assert_eq!(rot_180_x * Vec3::Z, -Vec3::Z); + let rot_180_y = Mat4::from_axis_rotation(Vec3::Y, HalfTurn::new(1.0)); + assert_eq!(rot_180_y * Vec3::X, -Vec3::X); assert_eq!(rot_180_y * Vec3::Y, Vec3::Y); - let rot_180_z = Mat4::from_axis_angle(Vec3::Z, Deg::new(180.0).into()); + assert_eq!(rot_180_y * Vec3::Z, -Vec3::Z); + let rot_180_z = Mat4::from_axis_rotation(Vec3::Z, HalfTurn::new(1.0)); + assert_eq!(rot_180_z * Vec3::X, -Vec3::X); + assert_eq!(rot_180_z * Vec3::Y, -Vec3::Y); assert_eq!(rot_180_z * Vec3::Z, Vec3::Z); } diff --git a/narcissus-maths/src/next_after_f32.rs b/narcissus-maths/src/next_after_f32.rs new file mode 100644 index 0000000..0de54d3 --- /dev/null +++ b/narcissus-maths/src/next_after_f32.rs @@ -0,0 +1,57 @@ +/// Calculate the next representable floating-point value following x in the direction of y. +/// +/// If y is less than x, these functions will return the largest representable number less than x. +/// +/// # Returns +/// +/// On success, the function returns the next representable floating-point value after x in the +/// direction of y. +/// +/// * If `x` equals `y`, then `y` is returned. +/// * If `x` or `y` is a `NaN`, a `NaN` is returned. +/// * If `x` is finite, and the result would overflow, a range error occurs, and the function +/// returns `inf` with the correct mathematical sign. +/// * If `x` is not equal to `y`, and the correct function result would be subnormal, zero, or +/// underflow, a range error occurs, and either the correct value (if it can be represented), +/// or `0.0`, is returned. +/// * If x equals y, the function returns y. +pub fn next_after_f32(x: f32, y: f32) -> f32 { + if x.is_nan() || y.is_nan() { + return x + y; + } + + let ux = x.to_bits(); + let uy = y.to_bits(); + + if ux == uy { + return y; + } + + let ax = ux & 0x7fff_ffff; + let ay = uy & 0x7fff_ffff; + + let ux = if ax == 0 { + if ay == 0 { + return y; + } + (uy & 0x8000_0000) | 1 + } else if ax > ay || ((ux ^ uy) & 0x8000_0000) != 0 { + ux - 1 + } else { + ux + 1 + }; + + let e = ux & 0x7f800000; + // Overflow if ux is infinite, and x is finite. + if e == 0x7f800000 { + return x + x; + } + // Force underflow if ux is subnormal or zero. + if e == 0 { + let mut force_eval = 0.0; + let val = f32::from_bits(ux); + unsafe { std::ptr::write_volatile(&mut force_eval, x * x + val * val) }; + } + + f32::from_bits(ux) +} diff --git a/narcissus-maths/src/sin_cos_pi.rs b/narcissus-maths/src/sin_cos_pi.rs new file mode 100644 index 0000000..9d6f5fd --- /dev/null +++ b/narcissus-maths/src/sin_cos_pi.rs @@ -0,0 +1,93 @@ +// Based on https://marc-b-reynolds.github.io/math/2020/03/11/SinCosPi.html +// +// Sollya code for generating these polynomials is in `doc/sincostan.sollya` + +// constants for sin(pi x), cos(pi x) for x on [-1/4,1/4] +const F32_SIN_PI_7_K: [f32; 4] = unsafe { + std::mem::transmute::<[u32; 4], _>([ + 0x40490fdb, // 0x1.921fb6p1 + 0xc0a55df6, // -0x1.4abbecp2 + 0x40233590, // 0x1.466b2p1 + 0xbf17acc9, // -0x1.2f5992p-1 + ]) +}; + +const F32_COS_PI_8_K: [f32; 4] = unsafe { + std::mem::transmute::<[u32; 4], _>([ + 0xc09de9e6, // -0x1.3bd3ccp2 + 0x4081e0dc, // 0x1.03c1b8p2 + 0xbfaadbe7, // -0x1.55b7cep0 + 0x3e6b4255, // 0x1.d684aap-3 + ]) +}; + +#[inline(always)] +fn mulsign_f32(x: f32, s: u32) -> f32 { + f32::from_bits(x.to_bits() ^ s) +} + +/// Simultaneously computes the sine and cosine of `a` expressed in multiples of *pi* radians, or half-turns. +/// +/// Returns `(sin(a * pi), cos(a * pi))` +/// +/// # Examples +/// +/// ``` +/// use narcissus_maths::sin_cos_pi_f32; +/// let (sin, cos) = sin_cos_pi_f32(0.0); +/// assert_eq!(sin, 0.0); +/// assert_eq!(cos, 1.0); +/// ``` +pub fn sin_cos_pi_f32(a: f32) -> (f32, f32) { + const S: [f32; 4] = F32_SIN_PI_7_K; + const C: [f32; 4] = F32_COS_PI_8_K; + + // Range reduction. + let r = (a + a).round(); + let i: u32 = unsafe { r.to_int_unchecked() }; + let r = r.mul_add(-0.5, a); + + let sx = (i >> 1) << 31; + let sy = (i << 31) ^ sx; + + // Core approximation. + let r2 = r * r; + let r = mulsign_f32(r, sy); + + let c = C[3]; + let c = c.mul_add(r2, C[2]); + let c = c.mul_add(r2, C[1]); + let c = c.mul_add(r2, C[0]); + let c = c.mul_add(r2, 1.0); + let c = mulsign_f32(c, sx); + + let r3 = r2 * r; + let s = S[3]; + let s = s.mul_add(r2, S[2]); + let s = s.mul_add(r2, S[1]); + let s = r.mul_add(S[0], r3 * s); + + if i & 1 != 0 { + (c, s) + } else { + (s, c) + } +} + +#[cfg(test)] +mod tests { + use crate::sin_cos_pi_f32; + + #[test] + fn basics() { + assert_eq!(sin_cos_pi_f32(-1.5), (1.0, 0.0)); + assert_eq!(sin_cos_pi_f32(-1.0), (0.0, -1.0)); + assert_eq!(sin_cos_pi_f32(-0.5), (-1.0, 0.0)); + assert_eq!(sin_cos_pi_f32(-0.0), (0.0, 1.0)); + + assert_eq!(sin_cos_pi_f32(0.0), (0.0, 1.0)); + assert_eq!(sin_cos_pi_f32(0.5), (1.0, 0.0)); + assert_eq!(sin_cos_pi_f32(1.0), (0.0, -1.0)); + assert_eq!(sin_cos_pi_f32(1.5), (-1.0, 0.0)); + } +} diff --git a/narcissus-maths/src/tan_pi.rs b/narcissus-maths/src/tan_pi.rs new file mode 100644 index 0000000..e220f99 --- /dev/null +++ b/narcissus-maths/src/tan_pi.rs @@ -0,0 +1,77 @@ +// Based on Norbert Juffa's tanpi posted to the cuda forums. Using my own polynomial, but that might +// be worse, todo: check whether polynomial is worse. +// https://forums.developer.nvidia.com/t/an-implementation-of-single-precision-tanpi-for-cuda/48024 +// +// Sollya code for generating these polynomials is in `doc/sincostan.sollya` + +const F32_TAN_PI_15_K: [f32; 7] = unsafe { + std::mem::transmute::<[u32; 7], _>([ + 0x41255def, // 0x1.4abbdep3 + 0x4223335b, // 0x1.4666b6p5 + 0x43234f4b, // 0x1.469e96p7 + 0x441e4604, // 0x1.3c8c08p9 + 0x4548fad9, // 0x1.91f5b2p11 + 0xc21a6851, // -0x1.34d0a2p5 + 0x47f775a0, // 0x1.eeeb4p16 + ]) +}; + +/// Computes the tangent of `a` expressed in multiples of *pi* radians, or half-turns. +/// +/// Returns `tan(a * pi)` +/// +/// # Examples +/// +/// ``` +/// use narcissus_maths::tan_pi_f32; +/// ``` +pub fn tan_pi_f32(a: f32) -> f32 { + const T: [f32; 7] = F32_TAN_PI_15_K; + + // Range reduction. + let r = (a + a).round(); + let i: u32 = unsafe { r.to_int_unchecked() }; + let r = r.mul_add(-0.5, a); + + let e = if i.wrapping_add(1) & 2 != 0 { + -0.0 + } else { + 0.0 + }; + + // Core approximation. + let r2 = r * r; + let p = T[6]; + let p = p.mul_add(r2, T[5]); + let p = p.mul_add(r2, T[4]); + let p = p.mul_add(r2, T[3]); + let p = p.mul_add(r2, T[2]); + let p = p.mul_add(r2, T[1]); + let p = p.mul_add(r2, T[0]); + + let t = r2 * r; + let t = p.mul_add(t, -8.742278e-8 * r); + let r = r.mul_add(std::f32::consts::PI, t); + + // Handle half-integer arguments. + let r = if r == 0.0 { e } else { r }; + let r = if i & 1 == 1 { 1.0 / -r } else { r }; + + // Handle integer arguments. + if a == a.floor() { + a * e + } else { + r + } +} + +#[cfg(test)] +mod tests { + use super::tan_pi_f32; + + #[test] + fn basics() { + assert_eq!(tan_pi_f32(0.0), 0.0); + assert_eq!(tan_pi_f32(0.25), 1.0); + } +} diff --git a/narcissus-maths/tests/exhaustive_f32.rs b/narcissus-maths/tests/exhaustive_f32.rs new file mode 100644 index 0000000..9d9f85c --- /dev/null +++ b/narcissus-maths/tests/exhaustive_f32.rs @@ -0,0 +1,246 @@ +/// Rough translation of Paul Zimmermann's binary32_exhaustive to Rust. +/// https://gitlab.inria.fr/zimmerma/math_accuracy/-/blob/master/binary32_exhaustive/check_exhaustive.c +use std::{ + ops::MulAssign, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use narcissus_maths::{next_after_f32, sin_cos_pi_f32, tan_pi_f32}; +use rug::{ + ops::{AssignRound, DivAssignRound}, + Assign, +}; + +fn ulp_error_f32(our_val: f32, ref_val: f32, x: f32, reference: fn(&mut rug::Float)) -> u32 { + if our_val == ref_val { + return 0; + } + + let mut our_val = our_val; + let mut ref_val = ref_val; + + if ref_val.is_infinite() { + if our_val.is_infinite() { + // Then ours and reference have different signs. + assert!(our_val * ref_val < 0.0); + return u32::MAX; + } + + our_val /= 2.0; + let mut ref_val_2 = rug::Float::new(24); + ref_val_2.assign_round(x, rug::float::Round::Down); + reference(&mut ref_val_2); + ref_val_2.div_assign_round(2, rug::float::Round::Down); + ref_val = ref_val_2.to_f32_round(rug::float::Round::Down); + if ref_val.is_infinite() { + ref_val = if ref_val > 0.0 { + f32::from_bits(0x7f00_0000) + } else { + f32::from_bits(0xff00_0000) + }; + } + } + + if our_val.is_infinite() { + assert!(!ref_val.is_infinite()); + // If ours gives +/-Inf but the correct rounding is in the binary32 range, assume we gave + // +/-2^128. + ref_val /= 2.0; + our_val = if our_val > 0.0 { + f32::from_bits(0x7f00_0000) + } else { + f32::from_bits(0xff00_0000) + } + } + + let err = our_val - ref_val; + let ulp = next_after_f32(ref_val, our_val) - ref_val; + let err = (err / ulp).abs(); + if err >= u32::MAX as f32 { + u32::MAX + } else { + err as u32 + } +} + +#[derive(Clone, Copy, Debug)] +struct TestRange { + base: u32, + end: u32, +} + +#[derive(Clone, Copy, Debug, Default)] +struct FloatErrors { + num_errors: u32, + max_error_ulp: u32, + max_error_val: u32, +} + +const PREC: u32 = 280; + +struct FloatCheck { + reference: fn(&mut rug::Float), + ours: fn(f32) -> f32, + + tan_mode: bool, + + pi: rug::Float, + tmp: rug::Float, + + errors: FloatErrors, +} + +impl FloatCheck { + fn new(reference: fn(&mut rug::Float), ours: fn(f32) -> f32, tan_mode: bool) -> Self { + Self { + reference, + ours, + tan_mode, + pi: rug::Float::with_val(PREC, rug::float::Constant::Pi), + tmp: rug::Float::new(PREC), + errors: Default::default(), + } + } + + #[inline(always)] + fn check(&mut self, u: u32) { + let x = f32::from_bits(u); + assert!(x.is_finite()); + + let our_val = (self.ours)(x); + + if self.tan_mode { + let fract = x.fract(); + if fract == 0.5 || fract == -0.5 { + assert!(our_val.is_infinite()); + return; + } + } + + self.tmp.assign(x); + self.tmp.mul_assign(&self.pi); + (self.reference)(&mut self.tmp); + self.tmp.subnormalize_ieee(); + let ref_val = self.tmp.to_f32_round(rug::float::Round::Nearest); + + if our_val != ref_val && !(our_val.is_nan() && ref_val.is_nan()) { + println!("u: {u:#08x}, our_val: {our_val}, ref_val: {ref_val}"); + + self.errors.num_errors += 1; + let e = ulp_error_f32(our_val, ref_val, x, self.reference); + if e > self.errors.max_error_ulp { + self.errors.max_error_ulp = e; + self.errors.max_error_val = u; + } + } + } +} + +fn check_exhaustive_f32(reference: fn(&mut rug::Float), ours: fn(f32) -> f32, tan_mode: bool) { + const COUNT: u32 = 2_139_095_040_u32; + const SPLIT: u32 = 256; + + // Generate ranges. + assert_eq!(COUNT % SPLIT, 0); + let per_split = COUNT / SPLIT; + + let work_index = AtomicUsize::new(0); + let mut work = (0..SPLIT) + .map(|i| { + let base = i * per_split; + let end = i * per_split + per_split; + TestRange { base, end } + }) + .collect::>(); + + // Try and start with big numbers to avoid reallocs? + work.reverse(); + + let count = AtomicUsize::new(0); + + let mut errors = FloatErrors::default(); + + // Spawn threads. + std::thread::scope(|s| { + let num_threads = std::thread::available_parallelism().unwrap().get(); + + let threads = (0..num_threads) + .map(|_| { + s.spawn(|| { + let mut float_check = FloatCheck::new(reference, ours, tan_mode); + loop { + let index = work_index.fetch_add(1, Ordering::SeqCst); + if let Some(range) = work.get(index) { + let start = std::time::Instant::now(); + for i in range.base..range.end { + float_check.check(i); + } + for i in range.base..range.end { + float_check.check(0x8000_0000 + i); + } + + let i = count.fetch_add(1, Ordering::SeqCst); + println!( + "{:.1}% chunk {index} took {:?}", + ((i + 1) as f32 * (1.0 / SPLIT as f32)) * 100.0, + std::time::Instant::now() - start + ); + + continue; + } + break; + } + float_check.errors + }) + }) + .collect::>(); + + for thread in threads { + let thread_errors = thread.join().unwrap(); + errors.num_errors += thread_errors.num_errors; + if thread_errors.max_error_ulp > errors.max_error_ulp { + errors.max_error_ulp = thread_errors.max_error_ulp; + errors.max_error_val = thread_errors.max_error_val; + } + } + }); + + println!( + "errors: {} ({:.2}%)", + errors.num_errors, + (errors.num_errors as f64 / (COUNT * 2) as f64) * 100.0 + ); + println!("max error ulps: {}", errors.max_error_ulp); + println!("max error error at: {:#08x}", errors.max_error_val); + assert_eq!(errors.num_errors, 0); +} + +fn ref_sin_pi_f32(x: &mut rug::Float) { + x.sin_mut(); +} + +fn ref_cos_pi_f32(x: &mut rug::Float) { + x.cos_mut(); +} + +fn ref_tan_pi_f32(x: &mut rug::Float) { + x.tan_mut(); +} + +#[test] +#[ignore] +pub fn exhaustive_sin_pi() { + check_exhaustive_f32(ref_sin_pi_f32, |a| sin_cos_pi_f32(a).0, false) +} + +#[test] +#[ignore] +pub fn exhaustive_cos_pi() { + check_exhaustive_f32(ref_cos_pi_f32, |a| sin_cos_pi_f32(a).1, false) +} + +#[test] +#[ignore] +pub fn exhaustive_tan_pi() { + check_exhaustive_f32(ref_tan_pi_f32, tan_pi_f32, true) +} diff --git a/narcissus-maths/tests/next_after_f32.rs b/narcissus-maths/tests/next_after_f32.rs new file mode 100644 index 0000000..e03e099 --- /dev/null +++ b/narcissus-maths/tests/next_after_f32.rs @@ -0,0 +1,43 @@ +use narcissus_maths::next_after_f32; + +mod libc { + use std::ffi::c_float; + extern "C" { + pub fn nextafterf(x: c_float, y: c_float) -> c_float; + } +} + +#[inline(always)] +fn nextafterf(x: f32, y: f32) -> f32 { + unsafe { libc::nextafterf(x, y) } +} + +fn test_towards(y: f32) { + for u in 0..=0xffff_ffff_u32 { + let x = f32::from_bits(u); + let ours = next_after_f32(x, y); + let reference = nextafterf(x, y); + assert!( + ours == reference || (ours.is_nan() && reference.is_nan()), + "x ({u:X}): ours ({ours}) != reference ({reference})" + ); + } +} + +#[test] +#[ignore] +fn next_after_f32_to_zero() { + test_towards(0.0) +} + +#[test] +#[ignore] +fn next_after_f32_to_inf() { + test_towards(f32::INFINITY) +} + +#[test] +#[ignore] +fn next_after_f32_to_neg_inf() { + test_towards(f32::NEG_INFINITY) +}