diff --git a/Cargo.lock b/Cargo.lock index b40743e..0eb03b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -354,8 +354,7 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "fearless_simd" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76258897e51fd156ee03b6246ea53f3e0eb395d0b327e9961c4fc4c8b2fa151a" +source = "git+https://github.com/Shnatsel/fearless_simd?branch=avx512#fd41e878b931aa65d36061fc769fbec83d6ce2b2" [[package]] name = "fftw" diff --git a/Cargo.toml b/Cargo.toml index 5011217..3d17ffe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,3 +42,6 @@ debug = true [package.metadata.docs.rs] all-features = true + +[patch.crates-io] +fearless_simd = {git = "https://github.com/Shnatsel/fearless_simd", branch = "avx512"} \ No newline at end of file diff --git a/src/algorithms/bravo.rs b/src/algorithms/bravo.rs index b3d1d7e..4014573 100644 --- a/src/algorithms/bravo.rs +++ b/src/algorithms/bravo.rs @@ -10,7 +10,7 @@ /// The initial implementation was translated from mathematical notation in the paper /// to Rust by Claude 4.5 Opus. use fearless_simd::prelude::*; -use fearless_simd::{f32x4, f32x8, f64x2, f64x4, Simd}; +use fearless_simd::{f32x16, f32x4, f32x8, f64x2, f64x4, f64x8, Simd}; /// Macro to generate bit_rev_bravo implementations for concrete types. /// Used instead of generics because `fearless_simd` doesn't let us be generic over the exact float type. @@ -153,8 +153,10 @@ macro_rules! impl_bit_rev_bravo { // which is necessary for using the native vector width impl_bit_rev_bravo!(bit_rev_bravo_chunk_4_f32, f32, f32x4, 4); impl_bit_rev_bravo!(bit_rev_bravo_chunk_8_f32, f32, f32x8, 8); +impl_bit_rev_bravo!(bit_rev_bravo_chunk_16_f32, f32, f32x16, 16); impl_bit_rev_bravo!(bit_rev_bravo_chunk_2_f64, f64, f64x2, 2); impl_bit_rev_bravo!(bit_rev_bravo_chunk_4_f64, f64, f64x4, 4); +impl_bit_rev_bravo!(bit_rev_bravo_chunk_8_f64, f64, f64x8, 8); /// Performs in-place bit-reversal permutation using the CO-BRAVO algorithm. /// @@ -165,8 +167,8 @@ impl_bit_rev_bravo!(bit_rev_bravo_chunk_4_f64, f64, f64x4, 4); pub fn bit_rev_bravo_f32(simd: S, data: &mut [f32], n: usize) { match ::N { 4 => bit_rev_bravo_chunk_4_f32(simd, data, n), // SSE, NEON and fallback - _ => bit_rev_bravo_chunk_8_f32(simd, data, n), - // fearless_simd has no native support for AVX-512 yet + 8 => bit_rev_bravo_chunk_8_f32(simd, data, n), + _ => bit_rev_bravo_chunk_16_f32(simd, data, n), } } @@ -179,8 +181,8 @@ pub fn bit_rev_bravo_f32(simd: S, data: &mut [f32], n: usize) { pub fn bit_rev_bravo_f64(simd: S, data: &mut [f64], n: usize) { match ::N { 2 => bit_rev_bravo_chunk_2_f64(simd, data, n), // SSE, NEON and fallback - _ => bit_rev_bravo_chunk_4_f64(simd, data, n), - // fearless_simd has no native support for AVX-512 yet + 4 => bit_rev_bravo_chunk_4_f64(simd, data, n), // AVX2 + _ => bit_rev_bravo_chunk_8_f64(simd, data, n), // AVX-512, AVX10 } }