From f1479778e4674788de87c1455a7d1ac7c7526c46 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 22 Jan 2026 22:30:06 -0500 Subject: [PATCH 1/3] Don't use load/store intrinsics --- fearless_simd/src/generated/avx2.rs | 580 +++++++++++---------- fearless_simd/src/generated/neon.rs | 432 ++++++++++++---- fearless_simd/src/generated/sse4_2.rs | 720 ++++++++++---------------- fearless_simd/src/generated/wasm.rs | 720 ++++++++++---------------- fearless_simd_gen/src/arch/neon.rs | 22 - fearless_simd_gen/src/generic.rs | 78 +-- fearless_simd_gen/src/mk_neon.rs | 13 +- fearless_simd_gen/src/mk_wasm.rs | 12 +- fearless_simd_gen/src/mk_x86.rs | 14 +- 9 files changed, 1203 insertions(+), 1388 deletions(-) diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index f09e03d07..27da1116d 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -104,14 +104,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -129,7 +129,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { - unsafe { _mm_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { @@ -371,14 +377,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -396,7 +402,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { @@ -566,14 +578,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -591,7 +603,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { @@ -767,14 +785,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -792,7 +810,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { @@ -868,14 +892,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -893,7 +917,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { @@ -1038,14 +1068,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1063,7 +1093,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { @@ -1214,14 +1250,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1239,7 +1275,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { @@ -1315,14 +1357,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1340,7 +1382,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { @@ -1487,14 +1535,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1512,7 +1560,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { @@ -1671,14 +1725,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1696,7 +1750,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { @@ -1772,14 +1832,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1797,7 +1857,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { - unsafe { _mm_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { @@ -1968,14 +2034,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1993,7 +2059,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { @@ -2069,14 +2141,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { crate::support::Aligned256(_mm256_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { crate::support::Aligned256(_mm256_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2094,7 +2166,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { - unsafe { _mm256_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { @@ -2372,18 +2450,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2401,7 +2475,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { @@ -2607,18 +2687,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2636,7 +2712,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { @@ -2853,18 +2935,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2882,7 +2960,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { @@ -2970,18 +3054,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2999,7 +3079,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { @@ -3182,18 +3268,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3211,7 +3293,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { @@ -3412,18 +3500,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3441,7 +3525,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { @@ -3529,18 +3619,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3558,7 +3644,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { @@ -3733,18 +3825,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3762,7 +3850,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { @@ -3952,18 +4046,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3981,7 +4071,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { @@ -4069,14 +4165,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { crate::support::Aligned256(_mm256_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { crate::support::Aligned256(_mm256_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4094,7 +4190,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { - unsafe { _mm256_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { @@ -4301,18 +4403,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256(_mm256_loadu_si256(val.as_ptr() as *const _)) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4330,7 +4428,13 @@ impl Simd for Avx2 { } #[inline(always)] fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { @@ -4419,24 +4523,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_ps(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_ps(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4455,8 +4549,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { unsafe { - _mm256_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_ps(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -4782,24 +4879,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4818,8 +4905,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5015,24 +5105,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5051,8 +5131,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5293,24 +5376,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(32usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5329,8 +5402,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5436,24 +5512,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5472,8 +5538,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -5678,24 +5747,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5714,8 +5773,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -5983,24 +6045,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6019,8 +6071,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6129,24 +6184,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6165,8 +6210,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6367,24 +6415,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6403,8 +6441,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6637,24 +6678,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6673,8 +6704,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6780,24 +6814,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_pd(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_pd(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6816,8 +6840,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { unsafe { - _mm256_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_pd(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -7051,24 +7078,14 @@ impl Simd for Avx2 { #[inline(always)] fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm256_loadu_si256(val.as_ptr().add(0usize) as *const _), - _mm256_loadu_si256(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7087,8 +7104,11 @@ impl Simd for Avx2 { #[inline(always)] fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { unsafe { - _mm256_storeu_si256(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm256_storeu_si256(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index def528dde..4b4b3b3c3 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -96,14 +96,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(vld1q_f32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(vld1q_f32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -121,7 +121,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { - unsafe { vst1q_f32(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { @@ -318,14 +324,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(vld1q_s8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(vld1q_s8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -343,7 +349,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { vst1q_s8(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { @@ -489,14 +501,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(vld1q_u8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(vld1q_u8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -514,7 +526,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { - unsafe { vst1q_u8(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { @@ -660,14 +678,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(vld1q_s8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(vld1q_s8(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -685,7 +703,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { vst1q_s8(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { @@ -764,14 +788,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(vld1q_s16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(vld1q_s16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -789,7 +813,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { vst1q_s16(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { @@ -935,14 +965,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(vld1q_u16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(vld1q_u16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -960,7 +990,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { - unsafe { vst1q_u16(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { @@ -1102,14 +1138,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(vld1q_s16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(vld1q_s16(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1127,7 +1163,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { vst1q_s16(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { @@ -1206,14 +1248,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(vld1q_s32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(vld1q_s32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1231,7 +1273,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { vst1q_s32(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { @@ -1381,14 +1429,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(vld1q_u32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(vld1q_u32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1406,7 +1454,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { - unsafe { vst1q_u32(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { @@ -1548,14 +1602,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(vld1q_s32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(vld1q_s32(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1573,7 +1627,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { vst1q_s32(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { @@ -1652,14 +1712,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(vld1q_f64(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(vld1q_f64(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1677,7 +1737,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { - unsafe { vst1q_f64(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { @@ -1846,14 +1912,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(vld1q_s64(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(vld1q_s64(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1871,7 +1937,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { vst1q_s64(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { @@ -1951,14 +2023,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { crate::support::Aligned256(vld1q_f32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { crate::support::Aligned256(vld1q_f32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1976,7 +2048,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { - unsafe { vst1q_f32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x8(self, a: u8x32) -> f32x8 { @@ -2265,14 +2343,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { crate::support::Aligned256(vld1q_s8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { crate::support::Aligned256(vld1q_s8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2290,7 +2368,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { vst1q_s8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x32(self, a: u8x32) -> i8x32 { @@ -2494,14 +2578,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { crate::support::Aligned256(vld1q_u8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { crate::support::Aligned256(vld1q_u8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2519,7 +2603,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { - unsafe { vst1q_u8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x32(self, a: u8x32) -> u8x32 { @@ -2718,14 +2808,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { crate::support::Aligned256(vld1q_s8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { crate::support::Aligned256(vld1q_s8_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2743,7 +2833,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { vst1q_s8_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { @@ -2857,14 +2953,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { crate::support::Aligned256(vld1q_s16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { crate::support::Aligned256(vld1q_s16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2882,7 +2978,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { vst1q_s16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x16(self, a: u8x32) -> i16x16 { @@ -3086,14 +3188,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { crate::support::Aligned256(vld1q_u16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { crate::support::Aligned256(vld1q_u16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3111,7 +3213,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { - unsafe { vst1q_u16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x16(self, a: u8x32) -> u16x16 { @@ -3319,14 +3427,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { crate::support::Aligned256(vld1q_s16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { crate::support::Aligned256(vld1q_s16_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3344,7 +3452,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { vst1q_s16_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { @@ -3458,14 +3572,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { crate::support::Aligned256(vld1q_s32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { crate::support::Aligned256(vld1q_s32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3483,7 +3597,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { vst1q_s32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x8(self, a: u8x32) -> i32x8 { @@ -3692,14 +3812,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { crate::support::Aligned256(vld1q_u32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { crate::support::Aligned256(vld1q_u32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3717,7 +3837,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { - unsafe { vst1q_u32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x8(self, a: u8x32) -> u32x8 { @@ -3913,14 +4039,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { crate::support::Aligned256(vld1q_s32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { crate::support::Aligned256(vld1q_s32_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3938,7 +4064,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { vst1q_s32_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { @@ -4052,14 +4184,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { crate::support::Aligned256(vld1q_f64_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { crate::support::Aligned256(vld1q_f64_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4077,7 +4209,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { - unsafe { vst1q_f64_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x4(self, a: u8x32) -> f64x4 { @@ -4319,14 +4457,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { crate::support::Aligned256(vld1q_s64_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { crate::support::Aligned256(vld1q_s64_x2(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4344,7 +4482,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { vst1q_s64_x2(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { @@ -4458,14 +4602,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { crate::support::Aligned512(vld1q_f32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { crate::support::Aligned512(vld1q_f32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4483,7 +4627,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { - unsafe { vst1q_f32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x16(self, a: u8x64) -> f32x16 { @@ -4771,14 +4921,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { crate::support::Aligned512(vld1q_s8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { crate::support::Aligned512(vld1q_s8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4796,7 +4946,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { vst1q_s8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x64(self, a: u8x64) -> i8x64 { @@ -4991,14 +5147,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { crate::support::Aligned512(vld1q_u8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { crate::support::Aligned512(vld1q_u8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5016,7 +5172,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { - unsafe { vst1q_u8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 64usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x64(self, a: u8x64) -> u8x64 { @@ -5209,14 +5371,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { crate::support::Aligned512(vld1q_s8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { crate::support::Aligned512(vld1q_s8_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5234,7 +5396,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { vst1q_s8_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { @@ -5339,14 +5507,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { crate::support::Aligned512(vld1q_s16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { crate::support::Aligned512(vld1q_s16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5364,7 +5532,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { vst1q_s16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x32(self, a: u8x64) -> i16x32 { @@ -5568,14 +5742,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { crate::support::Aligned512(vld1q_u16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { crate::support::Aligned512(vld1q_u16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5593,7 +5767,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { - unsafe { vst1q_u16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x32(self, a: u8x64) -> u16x32 { @@ -5805,14 +5985,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { crate::support::Aligned512(vld1q_s16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { crate::support::Aligned512(vld1q_s16_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5830,7 +6010,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { vst1q_s16_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { @@ -5938,14 +6124,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { crate::support::Aligned512(vld1q_s32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { crate::support::Aligned512(vld1q_s32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5963,7 +6149,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { vst1q_s32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x16(self, a: u8x64) -> i32x16 { @@ -6163,14 +6355,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { crate::support::Aligned512(vld1q_u32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { crate::support::Aligned512(vld1q_u32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6188,7 +6380,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { - unsafe { vst1q_u32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x16(self, a: u8x64) -> u32x16 { @@ -6383,14 +6581,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { crate::support::Aligned512(vld1q_s32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { crate::support::Aligned512(vld1q_s32_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6408,7 +6606,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { vst1q_s32_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { @@ -6513,14 +6717,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { crate::support::Aligned512(vld1q_f64_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { crate::support::Aligned512(vld1q_f64_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6538,7 +6742,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { - unsafe { vst1q_f64_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x8(self, a: u8x64) -> f64x8 { @@ -6771,14 +6981,14 @@ impl Simd for Neon { #[inline(always)] fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { crate::support::Aligned512(vld1q_s64_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { crate::support::Aligned512(vld1q_s64_x4(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6796,7 +7006,13 @@ impl Simd for Neon { } #[inline(always)] fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - unsafe { vst1q_s64_x4(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index a9f7c1ad4..16d6fe861 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -109,14 +109,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_ps(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -134,7 +134,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { - unsafe { _mm_storeu_ps(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { @@ -379,14 +385,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -404,7 +410,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { @@ -577,14 +589,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -602,7 +614,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { @@ -786,14 +804,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -811,7 +829,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { @@ -890,14 +914,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -915,7 +939,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { @@ -1063,14 +1093,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1088,7 +1118,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { @@ -1242,14 +1278,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1267,7 +1303,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { @@ -1346,14 +1388,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1371,7 +1413,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { @@ -1521,14 +1569,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1546,7 +1594,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { @@ -1708,14 +1762,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1733,7 +1787,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { @@ -1812,14 +1872,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_pd(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1837,7 +1897,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { - unsafe { _mm_storeu_pd(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { @@ -2011,14 +2077,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(_mm_loadu_si128(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2036,7 +2102,13 @@ impl Simd for Sse4_2 { } #[inline(always)] fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { @@ -2116,24 +2188,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2152,8 +2214,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { unsafe { - _mm_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_ps(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -2441,24 +2506,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2477,8 +2532,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -2681,24 +2739,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2717,8 +2765,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -2916,24 +2967,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2952,8 +2993,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -3066,24 +3110,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3102,8 +3136,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3306,24 +3343,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3342,8 +3369,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3552,24 +3582,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3588,8 +3608,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3702,24 +3725,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3738,8 +3751,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -3947,24 +3963,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3983,8 +3989,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -4179,24 +4188,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4215,8 +4214,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -4329,24 +4331,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4365,8 +4357,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { unsafe { - _mm_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_pd(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 4usize, + ); } } #[inline(always)] @@ -4607,24 +4602,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4643,8 +4628,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 4usize, + ); } } #[inline(always)] @@ -4757,28 +4745,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(4usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(8usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_ps(val.as_ptr().add(0usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(4usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(8usize) as *const _), - _mm_loadu_ps(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4797,10 +4771,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { unsafe { - _mm_storeu_ps(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_ps(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - _mm_storeu_ps(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - _mm_storeu_ps(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -5126,28 +5101,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5166,10 +5127,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5365,28 +5327,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5405,10 +5353,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5649,28 +5598,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(32usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5689,10 +5624,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5798,28 +5734,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5838,10 +5760,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6046,28 +5969,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6086,10 +5995,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6349,28 +6259,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(16usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6389,10 +6285,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6501,28 +6398,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6541,10 +6424,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6745,28 +6629,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6785,10 +6655,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -7021,28 +6892,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(8usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7061,10 +6918,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -7170,28 +7028,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(2usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(4usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_pd(val.as_ptr().add(0usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(2usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(4usize) as *const _), - _mm_loadu_pd(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7210,10 +7054,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { unsafe { - _mm_storeu_pd(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_pd(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); - _mm_storeu_pd(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); - _mm_storeu_pd(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -7447,28 +7292,14 @@ impl Simd for Sse4_2 { #[inline(always)] fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(2usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - _mm_loadu_si128(val.as_ptr().add(0usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(2usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(4usize) as *const _), - _mm_loadu_si128(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7487,10 +7318,11 @@ impl Simd for Sse4_2 { #[inline(always)] fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { unsafe { - _mm_storeu_si128(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - _mm_storeu_si128(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); - _mm_storeu_si128(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); - _mm_storeu_si128(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 59c94768f..42e53208f 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -91,14 +91,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4 { f32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -116,7 +116,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_f32x4(self, a: f32x4, dest: &mut [f32; 4usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f32x4(self, a: u8x16) -> f32x4 { @@ -354,14 +360,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16 { i8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -379,7 +385,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_i8x16(self, a: i8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i8x16(self, a: u8x16) -> i8x16 { @@ -540,14 +552,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16 { u8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -565,7 +577,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_u8x16(self, a: u8x16, dest: &mut [u8; 16usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u8x16(self, a: u8x16) -> u8x16 { @@ -724,14 +742,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { mask8x16 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -749,7 +767,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 16usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { @@ -835,14 +859,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8 { i16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -860,7 +884,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_i16x8(self, a: i16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i16x8(self, a: u8x16) -> i16x8 { @@ -1005,14 +1035,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8 { u16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1030,7 +1060,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_u16x8(self, a: u16x8, dest: &mut [u16; 8usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u16x8(self, a: u8x16) -> u16x8 { @@ -1171,14 +1207,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { mask16x8 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1196,7 +1232,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 8usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { @@ -1282,14 +1324,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4 { i32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1307,7 +1349,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_i32x4(self, a: i32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_i32x4(self, a: u8x16) -> i32x4 { @@ -1456,14 +1504,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4 { u32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1481,7 +1529,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_u32x4(self, a: u32x4, dest: &mut [u32; 4usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_u32x4(self, a: u8x16) -> u32x4 { @@ -1622,14 +1676,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { mask32x4 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1647,7 +1701,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 4usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { @@ -1733,14 +1793,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2 { f64x2 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1758,7 +1818,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_f64x2(self, a: f64x2, dest: &mut [f64; 2usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_f64x2(self, a: u8x16) -> f64x2 { @@ -1954,14 +2020,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { mask64x2 { - val: unsafe { crate::support::Aligned128(v128_load(val.as_ptr() as *const _)) }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -1979,7 +2045,13 @@ impl Simd for WasmSimd128 { } #[inline(always)] fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { v128_store(dest.as_mut_ptr() as *mut _, a.val.0) } + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 2usize, + ); + } } #[inline(always)] fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { @@ -2066,24 +2138,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8 { f32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2102,8 +2164,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_f32x8(self, a: f32x8, dest: &mut [f32; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -2391,24 +2456,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32 { i8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2427,8 +2482,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i8x32(self, a: i8x32, dest: &mut [i8; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -2631,24 +2689,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32 { u8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2667,8 +2715,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u8x32(self, a: u8x32, dest: &mut [u8; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -2866,24 +2917,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { mask8x32 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -2902,8 +2943,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -3016,24 +3060,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16 { i16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3052,8 +3086,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i16x16(self, a: i16x16, dest: &mut [i16; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3256,24 +3293,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16 { u16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3292,8 +3319,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u16x16(self, a: u16x16, dest: &mut [u16; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3500,24 +3530,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { mask16x16 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3536,8 +3556,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -3650,24 +3673,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8 { i32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3686,8 +3699,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i32x8(self, a: i32x8, dest: &mut [i32; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -3895,24 +3911,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8 { u32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -3931,8 +3937,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u32x8(self, a: u32x8, dest: &mut [u32; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -4127,24 +4136,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { mask32x8 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4163,8 +4162,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -4277,24 +4279,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4 { f64x4 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4313,8 +4305,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_f64x4(self, a: f64x4, dest: &mut [f64; 4usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 4usize, + ); } } #[inline(always)] @@ -4555,24 +4550,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { - crate::support::Aligned256([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4591,8 +4576,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 4usize, + ); } } #[inline(always)] @@ -4705,28 +4693,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16 { f32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -4745,10 +4719,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_f32x16(self, a: f32x16, dest: &mut [f32; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -5071,28 +5046,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64 { i8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5111,10 +5072,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i8x64(self, a: i8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5310,28 +5272,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64 { u8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5350,10 +5298,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5605,28 +5554,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { mask8x64 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(32usize) as *const _), - v128_load(val.as_ptr().add(48usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5645,10 +5580,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(32usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(48usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i8, + dest.as_mut_ptr(), + 64usize, + ); } } #[inline(always)] @@ -5754,28 +5690,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32 { i16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -5794,10 +5716,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i16x32(self, a: i16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6002,28 +5925,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32 { u16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6042,10 +5951,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u16x32(self, a: u16x32, dest: &mut [u16; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6292,28 +6202,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { mask16x32 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(16usize) as *const _), - v128_load(val.as_ptr().add(24usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6332,10 +6228,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(16usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(24usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i16, + dest.as_mut_ptr(), + 32usize, + ); } } #[inline(always)] @@ -6444,28 +6341,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16 { i32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6484,10 +6367,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_i32x16(self, a: i32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6688,28 +6572,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16 { u32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -6728,10 +6598,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_u32x16(self, a: u32x16, dest: &mut [u32; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const u32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -6961,28 +6832,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { mask32x16 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(8usize) as *const _), - v128_load(val.as_ptr().add(12usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7001,10 +6858,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(8usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(12usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i32, + dest.as_mut_ptr(), + 16usize, + ); } } #[inline(always)] @@ -7110,28 +6968,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8 { f64x8 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7150,10 +6994,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_f64x8(self, a: f64x8, dest: &mut [f64; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const f64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] @@ -7387,28 +7232,14 @@ impl Simd for WasmSimd128 { #[inline(always)] fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(&val) }, simd: self, } } #[inline(always)] fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { mask64x8 { - val: unsafe { - crate::support::Aligned512([ - v128_load(val.as_ptr().add(0usize) as *const _), - v128_load(val.as_ptr().add(2usize) as *const _), - v128_load(val.as_ptr().add(4usize) as *const _), - v128_load(val.as_ptr().add(6usize) as *const _), - ]) - }, + val: unsafe { core::mem::transmute_copy(val) }, simd: self, } } @@ -7427,10 +7258,11 @@ impl Simd for WasmSimd128 { #[inline(always)] fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { unsafe { - v128_store(dest.as_mut_ptr().add(0usize) as *mut _, a.val.0[0]); - v128_store(dest.as_mut_ptr().add(2usize) as *mut _, a.val.0[1]); - v128_store(dest.as_mut_ptr().add(4usize) as *mut _, a.val.0[2]); - v128_store(dest.as_mut_ptr().add(6usize) as *mut _, a.val.0[3]); + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const i64, + dest.as_mut_ptr(), + 8usize, + ); } } #[inline(always)] diff --git a/fearless_simd_gen/src/arch/neon.rs b/fearless_simd_gen/src/arch/neon.rs index 081a3242b..22b29c39f 100644 --- a/fearless_simd_gen/src/arch/neon.rs +++ b/fearless_simd_gen/src/arch/neon.rs @@ -98,28 +98,6 @@ pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> Ident { ) } -fn memory_intrinsic(op: &str, ty: &VecType) -> Ident { - let (opt_q, scalar_c, size) = neon_array_type(ty); - let num_blocks = ty.n_bits() / 128; - let opt_count = if num_blocks > 1 { - format!("_x{num_blocks}") - } else { - String::new() - }; - Ident::new( - &format!("{op}1{opt_q}_{scalar_c}{size}{opt_count}"), - Span::call_site(), - ) -} - -pub(crate) fn load_intrinsic(ty: &VecType) -> Ident { - memory_intrinsic("vld", ty) -} - -pub(crate) fn store_intrinsic(ty: &VecType) -> Ident { - memory_intrinsic("vst", ty) -} - pub(crate) fn split_intrinsic(name: &str, name2: &str, ty: &VecType) -> Ident { let (opt_q, scalar_c, size) = neon_array_type(ty); Ident::new( diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index 892c4bce7..8b5543365 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -266,33 +266,19 @@ pub(crate) fn generic_block_combine( pub(crate) fn generic_from_array( method_sig: TokenStream, vec_ty: &VecType, - _kind: RefKind, - max_block_size: usize, - load_unaligned_block: impl Fn(&VecType) -> Ident, + kind: RefKind, ) -> TokenStream { - let block_size = max_block_size.min(vec_ty.n_bits()); - let block_count = vec_ty.n_bits() / block_size; - let num_scalars_per_block = vec_ty.len / block_count; - - let native_block_ty = VecType::new( - vec_ty.scalar, - vec_ty.scalar_bits, - block_size / vec_ty.scalar_bits, - ); - - let wrapper_ty = vec_ty.aligned_wrapper(); - let load_unaligned = load_unaligned_block(&native_block_ty); - let expr = if block_count == 1 { - quote! { - unsafe { #wrapper_ty(#load_unaligned(val.as_ptr() as *const _)) } - } + let inner_ref = if kind == RefKind::Value { + quote! { &val } } else { - let blocks = (0..block_count).map(|n| n * num_scalars_per_block); - quote! { - unsafe { #wrapper_ty([ - #(#load_unaligned(val.as_ptr().add(#blocks) as *const _)),* - ]) } - } + quote! { val } + }; + + // There are architecture-specific "load" intrinsics, but they can actually be *worse* for performance. If they + // lower to LLVM intrinsics, they will likely not be optimized until much later in the pipeline (if at all), + // resulting in substantially worse codegen. + let expr = quote! { + unsafe { core::mem::transmute_copy(#inner_ref) } }; let vec_rust = vec_ty.rust(); @@ -333,39 +319,17 @@ pub(crate) fn generic_as_array( } } -pub(crate) fn generic_store_array( - method_sig: TokenStream, - vec_ty: &VecType, - max_block_size: usize, - store_unaligned_block: impl Fn(&VecType) -> Ident, -) -> TokenStream { - let block_size = max_block_size.min(vec_ty.n_bits()); - let block_count = vec_ty.n_bits() / block_size; - let num_scalars_per_block = vec_ty.len / block_count; - - let native_block_ty = VecType::new( - vec_ty.scalar, - vec_ty.scalar_bits, - block_size / vec_ty.scalar_bits, - ); +pub(crate) fn generic_store_array(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream { + let scalar_ty = vec_ty.scalar.rust(vec_ty.scalar_bits); + let count = vec_ty.len; - let store_unaligned = store_unaligned_block(&native_block_ty); - let store_expr = if block_count == 1 { - quote! { - unsafe { #store_unaligned(dest.as_mut_ptr() as *mut _, a.val.0) } - } - } else { - let blocks = (0..block_count).map(|n| { - let offset = n * num_scalars_per_block; - let block_idx = proc_macro2::Literal::usize_unsuffixed(n); - quote! { - #store_unaligned(dest.as_mut_ptr().add(#offset) as *mut _, a.val.0[#block_idx]) - } - }); - quote! { - unsafe { - #(#blocks;)* - } + let store_expr = quote! { + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const a.val.0) as *const #scalar_ty, + dest.as_mut_ptr(), + #count, + ); } }; diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 366359e10..31aaffc56 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -4,7 +4,6 @@ use proc_macro2::{Ident, Literal, Span, TokenStream}; use quote::{ToTokens as _, format_ident, quote}; -use crate::arch::neon::{load_intrinsic, store_intrinsic}; use crate::generic::{ generic_as_array, generic_from_array, generic_from_bytes, generic_op_name, generic_store_array, generic_to_bytes, @@ -462,21 +461,13 @@ impl Level for Neon { } } } - OpSig::FromArray { kind } => generic_from_array( - method_sig, - vec_ty, - kind, - self.max_block_size(), - load_intrinsic, - ), + OpSig::FromArray { kind } => generic_from_array(method_sig, vec_ty, kind), OpSig::AsArray { kind } => { generic_as_array(method_sig, vec_ty, kind, self.max_block_size(), |vec_ty| { self.arch_ty(vec_ty) }) } - OpSig::StoreArray => { - generic_store_array(method_sig, vec_ty, self.max_block_size(), store_intrinsic) - } + OpSig::StoreArray => generic_store_array(method_sig, vec_ty), OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index d098d7654..63f0a027d 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -613,21 +613,13 @@ impl Level for WasmSimd128 { } } } - OpSig::FromArray { kind } => { - generic_from_array(method_sig, vec_ty, kind, self.max_block_size(), |_| { - v128_intrinsic("load") - }) - } + OpSig::FromArray { kind } => generic_from_array(method_sig, vec_ty, kind), OpSig::AsArray { kind } => { generic_as_array(method_sig, vec_ty, kind, self.max_block_size(), |_| { Ident::new("v128", Span::call_site()) }) } - OpSig::StoreArray => { - generic_store_array(method_sig, vec_ty, self.max_block_size(), |_| { - v128_intrinsic("store") - }) - } + OpSig::StoreArray => generic_store_array(method_sig, vec_ty), OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 907002185..4086f41f5 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -170,23 +170,13 @@ impl Level for X86 { block_size, block_count, } => self.handle_store_interleaved(method_sig, vec_ty, block_size, block_count), - OpSig::FromArray { kind } => generic_from_array( - method_sig, - vec_ty, - kind, - self.max_block_size(), - |block_ty| intrinsic_ident("loadu", coarse_type(block_ty), block_ty.n_bits()), - ), + OpSig::FromArray { kind } => generic_from_array(method_sig, vec_ty, kind), OpSig::AsArray { kind } => { generic_as_array(method_sig, vec_ty, kind, self.max_block_size(), |vec_ty| { self.arch_ty(vec_ty) }) } - OpSig::StoreArray => { - generic_store_array(method_sig, vec_ty, self.max_block_size(), |block_ty| { - intrinsic_ident("storeu", coarse_type(block_ty), block_ty.n_bits()) - }) - } + OpSig::StoreArray => generic_store_array(method_sig, vec_ty), OpSig::FromBytes => generic_from_bytes(method_sig, vec_ty), OpSig::ToBytes => generic_to_bytes(method_sig, vec_ty), } From f46212d9bc4bdc097f3867eb58d660f7026ecf86 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 23 Jan 2026 14:50:36 -0500 Subject: [PATCH 2/3] Document safety a bit more --- fearless_simd_gen/src/generic.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index 8b5543365..975b2b47f 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -278,6 +278,18 @@ pub(crate) fn generic_from_array( // lower to LLVM intrinsics, they will likely not be optimized until much later in the pipeline (if at all), // resulting in substantially worse codegen. let expr = quote! { + // Safety: The native vector type backing any implementation will be: + // - A `#[repr(simd)]` type, which has the same layout as an array of scalars + // - An array of `#[repr(simd)]` types + // - For AArch64 specifically, a `#[repr(C)]` tuple of `#[repr(simd)]` types + // + // These all have the same layout as a flat array of the corresponding scalars. The native vector types probably + // have greater alignment requirements than the source array type we're copying from, but that's explicitly + // allowed by transmute_copy: + // + // > This function will unsafely assume the pointer src is valid for size_of:: bytes by transmuting &Src to + // > &Dst and then reading the &Dst **(except that this is done in a way that is correct even when &Dst has + // > stricter alignment requirements than &Src).** unsafe { core::mem::transmute_copy(#inner_ref) } }; let vec_rust = vec_ty.rust(); @@ -325,6 +337,9 @@ pub(crate) fn generic_store_array(method_sig: TokenStream, vec_ty: &VecType) -> let store_expr = quote! { unsafe { + // Copies `count` scalars from the backing type, which has the same layout as the destination array (see + // `generic_as_array`). We know that the source and destination are aligned to at least the alignment of the + // underlying scalar type. core::ptr::copy_nonoverlapping( (&raw const a.val.0) as *const #scalar_ty, dest.as_mut_ptr(), From 81cba51377c943f4b4a158c3f8810bc2202aa096 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 24 Jan 2026 15:15:37 -0500 Subject: [PATCH 3/3] Clarify comments --- fearless_simd_gen/src/generic.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index 975b2b47f..2a3dd239b 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -276,7 +276,7 @@ pub(crate) fn generic_from_array( // There are architecture-specific "load" intrinsics, but they can actually be *worse* for performance. If they // lower to LLVM intrinsics, they will likely not be optimized until much later in the pipeline (if at all), - // resulting in substantially worse codegen. + // resulting in substantially worse codegen. See https://github.com/linebender/fearless_simd/pull/185. let expr = quote! { // Safety: The native vector type backing any implementation will be: // - A `#[repr(simd)]` type, which has the same layout as an array of scalars @@ -338,8 +338,8 @@ pub(crate) fn generic_store_array(method_sig: TokenStream, vec_ty: &VecType) -> let store_expr = quote! { unsafe { // Copies `count` scalars from the backing type, which has the same layout as the destination array (see - // `generic_as_array`). We know that the source and destination are aligned to at least the alignment of the - // underlying scalar type. + // `generic_as_array`). The backing type is aligned to its own size, and the destination array must *by + // definition* be aligned to at least the alignment of the scalar. core::ptr::copy_nonoverlapping( (&raw const a.val.0) as *const #scalar_ty, dest.as_mut_ptr(),