Skip to content

Commit

Permalink
Merge branch 'support-x86'
Browse files Browse the repository at this point in the history
  • Loading branch information
onethumb committed Dec 11, 2024
2 parents 6376679 + b399836 commit d7dcc68
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/pclmulqdq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
//! [white paper]: https://web.archive.org/web/20131224125630/https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
#[cfg(not(feature = "fake-simd"))]
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), path = "x86.rs")]
#[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")]
#[cfg_attr(target_arch = "aarch64", path = "aarch64.rs")]
#[cfg_attr(target_arch = "x86", path = "x86.rs")]
mod arch;

#[cfg(feature = "fake-simd")]
Expand Down
25 changes: 19 additions & 6 deletions src/pclmulqdq/x86.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
// Copyright 2020 TiKV Project Authors. Licensed under MIT or Apache-2.0.

//! x86/x86_64 implementation of the PCLMULQDQ-based CRC calculation.
//! x86 (32-bit) implementation of the PCLMULQDQ-based CRC calculation.
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::BitXor;

#[repr(transparent)]
Expand All @@ -16,13 +14,20 @@ impl super::SimdExt for Simd {
fn is_supported() -> bool {
is_x86_feature_detected!("pclmulqdq") // _mm_clmulepi64_si128
&& is_x86_feature_detected!("sse2") // (all other _mm_*)
&& is_x86_feature_detected!("sse4.1") // _mm_extract_epi64
&& is_x86_feature_detected!("sse4.1")
}

#[inline]
#[target_feature(enable = "sse2")]
unsafe fn new(high: u64, low: u64) -> Self {
Self(_mm_set_epi64x(high as i64, low as i64))
// On 32-bit systems, we need to split u64 into low and high 32-bit parts
let high_low = (high & 0xFFFFFFFF) as i32;
let high_high = ((high >> 32) & 0xFFFFFFFF) as i32;
let low_low = (low & 0xFFFFFFFF) as i32;
let low_high = ((low >> 32) & 0xFFFFFFFF) as i32;

// Create the 128-bit vector using 32-bit parts
Self(_mm_set_epi32(high_high, high_low, low_high, low_low))
}

#[inline]
Expand Down Expand Up @@ -50,7 +55,15 @@ impl super::SimdExt for Simd {
let h = Self(_mm_slli_si128(t1, 8));
let l = Self(_mm_clmulepi64_si128(t1, polymu.0, 0x10));
let reduced = h ^ l ^ self;
_mm_extract_epi64(reduced.0, 1) as u64

// Store the result in memory and read it back as u64
// This approach is more reliable for handling 64-bit values on 32-bit systems
let mut result: [u32; 4] = [0; 4];
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, reduced.0);

// Combine the two 32-bit values into a 64-bit result
// We want the high 64 bits (indices 2 and 3)
((result[3] as u64) << 32) | (result[2] as u64)
}
}

Expand Down
61 changes: 61 additions & 0 deletions src/pclmulqdq/x86_64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2020 TiKV Project Authors. Licensed under MIT or Apache-2.0.

//! x86_64 implementation of the PCLMULQDQ-based CRC calculation.
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::BitXor;

#[repr(transparent)]
#[derive(Copy, Clone, Debug)]
pub struct Simd(__m128i);

impl super::SimdExt for Simd {
fn is_supported() -> bool {
is_x86_feature_detected!("pclmulqdq") // _mm_clmulepi64_si128
&& is_x86_feature_detected!("sse2") // (all other _mm_*)
&& is_x86_feature_detected!("sse4.1") // _mm_extract_epi64
}

#[inline]
#[target_feature(enable = "sse2")]
unsafe fn new(high: u64, low: u64) -> Self {
Self(_mm_set_epi64x(high as i64, low as i64))
}

#[inline]
#[target_feature(enable = "sse2", enable = "pclmulqdq")]
unsafe fn fold_16(self, coeff: Self) -> Self {
let h = Self(_mm_clmulepi64_si128(self.0, coeff.0, 0x11));
let l = Self(_mm_clmulepi64_si128(self.0, coeff.0, 0x00));
h ^ l
}

#[inline]
#[target_feature(enable = "sse2", enable = "pclmulqdq")]
unsafe fn fold_8(self, coeff: u64) -> Self {
let coeff = Self::new(0, coeff);
let h = Self(_mm_clmulepi64_si128(self.0, coeff.0, 0x00));
let l = Self(_mm_srli_si128(self.0, 8));
h ^ l
}

#[inline]
#[target_feature(enable = "sse2", enable = "sse4.1", enable = "pclmulqdq")]
unsafe fn barrett(self, poly: u64, mu: u64) -> u64 {
let polymu = Self::new(poly, mu);
let t1 = _mm_clmulepi64_si128(self.0, polymu.0, 0x00);
let h = Self(_mm_slli_si128(t1, 8));
let l = Self(_mm_clmulepi64_si128(t1, polymu.0, 0x10));
let reduced = h ^ l ^ self;
_mm_extract_epi64(reduced.0, 1) as u64
}
}

impl BitXor for Simd {
type Output = Self;

fn bitxor(self, other: Self) -> Self {
Self(unsafe { _mm_xor_si128(self.0, other.0) })
}
}

0 comments on commit d7dcc68

Please sign in to comment.