Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

split crates and fix bugs when using use_raw #124

Merged
merged 3 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@ license = "Apache-2.0"
name = "sonic-rs"
readme = "README.md"
repository = "https://github.com/cloudwego/sonic-rs"
version = "0.3.15"
version = "0.3.16"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bumpalo = "3.13"
bytes = "1.8"
cfg-if = "1.0"
faststr = { version = "0.2", features = ["serde"] }
itoa = "1.0"
ryu = "1.0"
serde = { version = "1.0", features = ["rc", "derive"] }
simdutf8 = "0.1"
thiserror = "2.0"
bumpalo = "3.13"
bytes = "1.8"
cfg-if = "1.0"
faststr = { version = "0.2", features = ["serde"] }
itoa = "1.0"
ryu = "1.0"
serde = { version = "1.0", features = ["rc", "derive"] }
simdutf8 = "0.1"
sonic_number = { path = "./sonic_number" }
sonic_simd = { path = "./sonic_simd" }
thiserror = "2.0"

[dev-dependencies]
bytes = { version = "1.4", features = ["serde"] }
Expand Down
7 changes: 7 additions & 0 deletions sonic_number/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
edition = "2021"
name = "sonic_number"
version = "0.1.0"

[dependencies]
cfg-if = "1.0"
3 changes: 3 additions & 0 deletions sonic_number/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# sonic_number

A fast number parsing library based on SIMD.
11 changes: 11 additions & 0 deletions sonic_number/src/arch/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[inline(always)]
pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) {
debug_assert!(need < 17);
let mut sum = 0u64;
let mut i = 0;
while i < need && c.get_unchecked(i).is_ascii_digit() {
sum = (c.get_unchecked(i) - b'0') as u64 + sum * 10;
i += 1;
}
(sum, i)
}
11 changes: 11 additions & 0 deletions sonic_number/src/arch/fallback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[inline(always)]
pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) {
debug_assert!(need < 17);
let mut sum = 0u64;
let mut i = 0;
while i < need && c.get_unchecked(i).is_ascii_digit() {
sum = (c.get_unchecked(i) - b'0') as u64 + sum * 10;
i += 1;
}
(sum, i)
}
12 changes: 12 additions & 0 deletions sonic_number/src/arch/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
cfg_if::cfg_if! {
if #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq", target_feature = "avx2", target_feature = "sse2"))] {
mod x86_64;
pub use x86_64::*;
} else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
mod aarch64;
pub use aarch64::*;
} else {
mod fallback;
pub use fallback::*;
}
}
108 changes: 108 additions & 0 deletions sonic_number/src/arch/x86_64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::arch::x86_64::*;

macro_rules! packadd_1 {
($v:ident) => {
let delta = _mm_set1_epi64x(0x010A010A010A010A);
$v = _mm_maddubs_epi16($v, delta);
};
}

macro_rules! packadd_2 {
($v:ident) => {
let delta = _mm_set1_epi64x(0x0001006400010064);
$v = _mm_madd_epi16($v, delta);
};
}

macro_rules! packadd_4 {
($v:ident) => {
$v = _mm_packus_epi32($v, $v);
let delta = _mm_set_epi16(0, 0, 0, 0, 1, 10000, 1, 10000);
$v = _mm_madd_epi16($v, delta);
};
}

// simd add for 5 ~ 8 digits
macro_rules! simd_add_5_8 {
($v:ident, $and:literal) => {{
$v = _mm_slli_si128($v, 16 - $and);
packadd_1!($v);
packadd_2!($v);
(_mm_extract_epi32($v, 2) as u64) * 10000 + (_mm_extract_epi32($v, 3) as u64)
}};
}

// simd add for 9 ~ 15 digits
macro_rules! simd_add_9_15 {
($v:ident, $and:literal) => {{
$v = _mm_slli_si128($v, 16 - $and);
packadd_1!($v);
packadd_2!($v);
packadd_4!($v);
(_mm_extract_epi32($v, 0) as u64) * 100000000 + (_mm_extract_epi32($v, 1) as u64)
}};
}

macro_rules! simd_add_16 {
($v:ident) => {{
packadd_1!($v);
packadd_2!($v);
packadd_4!($v);
(_mm_extract_epi32($v, 0) as u64) * 100000000 + (_mm_extract_epi32($v, 1) as u64)
}};
}
#[inline(always)]
pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) {
debug_assert!(need <= 16);
let data = _mm_loadu_si128(c.as_ptr() as *const __m128i);
let zero = _mm_setzero_si128();
let nine = _mm_set1_epi8(9);
let zero_c = _mm_set1_epi8(b'0' as i8);

let mut data = _mm_sub_epi8(data, zero_c);
let lt_zero = _mm_cmpgt_epi8(zero, data);
let gt_nine = _mm_cmpgt_epi8(data, nine);

let is_num_end = _mm_or_si128(lt_zero, gt_nine);
let is_num_end_int = _mm_movemask_epi8(is_num_end);

// get the real parsed count
let mut count = need;
if is_num_end_int != 0 {
let digits = is_num_end_int.trailing_zeros() as usize;
if digits < need {
count = digits;
}
}

let sum = match count {
1 => _mm_extract_epi8(data, 0) as u64,
2 => (_mm_extract_epi8(data, 0) * 10 + _mm_extract_epi8(data, 1)) as u64,
3 => {
// shift to clear the non-digit ascii in vector
data = _mm_slli_si128(data, 16 - 3);
packadd_1!(data);
// add the highest two lanes
(_mm_extract_epi16(data, 6) * 100 + _mm_extract_epi16(data, 7)) as u64
}
4 => {
data = _mm_slli_si128(data, 16 - 4);
packadd_1!(data);
(_mm_extract_epi16(data, 6) * 100 + _mm_extract_epi16(data, 7)) as u64
}
5 => simd_add_5_8!(data, 5),
6 => simd_add_5_8!(data, 6),
7 => simd_add_5_8!(data, 7),
8 => simd_add_5_8!(data, 8),
9 => simd_add_9_15!(data, 9),
10 => simd_add_9_15!(data, 10),
11 => simd_add_9_15!(data, 11),
12 => simd_add_9_15!(data, 12),
13 => simd_add_9_15!(data, 13),
14 => simd_add_9_15!(data, 14),
15 => simd_add_9_15!(data, 15),
16 => simd_add_16!(data),
_ => unreachable!(),
};
(sum, count)
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
34 changes: 18 additions & 16 deletions src/util/num/mod.rs → sonic_number/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod arch;
mod common;
mod decimal;
mod float;
Expand All @@ -6,7 +7,7 @@ mod slow;
mod table;

use self::{common::BiasedFp, float::RawFloat, table::POWER_OF_FIVE_128};
use crate::{error::ErrorCode, util::arch::simd_str2int};
use crate::arch::simd_str2int;

const FLOATING_LONGEST_DIGITS: usize = 17;
const F64_BITS: u32 = 64;
Expand All @@ -16,14 +17,20 @@ const F64_EXP_BIAS: i32 = 1023;
const F64_SIG_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;

#[derive(Debug)]
pub(crate) enum ParserNumber {
pub enum ParserNumber {
Unsigned(u64),
/// Always less than zero.
Signed(i64),
/// Always finite.
Float(f64),
}

#[derive(Debug)]
pub enum Error {
InvalidNumber,
FloatMustBeFinite,
}

macro_rules! match_digit {
($data:expr, $i:expr, $pattern:pat) => {
$i < $data.len() && matches!($data[$i], $pattern)
Expand All @@ -45,18 +52,18 @@ macro_rules! digit {
macro_rules! check_digit {
($data:expr, $i:expr) => {
if !($i < $data.len() && $data[$i].is_ascii_digit()) {
return Err(ErrorCode::InvalidNumber);
return Err(Error::InvalidNumber);
}
};
}

#[inline(always)]
fn parse_exponent(data: &[u8], index: &mut usize) -> Result<i32, ErrorCode> {
fn parse_exponent(data: &[u8], index: &mut usize) -> Result<i32, Error> {
let mut exponent: i32 = 0;
let mut negative = false;

if *index >= data.len() {
return Err(ErrorCode::InvalidNumber);
return Err(Error::InvalidNumber);
}

match data[*index] {
Expand Down Expand Up @@ -114,7 +121,7 @@ fn parse_number_fraction(
exponent: &mut i32,
mut need: isize,
dot_pos: usize,
) -> Result<bool, ErrorCode> {
) -> Result<bool, Error> {
debug_assert!(need < FLOATING_LONGEST_DIGITS as isize);

// native implement:
Expand Down Expand Up @@ -152,11 +159,7 @@ fn parse_number_fraction(
}

#[inline(always)]
pub(crate) fn parse_number(
data: &[u8],
index: &mut usize,
negative: bool,
) -> Result<ParserNumber, ErrorCode> {
pub fn parse_number(data: &[u8], index: &mut usize, negative: bool) -> Result<ParserNumber, Error> {
let mut significant: u64 = 0;
let mut exponent: i32 = 0;
let mut trunc = false;
Expand Down Expand Up @@ -250,7 +253,7 @@ pub(crate) fn parse_number(
}
let mut digits_cnt = *index - digit_start;
if digits_cnt == 0 {
return Err(ErrorCode::InvalidNumber);
return Err(Error::InvalidNumber);
}

// slow path for too long integer
Expand Down Expand Up @@ -331,7 +334,7 @@ fn parse_float(
negative: bool,
trunc: bool,
raw_num: &[u8],
) -> Result<ParserNumber, ErrorCode> {
) -> Result<ParserNumber, Error> {
// parse double fast
if significant >> 52 == 0 && (-22..=(22 + 15)).contains(&exponent) {
if let Some(mut float) = parse_float_fast(exponent, significant) {
Expand Down Expand Up @@ -375,7 +378,7 @@ fn parse_float(

// check inf for float
if float.is_infinite() {
return Err(ErrorCode::FloatMustBeFinite);
return Err(Error::FloatMustBeFinite);
}
Ok(ParserNumber::Float(float))
}
Expand Down Expand Up @@ -469,8 +472,7 @@ const POW10_FLOAT: [f64; 23] = [

#[cfg(test)]
mod test {
use super::parse_number;
use crate::util::num::ParserNumber;
use crate::{parse_number, ParserNumber};

fn test_parse_ok(input: &str, expect: f64) {
assert_eq!(input.parse::<f64>().unwrap(), expect);
Expand Down
File renamed without changes.
File renamed without changes.
7 changes: 7 additions & 0 deletions sonic_simd/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
edition = "2021"
name = "sonic_simd"
version = "0.1.0"

[dependencies]
cfg-if = "1.0"
8 changes: 8 additions & 0 deletions sonic_simd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

# sonic_simd

A portable SIMD library that provides low-level APIs for x86, ARM. Other platforms will use the fallback scalar implementation.

TODO:

1. support RISC-V.
1 change: 0 additions & 1 deletion src/util/simd/avx2.rs → sonic_simd/src/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::{
arch::x86_64::*,
mem::transmute,
ops::{BitAnd, BitOr, BitOrAssign},
};

Expand Down
24 changes: 0 additions & 24 deletions src/util/simd/bits.rs → sonic_simd/src/bits.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use serde::de;

use super::traits::BitMask;

macro_rules! impl_bits {
Expand Down Expand Up @@ -102,25 +100,3 @@ impl BitMask for NeonBits {
Self(self.0 & u64::MAX >> (n * 4))
}
}

pub fn combine_u16(lo: u16, hi: u16) -> u32 {
#[cfg(target_endian = "little")]
{
(lo as u32) | ((hi as u32) << 16)
}
#[cfg(target_endian = "big")]
{
(hi as u32) | ((lo as u32) << 16)
}
}

pub fn combine_u32(lo: u32, hi: u32) -> u64 {
#[cfg(target_endian = "little")]
{
(lo as u64) | ((hi as u64) << 32)
}
#[cfg(target_endian = "big")]
{
(hi as u64) | ((lo as u64) << 32)
}
}
10 changes: 5 additions & 5 deletions src/util/simd/mod.rs → sonic_simd/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#[allow(unused_imports)]
#[allow(unused)]
#![allow(non_camel_case_types)]

pub mod bits;
mod traits;

Expand All @@ -9,7 +9,7 @@ cfg_if::cfg_if! {
mod sse2;
use self::sse2::*;
} else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
pub(crate) mod neon;
pub mod neon;
use self::neon::*;
} else {
// TODO: support wasm
Expand All @@ -35,12 +35,12 @@ pub use self::traits::{BitMask, Mask, Simd};
mod v512;
use self::v512::*;

#[cfg(all(target_feature = "neon", target_arch = "aarch64"))]
pub type u8x16 = Simd128u;

pub type u8x32 = Simd256u;
pub type u8x64 = Simd512u;

pub type i8x16 = Simd128i;
pub type i8x32 = Simd256i;
pub type i8x64 = Simd512i;

pub type m8x32 = Mask256;
Loading
Loading