From 02821b46119eac3671192c3a739b5b79f26b15d9 Mon Sep 17 00:00:00 2001 From: codeesura Date: Wed, 6 Dec 2023 21:42:27 +0300 Subject: [PATCH 1/6] optimize hexadecimal --- .../src/custom_serde/num_as_hex.rs | 102 +++++------------- 1 file changed, 24 insertions(+), 78 deletions(-) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index a3fe274e..3fdf077c 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -16,60 +16,28 @@ impl<'de> NumAsHex<'de> for u64 { where S: serde::Serializer, { - /// The symbols to be used for the hexadecimal representation. const HEX_DIGITS: [u8; 16] = *b"0123456789abcdef"; - /// The maximum number of digits in the hexadecimal representation of a `u64`. - const MAX_NUMBER_SIZE: usize = u64::MAX.ilog(16) as usize + 1; if *self == 0 { return serializer.serialize_str("0x0"); } - // The following code can be very much optimized simply by making everything - // `unsafe` and using pointers to write to the buffer. - // Let's benchmark it first to ensure that it's actually worth it. - - // The buffer is filled from the end to the beginning. - // We know that it will always have the correct size because we made it have the - // maximum possible size for a base-16 representation of a `u64`. - // - // +-----------------------------------+ - // + 1 2 f a + - // +-----------------------------------+ - // ^ cursor - // - // Once the number has been written to the buffer, we simply add a `0x` prefix - // to the beginning of the buffer. Just like the digits, we know the buffer is - // large enough to hold the prefix. - // - // +-----------------------------------+ - // + 0 x 1 2 f a + - // +-----------------------------------+ - // ^ cursor - // |-----------------------| remaining - // - // The output string is the part of the buffer that has been written. In other - // words, we have to skip all the bytes that *were not* written yet (remaining). - - let mut buffer = [0u8; MAX_NUMBER_SIZE + 2]; // + 2 to account for 0x - let mut cursor = buffer.iter_mut().rev(); + let mut buffer = [0u8; 18]; // Enough for "0x" prefix and 16 hex digits let mut n = *self; + let mut length = 0; + while n != 0 { - *cursor.next().unwrap() = HEX_DIGITS[(n % 16) as usize]; + length += 1; + buffer[18 - length] = HEX_DIGITS[(n % 16) as usize]; n /= 16; } - *cursor.next().unwrap() = b'x'; - *cursor.next().unwrap() = b'0'; - - let remaining = cursor.len(); - // SAFETY: - // We only wrote ASCII characters to the buffer, ensuring that it is only composed - // of valid UTF-8 code points. This unwrap can never fail. Just like the code above, - // using `from_utf8_unchecked` is safe. - let s = core::str::from_utf8(&buffer[remaining..]).unwrap(); + buffer[18 - length - 1] = b'x'; + buffer[18 - length - 2] = b'0'; + length += 2; - serializer.serialize_str(s) + let hex_str = core::str::from_utf8(&buffer[18 - length..]).unwrap(); + serializer.serialize_str(hex_str) } fn deserialize(deserializer: D) -> Result @@ -93,51 +61,29 @@ impl<'de> NumAsHex<'de> for u64 { // unsafe code and pointers. Though the gain will probably be less interesting. // Explicitly avoid being UTF-8 aware. - let mut bytes = v.as_bytes(); + let bytes = v.as_bytes(); // If the input string does not start with the `0x` prefix, then it's an // error. The `NUM_AS_HEX` regex defined in the specification specifies // this prefix as mandatory. - bytes = bytes - .strip_prefix(b"0x") - .ok_or_else(|| E::custom("expected a hexadecimal string starting with 0x"))?; - - if bytes.is_empty() { - return Err(E::custom("expected a hexadecimal string")); - } - - // Remove the leading zeros from the string, if any. - // We need this in order to optimize the code below with the knowledge of the - // length of the hexadecimal representation of the number. - while let Some(rest) = bytes.strip_prefix(b"0") { - bytes = rest; - } - - // If the string has a size larger than the maximum size of the hexadecimal - // representation of a `u64`, then we're forced to overflow. - if bytes.len() > u64::MAX.ilog(16) as usize + 1 { - return Err(E::custom("integer overflowed 64-bit")); + if bytes.len() < 2 || &bytes[0..2] != b"0x" { + return Err(E::custom("expected a hexadecimal string starting with 0x")); } - // Aggregate the digits into `n`, - // Digits from `0` to `9` represent numbers from `0` to `9`. - // Letters from `a` to `f` represent numbers from `10` to `15`. - // - // As specified in the spec, both uppercase and lowercase characters are - // allowed. - // - // Because we already checked the size of the string earlier, we know that - // the following code will never overflow. + let hex_bytes = &bytes[2..]; let mut n = 0u64; - for &b in bytes.iter() { - let unit = match b { - b'0'..=b'9' => b as u64 - b'0' as u64, - b'a'..=b'f' => b as u64 - b'a' as u64 + 10, - b'A'..=b'F' => b as u64 - b'A' as u64 + 10, + for &b in hex_bytes { + let digit = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => 10 + b - b'a', + b'A'..=b'F' => 10 + b - b'A', _ => return Err(E::custom("invalid hexadecimal digit")), }; - - n = n * 16 + unit; + n = n + .checked_mul(16) + .ok_or_else(|| E::custom("integer overflowed 64-bit"))? + .checked_add(digit as u64) + .ok_or_else(|| E::custom("integer overflowed 64-bit"))?; } Ok(n) From 880f511fcdd1d603d8973e3f7f3839324c8b0a1b Mon Sep 17 00:00:00 2001 From: codeesura <120671243+codeesura@users.noreply.github.com> Date: Wed, 20 Dec 2023 18:37:21 +0300 Subject: [PATCH 2/6] Update num_as_hex.rs --- .../src/custom_serde/num_as_hex.rs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index 3fdf077c..47579141 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -16,12 +16,39 @@ impl<'de> NumAsHex<'de> for u64 { where S: serde::Serializer, { + /// The symbols to be used for the hexadecimal representation. const HEX_DIGITS: [u8; 16] = *b"0123456789abcdef"; if *self == 0 { return serializer.serialize_str("0x0"); } + // The following code can be very much optimized simply by making everything + // `unsafe` and using pointers to write to the buffer. + // Let's benchmark it first to ensure that it's actually worth it. + + // The buffer is filled from the end to the beginning. + // We know that it will always have the correct size because we made it have the + // maximum possible size for a base-16 representation of a `u64`. + // + // +-----------------------------------+ + // + 1 2 f a + + // +-----------------------------------+ + // ^ cursor + // + // Once the number has been written to the buffer, we simply add a `0x` prefix + // to the beginning of the buffer. Just like the digits, we know the buffer is + // large enough to hold the prefix. + // + // +-----------------------------------+ + // + 0 x 1 2 f a + + // +-----------------------------------+ + // ^ cursor + // |-----------------------| remaining + // + // The output string is the part of the buffer that has been written. In other + // words, we have to skip all the bytes that *were not* written yet (remaining). + let mut buffer = [0u8; 18]; // Enough for "0x" prefix and 16 hex digits let mut n = *self; let mut length = 0; From 071ee91477780761e11fdf6b7465b19bcf732586 Mon Sep 17 00:00:00 2001 From: codeesura <120671243+codeesura@users.noreply.github.com> Date: Wed, 20 Dec 2023 18:39:41 +0300 Subject: [PATCH 3/6] Update num_as_hex.rs --- crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index 47579141..e0e50606 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -97,6 +97,15 @@ impl<'de> NumAsHex<'de> for u64 { return Err(E::custom("expected a hexadecimal string starting with 0x")); } + // Aggregate the digits into `n`, + // Digits from `0` to `9` represent numbers from `0` to `9`. + // Letters from `a` to `f` represent numbers from `10` to `15`. + // + // As specified in the spec, both uppercase and lowercase characters are + // allowed. + // + // Because we already checked the size of the string earlier, we know that + // the following code will never overflow. let hex_bytes = &bytes[2..]; let mut n = 0u64; for &b in hex_bytes { From 6e23eff24caf1a5400c8bc3f40353fd0ecd4cb22 Mon Sep 17 00:00:00 2001 From: codeesura Date: Thu, 21 Dec 2023 13:18:28 +0300 Subject: [PATCH 4/6] hex parsing by trimming leading zeros --- .../src/custom_serde/num_as_hex.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index e0e50606..0ca04b3e 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -48,7 +48,7 @@ impl<'de> NumAsHex<'de> for u64 { // // The output string is the part of the buffer that has been written. In other // words, we have to skip all the bytes that *were not* written yet (remaining). - + let mut buffer = [0u8; 18]; // Enough for "0x" prefix and 16 hex digits let mut n = *self; let mut length = 0; @@ -107,19 +107,23 @@ impl<'de> NumAsHex<'de> for u64 { // Because we already checked the size of the string earlier, we know that // the following code will never overflow. let hex_bytes = &bytes[2..]; + let trimmed_hex = hex_bytes + .iter() + .skip_while(|&&b| b == b'0') + .collect::>(); + if trimmed_hex.len() > 16 { + return Err(E::custom("hexadecimal string too long for a 64-bit number")); + } + let mut n = 0u64; - for &b in hex_bytes { + for &b in &trimmed_hex { let digit = match b { b'0'..=b'9' => b - b'0', b'a'..=b'f' => 10 + b - b'a', b'A'..=b'F' => 10 + b - b'A', _ => return Err(E::custom("invalid hexadecimal digit")), }; - n = n - .checked_mul(16) - .ok_or_else(|| E::custom("integer overflowed 64-bit"))? - .checked_add(digit as u64) - .ok_or_else(|| E::custom("integer overflowed 64-bit"))?; + n = n * 16 + digit as u64; } Ok(n) From e1301439cb56cc79976affc4b626072768eb3525 Mon Sep 17 00:00:00 2001 From: codeesura Date: Thu, 21 Dec 2023 13:20:56 +0300 Subject: [PATCH 5/6] add comment --- crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index 0ca04b3e..6a90361f 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -107,10 +107,14 @@ impl<'de> NumAsHex<'de> for u64 { // Because we already checked the size of the string earlier, we know that // the following code will never overflow. let hex_bytes = &bytes[2..]; + + // Trim leading zeros from the hexadecimal part for efficient processing let trimmed_hex = hex_bytes .iter() .skip_while(|&&b| b == b'0') .collect::>(); + + // Check if the significant part of the hexadecimal string is too long for a 64-bit number if trimmed_hex.len() > 16 { return Err(E::custom("hexadecimal string too long for a 64-bit number")); } From c0fbaaee4661a5129be61f6a9667703074285a66 Mon Sep 17 00:00:00 2001 From: codeesura <120671243+codeesura@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:18:34 +0300 Subject: [PATCH 6/6] use alloc for workflow --- crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs index 6a90361f..149d29dd 100644 --- a/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs +++ b/crates/starknet-types-rpc/src/custom_serde/num_as_hex.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use core::marker::PhantomData; /// A trait for types that should be serialized or deserialized as hexadecimal strings.