Skip to content

Commit

Permalink
[db-verifier] compare hashes of values instead of holding index keys …
Browse files Browse the repository at this point in the history
…in memory (#23493)

GitOrigin-RevId: a5f8db1c1b9ccbb87d36a64dc0fa0425e43d26c1
  • Loading branch information
ldanilek authored and Convex, Inc. committed Mar 15, 2024
1 parent 778d90a commit e1062f9
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 5 deletions.
28 changes: 24 additions & 4 deletions crates/value/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ pub mod encode_for_hash {

use crate::{
sorting::write_escaped_bytes,
ConvexObject,
ConvexValue,
};

Expand Down Expand Up @@ -499,15 +500,22 @@ pub mod encode_for_hash {
},
ConvexValue::Object(o) => {
w.write_u8(11)?;
for (k, v) in o.iter() {
write_escaped_bytes(k.as_bytes(), w)?;
v.encode_for_hash(w)?;
}
o.encode_for_hash(w)?;
},
}
Ok(())
}
}

impl ConvexObject {
pub fn encode_for_hash<W: Write>(&self, w: &mut W) -> io::Result<()> {
for (k, v) in self.iter() {
write_escaped_bytes(k.as_bytes(), w)?;
v.encode_for_hash(w)?;
}
Ok(())
}
}
}

impl Hash for ConvexValue {
Expand All @@ -521,6 +529,18 @@ impl Hash for ConvexValue {
}
}

impl Hash for ConvexObject {
/// f64 doesn't implement `hash` so we need to manually implement `hash` for
/// `ConvexObject`. Must be compatible with our manual implementation of
/// `cmp`.
fn hash<H: Hasher>(&self, hasher: &mut H) {
let mut bytes = vec![];
self.encode_for_hash(&mut bytes)
.expect("failed to write to memory");
bytes.hash(hasher)
}
}

#[cfg(test)]
mod hash_tests {
use cmd_util::env::env_config;
Expand Down
33 changes: 32 additions & 1 deletion crates/value/src/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
//! integer lengths for their digest type.)
use std::{
fmt,
hash::Hasher,
io::{
self,
Write,
},
ops::Deref,
ops::{
BitXorAssign,
Deref,
},
};

use anyhow::Context;
Expand Down Expand Up @@ -72,6 +76,14 @@ impl TryFrom<Vec<u8>> for Sha256Digest {
}
}

impl BitXorAssign for Sha256Digest {
fn bitxor_assign(&mut self, rhs: Self) {
for (i, x) in rhs.iter().enumerate() {
self.0[i] ^= *x;
}
}
}

#[derive(Clone)]
pub struct Sha256 {
inner: sha2::Sha256,
Expand Down Expand Up @@ -114,6 +126,25 @@ impl Sha256 {
}
}

impl Hasher for Sha256 {
// Prefer using `finalize` which returns the full 256 bits.
fn finish(&self) -> u64 {
let digest = self.clone().finalize();
let mut hash = 0;
// Compress the 32 byte digest into 8 bytes:
// Interpret the [u8; 32] as [u64 little endian; 4]
// and compute hash = XOR(the u64s).
for (i, x) in digest.iter().enumerate() {
hash ^= (*x as u64) << ((i % 8) * 8);
}
hash
}

fn write(&mut self, bytes: &[u8]) {
self.update(bytes);
}
}

impl TryFrom<Sha256Digest> for ConvexValue {
type Error = anyhow::Error;

Expand Down

0 comments on commit e1062f9

Please sign in to comment.