feat: make the hash type generic

Currently, we only support using our internal hash type, that is the 32-bytes representation of a sha512-256 hash digest. However, we may want to give callers the possibility to use other hash types, or store them in other ways. This commit modifies `Pollard`, `Proof` and `Stump` to take in a generic parameter `Hash` that implements the trait `NodeHash` and defaults to `BitcoinNodeHash`, the one used by Bitcoin consensus as defined by the utreexo spec. This is part of a project to support a Cairo prover for Bitcoin. To reduce the circuit size, we need to use an algebraic hash function like Poseidon, which tends to reduce the circuit significantly. With this commit the caller can use our data structures with Poseidon without needing to change anything in rustreexo.
mit-dci · Oct 9, 2024 · 508339e · 508339e
1 parent 6a8fe53
commit 508339e
Show file tree

Hide file tree

Showing 10 changed files with 915 additions and 400 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,6 +17,7 @@ serde = { version = "1.0", features = ["derive"], optional = true }
 [dev-dependencies]
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.81"
+starknet-crypto = "0.7.2"
 
 [features]
 with-serde = ["serde"]
@@ -27,3 +28,6 @@ name = "simple-stump-update"
 
 [[example]]
 name = "proof-update"
+
+[[example]]
+name = "custom-hash-type"
diff --git a/examples/custom-hash-type.rs b/examples/custom-hash-type.rs
@@ -0,0 +1,143 @@
+//! All data structures in this library are generic over the hash type used, defaulting to
+//! [BitcoinNodeHash](crate::accumulator::node_hash::BitcoinNodeHash), the one used by Bitcoin
+//! as defined by the utreexo spec. However, if you need to use a different hash type, you can
+//! implement the [NodeHash](crate::accumulator::node_hash::NodeHash) trait for it, and use it
+//! with the accumulator data structures.
+//!
+//! This example shows how to use a custom hash type based on the Poseidon hash function. The
+//! [Poseidon Hash](https://eprint.iacr.org/2019/458.pdf) is a hash function that is optmized
+//! for zero-knowledge proofs, and is used in projects like ZCash and StarkNet.
+//! If you want to work with utreexo proofs in zero-knowledge you may want to use this instead
+//! of our usual sha512-256 that we use by default, since that will give you smaller circuits.
+//! This example shows how to use both the [Pollard](crate::accumulator::pollard::Pollard) and
+//! proofs with a custom hash type. The code here should be pretty much all you need to do to
+//! use your custom hashes, just tweak the implementation of
+//! [NodeHash](crate::accumulator::node_hash::NodeHash) for your hash type.
+
+use rustreexo::accumulator::node_hash::NodeHash;
+use rustreexo::accumulator::pollard::Pollard;
+use starknet_crypto::poseidon_hash_many;
+use starknet_crypto::Felt;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+/// We need a stateful wrapper around the actual hash, this is because we use those different
+/// values inside our accumulator. Here we use an enum to represent the different states, you
+/// may want to use a struct with more data, depending on your needs.
+enum PoseidonHash {
+    /// This means this holds an actual value
+    ///
+    /// It usually represents a node in the accumulator that haven't been deleted.
+    Hash(Felt),
+    /// Placeholder is a value that haven't been deleted, but we don't have the actual value.
+    /// The only thing that matters about it is that it's not empty. You can implement this
+    /// the way you want, just make sure that [NodeHash::is_placeholder] and [NodeHash::placeholder]
+    /// returns sane values (that is, if we call [NodeHash::placeholder] calling [NodeHash::is_placeholder]
+    /// on the result should return true).
+    Placeholder,
+    /// This is an empty value, it represents a node that was deleted from the accumulator.
+    ///
+    /// Same as the placeholder, you can implement this the way you want, just make sure that
+    /// [NodeHash::is_empty] and [NodeHash::empty] returns sane values.
+    Empty,
+}
+
+// you'll need to implement Display for your hash type, so you can print it.
+impl std::fmt::Display for PoseidonHash {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            PoseidonHash::Hash(h) => write!(f, "Hash({})", h),
+            PoseidonHash::Placeholder => write!(f, "Placeholder"),
+            PoseidonHash::Empty => write!(f, "Empty"),
+        }
+    }
+}
+
+// this is the implementation of the NodeHash trait for our custom hash type. And it's the only
+// thing you need to do to use your custom hash type with the accumulator data structures.
+impl NodeHash for PoseidonHash {
+    // returns a new placeholder type such that is_placeholder returns true
+    fn placeholder() -> Self {
+        PoseidonHash::Placeholder
+    }
+
+    // returns an empty hash such that is_empty returns true
+    fn empty() -> Self {
+        PoseidonHash::Empty
+    }
+
+    // returns true if this is a placeholder. This should be true iff this type was created by
+    // calling placeholder.
+    fn is_placeholder(&self) -> bool {
+        matches!(self, PoseidonHash::Placeholder)
+    }
+
+    // returns true if this is an empty hash. This should be true iff this type was created by
+    // calling empty.
+    fn is_empty(&self) -> bool {
+        matches!(self, PoseidonHash::Empty)
+    }
+
+    // used for serialization, writes the hash to the writer
+    //
+    // if you don't want to use serialization, you can just return an error here.
+    fn write<W>(&self, writer: &mut W) -> std::io::Result<()>
+    where
+        W: std::io::Write,
+    {
+        match self {
+            PoseidonHash::Hash(h) => writer.write_all(&h.to_bytes_be()),
+            PoseidonHash::Placeholder => writer.write_all(&[0u8; 32]),
+            PoseidonHash::Empty => writer.write_all(&[0u8; 32]),
+        }
+    }
+
+    // used for deserialization, reads the hash from the reader
+    //
+    // if you don't want to use serialization, you can just return an error here.
+    fn read<R>(reader: &mut R) -> std::io::Result<Self>
+    where
+        R: std::io::Read,
+    {
+        let mut buf = [0u8; 32];
+        reader.read_exact(&mut buf)?;
+        if buf.iter().all(|&b| b == 0) {
+            Ok(PoseidonHash::Empty)
+        } else {
+            Ok(PoseidonHash::Hash(Felt::from_bytes_be(&buf)))
+        }
+    }
+
+    // the main thing about the hash type, it returns the next node's hash, given it's children.
+    // The implementation of this method is highly consensus critical, so everywhere should use the
+    // exact same algorithm to calculate the next hash. Rustreexo won't call this method, unless
+    // **both** children are not empty.
+    fn parent_hash(left: &Self, right: &Self) -> Self {
+        if let (PoseidonHash::Hash(left), PoseidonHash::Hash(right)) = (left, right) {
+            return PoseidonHash::Hash(poseidon_hash_many(&[*left, *right]));
+        }
+
+        // This should never happen, since rustreexo won't call this method unless both children
+        // are not empty.
+        unreachable!()
+    }
+}
+
+fn main() {
+    // Create a vector with two utxos that will be added to the Pollard
+    let elements = vec![
+        PoseidonHash::Hash(Felt::from(1)),
+        PoseidonHash::Hash(Felt::from(2)),
+    ];
+
+    // Create a new Pollard, and add the utxos to it
+    let mut p = Pollard::<PoseidonHash>::new_with_hash();
+    p.modify(&elements, &[]).unwrap();
+
+    // Create a proof that the first utxo is in the Pollard
+    let proof = p.prove(&[elements[0]]).unwrap();
+
+    // check that the proof has exactly one target
+    assert_eq!(proof.n_targets(), 1);
+    // check that the proof is what we expect
+    assert!(p.verify(&proof, &[elements[0]]).unwrap());
+}
diff --git a/examples/full-accumulator.rs b/examples/full-accumulator.rs
@@ -4,17 +4,21 @@
 
 use std::str::FromStr;
 
-use rustreexo::accumulator::node_hash::NodeHash;
+use rustreexo::accumulator::node_hash::BitcoinNodeHash;
 use rustreexo::accumulator::pollard::Pollard;
 use rustreexo::accumulator::proof::Proof;
 use rustreexo::accumulator::stump::Stump;
 
 fn main() {
     let elements = vec![
-        NodeHash::from_str("b151a956139bb821d4effa34ea95c17560e0135d1e4661fc23cedc3af49dac42")
-            .unwrap(),
-        NodeHash::from_str("d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa")
-            .unwrap(),
+        BitcoinNodeHash::from_str(
+            "b151a956139bb821d4effa34ea95c17560e0135d1e4661fc23cedc3af49dac42",
+        )
+        .unwrap(),
+        BitcoinNodeHash::from_str(
+            "d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa",
+        )
+        .unwrap(),
     ];
     // Create a new Pollard, and add the utxos to it
     let mut p = Pollard::new();
@@ -31,9 +35,10 @@ fn main() {
     // Now we want to update the Pollard, by removing the first utxo, and adding a new one.
     // This would be in case we received a new block with a transaction spending the first utxo,
     // and creating a new one.
-    let new_utxo =
-        NodeHash::from_str("cac74661f4944e6e1fed35df40da951c6e151e7b0c8d65c3ee37d6dfd3bc3ef7")
-            .unwrap();
+    let new_utxo = BitcoinNodeHash::from_str(
+        "cac74661f4944e6e1fed35df40da951c6e151e7b0c8d65c3ee37d6dfd3bc3ef7",
+    )
+    .unwrap();
     p.modify(&[new_utxo], &[elements[0]]).unwrap();
 
     // Now we can prove that the new utxo is in the Pollard.

diff --git a/examples/proof-update.rs b/examples/proof-update.rs
@@ -12,7 +12,7 @@
 
 use std::str::FromStr;
 
-use rustreexo::accumulator::node_hash::NodeHash;
+use rustreexo::accumulator::node_hash::BitcoinNodeHash;
 use rustreexo::accumulator::proof::Proof;
 use rustreexo::accumulator::stump::Stump;
 
@@ -36,7 +36,7 @@ fn main() {
         .update(vec![], utxos.clone(), vec![], vec![0, 1], update_data)
         .unwrap();
     // This should be a valid proof over 0 and 1.
-    assert_eq!(p.targets(), 2);
+    assert_eq!(p.n_targets(), 2);
     assert_eq!(s.verify(&p, &cached_hashes), Ok(true));
 
     // Get a subset of the proof, for the first UTXO only
@@ -65,7 +65,7 @@ fn main() {
 
 /// Returns the hashes for UTXOs in the first block in this fictitious example, there's nothing
 /// special about them, they are just the first 8 integers hashed as u8s.
-fn get_utxo_hashes1() -> Vec<NodeHash> {
+fn get_utxo_hashes1() -> Vec<BitcoinNodeHash> {
     let hashes = [
         "6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d",
         "4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a",
@@ -78,11 +78,11 @@ fn get_utxo_hashes1() -> Vec<NodeHash> {
     ];
     hashes
         .iter()
-        .map(|h| NodeHash::from_str(h).unwrap())
+        .map(|h| BitcoinNodeHash::from_str(h).unwrap())
         .collect()
 }
 /// Returns the hashes for UTXOs in the second block.
-fn get_utxo_hashes2() -> Vec<NodeHash> {
+fn get_utxo_hashes2() -> Vec<BitcoinNodeHash> {
     let utxo_hashes = [
         "bf4aff60ee0f3b2d82b47b94f6eff3018d1a47d1b0bc5dfbf8d3a95a2836bf5b",
         "2e6adf10ab3174629fc388772373848bbe277ffee1f72568e6d06e823b39d2dd",
@@ -91,6 +91,6 @@ fn get_utxo_hashes2() -> Vec<NodeHash> {
     ];
     utxo_hashes
         .iter()
-        .map(|h| NodeHash::from_str(h).unwrap())
+        .map(|h| BitcoinNodeHash::from_str(h).unwrap())
         .collect()
 }
diff --git a/examples/simple-stump-update.rs b/examples/simple-stump-update.rs
@@ -5,7 +5,7 @@
 use std::str::FromStr;
 use std::vec;
 
-use rustreexo::accumulator::node_hash::NodeHash;
+use rustreexo::accumulator::node_hash::BitcoinNodeHash;
 use rustreexo::accumulator::proof::Proof;
 use rustreexo::accumulator::stump::Stump;
 
@@ -15,10 +15,14 @@ fn main() {
     // If we assume this is the very first block, then the Stump is empty, and we can just add
     // the utxos to it. Assuming a coinbase with two outputs, we would have the following utxos:
     let utxos = vec![
-        NodeHash::from_str("b151a956139bb821d4effa34ea95c17560e0135d1e4661fc23cedc3af49dac42")
-            .unwrap(),
-        NodeHash::from_str("d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa")
-            .unwrap(),
+        BitcoinNodeHash::from_str(
+            "b151a956139bb821d4effa34ea95c17560e0135d1e4661fc23cedc3af49dac42",
+        )
+        .unwrap(),
+        BitcoinNodeHash::from_str(
+            "d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa",
+        )
+        .unwrap(),
     ];
     // Create a new Stump, and add the utxos to it. Notice how we don't use the full return here,
     // but only the Stump. To understand what is the second return value, see the documentation
@@ -34,9 +38,10 @@ fn main() {
     // Now we want to update the Stump, by removing the first utxo, and adding a new one.
     // This would be in case we received a new block with a transaction spending the first utxo,
     // and creating a new one.
-    let new_utxo =
-        NodeHash::from_str("d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa")
-            .unwrap();
+    let new_utxo = BitcoinNodeHash::from_str(
+        "d3bd63d53c5a70050a28612a2f4b2019f40951a653ae70736d93745efb1124fa",
+    )
+    .unwrap();
     let s = s.modify(&[new_utxo], &[utxos[0]], &proof).unwrap().0;
     // Now we can verify that the new utxo is in the Stump, and the old one is not.
     let new_proof = Proof::new(vec![2], vec![new_utxo]);