diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 078f73a4..73923fa6 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,6 +1,6 @@ CODEOWNERS @brunoffranca -/node/actors/consensus/ @brunoffranca +/node/actors/bft/ @brunoffranca /node/actors/network/ @pompon0 /node/libs/concurrency/ @pompon0 diff --git a/docs/architecture.md b/docs/architecture.md index a9bbfc63..8907d8fd 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -12,9 +12,7 @@ The actor crates are where the vast majority of the work is done. Each of them m - the `executor` crate is responsible for parsing the configuration parameters given by the user, and initializing the actors and the storage. It's basically the bootloader for the node. It also dispatches messages between the rest of the actors. They all send messages to the executor and it then converts and forwards the messages to the desired destination. This improves the encapsulation of the actors. -- the `network` crate which maintains a pool of outbound and inbound connections to other nodes. - -- the `sync_blocks` crate implements a block syncing mechanism for nodes. It enables nodes to exchange blocks with each other in a peer-to-peer network, allowing them to keep a copy of the blockchain stored in their local storage up-to-date. +- the `network` crate which maintains a pool of outbound and inbound connections to other nodes. It also implements a syncing mechanism for nodes (for blocks, batches, attester signatures, etc). ### Library crates @@ -44,8 +42,7 @@ This section provides a physical map of folders & files in this repository. - `/bft`: The consensus actor. - `/executor`: The actor orchestrator. - - `/network`: The networking actor. - - `/sync_blocks`: The block syncing actor. + - `/network`: The network actor. - `/lib`: All the library crates used as dependencies of the actor crates above. diff --git a/node/Cargo.lock b/node/Cargo.lock index e288e042..7739f03b 100644 --- a/node/Cargo.lock +++ b/node/Cargo.lock @@ -76,9 +76,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] name = "android-tzdata" @@ -151,9 +151,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" [[package]] name = "assert_matches" @@ -163,13 +163,13 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -216,6 +216,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + [[package]] name = "base64ct" version = "1.6.0" @@ -249,7 +255,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -320,9 +326,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.4" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -355,12 +361,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.90" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" dependencies = [ "jobserver", "libc", + "once_cell", ] [[package]] @@ -404,15 +411,15 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -492,10 +499,10 @@ version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -659,7 +666,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -683,7 +690,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -694,7 +701,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -792,9 +799,9 @@ dependencies = [ [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "elsa" @@ -857,9 +864,9 @@ dependencies = [ [[package]] name = "fiat-crypto" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c007b1ae3abe1cb6f85a16305acd418b7ca6343b953633fee2b76d8f108b830f" +checksum = "38793c55593b33412e3ae40c2c9781ffaa6f438f6f8c10f24e71846fbd7ae01e" [[package]] name = "fixedbitset" @@ -944,7 +951,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -989,9 +996,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "libc", @@ -1041,9 +1048,9 @@ dependencies = [ [[package]] name = "half" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", @@ -1059,12 +1066,6 @@ dependencies = [ "allocator-api2", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -1287,9 +1288,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -1302,9 +1303,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] @@ -1534,7 +1535,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1588,7 +1589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -1635,7 +1636,7 @@ checksum = "adf157a4dc5a29b7b464aa8fe7edeff30076e07e13646a1c3874f58477dc99f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1680,7 +1681,7 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax 0.6.29", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1737,7 +1738,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1774,9 +1775,9 @@ dependencies = [ [[package]] name = "multimap" -version = "0.8.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" [[package]] name = "nom" @@ -1934,11 +1935,11 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "pem" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" +checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" dependencies = [ - "base64 0.21.7", + "base64 0.22.0", "serde", ] @@ -1979,7 +1980,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2020,7 +2021,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2132,19 +2133,19 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" +checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" dependencies = [ "proc-macro2", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] @@ -2169,14 +2170,14 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "prost" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" dependencies = [ "bytes", "prost-derive", @@ -2184,13 +2185,13 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" +checksum = "80b776a1b2dc779f5ee0641f8ade0125bc1298dd41a9a0c16d8bd57b42d222b1" dependencies = [ "bytes", - "heck 0.4.1", - "itertools 0.11.0", + "heck", + "itertools 0.12.1", "log", "multimap", "once_cell", @@ -2199,22 +2200,21 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.58", + "syn 2.0.60", "tempfile", - "which", ] [[package]] name = "prost-derive" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2235,9 +2235,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" +checksum = "3235c33eb02c1f1e212abdbe34c78b264b038fb58ca612664343271e36e55ffe" dependencies = [ "prost", ] @@ -2280,9 +2280,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -2490,9 +2490,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", @@ -2503,9 +2503,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.21.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" dependencies = [ "log", "ring", @@ -2649,9 +2649,9 @@ checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" dependencies = [ "serde_derive", ] @@ -2668,13 +2668,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2690,9 +2690,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "itoa", "ryu", @@ -2763,9 +2763,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", ] @@ -2899,9 +2899,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.58" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", @@ -2937,7 +2937,7 @@ checksum = "f9b53c7124dd88026d5d98a1eb1fd062a578b7d783017c9298825526c7fb6427" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2956,22 +2956,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2995,9 +2995,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.34" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "num-conv", @@ -3074,7 +3074,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3184,7 +3184,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3369,7 +3369,7 @@ source = "git+https://github.com/matter-labs/vise.git?rev=a5bb80c9ce7168663114ee dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3418,7 +3418,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-shared", ] @@ -3440,7 +3440,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3461,18 +3461,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.3.9" @@ -3491,11 +3479,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "134306a13c5647ad6453e8deaec55d3a44d6021970129e6188735e74bf546697" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -3510,7 +3498,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3528,7 +3516,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3548,17 +3536,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -3569,9 +3558,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" @@ -3581,9 +3570,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" @@ -3593,9 +3582,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" @@ -3605,9 +3600,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" @@ -3617,9 +3612,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" @@ -3629,9 +3624,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" @@ -3641,9 +3636,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "yansi" @@ -3668,14 +3663,14 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "zeroize" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +checksum = "63381fa6624bf92130a6b87c0d07380116f80b565c42cf0d754136f0238359ef" dependencies = [ "zeroize_derive", ] @@ -3688,7 +3683,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3766,7 +3761,6 @@ dependencies = [ "zksync_consensus_network", "zksync_consensus_roles", "zksync_consensus_storage", - "zksync_consensus_sync_blocks", "zksync_consensus_utils", "zksync_protobuf", ] @@ -3840,25 +3834,6 @@ dependencies = [ "zksync_protobuf_build", ] -[[package]] -name = "zksync_consensus_sync_blocks" -version = "0.1.0" -dependencies = [ - "anyhow", - "assert_matches", - "async-trait", - "rand 0.8.5", - "test-casing", - "thiserror", - "tokio", - "tracing", - "zksync_concurrency", - "zksync_consensus_network", - "zksync_consensus_roles", - "zksync_consensus_storage", - "zksync_consensus_utils", -] - [[package]] name = "zksync_consensus_tools" version = "0.1.0" @@ -3927,14 +3902,14 @@ name = "zksync_protobuf_build" version = "0.1.0" dependencies = [ "anyhow", - "heck 0.5.0", + "heck", "prettyplease", "proc-macro2", "prost-build", "prost-reflect", "protox", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] diff --git a/node/Cargo.toml b/node/Cargo.toml index 32aa5c97..c42f3ac3 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -3,7 +3,6 @@ members = [ "actors/bft", "actors/executor", "actors/network", - "actors/sync_blocks", "libs/concurrency", "libs/crypto", "libs/protobuf", @@ -30,7 +29,6 @@ zksync_consensus_executor = { path = "actors/executor" } zksync_consensus_network = { path = "actors/network" } zksync_consensus_roles = { path = "libs/roles" } zksync_consensus_storage = { path = "libs/storage" } -zksync_consensus_sync_blocks = { path = "actors/sync_blocks" } zksync_consensus_tools = { path = "tools" } zksync_consensus_utils = { path = "libs/utils" } diff --git a/node/actors/bft/src/testonly/node.rs b/node/actors/bft/src/testonly/node.rs index d6a25774..d08a52d0 100644 --- a/node/actors/bft/src/testonly/node.rs +++ b/node/actors/bft/src/testonly/node.rs @@ -81,10 +81,6 @@ impl Node { network::io::OutputMessage::Consensus(req) => { con_send.send(io::InputMessage::Network(req)); } - network::io::OutputMessage::SyncBlocks(_) => { - // Drop message related to block syncing; the nodes should work fine - // without them. - } } } Ok(()) diff --git a/node/actors/executor/Cargo.toml b/node/actors/executor/Cargo.toml index 3fec785b..be95dcb3 100644 --- a/node/actors/executor/Cargo.toml +++ b/node/actors/executor/Cargo.toml @@ -13,7 +13,6 @@ zksync_consensus_crypto.workspace = true zksync_consensus_network.workspace = true zksync_consensus_roles.workspace = true zksync_consensus_storage.workspace = true -zksync_consensus_sync_blocks.workspace = true zksync_consensus_utils.workspace = true zksync_protobuf.workspace = true diff --git a/node/actors/executor/src/io.rs b/node/actors/executor/src/io.rs index 5379092f..fe8df10a 100644 --- a/node/actors/executor/src/io.rs +++ b/node/actors/executor/src/io.rs @@ -10,9 +10,6 @@ use zksync_consensus_bft::io::{ use zksync_consensus_network::io::{ InputMessage as NetworkInputMessage, OutputMessage as NetworkOutputMessage, }; -use zksync_consensus_sync_blocks::io::{ - InputMessage as SyncBlocksInputMessage, OutputMessage as SyncBlocksOutputMessage, -}; use zksync_consensus_utils::pipe::DispatcherPipe; /// The IO dispatcher, it is the main struct to handle actor messages. It simply contains a sender and a receiver for @@ -21,8 +18,6 @@ use zksync_consensus_utils::pipe::DispatcherPipe; pub(super) struct Dispatcher { consensus_input: channel::UnboundedSender, consensus_output: channel::UnboundedReceiver, - sync_blocks_input: channel::UnboundedSender, - sync_blocks_output: channel::UnboundedReceiver, network_input: channel::UnboundedSender, network_output: channel::UnboundedReceiver, } @@ -31,14 +26,11 @@ impl Dispatcher { /// Creates a new IO Dispatcher. pub(super) fn new( consensus_pipe: DispatcherPipe, - sync_blocks_pipe: DispatcherPipe, network_pipe: DispatcherPipe, ) -> Self { Dispatcher { consensus_input: consensus_pipe.send, consensus_output: consensus_pipe.recv, - sync_blocks_input: sync_blocks_pipe.send, - sync_blocks_output: sync_blocks_pipe.recv, network_input: network_pipe.send, network_output: network_pipe.recv, } @@ -60,17 +52,6 @@ impl Dispatcher { Ok(()) }); - s.spawn(async { - while let Ok(msg) = self.sync_blocks_output.recv(ctx).await { - match msg { - SyncBlocksOutputMessage::Network(message) => { - self.network_input.send(message.into()); - } - } - } - Ok(()) - }); - // Start a task to handle the messages from the network actor. s.spawn(async { while let Ok(msg) = self.network_output.recv(ctx).await { @@ -79,10 +60,6 @@ impl Dispatcher { self.consensus_input .send(ConsensusInputMessage::Network(message)); } - NetworkOutputMessage::SyncBlocks(message) => { - self.sync_blocks_input - .send(SyncBlocksInputMessage::Network(message)); - } } } Ok(()) diff --git a/node/actors/executor/src/lib.rs b/node/actors/executor/src/lib.rs index 9c4e4688..d75d8f1d 100644 --- a/node/actors/executor/src/lib.rs +++ b/node/actors/executor/src/lib.rs @@ -11,7 +11,6 @@ use zksync_consensus_bft as bft; use zksync_consensus_network as network; use zksync_consensus_roles::{node, validator}; use zksync_consensus_storage::{BlockStore, ReplicaStore}; -use zksync_consensus_sync_blocks as sync_blocks; use zksync_consensus_utils::pipe; use zksync_protobuf::kB; @@ -95,6 +94,7 @@ impl Executor { validator_key: self.validator.as_ref().map(|v| v.key.clone()), ping_timeout: Some(time::Duration::seconds(10)), max_block_size: self.config.max_payload_size.saturating_add(kB), + max_block_queue_size: 20, tcp_accept_rate: limiter::Rate { burst: 10, refresh: time::Duration::milliseconds(100), @@ -126,27 +126,13 @@ impl Executor { // Generate the communication pipes. We have one for each actor. let (consensus_actor_pipe, consensus_dispatcher_pipe) = pipe::new(); - let (sync_blocks_actor_pipe, sync_blocks_dispatcher_pipe) = pipe::new(); let (network_actor_pipe, network_dispatcher_pipe) = pipe::new(); // Create the IO dispatcher. - let mut dispatcher = Dispatcher::new( - consensus_dispatcher_pipe, - sync_blocks_dispatcher_pipe, - network_dispatcher_pipe, - ); + let mut dispatcher = Dispatcher::new(consensus_dispatcher_pipe, network_dispatcher_pipe); tracing::debug!("Starting actors in separate threads."); scope::run!(ctx, |ctx, s| async { s.spawn_blocking(|| dispatcher.run(ctx).context("IO Dispatcher stopped")); - s.spawn(async { - let (net, runner) = network::Network::new( - network_config, - self.block_store.clone(), - network_actor_pipe, - ); - net.register_metrics(); - runner.run(ctx).await.context("Network stopped") - }); if let Some(validator) = self.validator { s.spawn(async { let validator = validator; @@ -162,10 +148,10 @@ impl Executor { .context("Consensus stopped") }); } - sync_blocks::Config::new() - .run(ctx, sync_blocks_actor_pipe, self.block_store.clone()) - .await - .context("Syncing blocks stopped") + let (net, runner) = + network::Network::new(network_config, self.block_store.clone(), network_actor_pipe); + net.register_metrics(); + runner.run(ctx).await.context("Network stopped") }) .await } diff --git a/node/actors/network/src/config.rs b/node/actors/network/src/config.rs index ab297458..e757a8fc 100644 --- a/node/actors/network/src/config.rs +++ b/node/actors/network/src/config.rs @@ -14,8 +14,10 @@ pub struct RpcConfig { pub push_validator_addrs_rate: limiter::Rate, /// Max rate of sending/receiving push_block_store_state messages. pub push_block_store_state_rate: limiter::Rate, - /// Max rate of sending/receiving get_block RPCs. + /// Max rate of sending/receiving `get_block` RPCs. pub get_block_rate: limiter::Rate, + /// Timeout for the `get_block` RPC. + pub get_block_timeout: Option, /// Max rate of sending/receiving consensus messages. pub consensus_rate: limiter::Rate, } @@ -35,6 +37,7 @@ impl Default for RpcConfig { burst: 10, refresh: time::Duration::milliseconds(100), }, + get_block_timeout: Some(time::Duration::seconds(10)), consensus_rate: limiter::Rate { burst: 10, refresh: time::Duration::ZERO, @@ -84,4 +87,9 @@ pub struct Config { pub tcp_accept_rate: limiter::Rate, /// Rate limiting config for RPCs. pub rpc: RpcConfig, + /// Maximum number of not-yet-persisted blocks fetched from the network. + /// If reached, network actor will wait for more blocks to get persisted + /// before fetching the next ones. It is useful for limiting memory consumption + /// when the block persisting rate is low. + pub max_block_queue_size: usize, } diff --git a/node/actors/network/src/consensus/mod.rs b/node/actors/network/src/consensus/mod.rs index 1acf0c24..bbd7bd87 100644 --- a/node/actors/network/src/consensus/mod.rs +++ b/node/actors/network/src/consensus/mod.rs @@ -175,8 +175,8 @@ impl Network { tracing::info!("peer = {peer:?}"); let res = scope::run!(ctx, |ctx, s| async { let mut service = rpc::Service::new() - .add_server(rpc::ping::Server, rpc::ping::RATE) - .add_server(self, self.gossip.cfg.rpc.consensus_rate); + .add_server(ctx, rpc::ping::Server, rpc::ping::RATE) + .add_server(ctx, self, self.gossip.cfg.rpc.consensus_rate); if let Some(ping_timeout) = &self.gossip.cfg.ping_timeout { let ping_client = rpc::Client::::new(ctx, rpc::ping::RATE); service = service.add_client(&ping_client); @@ -215,7 +215,7 @@ impl Network { rpc::Client::::new(ctx, self.gossip.cfg.rpc.consensus_rate); let res = scope::run!(ctx, |ctx, s| async { let mut service = rpc::Service::new() - .add_server(rpc::ping::Server, rpc::ping::RATE) + .add_server(ctx, rpc::ping::Server, rpc::ping::RATE) .add_client(&consensus_cli); if let Some(ping_timeout) = &self.gossip.cfg.ping_timeout { let ping_client = rpc::Client::::new(ctx, rpc::ping::RATE); diff --git a/node/actors/network/src/consensus/tests.rs b/node/actors/network/src/consensus/tests.rs index aaf4f21e..3b3c5bf6 100644 --- a/node/actors/network/src/consensus/tests.rs +++ b/node/actors/network/src/consensus/tests.rs @@ -1,3 +1,4 @@ +#![allow(irrefutable_let_patterns)] use super::*; use crate::{io, metrics, preface, rpc, testonly}; use assert_matches::assert_matches; diff --git a/node/actors/network/src/gossip/fetch.rs b/node/actors/network/src/gossip/fetch.rs new file mode 100644 index 00000000..115852bd --- /dev/null +++ b/node/actors/network/src/gossip/fetch.rs @@ -0,0 +1,104 @@ +#![allow(unused)] +use anyhow::Context as _; +use std::collections::BTreeMap; +use zksync_concurrency::{ctx, oneshot, scope, sync}; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::BlockStoreState; + +/// A block fetching request. +type Call = (validator::BlockNumber, oneshot::Sender<()>); + +/// Inner state of the `Queue`. +type Inner = BTreeMap>; + +/// Queue of block fetch request. +pub(crate) struct Queue(sync::watch::Sender); + +impl Default for Queue { + fn default() -> Self { + Self(sync::watch::channel(Inner::default()).0) + } +} + +impl Queue { + /// Requests a block from peers and waits until it is stored. + /// Note: in the current implementation concurrent calls for the same block number are + /// unsupported - second call will override the first call. + pub(crate) async fn request( + &self, + ctx: &ctx::Ctx, + n: validator::BlockNumber, + ) -> ctx::OrCanceled<()> { + loop { + let (send, recv) = oneshot::channel(); + self.0.send_if_modified(|x| { + x.insert(n, send); + // Send iff the lowest requested block changed. + x.first_key_value().unwrap().0 == &n + }); + match recv.recv_or_disconnected(ctx).await { + // Return if completed. + Ok(Ok(())) => return Ok(()), + // Retry if failed. + Ok(Err(sync::Disconnected)) => continue, + // Remove the request from the queue if canceled. + Err(ctx::Canceled) => { + self.0.send_if_modified(|x| { + let modified = x.first_key_value().map_or(false, |(k, _)| k == &n); + x.remove(&n); + // Send iff the lowest requested block changed. + modified + }); + return Err(ctx::Canceled); + } + } + } + } + + /// Accepts a block fetch request, which is contained in the available blocks range. + /// Caller is responsible for fetching the block and adding it to the block store. + pub(crate) async fn accept( + &self, + ctx: &ctx::Ctx, + available: &mut sync::watch::Receiver, + ) -> ctx::OrCanceled { + let sub = &mut self.0.subscribe(); + while ctx.is_active() { + // Wait for the lowest requested block to be available. + // This scope is always cancelled, so we ignore the result. + let mut block_number = None; + let _: Result<(), _> = scope::run!(ctx, |ctx, s| async { + if let Some(n) = sub.borrow_and_update().first_key_value().map(|x| *x.0) { + let n = ctx::NoCopy(n); + s.spawn::<()>(async { + let n = n; + sync::wait_for(ctx, available, |a| a.contains(n.0)).await?; + block_number = Some(n.0); + Err(ctx::Canceled) + }); + } + // If the lowest requested block changes, we need to restart the wait. + sync::changed(ctx, sub).await?; + Err(ctx::Canceled) + }) + .await; + let Some(block_number) = block_number else { + continue; + }; + + // Remove the request from the queue. + let mut res = None; + self.0.send_if_modified(|x| { + res = x.remove_entry(&block_number); + // Send iff the lowest requested block changed. + res.is_some() && !x.is_empty() + }); + // It may happen that someone else accepts our request faster. + // In this case we need to wait again. + if let Some(res) = res { + return Ok(res); + } + } + Err(ctx::Canceled) + } +} diff --git a/node/actors/network/src/gossip/mod.rs b/node/actors/network/src/gossip/mod.rs index 2b88dbb2..e8bf588c 100644 --- a/node/actors/network/src/gossip/mod.rs +++ b/node/actors/network/src/gossip/mod.rs @@ -12,42 +12,38 @@ //! Static connections constitute a rigid "backbone" of the gossip network, which is insensitive to //! eclipse attack. Dynamic connections are supposed to improve the properties of the gossip //! network graph (minimize its diameter, increase connectedness). -use crate::{gossip::ValidatorAddrsWatch, io, pool::PoolWatch, rpc, Config}; -use anyhow::Context as _; +use crate::{gossip::ValidatorAddrsWatch, io, pool::PoolWatch, Config}; use std::sync::{atomic::AtomicUsize, Arc}; +pub(crate) use validator_addrs::*; +use zksync_concurrency::{ctx, ctx::channel, scope, sync}; +use zksync_consensus_roles::{node, validator}; +use zksync_consensus_storage::BlockStore; +mod fetch; mod handshake; mod runner; #[cfg(test)] +mod testonly; +#[cfg(test)] mod tests; mod validator_addrs; -pub(crate) use validator_addrs::*; -use zksync_concurrency::{ctx, ctx::channel}; -use zksync_consensus_roles::{node, validator}; -use zksync_consensus_storage::BlockStore; -use zksync_protobuf::kB; - -/// State of the gossip connection. -pub(crate) struct Connection { - /// `get_block` rpc client. - pub(crate) get_block: rpc::Client, -} - /// Gossip network state. pub(crate) struct Network { /// Gossip network configuration. pub(crate) cfg: Config, /// Currently open inbound connections. - pub(crate) inbound: PoolWatch>, + pub(crate) inbound: PoolWatch, /// Currently open outbound connections. - pub(crate) outbound: PoolWatch>, + pub(crate) outbound: PoolWatch, /// Current state of knowledge about validators' endpoints. pub(crate) validator_addrs: ValidatorAddrsWatch, /// Block store to serve `get_block` requests from. pub(crate) block_store: Arc, /// Output pipe of the network actor. pub(crate) sender: channel::UnboundedSender, + /// Queue of block fetching requests. + pub(crate) fetch_queue: fetch::Queue, /// TESTONLY: how many time push_validator_addrs rpc was called by the peers. pub(crate) push_validator_addrs_calls: AtomicUsize, } @@ -67,8 +63,9 @@ impl Network { ), outbound: PoolWatch::new(cfg.gossip.static_outbound.keys().cloned().collect(), 0), validator_addrs: ValidatorAddrsWatch::default(), - block_store, cfg, + fetch_queue: fetch::Queue::default(), + block_store, push_validator_addrs_calls: 0.into(), }) } @@ -78,26 +75,32 @@ impl Network { self.block_store.genesis() } - /// Sends a GetBlock RPC to the given peer. - pub(crate) async fn get_block( - &self, - ctx: &ctx::Ctx, - recipient: &node::PublicKey, - number: validator::BlockNumber, - ) -> anyhow::Result> { - let outbound = self.outbound.current(); - let inbound = self.inbound.current(); - Ok(outbound - .get(recipient) - .or(inbound.get(recipient)) - .context("recipient is unreachable")? - .get_block - .call( - ctx, - &rpc::get_block::Req(number), - self.cfg.max_block_size.saturating_add(kB), - ) - .await? - .0) + /// Task fetching blocks from peers which are not present in storage. + pub(crate) async fn run_block_fetcher(&self, ctx: &ctx::Ctx) { + let sem = sync::Semaphore::new(self.cfg.max_block_queue_size); + let _: ctx::OrCanceled<()> = scope::run!(ctx, |ctx, s| async { + let mut next = self.block_store.queued().next(); + loop { + let permit = sync::acquire(ctx, &sem).await?; + let number = ctx::NoCopy(next); + next = next + 1; + // Fetch a block asynchronously. + s.spawn(async { + let _permit = permit; + let number = number.into(); + let _: ctx::OrCanceled<()> = scope::run!(ctx, |ctx, s| async { + s.spawn_bg(self.fetch_queue.request(ctx, number)); + // Cancel fetching as soon as block is queued for storage. + self.block_store.wait_until_queued(ctx, number).await?; + Err(ctx::Canceled) + }) + .await; + // Wait until the block is actually persisted, so that the amount of blocks + // stored in memory is bounded. + self.block_store.wait_until_persisted(ctx, number).await + }); + } + }) + .await; } } diff --git a/node/actors/network/src/gossip/runner.rs b/node/actors/network/src/gossip/runner.rs index f810d1a6..10e06755 100644 --- a/node/actors/network/src/gossip/runner.rs +++ b/node/actors/network/src/gossip/runner.rs @@ -1,13 +1,12 @@ -use super::{handshake, Connection, Network, ValidatorAddrs}; -use crate::{io, noise, preface, rpc}; +use super::{handshake, Network, ValidatorAddrs}; +use crate::{noise, preface, rpc}; use anyhow::Context as _; use async_trait::async_trait; use rand::seq::SliceRandom; -use std::sync::{atomic::Ordering, Arc}; -use tracing::Instrument as _; -use zksync_concurrency::{ctx, net, oneshot, scope, sync}; +use std::sync::atomic::Ordering; +use zksync_concurrency::{ctx, net, scope, sync}; use zksync_consensus_roles::node; -use zksync_consensus_storage::BlockStore; +use zksync_consensus_storage::{BlockStore, BlockStoreState}; use zksync_protobuf::kB; struct PushValidatorAddrsServer<'a>(&'a Network); @@ -33,32 +32,36 @@ impl rpc::Handler for PushValidatorAddrsServer<' } } -#[derive(Clone, Copy)] struct PushBlockStoreStateServer<'a> { - peer: &'a node::PublicKey, + state: sync::watch::Sender, net: &'a Network, } +impl<'a> PushBlockStoreStateServer<'a> { + fn new(net: &'a Network) -> Self { + Self { + state: sync::watch::channel(BlockStoreState { + first: net.genesis().fork.first_block, + last: None, + }) + .0, + net, + } + } +} + #[async_trait] -impl rpc::Handler for PushBlockStoreStateServer<'_> { +impl rpc::Handler for &PushBlockStoreStateServer<'_> { fn max_req_size(&self) -> usize { 10 * kB } async fn handle( &self, - ctx: &ctx::Ctx, + _ctx: &ctx::Ctx, req: rpc::push_block_store_state::Req, ) -> anyhow::Result<()> { - let (response, response_receiver) = oneshot::channel(); - let message = io::SyncBlocksRequest::UpdatePeerSyncState { - peer: self.peer.clone(), - state: req.0, - response, - }; - self.net.sender.send(message.into()); - // TODO(gprusak): disconnection means that the message was rejected OR - // that `SyncBlocks` actor is missing (in tests), which leads to unnecessary disconnects. - let _ = response_receiver.recv_or_disconnected(ctx).await?; + req.0.verify(self.net.genesis())?; + self.state.send_replace(req.0); Ok(()) } } @@ -79,13 +82,7 @@ impl rpc::Handler for &BlockStore { impl Network { /// Manages lifecycle of a single connection. - async fn run_stream( - &self, - ctx: &ctx::Ctx, - peer: &node::PublicKey, - stream: noise::Stream, - conn: &Connection, - ) -> anyhow::Result<()> { + async fn run_stream(&self, ctx: &ctx::Ctx, stream: noise::Stream) -> anyhow::Result<()> { let push_validator_addrs_client = rpc::Client::::new( ctx, self.cfg.rpc.push_validator_addrs_rate, @@ -95,22 +92,26 @@ impl Network { ctx, self.cfg.rpc.push_block_store_state_rate, ); - let push_block_store_state_server = PushBlockStoreStateServer { peer, net: self }; + let push_block_store_state_server = PushBlockStoreStateServer::new(self); + let get_block_client = + rpc::Client::::new(ctx, self.cfg.rpc.get_block_rate); scope::run!(ctx, |ctx, s| async { let mut service = rpc::Service::new() .add_client(&push_validator_addrs_client) .add_server( + ctx, push_validator_addrs_server, self.cfg.rpc.push_validator_addrs_rate, ) .add_client(&push_block_store_state_client) .add_server( - push_block_store_state_server, + ctx, + &push_block_store_state_server, self.cfg.rpc.push_block_store_state_rate, ) - .add_client(&conn.get_block) - .add_server(&*self.block_store, self.cfg.rpc.get_block_rate) - .add_server(rpc::ping::Server, rpc::ping::RATE); + .add_client(&get_block_client) + .add_server(ctx, &*self.block_store, self.cfg.rpc.get_block_rate) + .add_server(ctx, rpc::ping::Server, rpc::ping::RATE); if let Some(ping_timeout) = &self.cfg.ping_timeout { let ping_client = rpc::Client::::new(ctx, rpc::ping::RATE); @@ -134,8 +135,8 @@ impl Network { } }); + // Push validator addrs updates to peer. s.spawn::<()>(async { - // Push validator addrs updates to peer. let mut old = ValidatorAddrs::default(); let mut sub = self.validator_addrs.subscribe(); sub.mark_changed(); @@ -151,6 +152,51 @@ impl Network { } }); + // Perform get_block calls to peer. + s.spawn::<()>(async { + let state = &mut push_block_store_state_server.state.subscribe(); + loop { + let call = get_block_client.reserve(ctx).await?; + let (req, send_resp) = self.fetch_queue.accept(ctx, state).await?; + let req = rpc::get_block::Req(req); + s.spawn(async { + let req = req; + // Failing to fetch a block causes a disconnect: + // - peer predeclares which blocks are available and race condition + // with block pruning should be very rare, so we can consider + // an empty response to be offending + // - a stream for the call has been already reserved, + // so the peer is expected to answer immediately. The timeout + // should be high enough to accommodate network hiccups + // - a disconnect is not a ban, so the peer is free to try to + // reconnect. + async { + let ctx_with_timeout = + self.cfg.rpc.get_block_timeout.map(|t| ctx.with_timeout(t)); + let ctx = ctx_with_timeout.as_ref().unwrap_or(ctx); + let block = call + .call(ctx, &req, self.cfg.max_block_size.saturating_add(kB)) + .await? + .0 + .context("empty response")?; + anyhow::ensure!(block.number() == req.0, "received wrong block"); + // Storing the block will fail in case block is invalid. + self.block_store + .queue_block(ctx, block) + .await + .context("queue_block()")?; + tracing::info!("fetched block {}", req.0); + // Send a response that fetching was successful. + // Ignore disconnection error. + let _ = send_resp.send(()); + anyhow::Ok(()) + } + .await + .with_context(|| format!("get_block({})", req.0)) + }); + } + }); + service.run(ctx, stream).await?; Ok(()) }) @@ -168,11 +214,8 @@ impl Network { let peer = handshake::inbound(ctx, &self.cfg.gossip, self.genesis().hash(), &mut stream).await?; tracing::info!("peer = {peer:?}"); - let conn = Arc::new(Connection { - get_block: rpc::Client::::new(ctx, self.cfg.rpc.get_block_rate), - }); - self.inbound.insert(peer.clone(), conn.clone()).await?; - let res = self.run_stream(ctx, &peer, stream, &conn).await; + self.inbound.insert(peer.clone(), ()).await?; + let res = self.run_stream(ctx, stream).await; self.inbound.remove(&peer).await; res } @@ -202,14 +245,8 @@ impl Network { ) .await?; tracing::info!("peer = {peer:?}"); - let conn = Arc::new(Connection { - get_block: rpc::Client::::new(ctx, self.cfg.rpc.get_block_rate), - }); - self.outbound.insert(peer.clone(), conn.clone()).await?; - let res = self - .run_stream(ctx, peer, stream, &conn) - .instrument(tracing::info_span!("out", ?addr)) - .await; + self.outbound.insert(peer.clone(), ()).await?; + let res = self.run_stream(ctx, stream).await; self.outbound.remove(peer).await; res } diff --git a/node/actors/network/src/gossip/testonly.rs b/node/actors/network/src/gossip/testonly.rs new file mode 100644 index 00000000..fcf8bee4 --- /dev/null +++ b/node/actors/network/src/gossip/testonly.rs @@ -0,0 +1,147 @@ +#![allow(dead_code)] +use super::*; +use crate::{frame, mux, noise, preface, rpc, Config, GossipConfig}; +use anyhow::Context as _; +use rand::Rng as _; +use std::collections::BTreeMap; +use zksync_concurrency::{ctx, limiter}; +use zksync_consensus_roles::validator; + +/// Connection. +pub(super) struct Conn { + accept: BTreeMap>, + connect: BTreeMap>, +} + +/// Background task of the connection. +pub(super) struct ConnRunner { + mux: mux::Mux, + stream: noise::Stream, +} + +impl ConnRunner { + /// Runs the background task of the connection. + pub(super) async fn run(self, ctx: &ctx::Ctx) -> Result<(), mux::RunError> { + self.mux.run(ctx, self.stream).await + } +} + +fn mux_entry(ctx: &ctx::Ctx) -> (mux::CapabilityId, Arc) { + ( + R::CAPABILITY_ID, + mux::StreamQueue::new(ctx, R::INFLIGHT, limiter::Rate::INF), + ) +} + +/// Establishes an anonymous gossipnet connection to a peer. +pub(super) async fn connect( + ctx: &ctx::Ctx, + peer: &Config, + genesis: validator::GenesisHash, +) -> ctx::Result<(Conn, ConnRunner)> { + assert!(peer.gossip.dynamic_inbound_limit > 0); + let addr = peer + .public_addr + .resolve(ctx) + .await? + .context("peer.public_addr.resolve()")?[0]; + let mut stream = preface::connect(ctx, addr, preface::Endpoint::GossipNet) + .await + .context("preface::connect()")?; + let cfg = GossipConfig { + key: ctx.rng().gen(), + dynamic_inbound_limit: 0, + static_outbound: [].into(), + static_inbound: [].into(), + }; + handshake::outbound(ctx, &cfg, genesis, &mut stream, &peer.gossip.key.public()) + .await + .context("handshake::outbound()")?; + let conn = Conn { + accept: [ + mux_entry::(ctx), + mux_entry::(ctx), + ] + .into(), + connect: [ + mux_entry::(ctx), + mux_entry::(ctx), + ] + .into(), + }; + let mux = mux::Mux { + cfg: Arc::new(rpc::MUX_CONFIG.clone()), + accept: conn.accept.clone(), + connect: conn.connect.clone(), + }; + Ok((conn, ConnRunner { mux, stream })) +} + +impl Conn { + /// Opens a server-side stream. + pub(super) async fn open_server( + &self, + ctx: &ctx::Ctx, + ) -> ctx::OrCanceled> { + Ok(ServerStream( + self.connect + .get(&R::CAPABILITY_ID) + .unwrap() + .open(ctx) + .await?, + std::marker::PhantomData, + )) + } + + /// Opens a client-side stream. + pub(super) async fn open_client( + &self, + ctx: &ctx::Ctx, + ) -> ctx::OrCanceled> { + Ok(ClientStream( + self.accept + .get(&R::CAPABILITY_ID) + .unwrap() + .open(ctx) + .await?, + std::marker::PhantomData, + )) + } +} + +/// Client side stream. +pub(super) struct ClientStream(mux::Stream, std::marker::PhantomData); +/// Server side stream. +pub(super) struct ServerStream(mux::Stream, std::marker::PhantomData); + +impl ClientStream { + /// Sends a request. + pub(super) async fn send(&mut self, ctx: &ctx::Ctx, req: &R::Req) -> anyhow::Result<()> { + frame::mux_send_proto(ctx, &mut self.0.write, req).await?; + self.0.write.flush(ctx).await?; + Ok(()) + } + + /// Receives a response. + pub(super) async fn recv(mut self, ctx: &ctx::Ctx) -> anyhow::Result { + Ok(frame::mux_recv_proto(ctx, &mut self.0.read, usize::MAX) + .await? + .0) + } +} + +impl ServerStream { + /// Sends a response. + pub(super) async fn send(mut self, ctx: &ctx::Ctx, resp: &R::Resp) -> anyhow::Result<()> { + frame::mux_send_proto(ctx, &mut self.0.write, resp).await?; + self.0.write.flush(ctx).await?; + Ok(()) + } + + /// Receives a request. + pub(super) async fn recv(&mut self, ctx: &ctx::Ctx) -> anyhow::Result { + Ok(frame::mux_recv_proto(ctx, &mut self.0.read, usize::MAX) + .await? + .0) + } +} diff --git a/node/actors/network/src/gossip/tests/fetch.rs b/node/actors/network/src/gossip/tests/fetch.rs new file mode 100644 index 00000000..e4cba22d --- /dev/null +++ b/node/actors/network/src/gossip/tests/fetch.rs @@ -0,0 +1,313 @@ +//! Unit tests of `get_block` RPC. +use crate::{gossip, mux, rpc}; +use assert_matches::assert_matches; +use rand::Rng as _; +use tracing::Instrument as _; +use zksync_concurrency::{ctx, limiter, scope, testonly::abort_on_panic}; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::{testonly::new_store, BlockStoreState}; + +#[tokio::test] +async fn test_simple() { + abort_on_panic(); + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 1); + setup.push_blocks(rng, 2); + let mut cfg = crate::testonly::new_configs(rng, &setup, 0)[0].clone(); + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + + scope::run!(ctx, |ctx, s| async { + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (_node, runner) = crate::testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node"))); + + let (conn, runner) = gossip::testonly::connect(ctx, &cfg, setup.genesis.hash()) + .await + .unwrap(); + s.spawn_bg(async { + assert_matches!(runner.run(ctx).await, Err(mux::RunError::Canceled(_))); + Ok(()) + }); + + tracing::info!("Store is empty so requesting a block should return an empty response."); + let mut stream = conn.open_client::(ctx).await.unwrap(); + stream + .send(ctx, &rpc::get_block::Req(setup.blocks[0].number())) + .await + .unwrap(); + let resp = stream.recv(ctx).await.unwrap(); + assert_eq!(resp.0, None); + + tracing::info!("Insert a block."); + store + .queue_block(ctx, setup.blocks[0].clone()) + .await + .unwrap(); + loop { + let mut stream = conn + .open_server::(ctx) + .await + .unwrap(); + let state = stream.recv(ctx).await.unwrap(); + stream.send(ctx, &()).await.unwrap(); + if state.0.contains(setup.blocks[0].number()) { + tracing::info!("peer reported to have a block"); + break; + } + } + tracing::info!("fetch that block."); + let mut stream = conn.open_client::(ctx).await.unwrap(); + stream + .send(ctx, &rpc::get_block::Req(setup.blocks[0].number())) + .await + .unwrap(); + let resp = stream.recv(ctx).await.unwrap(); + assert_eq!(resp.0, Some(setup.blocks[0].clone())); + + tracing::info!("Inform the peer that we have {}", setup.blocks[1].number()); + let mut stream = conn + .open_client::(ctx) + .await + .unwrap(); + stream + .send( + ctx, + &rpc::push_block_store_state::Req(BlockStoreState { + first: setup.blocks[1].number(), + last: Some(setup.blocks[1].justification.clone()), + }), + ) + .await + .unwrap(); + stream.recv(ctx).await.unwrap(); + + tracing::info!("Wait for the client to request that block"); + let mut stream = conn.open_server::(ctx).await.unwrap(); + let req = stream.recv(ctx).await.unwrap(); + assert_eq!(req.0, setup.blocks[1].number()); + + tracing::info!("Return the requested block"); + stream + .send(ctx, &rpc::get_block::Resp(Some(setup.blocks[1].clone()))) + .await + .unwrap(); + + tracing::info!("Wait for the client to store that block"); + store + .wait_until_persisted(ctx, setup.blocks[1].number()) + .await + .unwrap(); + + Ok(()) + }) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_concurrent_requests() { + abort_on_panic(); + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 1); + setup.push_blocks(rng, 10); + let mut cfg = crate::testonly::new_configs(rng, &setup, 0)[0].clone(); + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + cfg.max_block_queue_size = setup.blocks.len(); + + scope::run!(ctx, |ctx, s| async { + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (_node, runner) = crate::testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node"))); + + let mut conns = vec![]; + for _ in 0..4 { + let (conn, runner) = gossip::testonly::connect(ctx, &cfg, setup.genesis.hash()) + .await + .unwrap(); + s.spawn_bg(async { + assert_matches!(runner.run(ctx).await, Err(mux::RunError::Canceled(_))); + Ok(()) + }); + let mut stream = conn + .open_client::(ctx) + .await + .unwrap(); + stream + .send( + ctx, + &rpc::push_block_store_state::Req(BlockStoreState { + first: setup.blocks[0].number(), + last: Some(setup.blocks.last().unwrap().justification.clone()), + }), + ) + .await + .unwrap(); + stream.recv(ctx).await.unwrap(); + conns.push(conn); + } + + // Receive a bunch of concurrent requests on various connections. + let mut streams = vec![]; + for (i, block) in setup.blocks.iter().enumerate() { + let mut stream = conns[i % conns.len()] + .open_server::(ctx) + .await + .unwrap(); + let req = stream.recv(ctx).await.unwrap(); + assert_eq!(req.0, block.number()); + streams.push(stream); + } + + // Respond to the requests. + for (i, stream) in streams.into_iter().enumerate() { + stream + .send(ctx, &rpc::get_block::Resp(Some(setup.blocks[i].clone()))) + .await + .unwrap(); + } + Ok(()) + }) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_bad_responses() { + abort_on_panic(); + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 1); + setup.push_blocks(rng, 2); + let mut cfg = crate::testonly::new_configs(rng, &setup, 0)[0].clone(); + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + + scope::run!(ctx, |ctx, s| async { + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (_node, runner) = crate::testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node"))); + + let state = rpc::push_block_store_state::Req(BlockStoreState { + first: setup.blocks[0].number(), + last: Some(setup.blocks[0].justification.clone()), + }); + + for resp in [ + // Empty response even though we declared to have the block. + None, + // Wrong block. + Some(setup.blocks[1].clone()), + // Malformed block. + { + let mut b = setup.blocks[0].clone(); + b.justification = rng.gen(); + Some(b) + }, + ] { + tracing::info!("bad response = {resp:?}"); + + tracing::info!("Connect to peer"); + let (conn, runner) = gossip::testonly::connect(ctx, &cfg, setup.genesis.hash()) + .await + .unwrap(); + let conn_task = s.spawn_bg(async { Ok(runner.run(ctx).await) }); + + tracing::info!("Inform the peer about the block that we possess"); + let mut stream = conn + .open_client::(ctx) + .await + .unwrap(); + stream.send(ctx, &state).await.unwrap(); + stream.recv(ctx).await.unwrap(); + + tracing::info!("Wait for the client to request that block"); + let mut stream = conn.open_server::(ctx).await.unwrap(); + let req = stream.recv(ctx).await.unwrap(); + assert_eq!(req.0, setup.blocks[0].number()); + + tracing::info!("Return a bad response"); + stream.send(ctx, &rpc::get_block::Resp(resp)).await.unwrap(); + + tracing::info!("Wait for the peer to drop the connection"); + assert_matches!( + conn_task.join(ctx).await.unwrap(), + Err(mux::RunError::Closed) + ); + } + Ok(()) + }) + .await + .unwrap(); +} + +#[tokio::test] +async fn test_retry() { + abort_on_panic(); + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 1); + setup.push_blocks(rng, 1); + let mut cfg = crate::testonly::new_configs(rng, &setup, 0)[0].clone(); + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + + scope::run!(ctx, |ctx, s| async { + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (_node, runner) = crate::testonly::Instance::new(cfg.clone(), store.clone()); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node"))); + + let state = rpc::push_block_store_state::Req(BlockStoreState { + first: setup.blocks[0].number(), + last: Some(setup.blocks[0].justification.clone()), + }); + + tracing::info!("establish a bunch of connections"); + let mut conns = vec![]; + for _ in 0..4 { + let (conn, runner) = gossip::testonly::connect(ctx, &cfg, setup.genesis.hash()) + .await + .unwrap(); + let task = s.spawn_bg(async { Ok(runner.run(ctx).await) }); + let mut stream = conn + .open_client::(ctx) + .await + .unwrap(); + stream.send(ctx, &state).await.unwrap(); + stream.recv(ctx).await.unwrap(); + conns.push((conn, task)); + } + + for (conn, task) in conns { + tracing::info!("Wait for the client to request a block"); + let mut stream = conn.open_server::(ctx).await.unwrap(); + let req = stream.recv(ctx).await.unwrap(); + assert_eq!(req.0, setup.blocks[0].number()); + + tracing::info!("Return a bad response"); + stream.send(ctx, &rpc::get_block::Resp(None)).await.unwrap(); + + tracing::info!("Wait for the peer to drop the connection"); + assert_matches!(task.join(ctx).await.unwrap(), Err(mux::RunError::Closed)); + } + + Ok(()) + }) + .await + .unwrap(); +} diff --git a/node/actors/network/src/gossip/tests.rs b/node/actors/network/src/gossip/tests/mod.rs similarity index 67% rename from node/actors/network/src/gossip/tests.rs rename to node/actors/network/src/gossip/tests/mod.rs index 40a50875..a8ba0cec 100644 --- a/node/actors/network/src/gossip/tests.rs +++ b/node/actors/network/src/gossip/tests/mod.rs @@ -1,5 +1,6 @@ use super::*; -use crate::{io, metrics, preface, rpc, testonly}; +use crate::{metrics, preface, rpc, testonly}; +use anyhow::Context as _; use assert_matches::assert_matches; use pretty_assertions::assert_eq; use rand::Rng; @@ -7,16 +8,18 @@ use std::{ collections::{HashMap, HashSet}, sync::{atomic::Ordering, Arc}, }; -use test_casing::{test_casing, Product}; use tracing::Instrument as _; use zksync_concurrency::{ - ctx, net, oneshot, scope, sync, + ctx, net, scope, sync, testonly::{abort_on_panic, set_timeout}, time, }; -use zksync_consensus_roles::validator::{self, BlockNumber, FinalBlock}; +use zksync_consensus_roles::validator; use zksync_consensus_storage::testonly::new_store; +mod fetch; +mod syncing; + #[tokio::test] async fn test_one_connection_per_node() { abort_on_panic(); @@ -311,210 +314,6 @@ async fn test_genesis_mismatch() { .unwrap(); } -const EXCHANGED_STATE_COUNT: usize = 5; -const NETWORK_CONNECTIVITY_CASES: [(usize, usize); 5] = [(2, 1), (3, 2), (5, 3), (10, 4), (10, 7)]; - -/// Tests block syncing with global network synchronization (a next block becoming available -/// to all nodes only after all nodes have received previous `SyncState` updates from peers). -#[test_casing(5, NETWORK_CONNECTIVITY_CASES)] -#[tokio::test(flavor = "multi_thread")] -#[tracing::instrument(level = "trace")] -async fn syncing_blocks(node_count: usize, gossip_peers: usize) { - abort_on_panic(); - let _guard = set_timeout(time::Duration::seconds(5)); - - let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); - let rng = &mut ctx.rng(); - let mut setup = validator::testonly::Setup::new(rng, node_count); - setup.push_blocks(rng, EXCHANGED_STATE_COUNT); - let cfgs = testonly::new_configs(rng, &setup, gossip_peers); - scope::run!(ctx, |ctx, s| async { - let mut nodes = vec![]; - for (i, cfg) in cfgs.into_iter().enumerate() { - let (store, runner) = new_store(ctx, &setup.genesis).await; - s.spawn_bg(runner.run(ctx)); - let (node, runner) = testonly::Instance::new(cfg, store); - s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); - nodes.push(node); - } - for block in &setup.blocks { - for node in &nodes { - node.net - .gossip - .block_store - .queue_block(ctx, block.clone()) - .await - .context("queue_block()")?; - } - for node in &mut nodes { - wait_for_updates(ctx, node, gossip_peers, block).await?; - } - } - Ok(()) - }) - .await - .unwrap(); -} - -async fn wait_for_updates( - ctx: &ctx::Ctx, - node: &mut testonly::Instance, - peer_count: usize, - block: &FinalBlock, -) -> anyhow::Result<()> { - let mut updates = HashSet::new(); - while updates.len() < peer_count { - let io::OutputMessage::SyncBlocks(io::SyncBlocksRequest::UpdatePeerSyncState { - peer, - state, - response, - }) = node.pipe.recv(ctx).await.context("pipe.recv()")? - else { - continue; - }; - if state.last.as_ref() == Some(&block.justification) { - updates.insert(peer); - } - response.send(()).ok(); - } - Ok(()) -} - -/// Tests block syncing in an uncoordinated network, in which new blocks arrive at a schedule. -/// In this case, some nodes may skip emitting initial / intermediate updates to peers, so we -/// only assert that all peers for all nodes emit the final update. -#[test_casing(10, Product(( - NETWORK_CONNECTIVITY_CASES, - [time::Duration::seconds(1), time::Duration::seconds(10)], -)))] -#[tokio::test(flavor = "multi_thread")] -#[tracing::instrument(level = "trace")] -async fn uncoordinated_block_syncing( - (node_count, gossip_peers): (usize, usize), - state_generation_interval: time::Duration, -) { - abort_on_panic(); - let _guard = set_timeout(time::Duration::seconds(5)); - - let ctx = &ctx::test_root(&ctx::AffineClock::new(20.0)); - let rng = &mut ctx.rng(); - let mut setup = validator::testonly::Setup::new(rng, node_count); - setup.push_blocks(rng, EXCHANGED_STATE_COUNT); - scope::run!(ctx, |ctx, s| async { - for (i, cfg) in testonly::new_configs(rng, &setup, gossip_peers) - .into_iter() - .enumerate() - { - let i = i; - let (store, runner) = new_store(ctx, &setup.genesis).await; - s.spawn_bg(runner.run(ctx)); - let (node, runner) = testonly::Instance::new(cfg, store.clone()); - s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); - s.spawn(async { - let store = store; - for block in &setup.blocks { - ctx.sleep(state_generation_interval).await?; - store.queue_block(ctx, block.clone()).await.unwrap(); - } - Ok(()) - }); - s.spawn(async { - let mut node = node; - wait_for_updates(ctx, &mut node, gossip_peers, setup.blocks.last().unwrap()).await - }); - } - Ok(()) - }) - .await - .unwrap(); -} - -#[test_casing(5, NETWORK_CONNECTIVITY_CASES)] -#[tokio::test] -async fn getting_blocks_from_peers(node_count: usize, gossip_peers: usize) { - abort_on_panic(); - - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let mut setup = validator::testonly::Setup::new(rng, node_count); - setup.push_blocks(rng, 1); - let cfgs = testonly::new_configs(rng, &setup, gossip_peers); - - // All inbound and outbound peers should answer the request. - let expected_successful_responses = (2 * gossip_peers).min(node_count - 1); - - scope::run!(ctx, |ctx, s| async { - let (store, runner) = new_store(ctx, &setup.genesis).await; - s.spawn_bg(runner.run(ctx)); - store - .queue_block(ctx, setup.blocks[0].clone()) - .await - .unwrap(); - - let mut nodes: Vec<_> = cfgs - .into_iter() - .enumerate() - .map(|(i, cfg)| { - let (node, runner) = testonly::Instance::new(cfg, store.clone()); - s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); - node - }) - .collect(); - - for node in &nodes { - node.wait_for_gossip_connections().await; - tracing::info!("establish connections"); - let mut successful_peer_responses = 0; - for peer in &nodes { - let (response, response_receiver) = oneshot::channel(); - node.pipe.send( - io::SyncBlocksInputMessage::GetBlock { - recipient: peer.net.gossip.cfg.gossip.key.public(), - number: setup.blocks[0].header().number, - response, - } - .into(), - ); - tracing::info!("wait for response"); - if let Ok(block) = response_receiver.recv(ctx).await? { - assert_eq!(block, setup.blocks[0]); - successful_peer_responses += 1; - } - } - assert_eq!(successful_peer_responses, expected_successful_responses); - } - - tracing::info!("stop the last node"); - let last = nodes.pop().unwrap(); - last.terminate(ctx).await?; - - let stopped_node_key = last.net.gossip.cfg.gossip.key.public(); - for node in &nodes { - tracing::info!("wait for disconnection"); - node.wait_for_gossip_disconnect(ctx, &stopped_node_key) - .await - .unwrap(); - - tracing::info!("wait for disconnection"); - // Check that the node cannot access the stopped peer. - let (response, response_receiver) = oneshot::channel(); - node.pipe.send( - io::SyncBlocksInputMessage::GetBlock { - recipient: stopped_node_key.clone(), - number: BlockNumber(1), - response, - } - .into(), - ); - assert!(response_receiver.recv(ctx).await?.is_err()); - } - - Ok(()) - }) - .await - .unwrap(); -} - /// When validator node is restarted, it should immediately override /// the AccountData that is present in the network from the previous run. #[tokio::test] diff --git a/node/actors/network/src/gossip/tests/syncing.rs b/node/actors/network/src/gossip/tests/syncing.rs new file mode 100644 index 00000000..44922ada --- /dev/null +++ b/node/actors/network/src/gossip/tests/syncing.rs @@ -0,0 +1,304 @@ +//! Integration tests of block synchronization. +use crate::testonly; +use anyhow::Context as _; +use rand::seq::SliceRandom as _; +use test_casing::{test_casing, Product}; +use tracing::Instrument as _; +use zksync_concurrency::{ + ctx, limiter, scope, + testonly::{abort_on_panic, set_timeout}, + time, +}; +use zksync_consensus_roles::validator; +use zksync_consensus_storage::testonly::{new_store, new_store_with_first}; + +const EXCHANGED_STATE_COUNT: usize = 5; +const NETWORK_CONNECTIVITY_CASES: [(usize, usize); 5] = [(2, 1), (3, 2), (5, 3), (10, 4), (10, 7)]; + +/// Tests block syncing with global network synchronization (a next block becoming available +/// on some node only after nodes have received the previous block. +#[test_casing(5, NETWORK_CONNECTIVITY_CASES)] +#[tokio::test(flavor = "multi_thread")] +async fn coordinated_block_syncing(node_count: usize, gossip_peers: usize) { + abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(20)); + + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, node_count); + setup.push_blocks(rng, EXCHANGED_STATE_COUNT); + let cfgs = testonly::new_configs(rng, &setup, gossip_peers); + scope::run!(ctx, |ctx, s| async { + let mut nodes = vec![]; + for (i, mut cfg) in cfgs.into_iter().enumerate() { + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + for block in &setup.blocks { + nodes + .choose(rng) + .unwrap() + .net + .gossip + .block_store + .queue_block(ctx, block.clone()) + .await + .context("queue_block()")?; + for node in &nodes { + node.net + .gossip + .block_store + .wait_until_persisted(ctx, block.number()) + .await + .unwrap(); + } + } + Ok(()) + }) + .await + .unwrap(); +} + +/// Tests block syncing in an uncoordinated network, in which new blocks arrive at a schedule. +#[test_casing(10, Product(( + NETWORK_CONNECTIVITY_CASES, + [time::Duration::milliseconds(50), time::Duration::milliseconds(500)], +)))] +#[tokio::test(flavor = "multi_thread")] +async fn uncoordinated_block_syncing( + (node_count, gossip_peers): (usize, usize), + state_generation_interval: time::Duration, +) { + abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(20)); + + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, node_count); + setup.push_blocks(rng, EXCHANGED_STATE_COUNT); + let cfgs = testonly::new_configs(rng, &setup, gossip_peers); + scope::run!(ctx, |ctx, s| async { + let mut nodes = vec![]; + for (i, mut cfg) in cfgs.into_iter().enumerate() { + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + for block in &setup.blocks { + nodes + .choose(rng) + .unwrap() + .net + .gossip + .block_store + .queue_block(ctx, block.clone()) + .await + .context("queue_block()")?; + ctx.sleep(state_generation_interval).await?; + } + let last = setup.blocks.last().unwrap().number(); + for node in &nodes { + node.net + .gossip + .block_store + .wait_until_persisted(ctx, last) + .await + .unwrap(); + } + Ok(()) + }) + .await + .unwrap(); +} + +/// Test concurrently adding new nodes and new blocks to the network. +#[tokio::test(flavor = "multi_thread")] +async fn test_switching_on_nodes() { + abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(20)); + + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 7); + // It is important that all nodes will connect to each other, + // because we spawn the nodes gradually and we want the network + // to be connected at all times. + let cfgs = testonly::new_configs(rng, &setup, setup.keys.len()); + setup.push_blocks(rng, cfgs.len()); + scope::run!(ctx, |ctx, s| async { + let mut nodes = vec![]; + for (i, mut cfg) in cfgs.into_iter().enumerate() { + // Spawn another node. + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + + // Insert a block to storage of a random node. + nodes + .choose(rng) + .unwrap() + .net + .gossip + .block_store + .queue_block(ctx, setup.blocks[i].clone()) + .await + .context("queue_block()")?; + + // Wait for all the nodes to fetch the block. + for node in &nodes { + node.net + .gossip + .block_store + .wait_until_persisted(ctx, setup.blocks[i].number()) + .await + .unwrap(); + } + } + Ok(()) + }) + .await + .unwrap(); +} + +/// Test concurrently removing nodes and adding new blocks to the network. +#[tokio::test(flavor = "multi_thread")] +async fn test_switching_off_nodes() { + abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(20)); + + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 7); + // It is important that all nodes will connect to each other, + // because we spawn the nodes gradually and we want the network + // to be connected at all times. + let cfgs = testonly::new_configs(rng, &setup, setup.keys.len()); + setup.push_blocks(rng, cfgs.len()); + scope::run!(ctx, |ctx, s| async { + let mut nodes = vec![]; + for (i, mut cfg) in cfgs.into_iter().enumerate() { + // Spawn another node. + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + let (store, runner) = new_store(ctx, &setup.genesis).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + nodes.shuffle(rng); + + for i in 0..nodes.len() { + // Insert a block to storage of a random node. + nodes[i..] + .choose(rng) + .unwrap() + .net + .gossip + .block_store + .queue_block(ctx, setup.blocks[i].clone()) + .await + .context("queue_block()")?; + + // Wait for all the remaining nodes to fetch the block. + for node in &nodes[i..] { + node.net + .gossip + .block_store + .wait_until_persisted(ctx, setup.blocks[i].number()) + .await + .unwrap(); + } + + // Terminate a random node. + nodes[i].terminate(ctx).await.unwrap(); + } + Ok(()) + }) + .await + .unwrap(); +} + +/// Test checking that nodes with different first block can synchronize. +#[tokio::test(flavor = "multi_thread")] +async fn test_different_first_block() { + abort_on_panic(); + let _guard = set_timeout(time::Duration::seconds(20)); + + let ctx = &ctx::test_root(&ctx::RealClock); + let rng = &mut ctx.rng(); + let mut setup = validator::testonly::Setup::new(rng, 4); + setup.push_blocks(rng, 10); + // It is important that all nodes will connect to each other, + // because we spawn the nodes gradually and we want the network + // to be connected at all times. + let cfgs = testonly::new_configs(rng, &setup, setup.keys.len()); + scope::run!(ctx, |ctx, s| async { + let mut nodes = vec![]; + for (i, mut cfg) in cfgs.into_iter().enumerate() { + // Spawn another node. + cfg.rpc.push_block_store_state_rate = limiter::Rate::INF; + cfg.rpc.get_block_rate = limiter::Rate::INF; + cfg.rpc.get_block_timeout = None; + cfg.validator_key = None; + // Choose the first block for the node at random. + let first = setup.blocks.choose(rng).unwrap().number(); + let (store, runner) = new_store_with_first(ctx, &setup.genesis, first).await; + s.spawn_bg(runner.run(ctx)); + let (node, runner) = testonly::Instance::new(cfg, store); + s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); + nodes.push(node); + } + nodes.shuffle(rng); + + for block in &setup.blocks { + // Find nodes interested in the next block. + let interested_nodes: Vec<_> = nodes + .iter() + .filter(|n| n.net.gossip.block_store.queued().first <= block.number()) + .collect(); + // Store this block to one of them. + if let Some(node) = interested_nodes.choose(rng) { + node.net + .gossip + .block_store + .queue_block(ctx, block.clone()) + .await + .unwrap(); + } + // Wait until all remaining nodes get the new block. + for node in interested_nodes { + node.net + .gossip + .block_store + .wait_until_persisted(ctx, block.number()) + .await + .unwrap(); + } + } + Ok(()) + }) + .await + .unwrap(); +} diff --git a/node/actors/network/src/io.rs b/node/actors/network/src/io.rs index 9747c8ed..9a7412f9 100644 --- a/node/actors/network/src/io.rs +++ b/node/actors/network/src/io.rs @@ -1,15 +1,12 @@ #![allow(missing_docs)] use zksync_concurrency::oneshot; -use zksync_consensus_roles::{node, validator}; -use zksync_consensus_storage::BlockStoreState; +use zksync_consensus_roles::validator; /// All the messages that other actors can send to the Network actor. #[derive(Debug)] pub enum InputMessage { /// Message types from the Consensus actor. Consensus(ConsensusInputMessage), - /// Message types from the Sync Blocks actor. - SyncBlocks(SyncBlocksInputMessage), } /// Message types from the Consensus actor. @@ -25,23 +22,6 @@ impl From for InputMessage { } } -/// Message types from the Sync Blocks actor. -#[derive(Debug)] -pub enum SyncBlocksInputMessage { - /// Request to get a block from a specific peer. - GetBlock { - recipient: node::PublicKey, - number: validator::BlockNumber, - response: oneshot::Sender>, - }, -} - -impl From for InputMessage { - fn from(message: SyncBlocksInputMessage) -> Self { - Self::SyncBlocks(message) - } -} - /// Consensus message received from the network. #[derive(Debug)] pub struct ConsensusReq { @@ -53,45 +33,11 @@ pub struct ConsensusReq { pub ack: oneshot::Sender<()>, } -/// Error returned in response to [`GetBlock`] call. -/// -/// Note that these errors don't include network-level errors, only app-level ones. -#[derive(Debug, thiserror::Error)] -pub enum GetBlockError { - /// Transient error: the node doesn't have the requested L2 block. - #[error("node doesn't have the requested L2 block")] - NotAvailable, - #[error(transparent)] - Internal(#[from] anyhow::Error), -} - -#[derive(Debug)] -pub enum SyncBlocksRequest { - /// Notifies about an update in peer's `SyncState`. - UpdatePeerSyncState { - /// Peer that has reported the update. - peer: node::PublicKey, - /// Updated peer syncing state. - state: BlockStoreState, - /// Acknowledgement response returned by the block syncing actor. - // TODO: return an error in case of invalid `SyncState`? - response: oneshot::Sender<()>, - }, -} - /// All the messages that the Network actor sends to other actors. #[derive(Debug)] pub enum OutputMessage { /// Message to the Consensus actor. Consensus(ConsensusReq), - /// Message to the block syncing actor. - SyncBlocks(SyncBlocksRequest), -} - -impl From for OutputMessage { - fn from(request: SyncBlocksRequest) -> Self { - Self::SyncBlocks(request) - } } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/node/actors/network/src/lib.rs b/node/actors/network/src/lib.rs index ed154333..4de3bc87 100644 --- a/node/actors/network/src/lib.rs +++ b/node/actors/network/src/lib.rs @@ -2,7 +2,7 @@ use anyhow::Context as _; use std::sync::Arc; use tracing::Instrument as _; -use zksync_concurrency::{ctx, ctx::channel, limiter, scope, time}; +use zksync_concurrency::{ctx, ctx::channel, limiter, scope}; use zksync_consensus_storage::BlockStore; use zksync_consensus_utils::pipe::ActorPipe; @@ -71,12 +71,9 @@ impl Network { /// Handles a dispatcher message. async fn handle_message( &self, - ctx: &ctx::Ctx, + _ctx: &ctx::Ctx, message: io::InputMessage, ) -> anyhow::Result<()> { - /// Timeout for a GetBlock RPC. - const GET_BLOCK_TIMEOUT: time::Duration = time::Duration::seconds(10); - match message { io::InputMessage::Consensus(message) => { self.consensus @@ -85,18 +82,6 @@ impl Network { .msg_pool .send(Arc::new(message)); } - io::InputMessage::SyncBlocks(io::SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) => { - let ctx = &ctx.with_timeout(GET_BLOCK_TIMEOUT); - let _ = response.send(match self.gossip.get_block(ctx, &recipient, number).await { - Ok(Some(block)) => Ok(block), - Ok(None) => Err(io::GetBlockError::NotAvailable), - Err(err) => Err(io::GetBlockError::Internal(err)), - }); - } } Ok(()) } @@ -128,6 +113,12 @@ impl Runner { Ok(()) }); + // Fetch missing blocks in the background. + s.spawn(async { + self.net.gossip.run_block_fetcher(ctx).await; + Ok(()) + }); + // Maintain static gossip connections. for (peer, addr) in &self.net.gossip.cfg.gossip.static_outbound { s.spawn::<()>(async { diff --git a/node/actors/network/src/mux/reusable_stream.rs b/node/actors/network/src/mux/reusable_stream.rs index 381cf561..83d0a943 100644 --- a/node/actors/network/src/mux/reusable_stream.rs +++ b/node/actors/network/src/mux/reusable_stream.rs @@ -4,7 +4,7 @@ use super::{ }; use crate::noise::bytes; use std::sync::Arc; -use zksync_concurrency::{ctx, ctx::channel, oneshot, scope, sync}; +use zksync_concurrency::{ctx, ctx::channel, limiter, oneshot, scope, sync}; /// Read frame allocation permit. #[derive(Debug)] @@ -66,18 +66,20 @@ impl ReservedStream { /// `queue.pop()` before the OPEN message is sent to the peer. pub(crate) struct StreamQueue { pub(super) max_streams: u32, - send: channel::UnboundedSender, - recv: sync::Mutex>, + limiter: limiter::Limiter, + send: channel::Sender, + recv: sync::Mutex>, } impl StreamQueue { /// Constructs a new StreamQueue with the specified number of reusable streams. /// During multiplexer handshake, peers exchange information about /// how many reusable streams they support per capability. - pub(crate) fn new(max_streams: u32) -> Arc { - let (send, recv) = channel::unbounded(); + pub(crate) fn new(ctx: &ctx::Ctx, max_streams: u32, rate: limiter::Rate) -> Arc { + let (send, recv) = channel::bounded(1); Arc::new(Self { max_streams, + limiter: limiter::Limiter::new(ctx, rate), send, recv: sync::Mutex::new(recv), }) @@ -91,6 +93,7 @@ impl StreamQueue { } /// Opens a transient stream from the queue. + #[allow(dead_code)] pub(crate) async fn open(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled { loop { // It may happen that the popped stream has been immediately disconnected @@ -106,7 +109,7 @@ impl StreamQueue { async fn push(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled { loop { let (send, recv) = oneshot::channel(); - self.send.send(ReservedStream(send)); + self.send.send(ctx, ReservedStream(send)).await?; if let Ok(reservation) = recv.recv_or_disconnected(ctx).await? { return Ok(reservation); } @@ -269,6 +272,7 @@ impl ReusableStream { let mut write = write_receiver.wait(ctx).await?; write.send_close(ctx).await?; + let _open_permit = self.stream_queue.limiter.acquire(ctx, 1).await?; let (read, reservation) = match write.stream_kind { StreamKind::ACCEPT => { let read = recv_open_task.join(ctx).await?; diff --git a/node/actors/network/src/mux/tests/mod.rs b/node/actors/network/src/mux/tests/mod.rs index 3106c047..2ae1b245 100644 --- a/node/actors/network/src/mux/tests/mod.rs +++ b/node/actors/network/src/mux/tests/mod.rs @@ -8,7 +8,7 @@ use std::{ Arc, }, }; -use zksync_concurrency::{ctx, scope, testonly::abort_on_panic}; +use zksync_concurrency::{ctx, limiter, scope, testonly::abort_on_panic}; mod proto; @@ -32,6 +32,7 @@ fn test_masks() { #[test] fn test_mux_verify() { + let ctx = &ctx::test_root(&ctx::RealClock); let cfg = Arc::new(mux::Config { read_buffer_size: 1000, read_frame_size: 100, @@ -47,8 +48,8 @@ fn test_mux_verify() { .is_ok()); let mut queues = BTreeMap::new(); - queues.insert(0, mux::StreamQueue::new(u32::MAX)); - queues.insert(1, mux::StreamQueue::new(u32::MAX)); + queues.insert(0, mux::StreamQueue::new(ctx, u32::MAX, limiter::Rate::INF)); + queues.insert(1, mux::StreamQueue::new(ctx, u32::MAX, limiter::Rate::INF)); // Total streams overflow: assert!(mux::Mux { cfg: cfg.clone(), @@ -208,10 +209,20 @@ fn mux_with_noise() { write_frame_size: 150, }), accept: (0..caps) - .map(|c| (c, mux::StreamQueue::new(rng.gen_range(1..5)))) + .map(|c| { + ( + c, + mux::StreamQueue::new(ctx, rng.gen_range(1..5), limiter::Rate::INF), + ) + }) .collect(), connect: (0..caps) - .map(|c| (c, mux::StreamQueue::new(rng.gen_range(1..5)))) + .map(|c| { + ( + c, + mux::StreamQueue::new(ctx, rng.gen_range(1..5), limiter::Rate::INF), + ) + }) .collect(), }; let mux2 = mux::Mux { @@ -222,10 +233,20 @@ fn mux_with_noise() { write_frame_size: 79, }), accept: (0..caps) - .map(|c| (c, mux::StreamQueue::new(rng.gen_range(1..5)))) + .map(|c| { + ( + c, + mux::StreamQueue::new(ctx, rng.gen_range(1..5), limiter::Rate::INF), + ) + }) .collect(), connect: (0..caps) - .map(|c| (c, mux::StreamQueue::new(rng.gen_range(1..5)))) + .map(|c| { + ( + c, + mux::StreamQueue::new(ctx, rng.gen_range(1..5), limiter::Rate::INF), + ) + }) .collect(), }; @@ -303,7 +324,7 @@ async fn test_transport_closed() { accept: BTreeMap::default(), connect: BTreeMap::default(), }; - let q = mux::StreamQueue::new(1); + let q = mux::StreamQueue::new(ctx, 1, limiter::Rate::INF); mux.connect.insert(cap, q.clone()); s.spawn_bg(async { expected(mux.run(ctx, s2).await).context("[connect] mux.run()") @@ -316,7 +337,7 @@ async fn test_transport_closed() { accept: BTreeMap::default(), connect: BTreeMap::default(), }; - let q = mux::StreamQueue::new(1); + let q = mux::StreamQueue::new(ctx, 1, limiter::Rate::INF); mux.accept.insert(cap, q.clone()); s.spawn_bg(async { expected(mux.run(ctx, s1).await).context("[accept] mux.run()") diff --git a/node/actors/network/src/rpc/mod.rs b/node/actors/network/src/rpc/mod.rs index 2153189e..2780e005 100644 --- a/node/actors/network/src/rpc/mod.rs +++ b/node/actors/network/src/rpc/mod.rs @@ -32,7 +32,8 @@ pub(crate) mod testonly; #[cfg(test)] mod tests; -const MUX_CONFIG: mux::Config = mux::Config { +/// Multiplexer configuration for the RPC services. +pub(crate) const MUX_CONFIG: mux::Config = mux::Config { read_buffer_size: 160 * zksync_protobuf::kB as u64, read_frame_size: 16 * zksync_protobuf::kB as u64, read_frame_count: 100, @@ -72,13 +73,12 @@ pub(crate) trait Rpc: Sync + Send + 'static { /// blindly decide which server to call without knowing their real capacity. /// TODO(gprusak): to actually pass around the permit, we should use an OwnedPermit /// instead. -pub(crate) struct ReservedCall<'a, R: Rpc> { +pub(crate) struct ReservedCall { stream: mux::ReservedStream, - permit: limiter::Permit<'a>, _rpc: std::marker::PhantomData, } -impl<'a, R: Rpc> ReservedCall<'a, R> { +impl ReservedCall { /// Performs the call. pub(crate) async fn call( self, @@ -88,7 +88,6 @@ impl<'a, R: Rpc> ReservedCall<'a, R> { ) -> anyhow::Result { let send_time = ctx.now(); let mut stream = self.stream.open(ctx).await??; - drop(self.permit); let res = async { let metric_labels = CallType::Client.to_labels::(req); let _guard = RPC_METRICS.inflight[&metric_labels].inc_guard(1); @@ -113,7 +112,6 @@ impl<'a, R: Rpc> ReservedCall<'a, R> { /// RPC client used to issue the calls to the server. pub(crate) struct Client { - limiter: limiter::Limiter, queue: Arc, _rpc: std::marker::PhantomData, } @@ -124,24 +122,18 @@ impl Client { // so perhaps they should be constructed by `Service::add_client` instead? pub(crate) fn new(ctx: &ctx::Ctx, rate: limiter::Rate) -> Self { Client { - limiter: limiter::Limiter::new(ctx, rate), - queue: mux::StreamQueue::new(R::INFLIGHT), + queue: mux::StreamQueue::new(ctx, R::INFLIGHT, rate), _rpc: std::marker::PhantomData, } } /// Reserves an RPC. - pub(crate) async fn reserve<'a>( - &'a self, - ctx: &'a ctx::Ctx, - ) -> ctx::OrCanceled> { + pub(crate) async fn reserve(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled> { let reserve_time = ctx.now(); - let permit = self.limiter.acquire(ctx, 1).await?; let stream = self.queue.reserve(ctx).await?; RPC_METRICS.call_reserve_latency[&R::METHOD].observe_latency(ctx.now() - reserve_time); Ok(ReservedCall { stream, - permit, _rpc: std::marker::PhantomData, }) } @@ -175,7 +167,6 @@ pub(crate) trait Handler: Sync + Send { struct Server> { handler: H, queue: Arc, - rate: limiter::Rate, _rpc: std::marker::PhantomData, } @@ -189,55 +180,51 @@ impl> ServerTrait for Server { /// Serves the incoming RPCs, respecting the rate limit and /// max inflight limit. async fn serve(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { - let limiter = limiter::Limiter::new(ctx, self.rate); scope::run!(ctx, |ctx, s| async { - for _ in 0..R::INFLIGHT { + loop { + let stream = self.queue.reserve(ctx).await?; s.spawn::<()>(async { - loop { - let permit = limiter.acquire(ctx, 1).await?; - let mut stream = self.queue.open(ctx).await?; - drop(permit); - let res = async { - let recv_time = ctx.now(); - let (req, msg_size) = frame::mux_recv_proto::( - ctx, - &mut stream.read, - self.handler.max_req_size(), - ) - .await?; + let res = async { + let mut stream = stream.open(ctx).await??; + let recv_time = ctx.now(); + let (req, msg_size) = frame::mux_recv_proto::( + ctx, + &mut stream.read, + self.handler.max_req_size(), + ) + .await?; - let size_labels = CallType::ReqRecv.to_labels::(&req); - let resp_size_labels = CallType::RespSent.to_labels::(&req); - RPC_METRICS.message_size[&size_labels].observe(msg_size); - let inflight_labels = CallType::Server.to_labels::(&req); - let _guard = RPC_METRICS.inflight[&inflight_labels].inc_guard(1); - let mut server_process_labels = - CallLatencyType::ServerProcess.to_labels::(&req, &Ok(())); - let mut recv_send_labels = - CallLatencyType::ServerRecvSend.to_labels::(&req, &Ok(())); + let size_labels = CallType::ReqRecv.to_labels::(&req); + let resp_size_labels = CallType::RespSent.to_labels::(&req); + RPC_METRICS.message_size[&size_labels].observe(msg_size); + let inflight_labels = CallType::Server.to_labels::(&req); + let _guard = RPC_METRICS.inflight[&inflight_labels].inc_guard(1); + let mut server_process_labels = + CallLatencyType::ServerProcess.to_labels::(&req, &Ok(())); + let mut recv_send_labels = + CallLatencyType::ServerRecvSend.to_labels::(&req, &Ok(())); - let process_time = ctx.now(); - let res = self.handler.handle(ctx, req).await.context(R::METHOD); - server_process_labels.set_result(&res); - RPC_METRICS.latency[&server_process_labels] - .observe_latency(ctx.now() - process_time); + let process_time = ctx.now(); + let res = self.handler.handle(ctx, req).await.context(R::METHOD); + server_process_labels.set_result(&res); + RPC_METRICS.latency[&server_process_labels] + .observe_latency(ctx.now() - process_time); - let res = frame::mux_send_proto(ctx, &mut stream.write, &res?).await; - recv_send_labels.set_result(&res); - RPC_METRICS.latency[&recv_send_labels] - .observe_latency(ctx.now() - recv_time); - let msg_size = res?; - RPC_METRICS.message_size[&resp_size_labels].observe(msg_size); - anyhow::Ok(()) - } - .await; - if let Err(err) = res { - tracing::info!("{err:#}"); - } + let res = frame::mux_send_proto(ctx, &mut stream.write, &res?).await; + recv_send_labels.set_result(&res); + RPC_METRICS.latency[&recv_send_labels] + .observe_latency(ctx.now() - recv_time); + let msg_size = res?; + RPC_METRICS.message_size[&resp_size_labels].observe(msg_size); + anyhow::Ok(()) } + .await; + if let Err(err) = res { + tracing::info!("{err:#}"); + } + Ok(()) }); } - Ok(()) }) .await } @@ -282,10 +269,11 @@ impl<'a> Service<'a> { /// Adds a server to the RPC service. pub(crate) fn add_server( mut self, + ctx: &ctx::Ctx, handler: impl Handler + 'a, rate: limiter::Rate, ) -> Self { - let queue = mux::StreamQueue::new(R::INFLIGHT); + let queue = mux::StreamQueue::new(ctx, R::INFLIGHT, rate); if self .mux .connect @@ -300,7 +288,6 @@ impl<'a> Service<'a> { self.servers.push(Box::new(Server { handler, queue, - rate, _rpc: std::marker::PhantomData, })); self diff --git a/node/actors/network/src/rpc/tests.rs b/node/actors/network/src/rpc/tests.rs index 3770320c..b3490406 100644 --- a/node/actors/network/src/rpc/tests.rs +++ b/node/actors/network/src/rpc/tests.rs @@ -50,7 +50,7 @@ async fn test_ping() { s.spawn_bg(async { expected( Service::new() - .add_server(ping::Server, ping::RATE) + .add_server(ctx, ping::Server, ping::RATE) .run(ctx, s1) .await, ) @@ -64,12 +64,10 @@ async fn test_ping() { let resp = client.call(ctx, &req, kB).await?; assert_eq!(req.0, resp.0); } - let now = ctx.now(); - clock.set_advance_on_sleep(); + clock.advance(ping::RATE.refresh); let req = ping::Req(ctx.rng().gen()); let resp = client.call(ctx, &req, kB).await?; assert_eq!(req.0, resp.0); - assert!(ctx.now() >= now + ping::RATE.refresh); Ok(()) }) .await @@ -120,6 +118,7 @@ async fn test_ping_loop() { expected( Service::new() .add_server( + ctx, server, limiter::Rate { burst: 1, @@ -185,7 +184,7 @@ async fn test_inflight() { s.spawn_bg(async { expected( Service::new() - .add_server(ExampleServer, RATE) + .add_server(ctx, ExampleServer, RATE) .run(ctx, s1) .await, ) diff --git a/node/actors/network/src/testonly.rs b/node/actors/network/src/testonly.rs index ef6bdcbf..8ae85f37 100644 --- a/node/actors/network/src/testonly.rs +++ b/node/actors/network/src/testonly.rs @@ -95,6 +95,7 @@ pub fn new_configs( max_block_size: usize::MAX, tcp_accept_rate: limiter::Rate::INF, rpc: RpcConfig::default(), + max_block_queue_size: 10, } }); let mut cfgs: Vec<_> = configs.collect(); @@ -131,6 +132,7 @@ pub fn new_fullnode(rng: &mut impl Rng, peer: &Config) -> Config { max_block_size: usize::MAX, tcp_accept_rate: limiter::Rate::INF, rpc: RpcConfig::default(), + max_block_queue_size: 10, } } diff --git a/node/actors/sync_blocks/Cargo.toml b/node/actors/sync_blocks/Cargo.toml deleted file mode 100644 index b478460d..00000000 --- a/node/actors/sync_blocks/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "zksync_consensus_sync_blocks" -version = "0.1.0" -edition.workspace = true -authors.workspace = true -homepage.workspace = true -license.workspace = true - -[dependencies] -zksync_concurrency.workspace = true -zksync_consensus_network.workspace = true -zksync_consensus_roles.workspace = true -zksync_consensus_storage.workspace = true -zksync_consensus_utils.workspace = true - -anyhow.workspace = true -rand.workspace = true -thiserror.workspace = true -tracing.workspace = true - -[dev-dependencies] -assert_matches.workspace = true -async-trait.workspace = true -test-casing.workspace = true -tokio.workspace = true - -[lints] -workspace = true diff --git a/node/actors/sync_blocks/src/config.rs b/node/actors/sync_blocks/src/config.rs deleted file mode 100644 index a3c9c88a..00000000 --- a/node/actors/sync_blocks/src/config.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! Configuration for the `SyncBlocks` actor. -use zksync_concurrency::time; - -/// Configuration for the `SyncBlocks` actor. -#[derive(Debug)] -pub struct Config { - /// Maximum number of blocks to attempt to get concurrently from all peers in total. - pub(crate) max_concurrent_blocks: usize, - /// Maximum number of blocks to attempt to get concurrently from any single peer. - pub(crate) max_concurrent_blocks_per_peer: usize, - /// Interval between re-checking peers to get a specific block if no peers currently should have - /// the block. - pub(crate) sleep_interval_for_get_block: time::Duration, -} - -impl Default for Config { - fn default() -> Self { - Self::new() - } -} - -impl Config { - /// Creates a new configuration with the provided mandatory params. - pub fn new() -> Self { - Self { - max_concurrent_blocks: 20, - max_concurrent_blocks_per_peer: 5, - sleep_interval_for_get_block: time::Duration::seconds(10), - } - } - - /// Sets the maximum number of blocks to attempt to get concurrently. - pub fn with_max_concurrent_blocks(mut self, blocks: usize) -> anyhow::Result { - anyhow::ensure!(blocks > 0, "Number of blocks must be positive"); - self.max_concurrent_blocks = blocks; - Ok(self) - } - - /// Maximum number of blocks to attempt to get concurrently from any single peer. - pub fn with_max_concurrent_blocks_per_peer(mut self, blocks: usize) -> anyhow::Result { - anyhow::ensure!(blocks > 0, "Number of blocks must be positive"); - self.max_concurrent_blocks_per_peer = blocks; - Ok(self) - } - - /// Sets the interval between re-checking peers to get a specific block if no peers currently - /// should have the block. - pub fn with_sleep_interval_for_get_block( - mut self, - interval: time::Duration, - ) -> anyhow::Result { - anyhow::ensure!(interval.is_positive(), "Sleep interval must be positive"); - self.sleep_interval_for_get_block = interval; - Ok(self) - } -} diff --git a/node/actors/sync_blocks/src/io.rs b/node/actors/sync_blocks/src/io.rs deleted file mode 100644 index cb4a9225..00000000 --- a/node/actors/sync_blocks/src/io.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Input and output messages for the [`SyncBlocks`](crate::SyncBlocks) actor. - -use zksync_consensus_network::io::{SyncBlocksInputMessage, SyncBlocksRequest}; - -/// All the messages that other actors can send to the `SyncBlocks` actor. -#[derive(Debug)] -pub enum InputMessage { - /// Message types from the Network actor. - Network(SyncBlocksRequest), -} - -impl From for InputMessage { - fn from(request: SyncBlocksRequest) -> Self { - Self::Network(request) - } -} - -/// Messages produced by the `SyncBlocks` actor. -#[derive(Debug)] -pub enum OutputMessage { - /// Message to the Network actor. - Network(SyncBlocksInputMessage), -} - -impl From for OutputMessage { - fn from(message: SyncBlocksInputMessage) -> Self { - Self::Network(message) - } -} diff --git a/node/actors/sync_blocks/src/lib.rs b/node/actors/sync_blocks/src/lib.rs deleted file mode 100644 index e46e340a..00000000 --- a/node/actors/sync_blocks/src/lib.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! # Sync Blocks Actor -//! -//! This crate contains an actor implementing block syncing among nodes, which is tied to the gossip -//! network RPCs. -use crate::io::{InputMessage, OutputMessage}; -use std::sync::Arc; -use zksync_concurrency::{ctx, scope}; -use zksync_consensus_network::io::SyncBlocksRequest; -use zksync_consensus_storage::BlockStore; -use zksync_consensus_utils::pipe::ActorPipe; - -mod config; -pub mod io; -mod peers; -#[cfg(test)] -mod tests; - -pub use crate::config::Config; -use crate::peers::PeerStates; - -impl Config { - /// Runs the sync_blocks actor. - pub async fn run( - self, - ctx: &ctx::Ctx, - mut pipe: ActorPipe, - storage: Arc, - ) -> anyhow::Result<()> { - let peer_states = PeerStates::new(self, storage.clone(), pipe.send); - let result: ctx::Result<()> = scope::run!(ctx, |ctx, s| async { - s.spawn_bg(async { peer_states.run_block_fetcher(ctx).await }); - loop { - match pipe.recv.recv(ctx).await? { - InputMessage::Network(SyncBlocksRequest::UpdatePeerSyncState { - peer, - state, - response, - }) => { - let res = peer_states.update(&peer, state); - if let Err(err) = res { - tracing::info!(%err, ?peer, "peer_states.update()"); - } - response.send(()).ok(); - } - } - } - }) - .await; - - // Since we clearly type cancellation errors, it's easier propagate them up to this entry point, - // rather than catching in the constituent tasks. - result.or_else(|err| match err { - ctx::Error::Canceled(_) => Ok(()), // Cancellation is not propagated as an error - ctx::Error::Internal(err) => Err(err), - }) - } -} diff --git a/node/actors/sync_blocks/src/peers/events.rs b/node/actors/sync_blocks/src/peers/events.rs deleted file mode 100644 index 36c00bed..00000000 --- a/node/actors/sync_blocks/src/peers/events.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Events emitted by `PeerStates` actor. Useful for testing. - -use zksync_consensus_roles::{node, validator::BlockNumber}; - -/// Events emitted by `PeerStates` actor. Only used for tests so far. -#[derive(Debug)] -#[allow(dead_code)] // Variant fields are only read in tests -pub(super) enum PeerStateEvent { - /// Node has successfully downloaded the specified block. - GotBlock(BlockNumber), - /// Received an invalid block from the peer. - RpcFailed { - peer_key: node::PublicKey, - block_number: BlockNumber, - }, - /// Peer was disconnected (i.e., it has dropped a request). - PeerDropped(node::PublicKey), -} diff --git a/node/actors/sync_blocks/src/peers/mod.rs b/node/actors/sync_blocks/src/peers/mod.rs deleted file mode 100644 index 6c838a1c..00000000 --- a/node/actors/sync_blocks/src/peers/mod.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Peer states tracked by the `SyncBlocks` actor. -#![allow(unused)] -use self::events::PeerStateEvent; -use crate::{io, Config}; -use anyhow::Context as _; -use std::{ - collections::HashMap, - sync::{Arc, Mutex}, -}; -use zksync_concurrency::{ - ctx::{self, channel}, - oneshot, scope, sync, -}; -use zksync_consensus_network::io::SyncBlocksInputMessage; -use zksync_consensus_roles::{ - node, validator, - validator::{BlockNumber, FinalBlock}, -}; -use zksync_consensus_storage::{BlockStore, BlockStoreState}; - -mod events; -#[cfg(test)] -mod tests; - -#[derive(Debug)] -struct PeerState { - state: BlockStoreState, - get_block_semaphore: Arc, -} - -/// Handle for [`PeerStates`] allowing to send updates to it. -#[derive(Debug)] -pub(crate) struct PeerStates { - config: Config, - storage: Arc, - message_sender: channel::UnboundedSender, - - peers: Mutex>, - highest_peer_block: sync::watch::Sender, - events_sender: Option>, -} - -impl PeerStates { - fn genesis(&self) -> &validator::Genesis { - self.storage.genesis() - } - - /// Creates a new instance together with a handle. - pub(crate) fn new( - config: Config, - storage: Arc, - message_sender: channel::UnboundedSender, - ) -> Self { - Self { - config, - storage, - message_sender, - - peers: Mutex::default(), - highest_peer_block: sync::watch::channel(BlockNumber(0)).0, - events_sender: None, - } - } - - /// Updates the known `BlockStore` state of the given peer. - /// This information is used to decide from which peer to fetch - /// a given block. - pub(crate) fn update( - &self, - peer: &node::PublicKey, - state: BlockStoreState, - ) -> anyhow::Result<()> { - use std::collections::hash_map::Entry; - state.verify(self.genesis()).context("state.verify()")?; - let mut peers = self.peers.lock().unwrap(); - match peers.entry(peer.clone()) { - Entry::Occupied(mut e) => e.get_mut().state = state.clone(), - Entry::Vacant(e) => { - let permits = self.config.max_concurrent_blocks_per_peer; - e.insert(PeerState { - state: state.clone(), - get_block_semaphore: Arc::new(sync::Semaphore::new(permits)), - }); - } - } - if let Some(last) = &state.last { - self.highest_peer_block - .send_if_modified(|highest_peer_block| { - if *highest_peer_block >= last.header().number { - return false; - } - *highest_peer_block = last.header().number; - true - }); - } - Ok(()) - } - - /// Task fetching blocks from peers which are not present in storage. - pub(crate) async fn run_block_fetcher(&self, ctx: &ctx::Ctx) -> ctx::Result<()> { - let sem = sync::Semaphore::new(self.config.max_concurrent_blocks); - scope::run!(ctx, |ctx, s| async { - let mut next = self.storage.queued().next(); - let mut highest_peer_block = self.highest_peer_block.subscribe(); - loop { - sync::wait_for(ctx, &mut highest_peer_block, |highest_peer_block| { - highest_peer_block >= &next - }) - .await?; - let permit = sync::acquire(ctx, &sem).await?; - let block_number = ctx::NoCopy(next); - next = next.next(); - s.spawn(async { - let _permit = permit; - self.fetch_block(ctx, block_number.into()).await - }); - } - }) - .await - } - - /// Fetches the block from peers and puts it to storage. - /// Early exits if the block appeared in storage from other source. - async fn fetch_block(&self, ctx: &ctx::Ctx, block_number: BlockNumber) -> ctx::Result<()> { - let _ = scope::run!(ctx, |ctx, s| async { - s.spawn_bg(async { - let block = self.fetch_block_from_peers(ctx, block_number).await?; - self.storage.queue_block(ctx, block).await - }); - // Cancel fetching as soon as block is queued for storage. - self.storage.wait_until_queued(ctx, block_number).await?; - Ok(()) - }) - .await; - self.storage.wait_until_persisted(ctx, block_number).await?; - Ok(()) - } - - /// Fetches the block from peers. - async fn fetch_block_from_peers( - &self, - ctx: &ctx::Ctx, - number: BlockNumber, - ) -> ctx::OrCanceled { - while ctx.is_active() { - let Some(peer) = self.select_peer(number) else { - ctx.sleep(self.config.sleep_interval_for_get_block).await?; - continue; - }; - let res = self.fetch_block_from_peer(ctx, &peer, number).await; - match res { - Ok(block) => { - if let Some(send) = &self.events_sender { - send.send(PeerStateEvent::GotBlock(number)); - } - return Ok(block); - } - Err(ctx::Error::Canceled(_)) => { - tracing::info!(%number, ?peer, "get_block() call canceled"); - } - Err(err) => { - tracing::info!(%err, %number, ?peer, "get_block() failed"); - if let Some(send) = &self.events_sender { - send.send(PeerStateEvent::RpcFailed { - peer_key: peer.clone(), - block_number: number, - }); - } - self.drop_peer(&peer); - } - } - } - Err(ctx::Canceled) - } - - /// Fetches a block from the specified peer. - async fn fetch_block_from_peer( - &self, - ctx: &ctx::Ctx, - peer: &node::PublicKey, - number: BlockNumber, - ) -> ctx::Result { - let (response, response_receiver) = oneshot::channel(); - let message = SyncBlocksInputMessage::GetBlock { - recipient: peer.clone(), - number, - response, - }; - self.message_sender.send(message.into()); - let block = response_receiver - .recv_or_disconnected(ctx) - .await? - .context("no response")? - .context("RPC error")?; - if block.header().number != number { - return Err(anyhow::anyhow!( - "block does not have requested number (requested: {number}, got: {})", - block.header().number - ) - .into()); - } - block.verify(self.genesis()).context("block.validate()")?; - Ok(block) - } - - fn select_peer(&self, block_number: BlockNumber) -> Option { - let peers = self.peers.lock().unwrap(); - peers - .iter() - .find(|(_, s)| s.state.contains(block_number)) - .map(|x| x.0.clone()) - } - - /// Drops peer state. - fn drop_peer(&self, peer: &node::PublicKey) { - if self.peers.lock().unwrap().remove(peer).is_none() { - return; - } - tracing::debug!(?peer, "Dropping peer state"); - if let Some(events_sender) = &self.events_sender { - events_sender.send(PeerStateEvent::PeerDropped(peer.clone())); - } - } -} diff --git a/node/actors/sync_blocks/src/peers/tests/basics.rs b/node/actors/sync_blocks/src/peers/tests/basics.rs deleted file mode 100644 index 4767967e..00000000 --- a/node/actors/sync_blocks/src/peers/tests/basics.rs +++ /dev/null @@ -1,522 +0,0 @@ -//! Basic tests. - -use super::*; -use crate::{ - io, - tests::{make_response, sync_state}, -}; -use rand::seq::SliceRandom as _; - -#[derive(Debug)] -struct UpdatingPeerStateWithSingleBlock; - -#[async_trait] -impl Test for UpdatingPeerStateWithSingleBlock { - const BLOCK_COUNT: usize = 2; - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - .. - } = handles; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.first())) - .unwrap(); - - // Check that the actor has sent a `get_block` request to the peer - let message = message_receiver.recv(ctx).await?; - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message; - assert_eq!(recipient, peer_key); - assert_eq!(number, setup.blocks[0].number()); - - // Emulate the peer sending a correct response. - response.send(make_response(setup.blocks.first())).unwrap(); - - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(n) if n == setup.blocks[0].number()); - - // Check that the block has been saved locally. - storage - .wait_until_persisted(ctx, setup.blocks[0].number()) - .await?; - Ok(()) - } -} - -#[tokio::test] -async fn updating_peer_state_with_single_block() { - test_peer_states(UpdatingPeerStateWithSingleBlock).await; -} - -#[derive(Debug)] -struct CancelingBlockRetrieval; - -#[async_trait] -impl Test for CancelingBlockRetrieval { - const BLOCK_COUNT: usize = 5; - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - setup, - peer_states, - storage, - mut message_receiver, - .. - } = handles; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.first())) - .unwrap(); - - // Check that the actor has sent a `get_block` request to the peer - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { mut response, .. }) = - message_receiver.recv(ctx).await?; - - // Emulate receiving block using external means. - storage.queue_block(ctx, setup.blocks[0].clone()).await?; - - // Retrieval of the block must be canceled. - response.closed().await; - Ok(()) - } -} - -#[tokio::test] -async fn canceling_block_retrieval() { - test_peer_states(CancelingBlockRetrieval).await; -} - -#[derive(Debug)] -struct FilteringBlockRetrieval; - -#[async_trait] -impl Test for FilteringBlockRetrieval { - const BLOCK_COUNT: usize = 5; - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - setup, - peer_states, - storage, - mut message_receiver, - .. - } = handles; - - // Emulate receiving block using external means. - storage.queue_block(ctx, setup.blocks[0].clone()).await?; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.get(1))) - .unwrap(); - - // Check that the actor has sent `get_block` request to the peer, but only for block #2. - let message = message_receiver.recv(ctx).await?; - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, number, .. - }) = message; - assert_eq!(recipient, peer_key); - assert_eq!(number, setup.blocks[1].number()); - assert!(message_receiver.try_recv().is_none()); - Ok(()) - } -} - -#[tokio::test] -async fn filtering_block_retrieval() { - test_peer_states(FilteringBlockRetrieval).await; -} - -#[derive(Debug)] -struct UpdatingPeerStateWithMultipleBlocks; - -impl UpdatingPeerStateWithMultipleBlocks { - const MAX_CONCURRENT_BLOCKS: usize = 3; -} - -#[async_trait] -impl Test for UpdatingPeerStateWithMultipleBlocks { - const BLOCK_COUNT: usize = 10; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.max_concurrent_blocks_per_peer = Self::MAX_CONCURRENT_BLOCKS; - // ^ We want to test rate limiting for peers - config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - } = handles; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.last()).clone()) - .unwrap(); - - let mut requested_blocks = HashMap::new(); - for _ in setup.blocks.iter() { - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await.unwrap(); - - tracing::trace!("Received request for block #{number}"); - assert_eq!(recipient, peer_key); - assert!( - requested_blocks.insert(number, response).is_none(), - "Block #{number} requested twice" - ); - - if requested_blocks.len() == Self::MAX_CONCURRENT_BLOCKS || rng.gen() { - // Answer a random request. - let number = *requested_blocks.keys().choose(rng).unwrap(); - let response = requested_blocks.remove(&number).unwrap(); - response.send(make_response(setup.block(number))).unwrap(); - - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(got) if got == number); - } - clock.advance(BLOCK_SLEEP_INTERVAL); - } - - // Answer all remaining requests. - for (number, response) in requested_blocks { - response.send(make_response(setup.block(number))).unwrap(); - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(got) if got == number); - } - - let expected_block_number = BlockNumber(Self::BLOCK_COUNT as u64 - 1); - storage - .wait_until_persisted(ctx, expected_block_number) - .await?; - Ok(()) - } -} - -#[tokio::test] -async fn updating_peer_state_with_multiple_blocks() { - test_peer_states(UpdatingPeerStateWithMultipleBlocks).await; -} - -#[derive(Debug)] -struct DisconnectingPeer; - -#[async_trait] -impl Test for DisconnectingPeer { - const BLOCK_COUNT: usize = 5; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - } = handles; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.first())) - .unwrap(); - - // Drop the response sender emulating peer disconnect. - let msg = message_receiver.recv(ctx).await?; - { - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - .. - }) = &msg; - assert_eq!(recipient, &peer_key); - assert_eq!(number, &setup.blocks[0].number()); - } - drop(msg); - - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::PeerDropped(key) if key == peer_key), - ) - .await - .context("wait for PeerDropped")?; - - // Check that no new requests are sent (there are no peers to send them to). - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(message_receiver.try_recv(), None); - - // Re-connect the peer with an updated state. - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.get(1))) - .unwrap(); - // Ensure that blocks are re-requested. - clock.advance(BLOCK_SLEEP_INTERVAL); - - let mut responses = HashMap::with_capacity(2); - for _ in 0..2 { - let message = message_receiver.recv(ctx).await?; - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message; - assert_eq!(recipient, peer_key); - assert!(responses.insert(number, response).is_none()); - } - - assert!(responses.contains_key(&setup.blocks[0].number())); - assert!(responses.contains_key(&setup.blocks[1].number())); - // Send one of the responses and drop the other request. - let response = responses.remove(&setup.blocks[1].number()).unwrap(); - response.send(make_response(setup.blocks.get(1))).unwrap(); - - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::GotBlock(n) if n==setup.blocks[1].number()), - ) - .await?; - drop(responses); - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::PeerDropped(key) if key == peer_key), - ) - .await?; - - // Check that no new requests are sent (there are no peers to send them to). - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(message_receiver.try_recv(), None); - - // Re-connect the peer with the same state. - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.get(1))) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - - let message = message_receiver.recv(ctx).await?; - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message; - assert_eq!(recipient, peer_key); - assert_eq!(number, setup.blocks[0].number()); - response.send(make_response(setup.blocks.first())).unwrap(); - - let peer_event = events_receiver.recv(ctx).await?; - assert_matches!(peer_event, PeerStateEvent::GotBlock(n) if n==setup.blocks[0].number()); - - // Check that no new requests are sent (all blocks are downloaded). - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(message_receiver.try_recv(), None); - - storage.wait_until_persisted(ctx, BlockNumber(1)).await?; - Ok(()) - } -} - -#[tokio::test] -async fn disconnecting_peer() { - test_peer_states(DisconnectingPeer).await; -} - -#[derive(Debug)] -struct DownloadingBlocksInGaps { - local_blocks: Vec, - increase_peer_block_number_during_test: bool, -} - -impl DownloadingBlocksInGaps { - fn new(local_blocks: &[usize]) -> Self { - Self { - local_blocks: local_blocks.to_vec(), - increase_peer_block_number_during_test: false, - } - } -} - -#[async_trait] -impl Test for DownloadingBlocksInGaps { - const BLOCK_COUNT: usize = 10; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.max_concurrent_blocks = 1; - // ^ Forces the node to download blocks in a deterministic order - config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - .. - } = handles; - - scope::run!(ctx, |ctx, s| async { - for n in &self.local_blocks { - s.spawn(storage.queue_block(ctx, setup.blocks[*n].clone())); - } - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - let mut last_peer_block = if self.increase_peer_block_number_during_test { - setup.blocks.choose(rng) - } else { - setup.blocks.last() - }; - peer_states - .update(&peer_key, sync_state(&setup, last_peer_block)) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - - // Check that all missing blocks are requested. - for n in 0..setup.blocks.len() { - if self.local_blocks.contains(&n) { - continue; - } - let n = setup.blocks[n].number(); - if n > last_peer_block.unwrap().number() { - last_peer_block = setup.blocks.iter().filter(|b| b.number() >= n).choose(rng); - peer_states - .update(&peer_key, sync_state(&setup, last_peer_block)) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - } - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await?; - - assert_eq!(recipient, peer_key); - assert!(number <= last_peer_block.unwrap().number()); - response.send(make_response(setup.block(number))).unwrap(); - storage.wait_until_persisted(ctx, number).await?; - clock.advance(BLOCK_SLEEP_INTERVAL); - } - Ok(()) - }) - .await?; - Ok(()) - } -} - -const LOCAL_BLOCK_NUMBERS: [&[usize]; 3] = [&[1, 9], &[3, 5, 6, 8], &[4]]; - -#[test_casing(6, Product((LOCAL_BLOCK_NUMBERS, [false, true])))] -#[tokio::test] -async fn downloading_blocks_in_gaps( - local_blocks: &[usize], - increase_peer_block_number_during_test: bool, -) { - let mut test = DownloadingBlocksInGaps::new(local_blocks); - test.increase_peer_block_number_during_test = increase_peer_block_number_during_test; - test_peer_states(test).await; -} - -#[derive(Debug)] -struct LimitingGetBlockConcurrency; - -#[async_trait] -impl Test for LimitingGetBlockConcurrency { - const BLOCK_COUNT: usize = 5; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.max_concurrent_blocks = 3; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - setup, - peer_states, - storage, - mut message_receiver, - .. - } = handles; - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - peer_states - .update(&peer_key, sync_state(&setup, setup.blocks.last())) - .unwrap(); - - // The actor should request 3 new blocks it's now aware of from the only peer it's currently - // aware of. Note that blocks may be queried in any order. - let mut message_responses = HashMap::new(); - for _ in 0..3 { - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, peer_key); - assert!(message_responses.insert(number, response).is_none()); - } - assert_matches!(message_receiver.try_recv(), None); - assert_eq!( - message_responses.keys().copied().collect::>(), - setup.blocks[0..3].iter().map(|b| b.number()).collect(), - ); - tracing::info!("blocks requested"); - - // Send a correct response. - let response = message_responses.remove(&setup.blocks[0].number()).unwrap(); - response.send(make_response(setup.blocks.first())).unwrap(); - storage - .wait_until_persisted(ctx, setup.blocks[0].number()) - .await?; - - // The actor should now request another block. - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, number, .. - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, peer_key); - assert_eq!(number, setup.blocks[3].number()); - - Ok(()) - } -} - -#[tokio::test] -async fn limiting_get_block_concurrency() { - test_peer_states(LimitingGetBlockConcurrency).await; -} diff --git a/node/actors/sync_blocks/src/peers/tests/fakes.rs b/node/actors/sync_blocks/src/peers/tests/fakes.rs deleted file mode 100644 index 95c6ddd8..00000000 --- a/node/actors/sync_blocks/src/peers/tests/fakes.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! Tests focused on handling peers providing fake information to the node. - -use super::*; -use crate::tests::sync_state; -use zksync_consensus_roles::{validator, validator::testonly::Setup}; -use zksync_consensus_storage::testonly::new_store; - -#[tokio::test] -async fn processing_invalid_sync_states() { - let ctx = &ctx::test_root(&ctx::RealClock); - let rng = &mut ctx.rng(); - let mut setup = Setup::new(rng, 4); - setup.push_blocks(rng, 3); - let (storage, _runner) = new_store(ctx, &setup.genesis).await; - - let (message_sender, _) = channel::unbounded(); - let peer_states = PeerStates::new(Config::new(), storage, message_sender); - let peer = &rng.gen::().public(); - - let mut invalid_block = setup.blocks[1].clone(); - invalid_block.justification.message.proposal.number = rng.gen(); - let invalid_sync_state = sync_state(&setup, Some(&invalid_block)); - assert!(peer_states.update(peer, invalid_sync_state).is_err()); - - let mut other_network = Setup::new(rng, 4); - other_network.push_blocks(rng, 2); - let invalid_sync_state = sync_state(&other_network, other_network.blocks.get(1)); - assert!(peer_states.update(peer, invalid_sync_state).is_err()); -} - -#[derive(Debug)] -struct PeerWithFakeSyncState; - -#[async_trait] -impl Test for PeerWithFakeSyncState { - const BLOCK_COUNT: usize = 10; - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - mut events_receiver, - .. - } = handles; - - let rng = &mut ctx.rng(); - let peer_key = rng.gen::().public(); - let mut invalid_block = setup.blocks[1].clone(); - invalid_block.justification.message.proposal.number = rng.gen(); - let fake_sync_state = sync_state(&setup, Some(&invalid_block)); - assert!(peer_states.update(&peer_key, fake_sync_state).is_err()); - - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(events_receiver.try_recv(), None); - Ok(()) - } -} - -#[tokio::test] -async fn receiving_fake_sync_state_from_peer() { - test_peer_states(PeerWithFakeSyncState).await; -} - -#[derive(Debug)] -struct PeerWithFakeBlock; - -#[async_trait] -impl Test for PeerWithFakeBlock { - const BLOCK_COUNT: usize = 10; - - fn config(&self) -> Config { - let mut cfg = Config::new(); - cfg.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - cfg - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - } = handles; - - let rng = &mut ctx.rng(); - - for fake_block in [ - // other block than requested - setup.blocks[1].clone(), - // block with wrong validator set - { - let mut s = Setup::new(rng, 4); - s.push_blocks(rng, 1); - s.blocks[0].clone() - }, - // block with mismatching payload, - { - let mut block = setup.blocks[0].clone(); - block.payload = validator::Payload(b"invalid".to_vec()); - block - }, - ] { - let key = rng.gen::().public(); - peer_states - .update(&key, sync_state(&setup, setup.blocks.first())) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, key); - assert_eq!(number, setup.blocks[0].number()); - response.send(Ok(fake_block)).unwrap(); - - wait_for_event(ctx, &mut events_receiver, |ev| { - matches!(ev, - PeerStateEvent::RpcFailed { - block_number, - peer_key, - } if peer_key == key && block_number == number - ) - }) - .await?; - } - - // The invalid block must not be saved. - assert!(storage.block(ctx, BlockNumber(1)).await?.is_none()); - Ok(()) - } -} - -#[tokio::test] -async fn receiving_fake_block_from_peer() { - test_peer_states(PeerWithFakeBlock).await; -} diff --git a/node/actors/sync_blocks/src/peers/tests/mod.rs b/node/actors/sync_blocks/src/peers/tests/mod.rs deleted file mode 100644 index 01120c5b..00000000 --- a/node/actors/sync_blocks/src/peers/tests/mod.rs +++ /dev/null @@ -1,157 +0,0 @@ -use super::*; -use assert_matches::assert_matches; -use async_trait::async_trait; -use rand::{seq::IteratorRandom, Rng}; -use std::{collections::HashSet, fmt}; -use test_casing::{test_casing, Product}; -use tracing::instrument; -use zksync_concurrency::{ - testonly::{abort_on_panic, set_timeout}, - time, -}; -use zksync_consensus_roles::validator; -use zksync_consensus_storage::testonly::new_store; - -mod basics; -mod fakes; -mod multiple_peers; - -const TEST_TIMEOUT: time::Duration = time::Duration::seconds(5); -const BLOCK_SLEEP_INTERVAL: time::Duration = time::Duration::milliseconds(5); - -async fn wait_for_event( - ctx: &ctx::Ctx, - events: &mut channel::UnboundedReceiver, - pred: impl Fn(PeerStateEvent) -> bool, -) -> ctx::OrCanceled<()> { - while !pred(events.recv(ctx).await?) {} - Ok(()) -} - -#[derive(Debug)] -struct TestHandles { - clock: ctx::ManualClock, - setup: validator::testonly::Setup, - peer_states: Arc, - storage: Arc, - message_receiver: channel::UnboundedReceiver, - events_receiver: channel::UnboundedReceiver, -} - -#[async_trait] -trait Test: fmt::Debug + Send + Sync { - const BLOCK_COUNT: usize; - // TODO: move this to genesis - const GENESIS_BLOCK_NUMBER: usize = 0; - - fn config(&self) -> Config { - Config::new() - } - - async fn initialize_storage( - &self, - _ctx: &ctx::Ctx, - _storage: &BlockStore, - _setup: &validator::testonly::Setup, - ) { - // Does nothing by default - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()>; -} - -#[instrument(level = "trace")] -async fn test_peer_states(test: T) { - abort_on_panic(); - let _guard = set_timeout(TEST_TIMEOUT); - - let clock = ctx::ManualClock::new(); - let ctx = &ctx::test_root(&clock); - let rng = &mut ctx.rng(); - let mut setup = validator::testonly::Setup::new(rng, 4); - setup.push_blocks(rng, T::BLOCK_COUNT); - let (store, store_run) = new_store(ctx, &setup.genesis).await; - test.initialize_storage(ctx, store.as_ref(), &setup).await; - - let (message_sender, message_receiver) = channel::unbounded(); - let (events_sender, events_receiver) = channel::unbounded(); - let mut peer_states = PeerStates::new(test.config(), store.clone(), message_sender); - peer_states.events_sender = Some(events_sender); - let peer_states = Arc::new(peer_states); - let test_handles = TestHandles { - clock, - setup, - peer_states: peer_states.clone(), - storage: store.clone(), - message_receiver, - events_receiver, - }; - - scope::run!(ctx, |ctx, s| async { - s.spawn_bg(store_run.run(ctx)); - s.spawn_bg(async { - peer_states.run_block_fetcher(ctx).await.ok(); - Ok(()) - }); - test.test(ctx, test_handles).await - }) - .await - .unwrap(); -} - -#[tokio::test] -async fn test_try_acquire_peer_permit() { - let clock = ctx::ManualClock::new(); - let ctx = &ctx::test_root(&clock); - let rng = &mut ctx.rng(); - let mut setup = validator::testonly::Setup::new(rng, 1); - setup.push_blocks(rng, 10); - scope::run!(ctx, |ctx, s| async { - let (store, runner) = new_store(ctx, &setup.genesis).await; - s.spawn_bg(runner.run(ctx)); - let (send, _recv) = ctx::channel::unbounded(); - let peer_states = PeerStates::new(Config::default(), store, send); - - let peer: node::PublicKey = rng.gen(); - let b = &setup.blocks; - for s in [ - // Empty entry. - BlockStoreState { - first: b[0].number(), - last: None, - }, - // Entry with some blocks. - BlockStoreState { - first: b[0].number(), - last: Some(b[3].justification.clone()), - }, - // Entry with changed first. - BlockStoreState { - first: b[1].number(), - last: Some(b[3].justification.clone()), - }, - // Empty entry again. - BlockStoreState { - first: b[1].number(), - last: None, - }, - ] { - peer_states.update(&peer, s.clone()).unwrap(); - for block in b { - let got = peer_states.select_peer(block.number()); - if s.first <= block.number() - && s.last - .as_ref() - .map_or(false, |last| block.number() <= last.header().number) - { - assert_eq!(Some(peer.clone()), got); - } else { - assert_eq!(None, got); - } - } - } - Ok(()) - }) - .await - .unwrap(); -} diff --git a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs b/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs deleted file mode 100644 index 4281c69c..00000000 --- a/node/actors/sync_blocks/src/peers/tests/multiple_peers.rs +++ /dev/null @@ -1,345 +0,0 @@ -use super::*; -use crate::tests::{make_response, sync_state}; - -#[derive(Debug)] -struct RequestingBlocksFromTwoPeers; - -#[async_trait] -impl Test for RequestingBlocksFromTwoPeers { - const BLOCK_COUNT: usize = 5; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config.max_concurrent_blocks = 5; - config.max_concurrent_blocks_per_peer = 1; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - } = handles; - - let rng = &mut ctx.rng(); - let first_peer = rng.gen::().public(); - peer_states - .update(&first_peer, sync_state(&setup, setup.blocks.get(1))) - .unwrap(); - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number: first_peer_block_number, - response: first_peer_response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, first_peer); - assert!(setup.blocks[0..=1] - .iter() - .any(|b| b.number() == first_peer_block_number)); - tracing::info!(%first_peer_block_number, "received request"); - - let second_peer = rng.gen::().public(); - peer_states - .update(&second_peer, sync_state(&setup, setup.blocks.get(3))) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number: second_peer_block_number, - response: second_peer_response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, second_peer); - assert!(setup.blocks[0..=1] - .iter() - .any(|b| b.number() == second_peer_block_number)); - tracing::info!(%second_peer_block_number, "received request"); - - first_peer_response - .send(make_response(setup.block(first_peer_block_number))) - .unwrap(); - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), - ) - .await - .unwrap(); - // The node shouldn't send more requests to the first peer since it would be beyond - // its known latest block number (2). - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(message_receiver.try_recv(), None); - - peer_states - .update(&first_peer, sync_state(&setup, setup.blocks.get(3))) - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - // Now the actor can get block #3 from the peer. - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number: first_peer_block_number, - response: first_peer_response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, first_peer); - assert!(setup.blocks[2..=3] - .iter() - .any(|b| b.number() == first_peer_block_number)); - tracing::info!(%first_peer_block_number, "received request"); - - first_peer_response - .send(make_response(setup.block(first_peer_block_number))) - .unwrap(); - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), - ) - .await - .unwrap(); - clock.advance(BLOCK_SLEEP_INTERVAL); - - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number: first_peer_block_number, - response: first_peer_response, - }) = message_receiver.recv(ctx).await?; - assert_eq!(recipient, first_peer); - assert!(setup.blocks[2..=3] - .iter() - .any(|b| b.number() == first_peer_block_number)); - tracing::info!(%first_peer_block_number, "received request"); - - second_peer_response - .send(make_response(setup.block(second_peer_block_number))) - .unwrap(); - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == second_peer_block_number), - ) - .await - .unwrap(); - first_peer_response - .send(make_response(setup.block(first_peer_block_number))) - .unwrap(); - wait_for_event( - ctx, - &mut events_receiver, - |ev| matches!(ev, PeerStateEvent::GotBlock(num) if num == first_peer_block_number), - ) - .await - .unwrap(); - // No more blocks should be requested from peers. - clock.advance(BLOCK_SLEEP_INTERVAL); - assert_matches!(message_receiver.try_recv(), None); - - storage - .wait_until_persisted(ctx, setup.blocks[3].number()) - .await?; - Ok(()) - } -} - -/*#[tokio::test] -async fn requesting_blocks_from_two_peers() { - test_peer_states(RequestingBlocksFromTwoPeers).await; -}*/ - -#[derive(Debug, Clone, Copy)] -struct PeerBehavior { - /// The peer will go offline after this block. - last_block: usize, - /// The peer will stop responding after this block, but will still announce `SyncState` updates. - /// Logically, should be `<= last_block`. - last_block_to_return: usize, -} - -impl Default for PeerBehavior { - fn default() -> Self { - Self { - last_block: usize::MAX, - last_block_to_return: usize::MAX, - } - } -} - -#[derive(Debug, Clone)] -struct RequestingBlocksFromMultiplePeers { - peer_behavior: Vec, - max_concurrent_blocks_per_peer: usize, - respond_probability: f64, -} - -impl RequestingBlocksFromMultiplePeers { - fn new(peer_count: usize, max_concurrent_blocks_per_peer: usize) -> Self { - Self { - peer_behavior: vec![PeerBehavior::default(); peer_count], - max_concurrent_blocks_per_peer, - respond_probability: 0.0, - } - } - - fn create_peers(&self, rng: &mut impl Rng) -> HashMap { - let last_block_number = Self::BLOCK_COUNT - 1; - let peers = self.peer_behavior.iter().copied().map(|behavior| { - let behavior = PeerBehavior { - last_block: behavior.last_block.min(last_block_number), - last_block_to_return: behavior.last_block_to_return.min(last_block_number), - }; - let peer_key = rng.gen::().public(); - (peer_key, behavior) - }); - peers.collect() - } -} - -#[async_trait] -impl Test for RequestingBlocksFromMultiplePeers { - const BLOCK_COUNT: usize = 20; - - fn config(&self) -> Config { - let mut config = Config::new(); - config.sleep_interval_for_get_block = BLOCK_SLEEP_INTERVAL; - config.max_concurrent_blocks_per_peer = self.max_concurrent_blocks_per_peer; - config - } - - async fn test(self, ctx: &ctx::Ctx, handles: TestHandles) -> anyhow::Result<()> { - let TestHandles { - clock, - setup, - peer_states, - storage, - mut message_receiver, - mut events_receiver, - } = handles; - - let rng = &mut ctx.rng(); - let peers = &self.create_peers(rng); - - scope::run!(ctx, |ctx, s| async { - // Announce peer states. - for (peer_key, peer) in peers { - peer_states.update(peer_key, sync_state(&setup, setup.blocks.get(peer.last_block))).unwrap(); - } - - s.spawn_bg(async { - let mut responses_by_peer: HashMap<_, Vec<_>> = HashMap::new(); - let mut requested_blocks = HashSet::new(); - while requested_blocks.len() < Self::BLOCK_COUNT { - let Ok(message) = message_receiver.recv(ctx).await else { - return Ok(()); // Test is finished - }; - let io::OutputMessage::Network(SyncBlocksInputMessage::GetBlock { - recipient, - number, - response, - }) = message; - - tracing::trace!("Block #{number} requested from {recipient:?}"); - assert!(number <= setup.blocks[peers[&recipient].last_block].number()); - - if setup.blocks[peers[&recipient].last_block_to_return].number() < number { - tracing::trace!("Dropping request for block #{number} to {recipient:?}"); - continue; - } - - assert!( - requested_blocks.insert(number), - "Block #{number} requested twice from a responsive peer" - ); - let peer_responses = responses_by_peer.entry(recipient).or_default(); - peer_responses.push((number, response)); - assert!(peer_responses.len() <= self.max_concurrent_blocks_per_peer); - if peer_responses.len() == self.max_concurrent_blocks_per_peer { - // Peer is at capacity, respond to a random request in order to progress - let idx = rng.gen_range(0..peer_responses.len()); - let (number, response) = peer_responses.remove(idx); - response.send(make_response(setup.block(number))).unwrap(); - } - - // Respond to some other random requests. - for peer_responses in responses_by_peer.values_mut() { - // Indexes are reversed in order to not be affected by removals. - for idx in (0..peer_responses.len()).rev() { - if !rng.gen_bool(self.respond_probability) { - continue; - } - let (number, response) = peer_responses.remove(idx); - response.send(make_response(setup.block(number))).unwrap(); - } - } - } - - // Answer to all remaining responses - for (number, response) in responses_by_peer.into_values().flatten() { - response.send(make_response(setup.block(number))).unwrap(); - } - Ok(()) - }); - - // We advance the clock when a node receives a new block or updates a peer state, - // since in both cases some new blocks may become available for download. - let mut block_numbers = HashSet::with_capacity(Self::BLOCK_COUNT); - while block_numbers.len() < Self::BLOCK_COUNT { - let peer_event = events_receiver.recv(ctx).await?; - match peer_event { - PeerStateEvent::GotBlock(number) => { - assert!( - block_numbers.insert(number), - "Block #{number} received twice" - ); - clock.advance(BLOCK_SLEEP_INTERVAL); - } - PeerStateEvent::RpcFailed{..} | PeerStateEvent::PeerDropped(_) => { /* Do nothing */ } - } - } - - storage.wait_until_persisted(ctx,setup.blocks.last().unwrap().header().number).await?; - Ok(()) - }) - .await - } -} - -const RESPOND_PROBABILITIES: [f64; 5] = [0.0, 0.1, 0.2, 0.5, 0.9]; - -#[test_casing(15, Product(([1, 2, 3], RESPOND_PROBABILITIES)))] -#[tokio::test] -async fn requesting_blocks(max_concurrent_blocks_per_peer: usize, respond_probability: f64) { - let mut test = RequestingBlocksFromMultiplePeers::new(3, max_concurrent_blocks_per_peer); - test.respond_probability = respond_probability; - test_peer_states(test.clone()).await; -} - -#[test_casing(15, Product(([1, 2, 3], RESPOND_PROBABILITIES)))] -#[tokio::test] -async fn requesting_blocks_with_failures( - max_concurrent_blocks_per_peer: usize, - respond_probability: f64, -) { - let mut test = RequestingBlocksFromMultiplePeers::new(3, max_concurrent_blocks_per_peer); - test.respond_probability = respond_probability; - test.peer_behavior[0].last_block = 5; - test.peer_behavior[1].last_block = 15; - test_peer_states(test).await; -} - -#[test_casing(15, Product(([1, 2, 3], RESPOND_PROBABILITIES)))] -#[tokio::test] -async fn requesting_blocks_with_unreliable_peers( - max_concurrent_blocks_per_peer: usize, - respond_probability: f64, -) { - let mut test = RequestingBlocksFromMultiplePeers::new(3, max_concurrent_blocks_per_peer); - test.respond_probability = respond_probability; - test.peer_behavior[0].last_block_to_return = 5; - test.peer_behavior[1].last_block_to_return = 15; - test_peer_states(test).await; -} diff --git a/node/actors/sync_blocks/src/tests/end_to_end.rs b/node/actors/sync_blocks/src/tests/end_to_end.rs deleted file mode 100644 index 0a0f1849..00000000 --- a/node/actors/sync_blocks/src/tests/end_to_end.rs +++ /dev/null @@ -1,393 +0,0 @@ -//! End-to-end tests that launch a network of nodes and the `SyncBlocks` actor for each node. -use super::*; -use anyhow::Context as _; -use async_trait::async_trait; -use rand::seq::SliceRandom; -use std::fmt; -use test_casing::test_casing; -use tracing::{instrument, Instrument}; -use zksync_concurrency::{ - ctx, - ctx::channel, - scope, - testonly::{abort_on_panic, set_timeout}, -}; -use zksync_consensus_network as network; -use zksync_consensus_storage::testonly::new_store_with_first; - -type NetworkDispatcherPipe = - pipe::DispatcherPipe; - -#[derive(Debug)] -struct Node { - store: Arc, - start: channel::Sender<()>, - terminate: channel::Sender<()>, -} - -impl Node { - async fn new(ctx: &ctx::Ctx, network: network::Config, setup: &Setup) -> (Self, NodeRunner) { - Self::new_with_first(ctx, network, setup, setup.genesis.fork.first_block).await - } - - async fn new_with_first( - ctx: &ctx::Ctx, - network: network::Config, - setup: &Setup, - first: validator::BlockNumber, - ) -> (Self, NodeRunner) { - let (store, store_runner) = new_store_with_first(ctx, &setup.genesis, first).await; - let (start_send, start_recv) = channel::bounded(1); - let (terminate_send, terminate_recv) = channel::bounded(1); - - let runner = NodeRunner { - network, - store: store.clone(), - store_runner, - start: start_recv, - terminate: terminate_recv, - }; - let this = Self { - store, - start: start_send, - terminate: terminate_send, - }; - (this, runner) - } - - fn start(&self) { - let _ = self.start.try_send(()); - } - - async fn terminate(&self, ctx: &ctx::Ctx) -> ctx::OrCanceled<()> { - let _ = self.terminate.try_send(()); - self.terminate.closed(ctx).await - } -} - -#[must_use] -struct NodeRunner { - network: network::Config, - store: Arc, - store_runner: BlockStoreRunner, - start: channel::Receiver<()>, - terminate: channel::Receiver<()>, -} - -impl NodeRunner { - async fn run(mut self, ctx: &ctx::Ctx) -> anyhow::Result<()> { - tracing::info!("NodeRunner::run()"); - let key = self.network.gossip.key.public(); - let (sync_blocks_actor_pipe, sync_blocks_dispatcher_pipe) = pipe::new(); - let (mut network, network_runner) = - network::testonly::Instance::new(self.network.clone(), self.store.clone()); - let sync_blocks_config = Config::new(); - let res = scope::run!(ctx, |ctx, s| async { - s.spawn_bg(self.store_runner.run(ctx)); - s.spawn_bg(network_runner.run(ctx)); - network.wait_for_gossip_connections().await; - tracing::info!("Node connected to peers"); - - self.start.recv(ctx).await?; - tracing::info!("switch_on"); - s.spawn_bg( - async { - Self::run_executor(ctx, sync_blocks_dispatcher_pipe, network.pipe()) - .await - .with_context(|| format!("executor for {key:?}")) - } - .instrument(tracing::info_span!("mock_executor", ?key)), - ); - s.spawn_bg(sync_blocks_config.run(ctx, sync_blocks_actor_pipe, self.store.clone())); - tracing::info!("Node is fully started"); - - let _ = self.terminate.recv(ctx).await; - tracing::info!("stopping"); - Ok(()) - }) - .await; - drop(self.terminate); - tracing::info!("node stopped"); - res - } - - async fn run_executor( - ctx: &ctx::Ctx, - mut sync_blocks_dispatcher_pipe: pipe::DispatcherPipe, - network_dispatcher_pipe: &mut NetworkDispatcherPipe, - ) -> anyhow::Result<()> { - scope::run!(ctx, |ctx, s| async { - s.spawn(async { - while let Ok(message) = network_dispatcher_pipe.recv.recv(ctx).await { - tracing::trace!(?message, "Received network message"); - match message { - network::io::OutputMessage::SyncBlocks(req) => { - sync_blocks_dispatcher_pipe.send.send(req.into()); - } - _ => unreachable!("consensus messages should not be produced"), - } - } - Ok(()) - }); - - while let Ok(message) = sync_blocks_dispatcher_pipe.recv.recv(ctx).await { - let OutputMessage::Network(message) = message; - tracing::trace!(?message, "Received sync blocks message"); - network_dispatcher_pipe.send.send(message.into()); - } - Ok(()) - }) - .await - } -} - -#[async_trait] -trait GossipNetworkTest: fmt::Debug + Send { - /// Returns the number of nodes in the gossip network and number of peers for each node. - fn network_params(&self) -> (usize, usize); - async fn test(self, ctx: &ctx::Ctx, setup: &Setup, network: Vec) -> anyhow::Result<()>; -} - -#[instrument(level = "trace")] -async fn test_sync_blocks(test: T) { - abort_on_panic(); - let _guard = set_timeout(TEST_TIMEOUT); - let ctx = &ctx::test_root(&ctx::AffineClock::new(25.)); - let rng = &mut ctx.rng(); - let (node_count, gossip_peers) = test.network_params(); - - let mut setup = validator::testonly::Setup::new(rng, node_count); - setup.push_blocks(rng, 10); - scope::run!(ctx, |ctx, s| async { - let mut nodes = vec![]; - for (i, net) in network::testonly::new_configs(rng, &setup, gossip_peers) - .into_iter() - .enumerate() - { - let (node, runner) = Node::new(ctx, net, &setup).await; - s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); - nodes.push(node); - } - test.test(ctx, &setup, nodes).await - }) - .await - .unwrap(); -} - -#[derive(Debug)] -struct BasicSynchronization { - node_count: usize, - gossip_peers: usize, -} - -#[async_trait] -impl GossipNetworkTest for BasicSynchronization { - fn network_params(&self) -> (usize, usize) { - (self.node_count, self.gossip_peers) - } - - async fn test(self, ctx: &ctx::Ctx, setup: &Setup, nodes: Vec) -> anyhow::Result<()> { - let rng = &mut ctx.rng(); - - tracing::info!("Check initial node states"); - for node in &nodes { - node.start(); - let state = node.store.queued(); - assert_eq!(state.first, setup.genesis.fork.first_block); - assert_eq!(state.last, None); - } - - for block in &setup.blocks[0..5] { - let node = nodes.choose(rng).unwrap(); - node.store.queue_block(ctx, block.clone()).await.unwrap(); - - tracing::info!("Wait until all nodes get block #{}", block.number()); - for node in &nodes { - node.store.wait_until_persisted(ctx, block.number()).await?; - } - } - - let node = nodes.choose(rng).unwrap(); - scope::run!(ctx, |ctx, s| async { - // Add a batch of blocks. - for block in setup.blocks[5..].iter().rev() { - s.spawn_bg(node.store.queue_block(ctx, block.clone())); - } - - // Wait until nodes get all new blocks. - let last = setup.blocks.last().unwrap().number(); - for node in &nodes { - node.store.wait_until_persisted(ctx, last).await?; - } - Ok(()) - }) - .await?; - Ok(()) - } -} - -#[test_casing(5, [2, 3, 5, 7, 10])] -#[tokio::test(flavor = "multi_thread")] -async fn basic_synchronization_with_single_peer(node_count: usize) { - test_sync_blocks(BasicSynchronization { - node_count, - gossip_peers: 1, - }) - .await; -} - -#[test_casing(5, [(3, 2), (5, 2), (5, 3), (7, 2), (7, 3)])] -#[tokio::test(flavor = "multi_thread")] -async fn basic_synchronization_with_multiple_peers(node_count: usize, gossip_peers: usize) { - test_sync_blocks(BasicSynchronization { - node_count, - gossip_peers, - }) - .await; -} - -#[derive(Debug)] -struct SwitchingOffNodes { - node_count: usize, -} - -#[async_trait] -impl GossipNetworkTest for SwitchingOffNodes { - fn network_params(&self) -> (usize, usize) { - // Ensure that each node is connected to all others via an inbound or outbound channel - (self.node_count, self.node_count / 2) - } - - async fn test(self, ctx: &ctx::Ctx, setup: &Setup, mut nodes: Vec) -> anyhow::Result<()> { - let rng = &mut ctx.rng(); - nodes.shuffle(rng); - - for node in &nodes { - node.start(); - } - - for i in 0..nodes.len() { - tracing::info!("{} nodes left", nodes.len() - i); - let block = &setup.blocks[i]; - nodes[i..] - .choose(rng) - .unwrap() - .store - .queue_block(ctx, block.clone()) - .await - .unwrap(); - tracing::info!("block {} inserted", block.number()); - - // Wait until all remaining nodes get the new block. - for node in &nodes[i..] { - node.store.wait_until_persisted(ctx, block.number()).await?; - } - tracing::info!("All nodes received block #{}", block.number()); - - // Terminate a random node. - // We start switching off only after the first round, to make sure all nodes are fully - // started. - nodes[i].terminate(ctx).await.unwrap(); - } - tracing::info!("test finished, terminating"); - Ok(()) - } -} - -#[test_casing(5, 3..=7)] -#[tokio::test(flavor = "multi_thread")] -async fn switching_off_nodes(node_count: usize) { - test_sync_blocks(SwitchingOffNodes { node_count }).await; -} - -#[derive(Debug)] -struct SwitchingOnNodes { - node_count: usize, -} - -#[async_trait] -impl GossipNetworkTest for SwitchingOnNodes { - fn network_params(&self) -> (usize, usize) { - (self.node_count, self.node_count / 2) - } - - async fn test(self, ctx: &ctx::Ctx, setup: &Setup, mut nodes: Vec) -> anyhow::Result<()> { - let rng = &mut ctx.rng(); - nodes.shuffle(rng); - for i in 0..nodes.len() { - nodes[i].start(); // Switch on a node. - let block = &setup.blocks[i]; - nodes[0..i + 1] - .choose(rng) - .unwrap() - .store - .queue_block(ctx, block.clone()) - .await - .unwrap(); - - // Wait until all switched on nodes get the new block. - for node in &nodes[0..i + 1] { - node.store.wait_until_persisted(ctx, block.number()).await?; - } - tracing::trace!("All nodes received block #{}", block.number()); - } - Ok(()) - } -} - -#[test_casing(5, 3..=7)] -#[tokio::test(flavor = "multi_thread")] -async fn switching_on_nodes(node_count: usize) { - test_sync_blocks(SwitchingOnNodes { node_count }).await; -} - -/// Test checking that nodes with different first block can synchronize. -#[tokio::test(flavor = "multi_thread")] -async fn test_different_first_block() { - abort_on_panic(); - let ctx = &ctx::test_root(&ctx::AffineClock::new(25.)); - let rng = &mut ctx.rng(); - - let mut setup = validator::testonly::Setup::new(rng, 2); - let n = 4; - setup.push_blocks(rng, 10); - scope::run!(ctx, |ctx, s| async { - let mut nodes = vec![]; - // Spawn `n` nodes, all connected to each other. - for (i, net) in network::testonly::new_configs(rng, &setup, n) - .into_iter() - .enumerate() - { - // Choose the first block for the node at random. - let first = setup.blocks.choose(rng).unwrap().number(); - let (node, runner) = Node::new_with_first(ctx, net, &setup, first).await; - s.spawn_bg(runner.run(ctx).instrument(tracing::info_span!("node", i))); - node.start(); - nodes.push(node); - } - // Randomize the order of nodes. - nodes.shuffle(rng); - - for block in &setup.blocks { - // Find nodes interested in the next block. - let interested_nodes: Vec<_> = nodes - .iter() - .filter(|n| n.store.queued().first <= block.number()) - .collect(); - // Store this block to one of them. - if let Some(node) = interested_nodes.choose(rng) { - node.store.queue_block(ctx, block.clone()).await.unwrap(); - } - // Wait until all remaining nodes get the new block. - for node in interested_nodes { - node.store - .wait_until_persisted(ctx, block.number()) - .await - .unwrap(); - } - } - Ok(()) - }) - .await - .unwrap(); -} diff --git a/node/actors/sync_blocks/src/tests/mod.rs b/node/actors/sync_blocks/src/tests/mod.rs deleted file mode 100644 index c7273669..00000000 --- a/node/actors/sync_blocks/src/tests/mod.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! Tests for the block syncing actor. -use super::*; -use zksync_concurrency::time; -use zksync_consensus_network::io::GetBlockError; -use zksync_consensus_roles::validator::{self, testonly::Setup}; -use zksync_consensus_storage::{BlockStore, BlockStoreRunner, BlockStoreState}; -use zksync_consensus_utils::pipe; - -mod end_to_end; - -const TEST_TIMEOUT: time::Duration = time::Duration::seconds(20); - -pub(crate) fn sync_state(setup: &Setup, last: Option<&validator::FinalBlock>) -> BlockStoreState { - BlockStoreState { - first: setup.genesis.fork.first_block, - last: last.map(|b| b.justification.clone()), - } -} - -pub(crate) fn make_response( - block: Option<&validator::FinalBlock>, -) -> Result { - block.cloned().ok_or(GetBlockError::NotAvailable) -} diff --git a/node/deny.toml b/node/deny.toml index 9e0cf1c3..78cba495 100644 --- a/node/deny.toml +++ b/node/deny.toml @@ -59,7 +59,6 @@ multiple-versions = "deny" skip = [ # Old versions required by tempfile and prost-build. { name = "bitflags", version = "1.3.2" }, - { name = "heck", version = "0.4.1" }, # Old version required by tracing-subscriber. { name = "regex-automata", version = "0.1.10" }, @@ -75,7 +74,10 @@ skip = [ { name = "digest", version = "0.10.7" }, # Old versions required by kube. - { name = "strsim", version = "0.10.0" } + { name = "strsim", version = "0.10.0" }, + + # Old versions required by k8s-openapi. + { name = "base64", version = "0.21.7" } ] [sources] diff --git a/node/libs/concurrency/src/ctx/mod.rs b/node/libs/concurrency/src/ctx/mod.rs index f86275df..6af9af06 100644 --- a/node/libs/concurrency/src/ctx/mod.rs +++ b/node/libs/concurrency/src/ctx/mod.rs @@ -11,16 +11,6 @@ //! Instead of "awaiting for new data on the channel", you "await for new data on the channel OR //! for context to get canceled". This way you can implement graceful shutdown //! in a very uniform way. -//! -//! Contrary to the golang implementation, we pass the context implicitly -//! in the thread-local memory. Implicit passing may look like magic, however -//! * it is built on top of `tokio::Runtime` which is also passed implicitly, -//! so the concept should be familiar for the tokio users. -//! * it prevents misuse of context, as what we actually try to model here -//! is a reader monad, which in essence is equivalent to implicit argument passing -//! (https://hackage.haskell.org/package/mtl-2.3.1/docs/Control-Monad-Reader.html) -//! * it presumably makes it easier to onboard new users, without having to add an explicit -//! context argument to all functions in their codebase. use crate::{signal, time}; use std::{fmt, future::Future, pin::Pin, sync::Arc, task}; @@ -149,9 +139,9 @@ impl Ctx { _parent: Some(self.0.clone()), })); // Spawn a task propagating task cancelation. - // This task takes references to only to the `canceled` signals - // of parent and child to avoid a reference loop (rather than - // the whole context object) to avoid a memory leak: + // This task takes references only to the `canceled` signals + // of parent and child (rather that the whole context object) + // to avoid a reference loop and therefore a memory leak: // context is automatically canceled when dropped, which // guarantees that this task eventually completes. tokio::spawn(async move { @@ -173,13 +163,6 @@ impl Ctx { CtxAware(self.0.canceled.cancel_safe_recv()) } - /// Awaits until the local context gets canceled. Unlike [`Self::canceled()`], the returned - /// future has a static lifetime. - pub fn canceled_owned(&self) -> impl Future { - let canceled = self.0.canceled.clone(); - async move { canceled.cancel_safe_recv().await } - } - /// Checks if this context is still active (i.e., not canceled). pub fn is_active(&self) -> bool { !self.0.canceled.try_recv() diff --git a/node/libs/concurrency/src/sync/mod.rs b/node/libs/concurrency/src/sync/mod.rs index 7b5587a6..43cd5c0b 100644 --- a/node/libs/concurrency/src/sync/mod.rs +++ b/node/libs/concurrency/src/sync/mod.rs @@ -129,6 +129,8 @@ pub async fn wait_for<'a, T>( recv: &'a mut watch::Receiver, pred: impl Fn(&T) -> bool, ) -> ctx::OrCanceled> { + // TODO(gprusak): wait_for is not documented to be cancel-safe. + // We should use changed() instead. if let Ok(res) = ctx.wait(recv.wait_for(pred)).await? { return Ok(res); } diff --git a/node/libs/concurrency/src/time.rs b/node/libs/concurrency/src/time.rs index 718317c6..4e5581c0 100644 --- a/node/libs/concurrency/src/time.rs +++ b/node/libs/concurrency/src/time.rs @@ -6,6 +6,7 @@ pub type Duration = time::Duration; /// Monotonic clock time. +#[allow(deprecated)] pub type Instant = time::Instant; /// UTC time in nanoseconds precision.