From e9fc927e40e9c4513c1fa0f87e9d7c51f7872ba4 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 1 Oct 2024 11:59:03 -0300 Subject: [PATCH] Initial workarround for the hang of the sync --- .../p2p/blob_downloader.ex | 8 +++++-- .../p2p/block_downloader.ex | 8 +++++-- lib/lambda_ethereum_consensus/p2p/peerbook.ex | 22 ++++++++++++++++++- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/lambda_ethereum_consensus/p2p/blob_downloader.ex b/lib/lambda_ethereum_consensus/p2p/blob_downloader.ex index 4493a4fc0..81877f045 100644 --- a/lib/lambda_ethereum_consensus/p2p/blob_downloader.ex +++ b/lib/lambda_ethereum_consensus/p2p/blob_downloader.ex @@ -32,7 +32,7 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do def request_blobs_by_range(slot, count, on_blobs, retries) do Logger.debug("Requesting blobs", slot: slot) - # TODO: handle no-peers asynchronously? + # FIXME: handle no-peers asynchronously! this is hanging Libp2pPort when there are no peers peer_id = get_some_peer() # NOTE: BeaconBlocksByRangeRequest == BlobSidecarsByRangeRequest @@ -62,7 +62,8 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do P2P.Peerbook.penalize_peer(peer_id) if retries > 0 do - Logger.debug("Retrying request for #{count} blobs", slot: slot) + Logger.info("Retrying request for #{count} blobs, reason: #{inspect(reason)} in 2 second", slot: slot) + Process.sleep(2000) request_blobs_by_range(slot, count, on_blobs, retries - 1) {:ok, store} else @@ -123,6 +124,9 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do defp get_some_peer() do case P2P.Peerbook.get_some_peer() do nil -> + stacktrace = Process.info(self(), :current_stacktrace) + IO.inspect(stacktrace, label: "Current stacktrace") + Process.sleep(100) get_some_peer() diff --git a/lib/lambda_ethereum_consensus/p2p/block_downloader.ex b/lib/lambda_ethereum_consensus/p2p/block_downloader.ex index 6522fa84a..6438ccfd1 100644 --- a/lib/lambda_ethereum_consensus/p2p/block_downloader.ex +++ b/lib/lambda_ethereum_consensus/p2p/block_downloader.ex @@ -66,7 +66,7 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do def request_blocks_by_range(slot, count, on_blocks, retries) do Logger.debug("Requesting block", slot: slot) - # TODO: handle no-peers asynchronously? + # FIXME: handle no-peers asynchronously! this is hanging Libp2pPort when there are no peers peer_id = get_some_peer() request = @@ -173,7 +173,8 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do if retries > 0 do :telemetry.execute([:network, :request], %{blocks: 0}, Map.put(tags, :result, "retry")) pretty_roots = Enum.map_join(roots, ", ", &Base.encode16/1) - Logger.debug("Retrying request for blocks with roots #{pretty_roots}") + Logger.debug("Retrying request (reason: #{inspect(reason)}) for blocks with roots #{pretty_roots}, in 2 second") + Process.sleep(2000) request_blocks_by_root(roots, on_blocks, retries - 1) {:ok, store} else @@ -186,6 +187,9 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do defp get_some_peer() do case P2P.Peerbook.get_some_peer() do nil -> + stacktrace = Process.info(self(), :current_stacktrace) + IO.inspect(stacktrace, label: "Current stacktrace") + Process.sleep(100) get_some_peer() diff --git a/lib/lambda_ethereum_consensus/p2p/peerbook.ex b/lib/lambda_ethereum_consensus/p2p/peerbook.ex index 96b9c34a9..785e198fb 100644 --- a/lib/lambda_ethereum_consensus/p2p/peerbook.ex +++ b/lib/lambda_ethereum_consensus/p2p/peerbook.ex @@ -2,10 +2,12 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do @moduledoc """ General peer bookkeeping. """ + require Logger alias LambdaEthereumConsensus.Libp2pPort alias LambdaEthereumConsensus.Store.KvSchema @initial_score 100 + @penalize 2 @target_peers 128 @max_prune_size 8 @prune_percentage 0.05 @@ -53,11 +55,29 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do end def penalize_peer(peer_id) do - fetch_peerbook!() |> Map.delete(peer_id) |> store_peerbook() + Logger.debug("Penalizing peer: #{inspect(LambdaEthereumConsensus.Utils.format_shorten_binary(peer_id))}") + peer_score = fetch_peerbook!() |> Map.get(peer_id) + + case peer_score do + nil -> + :ok + + score when score - @penalize <= 0 -> + Logger.info("Removing peer: #{inspect(LambdaEthereumConsensus.Utils.format_shorten_binary(peer_id))}") + fetch_peerbook!() + |> Map.delete(peer_id) + |> store_peerbook() + + score -> + fetch_peerbook!() + |> Map.put(peer_id, score - @penalize) + |> store_peerbook() + end end def handle_new_peer(peer_id) do peerbook = fetch_peerbook!() + Logger.debug("New peer connected: #{inspect(LambdaEthereumConsensus.Utils.format_shorten_binary(peer_id))}") if not Map.has_key?(peerbook, peer_id) do :telemetry.execute([:peers, :connection], %{id: peer_id}, %{result: "success"})