-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[5.0.3] P2P: Resolve on reconnect #2408
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -348,7 +348,6 @@ namespace eosio { | |
std::string host; | ||
connection_ptr c; | ||
tcp::endpoint active_ip; | ||
tcp::resolver::results_type ips; | ||
}; | ||
|
||
using connection_details_index = multi_index_container< | ||
|
@@ -416,9 +415,9 @@ namespace eosio { | |
|
||
void add(connection_ptr c); | ||
string connect(const string& host, const string& p2p_address); | ||
string resolve_and_connect(const string& host, const string& p2p_address); | ||
string resolve_and_connect(const string& host, const string& p2p_address, const connection_ptr& c = {}); | ||
void update_connection_endpoint(connection_ptr c, const tcp::endpoint& endpoint); | ||
void connect(const connection_ptr& c); | ||
void reconnect(const connection_ptr& c); | ||
string disconnect(const string& host); | ||
void close_all(); | ||
|
||
|
@@ -912,7 +911,7 @@ namespace eosio { | |
|
||
fc::sha256 conn_node_id; | ||
string short_conn_node_id; | ||
string listen_address; // address sent to peer in handshake | ||
const string listen_address; // address sent to peer in handshake | ||
string log_p2p_address; | ||
string log_remote_endpoint_ip; | ||
string log_remote_endpoint_port; | ||
|
@@ -2764,16 +2763,21 @@ namespace eosio { | |
fc_dlog( logger, "Skipping connect due to go_away reason ${r}",("r", reason_str( no_retry ))); | ||
return false; | ||
} | ||
|
||
if (incoming()) | ||
return false; | ||
|
||
if( consecutive_immediate_connection_close > def_max_consecutive_immediate_connection_close || no_retry == benign_other ) { | ||
fc::microseconds connector_period = my_impl->connections.get_connector_period(); | ||
fc::lock_guard g( conn_mtx ); | ||
if( last_close == fc::time_point() || last_close > fc::time_point::now() - connector_period ) { | ||
return true; // true so doesn't remove from valid connections | ||
} | ||
} | ||
|
||
connection_ptr c = shared_from_this(); | ||
strand.post([c]() { | ||
my_impl->connections.connect(c); | ||
my_impl->connections.reconnect(c); | ||
}); | ||
return true; | ||
} | ||
|
@@ -4486,39 +4490,48 @@ namespace eosio { | |
return resolve_and_connect( host, p2p_address ); | ||
} | ||
|
||
string connections_manager::resolve_and_connect( const string& peer_address, const string& listen_address ) { | ||
string connections_manager::resolve_and_connect( const string& peer_address, const string& listen_address, | ||
const connection_ptr& c ) | ||
{ | ||
assert(!c || (c->peer_address() == peer_address && c->listen_address == listen_address)); | ||
|
||
string::size_type colon = peer_address.find(':'); | ||
if (colon == std::string::npos || colon == 0) { | ||
fc_elog( logger, "Invalid peer address. must be \"host:port[:<blk>|<trx>]\": ${p}", ("p", peer_address) ); | ||
return "invalid peer address"; | ||
} | ||
|
||
std::lock_guard g( connections_mtx ); | ||
if( find_connection_i( peer_address ) ) | ||
return "already connected"; | ||
{ | ||
std::lock_guard g(connections_mtx); | ||
if (find_connection_i(peer_address)) | ||
return "already connected"; | ||
} | ||
|
||
auto [host, port, type] = split_host_port_type(peer_address); | ||
|
||
auto resolver = std::make_shared<tcp::resolver>( my_impl->thread_pool.get_executor() ); | ||
|
||
resolver->async_resolve(host, port, | ||
[resolver, host = host, port = port, peer_address = peer_address, listen_address = listen_address, this]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { | ||
connection_ptr c = std::make_shared<connection>( peer_address, listen_address ); | ||
c->set_heartbeat_timeout( heartbeat_timeout ); | ||
std::lock_guard g( connections_mtx ); | ||
auto [it, inserted] = connections.emplace( connection_detail{ | ||
.host = peer_address, | ||
.c = std::move(c), | ||
.ips = results | ||
[this, resolver, c_org{c}, host, port, peer_address, listen_address]( const boost::system::error_code& err, const tcp::resolver::results_type& results ) { | ||
connection_ptr c = c_org ? c_org : std::make_shared<connection>( peer_address, listen_address ); | ||
c->strand.post([this, resolver, c, err, results, host, port, peer_address]() { | ||
c->set_heartbeat_timeout( heartbeat_timeout ); | ||
{ | ||
std::lock_guard g( connections_mtx ); | ||
connections.emplace( connection_detail{ | ||
.host = peer_address, | ||
.c = c, | ||
}); | ||
} | ||
if( !err ) { | ||
c->connect( results ); | ||
} else { | ||
fc_wlog( logger, "Unable to resolve ${host}:${port} ${error}", | ||
("host", host)("port", port)( "error", err.message() ) ); | ||
c->set_state(connection::connection_state::closed); | ||
++(c->consecutive_immediate_connection_close); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does increasing this do? I thought it was going to cause some sort of cool down when reaching There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to look into it, but I think the change that broke this re-connect also broke this back off of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, actually this a reason to reuse the connection object. I guess I should maintain the reuse of the connection object for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like now (on 51fd8fa) after failing to resolve a host |
||
} | ||
}); | ||
if( !err ) { | ||
it->c->connect( results ); | ||
} else { | ||
fc_wlog( logger, "Unable to resolve ${host}:${port} ${error}", | ||
("host", host)("port", port)( "error", err.message() ) ); | ||
it->c->set_state(connection::connection_state::closed); | ||
++(it->c->consecutive_immediate_connection_close); | ||
} | ||
} ); | ||
|
||
return "added connection"; | ||
|
@@ -4536,13 +4549,13 @@ namespace eosio { | |
} | ||
} | ||
|
||
void connections_manager::connect(const connection_ptr& c) { | ||
std::lock_guard g( connections_mtx ); | ||
const auto& index = connections.get<by_connection>(); | ||
const auto& it = index.find(c); | ||
if( it != index.end() ) { | ||
it->c->connect( it->ips ); | ||
void connections_manager::reconnect(const connection_ptr& c) { | ||
{ | ||
std::lock_guard g( connections_mtx ); | ||
auto& index = connections.get<by_connection>(); | ||
index.erase(c); | ||
} | ||
resolve_and_connect(c->peer_address(), c->listen_address, c); | ||
} | ||
|
||
// called by API | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fwiw this is a warning log, where failing to connect is an info log. Not sure if you want to make consistent or not
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the
async_connect
fails it callsc->close(false)
which calls_close()
which incrementsconsecutive_immediate_connection_close
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think a
warn
is appropriate if unable to resolve.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is something different between the two though. For example when I run nodeos with
--p2p-peer-address fdsfdsfds.invalid:4444 --p2p-peer-address fddsfdsfdsfds.invalid:4444 --p2p-peer-address 127.0.0.1:2121 --p2p-peer-address www.google.com:4444
you'll see how the latter 2 will try to reconnect forever where the first 2 won't.There is also some log oddness at shutdown,
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This different in backoff behavior seems to match Leap 4.0 behavior.