From 6591537984b4e0e305ab4c781b7a6a1fad5ce5ce Mon Sep 17 00:00:00 2001 From: Elad Gildnur <6321801+shleikes@users.noreply.github.com> Date: Sun, 27 Oct 2024 17:32:42 +0200 Subject: [PATCH 01/14] File name changes and a typo (#1758) --- ...{metrics_consumer_manager.go => consumer_metrics_manager.go} | 0 ...onsumer_manager_test.go => consumer_metrics_manager_test.go} | 0 protocol/metrics/{metricsService.go => metrics_service.go} | 0 protocol/metrics/metrics_test.go | 2 +- protocol/metrics/{metrics_provider.go => provider_metrics.go} | 0 ...{metrics_provider_manager.go => provider_metrics_manager.go} | 0 protocol/metrics/{rpcconsumerlogs.go => rpcconsumer_logs.go} | 0 .../{rpcconsumerlogs_test.go => rpcconsumer_logs_test.go} | 0 8 files changed, 1 insertion(+), 1 deletion(-) rename protocol/metrics/{metrics_consumer_manager.go => consumer_metrics_manager.go} (100%) rename protocol/metrics/{metrics_consumer_manager_test.go => consumer_metrics_manager_test.go} (100%) rename protocol/metrics/{metricsService.go => metrics_service.go} (100%) rename protocol/metrics/{metrics_provider.go => provider_metrics.go} (100%) rename protocol/metrics/{metrics_provider_manager.go => provider_metrics_manager.go} (100%) rename protocol/metrics/{rpcconsumerlogs.go => rpcconsumer_logs.go} (100%) rename protocol/metrics/{rpcconsumerlogs_test.go => rpcconsumer_logs_test.go} (100%) diff --git a/protocol/metrics/metrics_consumer_manager.go b/protocol/metrics/consumer_metrics_manager.go similarity index 100% rename from protocol/metrics/metrics_consumer_manager.go rename to protocol/metrics/consumer_metrics_manager.go diff --git a/protocol/metrics/metrics_consumer_manager_test.go b/protocol/metrics/consumer_metrics_manager_test.go similarity index 100% rename from protocol/metrics/metrics_consumer_manager_test.go rename to protocol/metrics/consumer_metrics_manager_test.go diff --git a/protocol/metrics/metricsService.go b/protocol/metrics/metrics_service.go similarity index 100% rename from protocol/metrics/metricsService.go rename to protocol/metrics/metrics_service.go diff --git a/protocol/metrics/metrics_test.go b/protocol/metrics/metrics_test.go index 5590def6a3..a474dc2dad 100644 --- a/protocol/metrics/metrics_test.go +++ b/protocol/metrics/metrics_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func Test_StorAaggregatedata_OnMetricService(t *testing.T) { +func Test_StoreAggregatedData_OnMetricService(t *testing.T) { // setup metricService := MetricService{ AggregatedMetricMap: &map[string]map[string]map[string]map[RelaySource]map[string]*AggregatedMetric{}, diff --git a/protocol/metrics/metrics_provider.go b/protocol/metrics/provider_metrics.go similarity index 100% rename from protocol/metrics/metrics_provider.go rename to protocol/metrics/provider_metrics.go diff --git a/protocol/metrics/metrics_provider_manager.go b/protocol/metrics/provider_metrics_manager.go similarity index 100% rename from protocol/metrics/metrics_provider_manager.go rename to protocol/metrics/provider_metrics_manager.go diff --git a/protocol/metrics/rpcconsumerlogs.go b/protocol/metrics/rpcconsumer_logs.go similarity index 100% rename from protocol/metrics/rpcconsumerlogs.go rename to protocol/metrics/rpcconsumer_logs.go diff --git a/protocol/metrics/rpcconsumerlogs_test.go b/protocol/metrics/rpcconsumer_logs_test.go similarity index 100% rename from protocol/metrics/rpcconsumerlogs_test.go rename to protocol/metrics/rpcconsumer_logs_test.go From 831d3790b6e64e9bf6b5bdf16d4566ee768f53e9 Mon Sep 17 00:00:00 2001 From: oren-lava <111131399+oren-lava@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:51:19 +0200 Subject: [PATCH 02/14] fix: delegator rewards query bug (#1760) * fix delegator rewards query so it'll show claimable rewards from past providers * optimize query --- .../client/cli/query_delegator_rewards.go | 16 ++---------- .../keeper/grpc_query_delegator_rewards.go | 25 +++++++++---------- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/x/dualstaking/client/cli/query_delegator_rewards.go b/x/dualstaking/client/cli/query_delegator_rewards.go index 8a02b46528..461ecbb684 100644 --- a/x/dualstaking/client/cli/query_delegator_rewards.go +++ b/x/dualstaking/client/cli/query_delegator_rewards.go @@ -13,17 +13,16 @@ import ( const ( providerFlagName = "provider" - chainIDFlagName = "chain-id" ) func CmdQueryDelegatorRewards() *cobra.Command { cmd := &cobra.Command{ Use: "delegator-rewards [delegator]", Short: `shows all the rewards that can be claimed for a specific delegator. - Can be more specific using the optional --provider and --chain-id flags`, + show rewards from a specific provider using the optional --provider flag`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - var provider, chainID string + var provider string clientCtx, err := client.GetClientQueryContext(cmd) if err != nil { @@ -47,19 +46,9 @@ func CmdQueryDelegatorRewards() *cobra.Command { return err } - // check if the command includes --chain-id - chainIDFlag := cmd.Flags().Lookup(chainIDFlagName) - if chainIDFlag == nil { - return fmt.Errorf("%s flag wasn't found", chainIDFlagName) - } - if cmd.Flags().Changed(chainIDFlagName) { - chainID = chainIDFlag.Value.String() - } - res, err := queryClient.DelegatorRewards(cmd.Context(), &types.QueryDelegatorRewardsRequest{ Delegator: delegator, Provider: provider, - ChainId: chainID, }) if err != nil { return err @@ -71,7 +60,6 @@ func CmdQueryDelegatorRewards() *cobra.Command { flags.AddQueryFlagsToCmd(cmd) cmd.Flags().String(providerFlagName, "", "output rewards from a specific provider") - cmd.Flags().String(chainIDFlagName, "", "output rewards for a specific chain") return cmd } diff --git a/x/dualstaking/keeper/grpc_query_delegator_rewards.go b/x/dualstaking/keeper/grpc_query_delegator_rewards.go index 01413ccaa9..bfb2025b6b 100644 --- a/x/dualstaking/keeper/grpc_query_delegator_rewards.go +++ b/x/dualstaking/keeper/grpc_query_delegator_rewards.go @@ -17,20 +17,19 @@ func (k Keeper) DelegatorRewards(goCtx context.Context, req *types.QueryDelegato ctx := sdk.UnwrapSDKContext(goCtx) var rewards []types.DelegatorRewardInfo - resProviders, err := k.DelegatorProviders(goCtx, &types.QueryDelegatorProvidersRequest{Delegator: req.Delegator}) - if err != nil { - return nil, err - } - - for _, delegation := range resProviders.Delegations { - if delegation.Provider == req.Provider || req.Provider == "" { - delegatorReward, found := k.GetDelegatorReward(ctx, delegation.Provider, delegation.Delegator) - if found { - reward := types.DelegatorRewardInfo{ - Provider: delegation.Provider, + if req.Provider != "" { + reward, found := k.GetDelegatorReward(ctx, req.Provider, req.Delegator) + if found { + rewards = append(rewards, types.DelegatorRewardInfo{Provider: reward.Provider, Amount: reward.Amount}) + } + } else { + allRewards := k.GetAllDelegatorReward(ctx) + for _, delegatorReward := range allRewards { + if delegatorReward.Delegator == req.Delegator { + rewards = append(rewards, types.DelegatorRewardInfo{ + Provider: delegatorReward.Provider, Amount: delegatorReward.Amount, - } - rewards = append(rewards, reward) + }) } } } From 0e9c83070143ec837868ce7122656365f6c60770 Mon Sep 17 00:00:00 2001 From: oren-lava <111131399+oren-lava@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:05:21 +0200 Subject: [PATCH 03/14] fix GetValidator so it'll fetch delegated validators (#1762) --- x/dualstaking/client/cli/tx_delegate.go | 39 +++++++++------------ x/pairing/client/cli/tx_unstake_provider.go | 4 +++ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/x/dualstaking/client/cli/tx_delegate.go b/x/dualstaking/client/cli/tx_delegate.go index 945a24fbeb..24f303bab0 100644 --- a/x/dualstaking/client/cli/tx_delegate.go +++ b/x/dualstaking/client/cli/tx_delegate.go @@ -59,37 +59,30 @@ func CmdDelegate() *cobra.Command { return cmd } +// GetValidator gets a validator that is delegated by the `from` address +// The dualstaking mecahnism makes providers delegate to a validator when they're staking. +// Assuming that the `from` address (of the clientCtx) is a staked provider address, this +// function returns a validator that the provider is delegated to (the one with the largest delegation). func GetValidator(clientCtx client.Context) string { provider := clientCtx.GetFromAddress().String() q := stakingtypes.NewQueryClient(clientCtx) ctx := context.Background() - resD, err := q.DelegatorValidators(ctx, &stakingtypes.QueryDelegatorValidatorsRequest{DelegatorAddr: provider}) + resD, err := q.DelegatorDelegations(ctx, &stakingtypes.QueryDelegatorDelegationsRequest{DelegatorAddr: provider}) - if err == nil && len(resD.Validators) > 0 { - validatorBiggest := resD.Validators[0] - for _, validator := range resD.Validators { - if sdk.AccAddress(validator.OperatorAddress).String() == provider { - return validator.OperatorAddress + if err == nil && len(resD.DelegationResponses) > 0 { + delegationBiggest := resD.DelegationResponses[0] + for _, delegationResponse := range resD.DelegationResponses { + if sdk.AccAddress(delegationResponse.Delegation.ValidatorAddress).String() == provider { + return delegationResponse.Delegation.ValidatorAddress } - if validator.Tokens.GT(validatorBiggest.Tokens) { - validatorBiggest = validator + if delegationResponse.Balance.IsGTE(delegationBiggest.Balance) { + delegationBiggest = delegationResponse } } - return validatorBiggest.OperatorAddress - } - - resV, err := q.Validators(ctx, &stakingtypes.QueryValidatorsRequest{}) - if err != nil { - panic("failed to fetch list of validators") - } - validatorBiggest := resV.Validators[0] - for _, validator := range resV.Validators { - if sdk.AccAddress(validator.OperatorAddress).String() == provider { - return validator.OperatorAddress - } - if validator.Tokens.GT(validatorBiggest.Tokens) { - validatorBiggest = validator + if !delegationBiggest.Balance.IsZero() { + return delegationBiggest.Delegation.ValidatorAddress } } - return validatorBiggest.OperatorAddress + + return "" } diff --git a/x/pairing/client/cli/tx_unstake_provider.go b/x/pairing/client/cli/tx_unstake_provider.go index 06cd8da538..c5f380a063 100644 --- a/x/pairing/client/cli/tx_unstake_provider.go +++ b/x/pairing/client/cli/tx_unstake_provider.go @@ -2,6 +2,7 @@ package cli import ( "context" + "fmt" "strconv" "strings" @@ -43,6 +44,9 @@ func CmdUnstakeProvider() *cobra.Command { } else { validator = dualstakingclient.GetValidator(clientCtx) } + if validator == "" { + return fmt.Errorf("cannot unstake, the provider is not delegated to any validator.\nthe provider might have all zero delegations, to resolve please delegated to one of the validators a small amount") + } msgs := []sdk.Msg{} for _, chainID := range chainIDs { From b987fd6d27ff56e6c97d9b6d2c70e80e6dc88640 Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:55:45 +0100 Subject: [PATCH 04/14] fix: PRT - fixing tendermint node error parser (#1761) --- .../rpcInterfaceMessages/jsonRPCMessage.go | 2 +- .../tendermintRPCMessage.go | 31 +++++++++++++++++++ protocol/rpcconsumer/rpcconsumer_server.go | 3 ++ scripts/automation_scripts/pure_proxy.py | 18 +++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go index f45a241329..cc31752d3c 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/jsonRPCMessage.go @@ -56,7 +56,7 @@ func (jm JsonrpcMessage) CheckResponseError(data []byte, httpStatusCode int) (ha utils.LavaFormatWarning("Failed unmarshalling CheckError", err, utils.LogAttr("data", string(data))) return false, "" } - if result.Error == nil { + if result.Error == nil { // no error return false, "" } return result.Error.Message != "", result.Error.Message diff --git a/protocol/chainlib/chainproxy/rpcInterfaceMessages/tendermintRPCMessage.go b/protocol/chainlib/chainproxy/rpcInterfaceMessages/tendermintRPCMessage.go index a46073b9d2..f370f00cd0 100644 --- a/protocol/chainlib/chainproxy/rpcInterfaceMessages/tendermintRPCMessage.go +++ b/protocol/chainlib/chainproxy/rpcInterfaceMessages/tendermintRPCMessage.go @@ -50,6 +50,37 @@ func (cp TendermintrpcMessage) GetParams() interface{} { return cp.Params } +type TendermintMessageResponseBody struct { + Code int `json:"code,omitempty"` + Log string `json:"log,omitempty"` +} + +type TendermintMessageResponse struct { + Response TendermintMessageResponseBody `json:"response,omitempty"` +} + +// returns if error exists and +func (jm TendermintrpcMessage) CheckResponseError(data []byte, httpStatusCode int) (hasError bool, errorMessage string) { + result := &JsonrpcMessage{} + err := json.Unmarshal(data, result) + if err != nil { + utils.LavaFormatWarning("Failed unmarshalling CheckError", err, utils.LogAttr("data", string(data))) + return false, "" + } + + if result.Error == nil { // no error + if result.Result != nil { // check if we got a tendermint error + tendermintResponse := &TendermintMessageResponse{} + err := json.Unmarshal(result.Result, tendermintResponse) + if err == nil { + return (tendermintResponse.Response.Code != 0 && tendermintResponse.Response.Log != ""), tendermintResponse.Response.Log + } + } + return false, "" + } + return result.Error.Message != "", result.Error.Message +} + func (tm TendermintrpcMessage) GetResult() json.RawMessage { if tm.Error != nil { utils.LavaFormatWarning("GetResult() Request got an error from the node", nil, utils.Attribute{Key: "error", Value: tm.Error}) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 7f8989b509..05cc22e2b3 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -760,6 +760,9 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(protocolMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(expectedRelayTimeoutForQOS), expectedBH, numOfProviders, pairingAddressesLen, protocolMessage.GetApi().Category.HangingApi) // session done successfully isNodeError, _ := protocolMessage.CheckResponseError(localRelayResult.Reply.Data, localRelayResult.StatusCode) localRelayResult.IsNodeError = isNodeError + if rpccs.debugRelays { + utils.LavaFormatDebug("Result Code", utils.LogAttr("isNodeError", isNodeError), utils.LogAttr("StatusCode", localRelayResult.StatusCode)) + } if rpccs.cache.CacheActive() && rpcclient.ValidateStatusCodes(localRelayResult.StatusCode, true) == nil { // in case the error is a node error we don't want to cache if !isNodeError { diff --git a/scripts/automation_scripts/pure_proxy.py b/scripts/automation_scripts/pure_proxy.py index 4472727e23..f0197771cd 100644 --- a/scripts/automation_scripts/pure_proxy.py +++ b/scripts/automation_scripts/pure_proxy.py @@ -2,6 +2,7 @@ import aiohttp from aiohttp import web from functools import partial +import json port_url_map = { 5555: "http://localhost:1317", # Replace with actual target URLs @@ -21,6 +22,8 @@ async def proxy_handler(request, server_port): url += f"?{query_string}" print(f"Proxying request to: {url}") # Debug print + print(f"Request path: {path}") # Debug print + print(f"Request query: {query_string}") # Debug print print(f"Request headers: {request.headers}") # Debug print try: @@ -28,6 +31,21 @@ async def proxy_handler(request, server_port): method = request.method headers = {k: v for k, v in request.headers.items() if k.lower() not in ('host', 'content-length')} data = await request.read() + + # Print the data, decoding it as JSON if possible + try: + json_data = data.decode('utf-8') + print(f"Request data (raw): {json_data}") # Debug print + print("") + print("Copy for curl:") + print(f'curl -X POST -H "Content-Type: application/json" {target_url} --data {json_data} -v') + print("") + print("") + json_parsed_data = json.loads(json_data) + print(f"Request data (parsed JSON): {json.dumps(json_parsed_data, indent=2)}") # Debug print + except Exception as e: + print(f"Error decoding request data as JSON: {e}") + print(f"Request data (raw bytes): {data}") # Debug print async with session.request(method, url, headers=headers, data=data, allow_redirects=False) as resp: print(f"Response status: {resp.status}") # Debug print From 479a9060e70b0966087b3cbdb47270352268ae47 Mon Sep 17 00:00:00 2001 From: Elad Gildnur <6321801+shleikes@users.noreply.github.com> Date: Tue, 29 Oct 2024 16:51:00 +0200 Subject: [PATCH 05/14] feat: PRT - Check the lava chain id in provider (#1751) * Changed logs * Add the lava chain id to the probe * Check the lava chain id in the "rpcprovider test" * Check the lava chain id on provider startup * CR Fix + Bug fix * Fix the Ctrl + C not working bug * CR Fix: Remove duplicated check --- protocol/common/conf.go | 1 + protocol/rpcprovider/rpcprovider_server.go | 1 + protocol/rpcprovider/testing.go | 149 ++++++++++++++++----- 3 files changed, 119 insertions(+), 32 deletions(-) diff --git a/protocol/common/conf.go b/protocol/common/conf.go index 5df9ebf1b9..483d6d0684 100644 --- a/protocol/common/conf.go +++ b/protocol/common/conf.go @@ -20,6 +20,7 @@ const ( MaximumConcurrentProvidersFlagName = "concurrent-providers" StatusCodeMetadataKey = "status-code" VersionMetadataKey = "lavap-version" + LavaChainIdMetadataKey = "lavap-chain-id" TimeOutForFetchingLavaBlocksFlag = "timeout-for-fetching-lava-blocks" ) diff --git a/protocol/rpcprovider/rpcprovider_server.go b/protocol/rpcprovider/rpcprovider_server.go index acd31b9717..86ab5d38c3 100644 --- a/protocol/rpcprovider/rpcprovider_server.go +++ b/protocol/rpcprovider/rpcprovider_server.go @@ -1208,6 +1208,7 @@ func (rpcps *RPCProviderServer) Probe(ctx context.Context, probeReq *pairingtype } trailer := metadata.Pairs(common.VersionMetadataKey, upgrade.GetCurrentVersion().ProviderVersion) trailer.Append(chainlib.RpcProviderUniqueIdHeader, rpcps.providerUniqueId) + trailer.Append(common.LavaChainIdMetadataKey, rpcps.lavaChainID) grpc.SetTrailer(ctx, trailer) // we ignore this error here since this code can be triggered not from grpc return probeReply, nil } diff --git a/protocol/rpcprovider/testing.go b/protocol/rpcprovider/testing.go index 01535bc2cb..a5bc7a863d 100644 --- a/protocol/rpcprovider/testing.go +++ b/protocol/rpcprovider/testing.go @@ -5,16 +5,16 @@ import ( "crypto/tls" "fmt" "net/http" - "os" - "os/signal" "strconv" "strings" "time" "github.com/cosmos/cosmos-sdk/client" + "github.com/cosmos/cosmos-sdk/client/config" "github.com/cosmos/cosmos-sdk/client/flags" "github.com/cosmos/cosmos-sdk/version" "github.com/gogo/status" + "github.com/lavanet/lava/v4/app" lvutil "github.com/lavanet/lava/v4/ecosystem/lavavisor/pkg/util" "github.com/lavanet/lava/v4/protocol/chainlib/chainproxy" "github.com/lavanet/lava/v4/protocol/common" @@ -49,7 +49,7 @@ func validatePortNumber(ipPort string) string { } func PerformCORSCheck(endpoint epochstoragetypes.Endpoint) error { - utils.LavaFormatDebug("Checking CORS", utils.Attribute{Key: "endpoint", Value: endpoint}) + utils.LavaFormatDebug("Checking CORS", utils.LogAttr("endpoint", endpoint)) // Construct the URL for the RPC endpoint endpointURL := "https://" + endpoint.IPPORT // Providers must have HTTPS support @@ -86,7 +86,10 @@ func validateCORSHeaders(resp *http.Response) error { // Check for the presence of "Access-Control-Allow-Origin" header corsOrigin := resp.Header.Get("Access-Control-Allow-Origin") if corsOrigin != "*" { - return utils.LavaFormatError("CORS check failed. Expected 'Access-Control-Allow-Origin: *' but not found.", nil, utils.Attribute{Key: "returned code", Value: resp.StatusCode}, utils.Attribute{Key: "corsOrigin", Value: corsOrigin}) + return utils.LavaFormatError("CORS check failed. Expected 'Access-Control-Allow-Origin: *' but not found.", nil, + utils.LogAttr("returned code", resp.StatusCode), + utils.LogAttr("corsOrigin", corsOrigin), + ) } // Headers that must be present in "Access-Control-Allow-Headers" @@ -95,21 +98,17 @@ func validateCORSHeaders(resp *http.Response) error { corsHeaders := strings.ToLower(resp.Header.Get("Access-Control-Allow-Headers")) for _, requiredHeader := range requiredHeaders { if !strings.Contains(corsHeaders, strings.ToLower(requiredHeader)) { - return utils.LavaFormatError("CORS check failed. Expected 'Access-Control-Allow-Headers' are not present.", nil, utils.Attribute{Key: "corsHeaders", Value: corsHeaders}, utils.Attribute{Key: "requiredHeader", Value: requiredHeader}) + return utils.LavaFormatError("CORS check failed. Expected 'Access-Control-Allow-Headers' are not present.", nil, + utils.LogAttr("corsHeaders", corsHeaders), + utils.LogAttr("requiredHeader", requiredHeader), + ) } } return nil } -func startTesting(ctx context.Context, clientCtx client.Context, providerEntries []epochstoragetypes.StakeEntry, plainTextConnection bool) error { - ctx, cancel := context.WithCancel(ctx) - signalChan := make(chan os.Signal, 1) - signal.Notify(signalChan, os.Interrupt) - defer func() { - signal.Stop(signalChan) - cancel() - }() +func startTesting(ctx context.Context, clientCtx client.Context, lavaNetworkChainId string, providerEntries []epochstoragetypes.StakeEntry, plainTextConnection bool) error { goodChains := []string{} badChains := []string{} portValidation := []string{} @@ -121,7 +120,10 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries lavaVersion := param.GetParams().Version targetVersion := lvutil.ParseToSemanticVersion(lavaVersion.ProviderTarget) for _, providerEntry := range providerEntries { - utils.LavaFormatInfo("checking provider entry", utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "endpoints", Value: providerEntry.Endpoints}) + utils.LavaFormatInfo("checking provider entry", + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("endpoints", providerEntry.Endpoints), + ) for _, endpoint := range providerEntry.Endpoints { checkOneProvider := func(apiInterface string, addon string) (time.Duration, string, int64, error) { @@ -137,7 +139,12 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries utils.LavaFormatWarning("You are using plain text connection (disabled tls), no consumer can connect to it as all consumers use tls. this should be used for testing purposes only", nil) conn, err = grpc.DialContext(ctx, endpoint.IPPORT, grpc.WithBlock(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(chainproxy.MaxCallRecvMsgSize))) if err != nil { - return 0, "", 0, utils.LavaFormatError("failed connecting to provider endpoint", err, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, "", 0, utils.LavaFormatError("failed connecting to provider endpoint", err, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } relayerClient = pairingtypes.NewRelayerClient(conn) } else { @@ -149,10 +156,20 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries _, _, err := cswp.ConnectRawClientWithTimeout(ctx, endpoint.IPPORT) lavasession.AllowInsecureConnectionToProviders = false if err == nil { - return 0, "", 0, utils.LavaFormatError("provider endpoint is insecure when it should be secure", err, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, "", 0, utils.LavaFormatError("provider endpoint is insecure when it should be secure", err, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } } - return 0, "", 0, utils.LavaFormatError("failed connecting to provider endpoint", err, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, "", 0, utils.LavaFormatError("failed connecting to provider endpoint", err, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } } @@ -167,17 +184,52 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries var trailer metadata.MD probeResp, err := relayerClient.Probe(ctx, probeReq, grpc.Trailer(&trailer)) if err != nil { - return 0, "", 0, utils.LavaFormatError("failed probing provider endpoint", err, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, "", 0, utils.LavaFormatError("failed probing provider endpoint", err, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } versions := strings.Join(trailer.Get(common.VersionMetadataKey), ",") relayLatency := time.Since(relaySentTime) if guid != probeResp.GetGuid() { - return 0, versions, 0, utils.LavaFormatError("probe returned invalid value", err, utils.Attribute{Key: "returnedGuid", Value: probeResp.GetGuid()}, utils.Attribute{Key: "guid", Value: guid}, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, versions, 0, utils.LavaFormatError("probe returned invalid value", err, + utils.LogAttr("returnedGuid", probeResp.GetGuid()), + utils.LogAttr("guid", guid), + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) + } + + // chain id check - lava node chain id should be the same as the one we are probing + lavaChainIdFromProbeMD := trailer.Get(common.LavaChainIdMetadataKey) + if len(lavaChainIdFromProbeMD) > 0 { + lavaChainIdFromProbe := lavaChainIdFromProbeMD[0] + if lavaChainIdFromProbe != lavaNetworkChainId { + return 0, versions, 0, utils.LavaFormatError("lava chain id from probe does not match the configured network chain id", nil, + utils.LogAttr("returnedGuid", probeResp.GetGuid()), + utils.LogAttr("guid", guid), + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("lavaChainIdFromProbe", lavaChainIdFromProbe), + utils.LogAttr("networkChainId", lavaNetworkChainId), + ) + } } // CORS check if err := PerformCORSCheck(endpoint); err != nil { - return 0, versions, 0, utils.LavaFormatError("invalid CORS check", err, utils.Attribute{Key: "returnedGuid", Value: probeResp.GetGuid()}, utils.Attribute{Key: "guid", Value: guid}, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, versions, 0, utils.LavaFormatError("invalid CORS check", err, + utils.LogAttr("returnedGuid", probeResp.GetGuid()), + utils.LogAttr("guid", guid), + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } relayRequest := &pairingtypes.RelayRequest{ @@ -186,17 +238,27 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries } _, err = relayerClient.Relay(ctx, relayRequest) if err == nil { - return 0, "", 0, utils.LavaFormatError("relay Without signature did not error, unexpected", nil, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, "", 0, utils.LavaFormatError("relay Without signature did not error, unexpected", nil, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } code := status.Code(err) if code != codes.Code(lavasession.EpochMismatchError.ABCICode()) { - return 0, versions, 0, utils.LavaFormatError("relay returned unexpected error", err, utils.Attribute{Key: "apiInterface", Value: apiInterface}, utils.Attribute{Key: "addon", Value: addon}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}) + return 0, versions, 0, utils.LavaFormatError("relay returned unexpected error", err, + utils.LogAttr("apiInterface", apiInterface), + utils.LogAttr("addon", addon), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + ) } return relayLatency, versions, probeResp.GetLatestBlock(), nil } endpointServices := endpoint.GetSupportedServices() if len(endpointServices) == 0 { - utils.LavaFormatWarning("endpoint has no supported services", nil, utils.Attribute{Key: "endpoint", Value: endpoint}) + utils.LavaFormatWarning("endpoint has no supported services", nil, utils.LogAttr("endpoint", endpoint)) } for _, endpointService := range endpointServices { probeLatency, version, latestBlockFromProbe, err := checkOneProvider(endpointService.ApiInterface, endpointService.Addon) @@ -209,7 +271,12 @@ func startTesting(ctx context.Context, clientCtx client.Context, providerEntries badChains = append(badChains, providerEntry.Chain+" "+endpointService.String()+" Version:"+version+" should be: "+lavaVersion.ProviderTarget) continue } - utils.LavaFormatInfo("successfully verified provider endpoint", utils.LogAttr("version", version), utils.Attribute{Key: "enspointService", Value: endpointService}, utils.Attribute{Key: "chainID", Value: providerEntry.Chain}, utils.Attribute{Key: "network address", Value: endpoint.IPPORT}, utils.Attribute{Key: "probe latency", Value: probeLatency}) + utils.LavaFormatInfo("successfully verified provider endpoint", utils.LogAttr("version", version), + utils.LogAttr("enspointService", endpointService), + utils.LogAttr("chainID", providerEntry.Chain), + utils.LogAttr("network address", endpoint.IPPORT), + utils.LogAttr("probe latency", probeLatency), + ) goodChains = append(goodChains, providerEntry.Chain+"-"+endpointService.String()+" version: "+version+" latest block: 0x"+strconv.FormatInt(latestBlockFromProbe, 16)) } } @@ -254,6 +321,16 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc if err != nil { return err } + + if networkChainId == app.Name { + clientTomlConfig, err := config.ReadFromClientConfig(clientCtx) + if err == nil { + if clientTomlConfig.ChainID != "" { + networkChainId = clientTomlConfig.ChainID + } + } + } + logLevel, err := cmd.Flags().GetString(flags.FlagLogLevel) if err != nil { utils.LavaFormatFatal("failed to read log level flag", err) @@ -261,7 +338,7 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc // setting the insecure option on provider dial, this should be used in development only! lavasession.AllowInsecureConnectionToProviders = viper.GetBool(lavasession.AllowInsecureConnectionToProvidersFlag) if lavasession.AllowInsecureConnectionToProviders { - utils.LavaFormatWarning("AllowInsecureConnectionToProviders is set to true, this should be used only in development", nil, utils.Attribute{Key: lavasession.AllowInsecureConnectionToProvidersFlag, Value: lavasession.AllowInsecureConnectionToProviders}) + utils.LavaFormatWarning("AllowInsecureConnectionToProviders is set to true, this should be used only in development", nil, utils.LogAttr(lavasession.AllowInsecureConnectionToProvidersFlag, lavasession.AllowInsecureConnectionToProviders)) } var address string @@ -282,7 +359,7 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc } else { address = args[0] } - utils.LavaFormatInfo("RPCProvider Test started", utils.Attribute{Key: "address", Value: address}) + utils.LavaFormatInfo("RPCProvider Test started", utils.LogAttr("address", address)) utils.SetGlobalLoggingLevel(logLevel) clientCtx = clientCtx.WithChainID(networkChainId) @@ -292,6 +369,14 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc if err != nil { return err } + + if resultStatus.NodeInfo.Network != networkChainId { + return utils.LavaFormatError("network chain id does not match the one in the node", nil, + utils.LogAttr("networkChainId", networkChainId), + utils.LogAttr("nodeNetwork", resultStatus.NodeInfo.Network), + ) + } + currentBlock := resultStatus.SyncInfo.LatestBlockHeight // get all chains provider is serving and their endpoints specQuerier := spectypes.NewQueryClient(clientCtx) @@ -330,11 +415,11 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc ChainName: chainID, }) if err != nil { - return utils.LavaFormatError("failed reading on chain data in order to resolve endpoint", err, utils.Attribute{Key: "endpoint", Value: endpoints[0]}) + return utils.LavaFormatError("failed reading on chain data in order to resolve endpoint", err, utils.LogAttr("endpoint", endpoints[0])) } endpoints[0].ApiInterfaces = chainInfoResponse.Interfaces } - utils.LavaFormatDebug("endpoints to check", utils.Attribute{Key: "endpoints", Value: endpoints}) + utils.LavaFormatDebug("endpoints to check", utils.LogAttr("endpoints", endpoints)) providerEntry := epochstoragetypes.StakeEntry{ Endpoints: endpoints, Chain: chainID, @@ -353,7 +438,7 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc for _, provider := range response.StakeEntry { if provider.Address == address { if provider.StakeAppliedBlock > uint64(currentBlock+1) { - utils.LavaFormatWarning("provider is Frozen", nil, utils.Attribute{Key: "chainID", Value: provider.Chain}) + utils.LavaFormatWarning("provider is Frozen", nil, utils.LogAttr("chainID", provider.Chain)) } stakedProviderChains = append(stakedProviderChains, provider) break @@ -363,10 +448,10 @@ rpcprovider --from providerWallet --endpoints "provider-public-grpc:port,jsonrpc } } if len(stakedProviderChains) == 0 { - utils.LavaFormatError("no active chains for provider", nil, utils.Attribute{Key: "address", Value: address}) + utils.LavaFormatError("no active chains for provider", nil, utils.LogAttr("address", address)) } - utils.LavaFormatDebug("checking chain entries", utils.Attribute{Key: "stakedProviderChains", Value: stakedProviderChains}) - return startTesting(ctx, clientCtx, stakedProviderChains, viper.GetBool(common.PlainTextConnection)) + utils.LavaFormatDebug("checking chain entries", utils.LogAttr("stakedProviderChains", stakedProviderChains)) + return startTesting(ctx, clientCtx, networkChainId, stakedProviderChains, viper.GetBool(common.PlainTextConnection)) }, } From d677c37e224aed8af49f451b18e8a933f33d3107 Mon Sep 17 00:00:00 2001 From: Leon Magma Date: Wed, 30 Oct 2024 16:21:32 +0100 Subject: [PATCH 06/14] feat: PRT provider load rate (#1720) * load rate report in trailer * fix trailer name * fix lint * fix load manager logic * fix lint * fix spelling * fix logic * fixed flag & header names * fix load provider manager and creation logic * fix logs for relay load rate * fix rpcprovider server relay load handling * fix tests * fix typo * fix init lava script * fix provider load manager * fix provider server and load manager * fix lint - fix protocol test * fix provider load manager applyProviderLoadMetadataToContextTrailer * change cmdRPCProvider load rate flag to uint64 * try fix * fix cmd flag reading * adjusting uint64 * fix redundent nil check in provider load manager * fix providerLoadManager per chain creation * rename and fix instance passing unnecessarily * fixed chainlib common formatting * fix provider load manager comments * fix e2e tests * fix pr - unite add relay load and set trailer * fix common.go provider load header * fix edge case of getProviderLoad * fix command flag description * fix command flag description * add metric for load rate * fix division to be float and not uint * roll back init lava only with node two consumers * fix load metric * merge main * Update protocol/chainlib/common.go Co-authored-by: Elad Gildnur <6321801+shleikes@users.noreply.github.com> * fix load calculation * tidy code * changing rate limit to 1k * fix bug * fix pr * v4 * fix pr * fix --------- Co-authored-by: leon mandel Co-authored-by: Ran Mishael Co-authored-by: Leon Magma Co-authored-by: Elad Gildnur <6321801+shleikes@users.noreply.github.com> Co-authored-by: Omer <100387053+omerlavanet@users.noreply.github.com> --- protocol/chainlib/common.go | 2 + protocol/common/cobra_common.go | 1 + protocol/integration/protocol_test.go | 2 +- protocol/metrics/provider_metrics.go | 10 ++++ protocol/metrics/provider_metrics_manager.go | 10 +++- protocol/rpcconsumer/rpcconsumer_server.go | 23 +++++--- protocol/rpcprovider/provider_load_manager.go | 55 +++++++++++++++++ protocol/rpcprovider/rpcprovider.go | 59 +++++++++++-------- protocol/rpcprovider/rpcprovider_server.go | 15 ++++- ...=> init_lava_only_with_node_rate_limit.sh} | 12 +--- 10 files changed, 144 insertions(+), 45 deletions(-) create mode 100644 protocol/rpcprovider/provider_load_manager.go rename scripts/pre_setups/{init_lava_only_with_node_two_consumers.sh => init_lava_only_with_node_rate_limit.sh} (79%) diff --git a/protocol/chainlib/common.go b/protocol/chainlib/common.go index 29a25401e6..bbb4fe11d0 100644 --- a/protocol/chainlib/common.go +++ b/protocol/chainlib/common.go @@ -30,11 +30,13 @@ const ( relayMsgLogMaxChars = 200 RPCProviderNodeAddressHash = "Lava-Provider-Node-Address-Hash" RPCProviderNodeExtension = "Lava-Provider-Node-Extension" + RpcProviderLoadRateHeader = "Lava-Provider-Load-Rate" RpcProviderUniqueIdHeader = "Lava-Provider-Unique-Id" WebSocketExtension = "websocket" ) var ( + TrailersToAddToHeaderResponse = []string{RPCProviderNodeExtension, RpcProviderLoadRateHeader} InvalidResponses = []string{"null", "", "nil", "undefined"} FailedSendingSubscriptionToClients = sdkerrors.New("failed Sending Subscription To Clients", 1015, "Failed Sending Subscription To Clients connection might have been closed by the user") NoActiveSubscriptionFound = sdkerrors.New("failed finding an active subscription on provider side", 1016, "no active subscriptions for hashed params.") diff --git a/protocol/common/cobra_common.go b/protocol/common/cobra_common.go index 17a2bef31b..338b003f67 100644 --- a/protocol/common/cobra_common.go +++ b/protocol/common/cobra_common.go @@ -46,6 +46,7 @@ const ( // websocket flags RateLimitWebSocketFlag = "rate-limit-websocket-requests-per-connection" BanDurationForWebsocketRateLimitExceededFlag = "ban-duration-for-websocket-rate-limit-exceeded" + RateLimitRequestPerSecondFlag = "rate-limit-requests-per-second" ) const ( diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index ee7823f669..03ef7cdd2f 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -344,7 +344,7 @@ func createRpcProvider(t *testing.T, ctx context.Context, rpcProviderOptions rpc chainTracker.StartAndServe(ctx) reliabilityManager := reliabilitymanager.NewReliabilityManager(chainTracker, &mockProviderStateTracker, rpcProviderOptions.account.Addr.String(), chainRouter, chainParser) mockReliabilityManager := NewMockReliabilityManager(reliabilityManager) - rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rws, providerSessionManager, mockReliabilityManager, rpcProviderOptions.account.SK, cache, chainRouter, &mockProviderStateTracker, rpcProviderOptions.account.Addr, rpcProviderOptions.lavaChainID, rpcprovider.DEFAULT_ALLOWED_MISSING_CU, nil, nil, nil, false) + rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rws, providerSessionManager, mockReliabilityManager, rpcProviderOptions.account.SK, cache, chainRouter, &mockProviderStateTracker, rpcProviderOptions.account.Addr, rpcProviderOptions.lavaChainID, rpcprovider.DEFAULT_ALLOWED_MISSING_CU, nil, nil, nil, false, nil) listener := rpcprovider.NewProviderListener(ctx, rpcProviderEndpoint.NetworkAddress, "/health") err = listener.RegisterReceiver(rpcProviderServer, rpcProviderEndpoint) require.NoError(t, err) diff --git a/protocol/metrics/provider_metrics.go b/protocol/metrics/provider_metrics.go index e4eadbe4dc..3b5f863c50 100644 --- a/protocol/metrics/provider_metrics.go +++ b/protocol/metrics/provider_metrics.go @@ -22,6 +22,7 @@ type ProviderMetrics struct { totalRelaysServicedMetric *prometheus.CounterVec totalErroredMetric *prometheus.CounterVec consumerQoSMetric *prometheus.GaugeVec + loadRateMetric *prometheus.GaugeVec } func (pm *ProviderMetrics) AddRelay(consumerAddress string, cu uint64, qos *pairingtypes.QualityOfServiceReport) { @@ -49,6 +50,13 @@ func (pm *ProviderMetrics) AddRelay(consumerAddress string, cu uint64, qos *pair } } +func (pm *ProviderMetrics) SetLoadRate(loadRate float64) { + if pm == nil { + return + } + pm.loadRateMetric.WithLabelValues(pm.specID).Set(loadRate) +} + func (pm *ProviderMetrics) AddPayment(cu uint64) { if pm == nil { return @@ -72,6 +80,7 @@ func NewProviderMetrics(specID, apiInterface string, totalCUServicedMetric *prom totalRelaysServicedMetric *prometheus.CounterVec, totalErroredMetric *prometheus.CounterVec, consumerQoSMetric *prometheus.GaugeVec, + loadRateMetric *prometheus.GaugeVec, ) *ProviderMetrics { pm := &ProviderMetrics{ specID: specID, @@ -82,6 +91,7 @@ func NewProviderMetrics(specID, apiInterface string, totalCUServicedMetric *prom totalRelaysServicedMetric: totalRelaysServicedMetric, totalErroredMetric: totalErroredMetric, consumerQoSMetric: consumerQoSMetric, + loadRateMetric: loadRateMetric, } return pm } diff --git a/protocol/metrics/provider_metrics_manager.go b/protocol/metrics/provider_metrics_manager.go index f6734f69d2..a4578df233 100644 --- a/protocol/metrics/provider_metrics_manager.go +++ b/protocol/metrics/provider_metrics_manager.go @@ -41,6 +41,7 @@ type ProviderMetricsManager struct { endpointsHealthChecksOk uint64 relaysMonitors map[string]*RelaysMonitor relaysMonitorsLock sync.RWMutex + loadRateMetric *prometheus.GaugeVec } func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { @@ -107,6 +108,11 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { Help: "The total number of get latest block queries that succeeded by chainfetcher", }, []string{"spec"}) + loadRateMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_provider_load_rate", + Help: "The load rate according to the load rate limit - Given Y simultaneous relay calls, a value of X and will measure Y/X load rate.", + }, []string{"spec"}) + fetchBlockSuccessMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "lava_provider_fetch_block_success", Help: "The total number of get specific block queries that succeeded by chainfetcher", @@ -141,6 +147,7 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { prometheus.MustRegister(virtualEpochMetric) prometheus.MustRegister(endpointsHealthChecksOkMetric) prometheus.MustRegister(protocolVersionMetric) + prometheus.MustRegister(loadRateMetric) providerMetricsManager := &ProviderMetricsManager{ providerMetrics: map[string]*ProviderMetrics{}, @@ -161,6 +168,7 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { endpointsHealthChecksOk: 1, protocolVersionMetric: protocolVersionMetric, relaysMonitors: map[string]*RelaysMonitor{}, + loadRateMetric: loadRateMetric, } http.Handle("/metrics", promhttp.Handler()) @@ -209,7 +217,7 @@ func (pme *ProviderMetricsManager) AddProviderMetrics(specID, apiInterface strin } if pme.getProviderMetric(specID, apiInterface) == nil { - providerMetric := NewProviderMetrics(specID, apiInterface, pme.totalCUServicedMetric, pme.totalCUPaidMetric, pme.totalRelaysServicedMetric, pme.totalErroredMetric, pme.consumerQoSMetric) + providerMetric := NewProviderMetrics(specID, apiInterface, pme.totalCUServicedMetric, pme.totalCUPaidMetric, pme.totalRelaysServicedMetric, pme.totalErroredMetric, pme.consumerQoSMetric, pme.loadRateMetric) pme.setProviderMetric(providerMetric) endpoint := fmt.Sprintf("/metrics/%s/%s/health", specID, apiInterface) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index 05cc22e2b3..dcb7657aa5 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -1264,6 +1264,20 @@ func (rpccs *RPCConsumerServer) HandleDirectiveHeadersForMessage(chainMessage ch chainMessage.SetForceCacheRefresh(ok) } +// Iterating over metadataHeaders adding each trailer that fits the header if found to relayResult.Relay.Metadata +func (rpccs *RPCConsumerServer) getMetadataFromRelayTrailer(metadataHeaders []string, relayResult *common.RelayResult) { + for _, metadataHeader := range metadataHeaders { + trailerValue := relayResult.ProviderTrailer.Get(metadataHeader) + if len(trailerValue) > 0 { + extensionMD := pairingtypes.Metadata{ + Name: metadataHeader, + Value: trailerValue[0], + } + relayResult.Reply.Metadata = append(relayResult.Reply.Metadata, extensionMD) + } + } +} + func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, relayResult *common.RelayResult, protocolErrors uint64, relayProcessor *RelayProcessor, protocolMessage chainlib.ProtocolMessage, apiName string) { if relayResult == nil { return @@ -1336,14 +1350,7 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, } // fetch trailer information from the provider by using the provider trailer field. - providerNodeExtensions := relayResult.ProviderTrailer.Get(chainlib.RPCProviderNodeExtension) - if len(providerNodeExtensions) > 0 { - extensionMD := pairingtypes.Metadata{ - Name: chainlib.RPCProviderNodeExtension, - Value: providerNodeExtensions[0], - } - relayResult.Reply.Metadata = append(relayResult.Reply.Metadata, extensionMD) - } + rpccs.getMetadataFromRelayTrailer(chainlib.TrailersToAddToHeaderResponse, relayResult) directiveHeaders := protocolMessage.GetDirectiveHeaders() _, debugRelays := directiveHeaders[common.LAVA_DEBUG_RELAY] diff --git a/protocol/rpcprovider/provider_load_manager.go b/protocol/rpcprovider/provider_load_manager.go new file mode 100644 index 0000000000..f21b221ad9 --- /dev/null +++ b/protocol/rpcprovider/provider_load_manager.go @@ -0,0 +1,55 @@ +package rpcprovider + +import ( + "context" + "strconv" + "sync/atomic" + + "github.com/lavanet/lava/v4/protocol/chainlib" + grpc "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +type ProviderLoadManager struct { + rateLimitThreshold uint64 + activeRequestsPerSecond atomic.Uint64 +} + +func NewProviderLoadManager(rateLimitThreshold uint64) *ProviderLoadManager { + if rateLimitThreshold == 0 { + return nil + } + loadManager := &ProviderLoadManager{rateLimitThreshold: rateLimitThreshold} + return loadManager +} + +func (loadManager *ProviderLoadManager) subtractRelayCall() { + if loadManager == nil { + return + } + loadManager.activeRequestsPerSecond.Add(^uint64(0)) +} + +func (loadManager *ProviderLoadManager) getProviderLoad(activeRequests uint64) float64 { + rateLimitThreshold := loadManager.rateLimitThreshold + if rateLimitThreshold == 0 { + return 0 + } + return float64(activeRequests) / float64(rateLimitThreshold) +} + +// Add relay count, calculate current load +func (loadManager *ProviderLoadManager) addAndSetRelayLoadToContextTrailer(ctx context.Context) float64 { + if loadManager == nil { + return 0 + } + activeRequestsPerSecond := loadManager.activeRequestsPerSecond.Add(1) + provideRelayLoad := loadManager.getProviderLoad(activeRequestsPerSecond) + if provideRelayLoad == 0 { + return provideRelayLoad + } + formattedProviderLoad := strconv.FormatFloat(provideRelayLoad, 'f', -1, 64) + trailerMd := metadata.Pairs(chainlib.RpcProviderLoadRateHeader, formattedProviderLoad) + grpc.SetTrailer(ctx, trailerMd) + return provideRelayLoad +} diff --git a/protocol/rpcprovider/rpcprovider.go b/protocol/rpcprovider/rpcprovider.go index 3c2a379b9e..22b9ed8b65 100644 --- a/protocol/rpcprovider/rpcprovider.go +++ b/protocol/rpcprovider/rpcprovider.go @@ -110,6 +110,7 @@ type rpcProviderStartOptions struct { healthCheckMetricsOptions *rpcProviderHealthCheckMetricsOptions staticProvider bool staticSpecPath string + relayLoadLimit uint64 } type rpcProviderHealthCheckMetricsOptions struct { @@ -123,24 +124,26 @@ type RPCProvider struct { rpcProviderListeners map[string]*ProviderListener lock sync.Mutex // all of the following members need to be concurrency proof - providerMetricsManager *metrics.ProviderMetricsManager - rewardServer *rewardserver.RewardServer - privKey *btcec.PrivateKey - lavaChainID string - addr sdk.AccAddress - blockMemorySize uint64 - chainMutexes map[string]*sync.Mutex - parallelConnections uint - cache *performance.Cache - shardID uint // shardID is a flag that allows setting up multiple provider databases of the same chain - chainTrackers *common.SafeSyncMap[string, *chaintracker.ChainTracker] - relaysMonitorAggregator *metrics.RelaysMonitorAggregator - relaysHealthCheckEnabled bool - relaysHealthCheckInterval time.Duration - grpcHealthCheckEndpoint string - providerUniqueId string - staticProvider bool - staticSpecPath string + providerMetricsManager *metrics.ProviderMetricsManager + rewardServer *rewardserver.RewardServer + privKey *btcec.PrivateKey + lavaChainID string + addr sdk.AccAddress + blockMemorySize uint64 + chainMutexes map[string]*sync.Mutex + parallelConnections uint + cache *performance.Cache + shardID uint // shardID is a flag that allows setting up multiple provider databases of the same chain + chainTrackers *common.SafeSyncMap[string, *chaintracker.ChainTracker] + relaysMonitorAggregator *metrics.RelaysMonitorAggregator + relaysHealthCheckEnabled bool + relaysHealthCheckInterval time.Duration + grpcHealthCheckEndpoint string + providerUniqueId string + staticProvider bool + staticSpecPath string + relayLoadLimit uint64 + providerLoadManagersPerChain *common.SafeSyncMap[string, *ProviderLoadManager] } func (rpcp *RPCProvider) Start(options *rpcProviderStartOptions) (err error) { @@ -165,7 +168,8 @@ func (rpcp *RPCProvider) Start(options *rpcProviderStartOptions) (err error) { rpcp.grpcHealthCheckEndpoint = options.healthCheckMetricsOptions.grpcHealthCheckEndpoint rpcp.staticProvider = options.staticProvider rpcp.staticSpecPath = options.staticSpecPath - + rpcp.relayLoadLimit = options.relayLoadLimit + rpcp.providerLoadManagersPerChain = &common.SafeSyncMap[string, *ProviderLoadManager]{} // single state tracker lavaChainFetcher := chainlib.NewLavaChainFetcher(ctx, options.clientCtx) providerStateTracker, err := statetracker.NewProviderStateTracker(ctx, options.txFactory, options.clientCtx, lavaChainFetcher, rpcp.providerMetricsManager) @@ -307,9 +311,7 @@ func (rpcp *RPCProvider) SetupProviderEndpoints(rpcProviderEndpoints []*lavasess wg.Add(parallelJobs) disabledEndpoints := make(chan *lavasession.RPCProviderEndpoint, parallelJobs) // validate static spec configuration is used only on a single chain setup. - chainIds := make(map[string]struct{}) for _, rpcProviderEndpoint := range rpcProviderEndpoints { - chainIds[rpcProviderEndpoint.ChainID] = struct{}{} setupEndpoint := func(rpcProviderEndpoint *lavasession.RPCProviderEndpoint, specValidator *SpecValidator) { defer wg.Done() err := rpcp.SetupEndpoint(context.Background(), rpcProviderEndpoint, specValidator) @@ -404,8 +406,8 @@ func (rpcp *RPCProvider) SetupEndpoint(ctx context.Context, rpcProviderEndpoint utils.Attribute{Key: "Chain", Value: rpcProviderEndpoint.ChainID}, utils.Attribute{Key: "apiInterface", Value: apiInterface}) } - // in order to utilize shared resources between chains we need go routines with the same chain to wait for one another here + var loadManager *ProviderLoadManager chainCommonSetup := func() error { rpcp.chainMutexes[chainID].Lock() defer rpcp.chainMutexes[chainID].Unlock() @@ -450,6 +452,12 @@ func (rpcp *RPCProvider) SetupEndpoint(ctx context.Context, rpcProviderEndpoint chainTracker = chainTrackerLoaded utils.LavaFormatDebug("reusing chain tracker", utils.Attribute{Key: "chain", Value: rpcProviderEndpoint.ChainID}) } + + // create provider load manager per chain ID + loadManager, _, err = rpcp.providerLoadManagersPerChain.LoadOrStore(rpcProviderEndpoint.ChainID, NewProviderLoadManager(rpcp.relayLoadLimit)) + if err != nil { + utils.LavaFormatError("Failed LoadOrStore providerLoadManagersPerChain", err, utils.LogAttr("chainId", rpcProviderEndpoint.ChainID), utils.LogAttr("rpcp.relayLoadLimit", rpcp.relayLoadLimit)) + } return nil } err = chainCommonSetup() @@ -485,8 +493,7 @@ func (rpcp *RPCProvider) SetupEndpoint(ctx context.Context, rpcProviderEndpoint utils.LavaFormatTrace("Creating provider node subscription manager", utils.LogAttr("rpcProviderEndpoint", rpcProviderEndpoint)) providerNodeSubscriptionManager = chainlib.NewProviderNodeSubscriptionManager(chainRouter, chainParser, rpcProviderServer, rpcp.privKey) } - - rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rpcp.rewardServer, providerSessionManager, reliabilityManager, rpcp.privKey, rpcp.cache, chainRouter, rpcp.providerStateTracker, rpcp.addr, rpcp.lavaChainID, DEFAULT_ALLOWED_MISSING_CU, providerMetrics, relaysMonitor, providerNodeSubscriptionManager, rpcp.staticProvider) + rpcProviderServer.ServeRPCRequests(ctx, rpcProviderEndpoint, chainParser, rpcp.rewardServer, providerSessionManager, reliabilityManager, rpcp.privKey, rpcp.cache, chainRouter, rpcp.providerStateTracker, rpcp.addr, rpcp.lavaChainID, DEFAULT_ALLOWED_MISSING_CU, providerMetrics, relaysMonitor, providerNodeSubscriptionManager, rpcp.staticProvider, loadManager) // set up grpc listener var listener *ProviderListener func() { @@ -717,6 +724,7 @@ rpcprovider 127.0.0.1:3333 OSMOSIS tendermintrpc "wss://www.node-path.com:80,htt if stickinessHeaderName != "" { RPCProviderStickinessHeaderName = stickinessHeaderName } + relayLoadLimit := viper.GetUint64(common.RateLimitRequestPerSecondFlag) prometheusListenAddr := viper.GetString(metrics.MetricsListenFlagName) rewardStoragePath := viper.GetString(rewardserver.RewardServerStorageFlagName) rewardTTL := viper.GetDuration(rewardserver.RewardTTLFlagName) @@ -754,6 +762,7 @@ rpcprovider 127.0.0.1:3333 OSMOSIS tendermintrpc "wss://www.node-path.com:80,htt &rpcProviderHealthCheckMetricsOptions, staticProvider, offlineSpecPath, + relayLoadLimit, } rpcProvider := RPCProvider{} @@ -790,7 +799,7 @@ rpcprovider 127.0.0.1:3333 OSMOSIS tendermintrpc "wss://www.node-path.com:80,htt cmdRPCProvider.Flags().BoolVar(&chainlib.IgnoreSubscriptionNotConfiguredError, chainlib.IgnoreSubscriptionNotConfiguredErrorFlag, chainlib.IgnoreSubscriptionNotConfiguredError, "ignore webSocket node url not configured error, when subscription is enabled in spec") cmdRPCProvider.Flags().IntVar(&numberOfRetriesAllowedOnNodeErrors, common.SetRelayCountOnNodeErrorFlag, 2, "set the number of retries attempt on node errors") cmdRPCProvider.Flags().String(common.UseStaticSpecFlag, "", "load offline spec provided path to spec file, used to test specs before they are proposed on chain, example for spec with inheritance: --use-static-spec ./cookbook/specs/ibc.json,./cookbook/specs/tendermint.json,./cookbook/specs/cosmossdk.json,./cookbook/specs/ethermint.json,./cookbook/specs/ethereum.json,./cookbook/specs/evmos.json") - + cmdRPCProvider.Flags().Uint64(common.RateLimitRequestPerSecondFlag, 0, "Measuring the load relative to this number for feedback - per second - per chain - default unlimited. Given Y simultaneous relay calls, a value of X and will measure Y/X load rate.") common.AddRollingLogConfig(cmdRPCProvider) return cmdRPCProvider } diff --git a/protocol/rpcprovider/rpcprovider_server.go b/protocol/rpcprovider/rpcprovider_server.go index 86ab5d38c3..4ca070c3b3 100644 --- a/protocol/rpcprovider/rpcprovider_server.go +++ b/protocol/rpcprovider/rpcprovider_server.go @@ -71,6 +71,7 @@ type RPCProviderServer struct { providerUniqueId string StaticProvider bool providerStateMachine *ProviderStateMachine + providerLoadManager *ProviderLoadManager } type ReliabilityManagerInf interface { @@ -112,6 +113,7 @@ func (rpcps *RPCProviderServer) ServeRPCRequests( relaysMonitor *metrics.RelaysMonitor, providerNodeSubscriptionManager *chainlib.ProviderNodeSubscriptionManager, staticProvider bool, + providerLoadManager *ProviderLoadManager, ) { rpcps.cache = cache rpcps.chainRouter = chainRouter @@ -134,6 +136,7 @@ func (rpcps *RPCProviderServer) ServeRPCRequests( rpcps.relaysMonitor = relaysMonitor rpcps.providerNodeSubscriptionManager = providerNodeSubscriptionManager rpcps.providerStateMachine = NewProviderStateMachine(rpcProviderEndpoint.ChainID, lavaprotocol.NewRelayRetriesManager(), chainRouter) + rpcps.providerLoadManager = providerLoadManager rpcps.initRelaysMonitor(ctx) } @@ -180,7 +183,17 @@ func (rpcps *RPCProviderServer) craftChainMessage() (chainMessage chainlib.Chain // function used to handle relay requests from a consumer, it is called by a provider_listener by calling RegisterReceiver func (rpcps *RPCProviderServer) Relay(ctx context.Context, request *pairingtypes.RelayRequest) (*pairingtypes.RelayReply, error) { - grpc.SetTrailer(ctx, metadata.Pairs(chainlib.RpcProviderUniqueIdHeader, rpcps.providerUniqueId)) + // get the number of simultaneous relay calls + currentLoad := rpcps.providerLoadManager.addAndSetRelayLoadToContextTrailer(ctx) + defer func() { + // add load metric and subtract the load at the end of the relay using a routine. + go func() { + rpcps.providerLoadManager.subtractRelayCall() + rpcps.metrics.SetLoadRate(currentLoad) + }() + }() + trailerMd := metadata.Pairs(chainlib.RpcProviderUniqueIdHeader, rpcps.providerUniqueId) + grpc.SetTrailer(ctx, trailerMd) if request.RelayData == nil || request.RelaySession == nil { return nil, utils.LavaFormatWarning("invalid relay request, internal fields are nil", nil) } diff --git a/scripts/pre_setups/init_lava_only_with_node_two_consumers.sh b/scripts/pre_setups/init_lava_only_with_node_rate_limit.sh similarity index 79% rename from scripts/pre_setups/init_lava_only_with_node_two_consumers.sh rename to scripts/pre_setups/init_lava_only_with_node_rate_limit.sh index 2ebffd14ea..4d35705eb0 100755 --- a/scripts/pre_setups/init_lava_only_with_node_two_consumers.sh +++ b/scripts/pre_setups/init_lava_only_with_node_rate_limit.sh @@ -20,7 +20,7 @@ echo "[Test Setup] sleeping 20 seconds for node to finish setup (if its not enou sleep 5 wait_for_lava_node_to_start -GASPRICE="0.000000001ulava" +GASPRICE="0.00002ulava" lavad tx gov submit-legacy-proposal spec-add ./cookbook/specs/ibc.json,./cookbook/specs/cosmoswasm.json,./cookbook/specs/tendermint.json,./cookbook/specs/cosmossdk.json,./cookbook/specs/cosmossdk_45.json,./cookbook/specs/cosmossdk_full.json,./cookbook/specs/ethermint.json,./cookbook/specs/ethereum.json,./cookbook/specs/cosmoshub.json,./cookbook/specs/lava.json,./cookbook/specs/osmosis.json,./cookbook/specs/fantom.json,./cookbook/specs/celo.json,./cookbook/specs/optimism.json,./cookbook/specs/arbitrum.json,./cookbook/specs/starknet.json,./cookbook/specs/aptos.json,./cookbook/specs/juno.json,./cookbook/specs/polygon.json,./cookbook/specs/evmos.json,./cookbook/specs/base.json,./cookbook/specs/canto.json,./cookbook/specs/sui.json,./cookbook/specs/solana.json,./cookbook/specs/bsc.json,./cookbook/specs/axelar.json,./cookbook/specs/avalanche.json,./cookbook/specs/fvm.json --lava-dev-test -y --from alice --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE & wait_next_block wait_next_block @@ -42,8 +42,6 @@ PROVIDER1_LISTENER="127.0.0.1:2220" lavad tx subscription buy DefaultPlan $(lavad keys show user1 -a) -y --from user1 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE wait_next_block -lavad tx subscription buy DefaultPlan $(lavad keys show user2 -a) -y --from user2 --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE -wait_next_block lavad tx pairing stake-provider "LAV1" $PROVIDERSTAKE "$PROVIDER1_LISTENER,1" 1 $(operator_address) -y --from servicer1 --provider-moniker "dummyMoniker" --gas-adjustment "1.5" --gas "auto" --gas-prices $GASPRICE sleep_until_next_epoch @@ -53,17 +51,13 @@ screen -d -m -S provider1 bash -c "source ~/.bashrc; lavap rpcprovider \ $PROVIDER1_LISTENER LAV1 rest '$LAVA_REST' \ $PROVIDER1_LISTENER LAV1 tendermintrpc '$LAVA_RPC,$LAVA_RPC_WS' \ $PROVIDER1_LISTENER LAV1 grpc '$LAVA_GRPC' \ -$EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level trace --from servicer1 --chain-id lava --metrics-listen-address ":7776" 2>&1 | tee $LOGS_DIR/PROVIDER1.log" && sleep 0.25 +$EXTRA_PROVIDER_FLAGS --geolocation 1 --log_level debug --from servicer1 --rate-limit-requests-per-second 10 --chain-id lava --metrics-listen-address ":7776" 2>&1 | tee $LOGS_DIR/PROVIDER1.log" && sleep 0.25 wait_next_block screen -d -m -S consumers bash -c "source ~/.bashrc; lavap rpcconsumer \ 127.0.0.1:3360 LAV1 rest 127.0.0.1:3361 LAV1 tendermintrpc 127.0.0.1:3362 LAV1 grpc \ -$EXTRA_PORTAL_FLAGS --geolocation 1 --log_level trace --from user1 --chain-id lava --add-api-method-metrics --allow-insecure-provider-dialing --metrics-listen-address ":7779" 2>&1 | tee $LOGS_DIR/CONSUMERS.log" && sleep 0.25 - -screen -d -m -S consumers2 bash -c "source ~/.bashrc; lavap rpcconsumer \ -127.0.0.1:3350 LAV1 rest 127.0.0.1:3351 LAV1 tendermintrpc 127.0.0.1:3352 LAV1 grpc \ -$EXTRA_PORTAL_FLAGS --geolocation 1 --log_level trace --from user2 --chain-id lava --add-api-method-metrics --allow-insecure-provider-dialing --metrics-listen-address ":7773" 2>&1 | tee $LOGS_DIR/CONSUMERS2.log" && sleep 0.25 +$EXTRA_PORTAL_FLAGS --geolocation 1 --log_level debug --from user1 --chain-id lava --add-api-method-metrics --allow-insecure-provider-dialing --metrics-listen-address ":7779" 2>&1 | tee $LOGS_DIR/CONSUMERS.log" && sleep 0.25 echo "--- setting up screens done ---" screen -ls \ No newline at end of file From fc67ad2196986f8e13f78948797567a31eaae4d8 Mon Sep 17 00:00:00 2001 From: Omer <100387053+omerlavanet@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:58:37 +0200 Subject: [PATCH 07/14] added the error so we can solve problems for users (#1763) --- protocol/chainlib/grpc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol/chainlib/grpc.go b/protocol/chainlib/grpc.go index b026e8b9d8..926be7bcf3 100644 --- a/protocol/chainlib/grpc.go +++ b/protocol/chainlib/grpc.go @@ -128,7 +128,7 @@ func (apip *GrpcChainParser) ParseMsg(url string, data []byte, connectionType st // Check API is supported and save it in nodeMsg. apiCont, err := apip.getSupportedApi(url, connectionType) if err != nil { - return nil, utils.LavaFormatError("failed to getSupportedApi gRPC", err) + return nil, utils.LavaFormatError("failed to getSupportedApi gRPC", err, utils.LogAttr("url", url), utils.LogAttr("connectionType", connectionType)) } apiCollection, err := apip.getApiCollection(connectionType, apiCont.collectionKey.InternalPath, apiCont.collectionKey.Addon) From ab29fb61ea0f3caac8b9c72e096370afc24d7a26 Mon Sep 17 00:00:00 2001 From: Elad Gildnur <6321801+shleikes@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:35:21 +0200 Subject: [PATCH 08/14] feat: PRT - Implement cache reconnect mechanism + Cache E2E (#1734) * Fix small things in protocol e2e * Add cache reconnect mechanism * Add cache to protocol E2E * Fix lint * Add missing line * Separate the cache from the relayer for auto reconnection * Fix lint --- protocol/performance/cache.go | 131 +++++++++++++++++---- protocol/performance/errors.go | 2 +- testutil/e2e/allowedErrorList.go | 1 + testutil/e2e/protocolE2E.go | 189 ++++++++++++++++++++----------- 4 files changed, 230 insertions(+), 93 deletions(-) diff --git a/protocol/performance/cache.go b/protocol/performance/cache.go index abce3be96f..638644fbff 100644 --- a/protocol/performance/cache.go +++ b/protocol/performance/cache.go @@ -2,22 +2,56 @@ package performance import ( "context" + "sync" + "sync/atomic" "time" "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/utils" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) -type Cache struct { - client pairingtypes.RelayerCacheClient - address string +type relayerCacheClientStore struct { + client pairingtypes.RelayerCacheClient + lock sync.RWMutex + ctx context.Context + address string + reconnecting atomic.Bool } -func ConnectGRPCConnectionToRelayerCacheService(ctx context.Context, addr string) (*pairingtypes.RelayerCacheClient, error) { - connectCtx, cancel := context.WithTimeout(ctx, 3*time.Second) +const ( + reconnectInterval = 5 * time.Second +) + +func newRelayerCacheClientStore(ctx context.Context, address string) (*relayerCacheClientStore, error) { + clientStore := &relayerCacheClientStore{ + client: nil, + ctx: ctx, + address: address, + } + return clientStore, clientStore.connectClient() +} + +func (r *relayerCacheClientStore) getClient() pairingtypes.RelayerCacheClient { + if r == nil { + return nil + } + + r.lock.RLock() + defer r.lock.RUnlock() + + if r.client == nil { + go r.reconnectClient() + } + + return r.client // might be nil +} + +func (r *relayerCacheClientStore) connectGRPCConnectionToRelayerCacheService() (*pairingtypes.RelayerCacheClient, error) { + connectCtx, cancel := context.WithTimeout(r.ctx, 3*time.Second) defer cancel() - conn, err := lavasession.ConnectGRPCClient(connectCtx, addr, false, true, false) + conn, err := lavasession.ConnectGRPCClient(connectCtx, r.address, false, true, false) if err != nil { return nil, err } @@ -27,40 +61,91 @@ func ConnectGRPCConnectionToRelayerCacheService(ctx context.Context, addr string return &c, nil } -func InitCache(ctx context.Context, addr string) (*Cache, error) { - relayerCacheClient, err := ConnectGRPCConnectionToRelayerCacheService(ctx, addr) - if err != nil { - return &Cache{client: nil, address: addr}, err +func (r *relayerCacheClientStore) connectClient() error { + relayerCacheClient, err := r.connectGRPCConnectionToRelayerCacheService() + if err == nil { + utils.LavaFormatInfo("Connected to cache service", utils.LogAttr("address", r.address)) + func() { + r.lock.Lock() + defer r.lock.Unlock() + r.client = *relayerCacheClient + }() + + r.reconnecting.Store(false) + return nil // connected } - cache := Cache{client: *relayerCacheClient, address: addr} - return &cache, nil + + utils.LavaFormatDebug("Failed to connect to cache service", utils.LogAttr("address", r.address), utils.LogAttr("error", err)) + return err +} + +func (r *relayerCacheClientStore) reconnectClient() { + // This is a simple atomic operation to ensure that only one goroutine is reconnecting at a time. + // reconnecting.CompareAndSwap(false, true): + // if reconnecting == false { + // reconnecting = true + // return true -> reconnect + // } + // return false -> already reconnecting + if !r.reconnecting.CompareAndSwap(false, true) { + return + } + + for { + select { + case <-r.ctx.Done(): + return + case <-time.After(reconnectInterval): + if r.connectClient() != nil { + return + } + } + } +} + +type Cache struct { + clientStore *relayerCacheClientStore + address string + serviceCtx context.Context +} + +func InitCache(ctx context.Context, addr string) (*Cache, error) { + clientStore, err := newRelayerCacheClientStore(ctx, addr) + return &Cache{ + clientStore: clientStore, + address: addr, + serviceCtx: ctx, + }, err } func (cache *Cache) GetEntry(ctx context.Context, relayCacheGet *pairingtypes.RelayCacheGet) (reply *pairingtypes.CacheRelayReply, err error) { if cache == nil { - // TODO: try to connect again once in a while return nil, NotInitializedError } - if cache.client == nil { - return nil, NotConnectedError.Wrapf("No client connected to address: %s", cache.address) + + client := cache.clientStore.getClient() + if client == nil { + return nil, NotConnectedError } - // TODO: handle disconnections and error types here - return cache.client.GetRelay(ctx, relayCacheGet) + + reply, err = client.GetRelay(ctx, relayCacheGet) + return reply, err } func (cache *Cache) CacheActive() bool { - return cache != nil + return cache != nil && cache.clientStore.getClient() != nil } func (cache *Cache) SetEntry(ctx context.Context, cacheSet *pairingtypes.RelayCacheSet) error { if cache == nil { - // TODO: try to connect again once in a while return NotInitializedError } - if cache.client == nil { - return NotConnectedError.Wrapf("No client connected to address: %s", cache.address) + + client := cache.clientStore.getClient() + if client == nil { + return NotConnectedError } - // TODO: handle disconnections and SetRelay error types here - _, err := cache.client.SetRelay(ctx, cacheSet) + + _, err := client.SetRelay(ctx, cacheSet) return err } diff --git a/protocol/performance/errors.go b/protocol/performance/errors.go index bb69cd3f51..1170f0b67f 100644 --- a/protocol/performance/errors.go +++ b/protocol/performance/errors.go @@ -6,5 +6,5 @@ import ( var ( NotConnectedError = sdkerrors.New("Not Connected Error", 700, "No Connection To grpc server") - NotInitializedError = sdkerrors.New("Not Initialised Error", 701, "to use cache run initCache") + NotInitializedError = sdkerrors.New("Not Initialized Error", 701, "to use cache run initCache") ) diff --git a/testutil/e2e/allowedErrorList.go b/testutil/e2e/allowedErrorList.go index 5b083bae70..192e3eac1b 100644 --- a/testutil/e2e/allowedErrorList.go +++ b/testutil/e2e/allowedErrorList.go @@ -9,6 +9,7 @@ var allowedErrors = map[string]string{ "purging provider after all endpoints are disabled provider": "This error is allowed because it is caused by the initial bootup, continuous failure would be caught by the e2e so we can allowed this error.", "Provider Side Failed Sending Message, Reason: Unavailable": "This error is allowed because it is caused by the lavad restart to turn on emergency mode", "Maximum cu exceeded PrepareSessionForUsage": "This error is allowed because it is caused by switching between providers, continuous failure would be caught by the e2e so we can allowed this error.", + "Failed To Connect to cache at address": "This error is allowed because it is caused by cache being connected only during the test and not during the bootup", } var allowedErrorsDuringEmergencyMode = map[string]string{ diff --git a/testutil/e2e/protocolE2E.go b/testutil/e2e/protocolE2E.go index bb19f16e8c..541eb66a2c 100644 --- a/testutil/e2e/protocolE2E.go +++ b/testutil/e2e/protocolE2E.go @@ -77,6 +77,8 @@ type lavaTest struct { wg sync.WaitGroup logPath string tokenDenom string + consumerCacheAddress string + providerCacheAddress string } func init() { @@ -339,8 +341,8 @@ func (lt *lavaTest) startJSONRPCProxy(ctx context.Context) { func (lt *lavaTest) startJSONRPCProvider(ctx context.Context) { for idx := 1; idx <= 5; idx++ { command := fmt.Sprintf( - "%s rpcprovider %s/jsonrpcProvider%d.yml --chain-id=lava --from servicer%d %s", - lt.protocolPath, providerConfigsFolder, idx, idx, lt.lavadArgs, + "%s rpcprovider %s/jsonrpcProvider%d.yml --cache-be %s --chain-id=lava --from servicer%d %s", + lt.protocolPath, providerConfigsFolder, idx, lt.providerCacheAddress, idx, lt.lavadArgs, ) logName := "03_EthProvider_" + fmt.Sprintf("%02d", idx) funcName := fmt.Sprintf("startJSONRPCProvider (provider %02d)", idx) @@ -358,8 +360,8 @@ func (lt *lavaTest) startJSONRPCProvider(ctx context.Context) { func (lt *lavaTest) startJSONRPCConsumer(ctx context.Context) { for idx, u := range []string{"user1"} { command := fmt.Sprintf( - "%s rpcconsumer %s/ethConsumer%d.yml --chain-id=lava --from %s %s", - lt.protocolPath, consumerConfigsFolder, idx+1, u, lt.lavadArgs+lt.consumerArgs, + "%s rpcconsumer %s/ethConsumer%d.yml --cache-be %s --chain-id=lava --from %s %s", + lt.protocolPath, consumerConfigsFolder, idx+1, lt.consumerCacheAddress, u, lt.lavadArgs+lt.consumerArgs, ) logName := "04_jsonConsumer_" + fmt.Sprintf("%02d", idx+1) funcName := fmt.Sprintf("startJSONRPCConsumer (consumer %02d)", idx+1) @@ -533,8 +535,8 @@ func jsonrpcTests(rpcURL string, testDuration time.Duration) error { func (lt *lavaTest) startLavaProviders(ctx context.Context) { for idx := 6; idx <= 10; idx++ { command := fmt.Sprintf( - "%s rpcprovider %s/lavaProvider%d --chain-id=lava --from servicer%d %s", - lt.protocolPath, providerConfigsFolder, idx, idx, lt.lavadArgs, + "%s rpcprovider %s/lavaProvider%d --cache-be %s --chain-id=lava --from servicer%d %s", + lt.protocolPath, providerConfigsFolder, idx, lt.providerCacheAddress, idx, lt.lavadArgs, ) logName := "05_LavaProvider_" + fmt.Sprintf("%02d", idx-5) funcName := fmt.Sprintf("startLavaProviders (provider %02d)", idx-5) @@ -552,8 +554,8 @@ func (lt *lavaTest) startLavaProviders(ctx context.Context) { func (lt *lavaTest) startLavaConsumer(ctx context.Context) { for idx, u := range []string{"user3"} { command := fmt.Sprintf( - "%s rpcconsumer %s/lavaConsumer%d.yml --chain-id=lava --from %s %s", - lt.protocolPath, consumerConfigsFolder, idx+1, u, lt.lavadArgs+lt.consumerArgs, + "%s rpcconsumer %s/lavaConsumer%d.yml --cache-be %s --chain-id=lava --from %s %s", + lt.protocolPath, consumerConfigsFolder, idx+1, lt.consumerCacheAddress, u, lt.lavadArgs+lt.consumerArgs, ) logName := "06_RPCConsumer_" + fmt.Sprintf("%02d", idx+1) funcName := fmt.Sprintf("startRPCConsumer (consumer %02d)", idx+1) @@ -562,6 +564,43 @@ func (lt *lavaTest) startLavaConsumer(ctx context.Context) { utils.LavaFormatInfo("startRPCConsumer OK") } +func (lt *lavaTest) startConsumerCache(ctx context.Context) { + command := fmt.Sprintf("%s cache %s --log_level debug", lt.protocolPath, lt.consumerCacheAddress) + logName := "08_Consumer_Cache" + funcName := "startConsumerCache" + + lt.execCommand(ctx, funcName, logName, command, false) + lt.checkCacheIsUp(ctx, lt.consumerCacheAddress, time.Minute) + utils.LavaFormatInfo(funcName + OKstr) +} + +func (lt *lavaTest) startProviderCache(ctx context.Context) { + command := fmt.Sprintf("%s cache %s --log_level debug", lt.protocolPath, lt.providerCacheAddress) + logName := "09_Provider_Cache" + funcName := "startProviderCache" + + lt.execCommand(ctx, funcName, logName, command, false) + lt.checkCacheIsUp(ctx, lt.providerCacheAddress, time.Minute) + utils.LavaFormatInfo(funcName + OKstr) +} + +func (lt *lavaTest) checkCacheIsUp(ctx context.Context, cacheAddress string, timeout time.Duration) { + for start := time.Now(); time.Since(start) < timeout; { + utils.LavaFormatInfo("Waiting Cache " + cacheAddress) + nctx, cancel := context.WithTimeout(ctx, time.Second) + grpcClient, err := grpc.DialContext(nctx, cacheAddress, grpc.WithBlock(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + cancel() + time.Sleep(time.Second) + continue + } + cancel() + grpcClient.Close() + return + } + panic("checkCacheIsUp: Check Failed Cache didn't respond" + cacheAddress) +} + func (lt *lavaTest) startLavaEmergencyConsumer(ctx context.Context) { for idx, u := range []string{"user5"} { command := fmt.Sprintf( @@ -1394,16 +1433,18 @@ func runProtocolE2E(timeout time.Duration) { fmt.Println(err) } lt := &lavaTest{ - grpcConn: grpcConn, - lavadPath: gopath + "/bin/lavad", - protocolPath: gopath + "/bin/lavap", - lavadArgs: "--geolocation 1 --log_level debug", - consumerArgs: " --allow-insecure-provider-dialing", - logs: make(map[string]*sdk.SafeBuffer), - commands: make(map[string]*exec.Cmd), - providerType: make(map[string][]epochStorageTypes.Endpoint), - logPath: protocolLogsFolder, - tokenDenom: commonconsts.TestTokenDenom, + grpcConn: grpcConn, + lavadPath: gopath + "/bin/lavad", + protocolPath: gopath + "/bin/lavap", + lavadArgs: "--geolocation 1 --log_level debug", + consumerArgs: " --allow-insecure-provider-dialing", + logs: make(map[string]*sdk.SafeBuffer), + commands: make(map[string]*exec.Cmd), + providerType: make(map[string][]epochStorageTypes.Endpoint), + logPath: protocolLogsFolder, + tokenDenom: commonconsts.TestTokenDenom, + consumerCacheAddress: "127.0.0.1:2778", + providerCacheAddress: "127.0.0.1:2777", } // use defer to save logs in case the tests fail defer func() { @@ -1465,63 +1506,73 @@ func runProtocolE2E(timeout time.Duration) { lt.startLavaProviders(ctx) lt.startLavaConsumer(ctx) - // staked client then with subscription - repeat(1, func(n int) { - url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) - lt.checkTendermintConsumer(url, time.Second*30) - url = fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3+1) - lt.checkRESTConsumer(url, time.Second*30) - url = fmt.Sprintf("127.0.0.1:334%d", (n-1)*3+2) - lt.checkGRPCConsumer(url, time.Second*30) - }) + runChecksAndTests := func() { + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) + lt.checkTendermintConsumer(url, time.Second*15) + url = fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3+1) + lt.checkRESTConsumer(url, time.Second*15) + url = fmt.Sprintf("127.0.0.1:334%d", (n-1)*3+2) + lt.checkGRPCConsumer(url, time.Second*15) + }) - // staked client then with subscription - repeat(1, func(n int) { - url := fmt.Sprintf("http://127.0.0.1:333%d", n) - if err := jsonrpcTests(url, time.Second*30); err != nil { - panic(err) - } - }) - utils.LavaFormatInfo("JSONRPC TEST OK") + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("http://127.0.0.1:333%d", n) + if err := jsonrpcTests(url, time.Second*15); err != nil { + panic(err) + } + }) + utils.LavaFormatInfo("JSONRPC TEST OK") - // staked client then with subscription - repeat(1, func(n int) { - url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) - if err := tendermintTests(url, time.Second*30); err != nil { - panic(err) - } - }) - utils.LavaFormatInfo("TENDERMINTRPC TEST OK") + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) + if err := tendermintTests(url, time.Second*15); err != nil { + panic(err) + } + }) + utils.LavaFormatInfo("TENDERMINTRPC TEST OK") - // staked client then with subscription - repeat(1, func(n int) { - url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) - if err := tendermintURITests(url, time.Second*30); err != nil { - panic(err) - } - }) - utils.LavaFormatInfo("TENDERMINTRPC URI TEST OK") + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3) + if err := tendermintURITests(url, time.Second*15); err != nil { + panic(err) + } + }) + utils.LavaFormatInfo("TENDERMINTRPC URI TEST OK") - lt.lavaOverLava(ctx) + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3+1) + if err := restTests(url, time.Second*15); err != nil { + panic(err) + } + }) + utils.LavaFormatInfo("REST TEST OK") - // staked client then with subscription - repeat(1, func(n int) { - url := fmt.Sprintf("http://127.0.0.1:334%d", (n-1)*3+1) - if err := restTests(url, time.Second*30); err != nil { - panic(err) - } - }) - utils.LavaFormatInfo("REST TEST OK") + // staked client then with subscription + repeat(1, func(n int) { + url := fmt.Sprintf("127.0.0.1:334%d", (n-1)*3+2) + if err := grpcTests(url, time.Second*15); err != nil { + panic(err) + } + }) + utils.LavaFormatInfo("GRPC TEST OK") + } - // staked client then with subscription - // TODO: if set to 30 secs fails e2e need to investigate why. currently blocking PR's - repeat(1, func(n int) { - url := fmt.Sprintf("127.0.0.1:334%d", (n-1)*3+2) - if err := grpcTests(url, time.Second*5); err != nil { - panic(err) - } - }) - utils.LavaFormatInfo("GRPC TEST OK") + // run tests without cache + runChecksAndTests() + + lt.startConsumerCache(ctx) + lt.startProviderCache(ctx) + + // run tests with cache + runChecksAndTests() + + lt.lavaOverLava(ctx) lt.checkResponse("http://127.0.0.1:3340", "http://127.0.0.1:3341", "127.0.0.1:3342") From 0d43649f18f28e83f0f6b8e0f2431a2fc3807c32 Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:35:36 +0100 Subject: [PATCH 09/14] feat: PRT - adding missing API's to celestia (#1764) --- cookbook/specs/celestia.json | 216 +++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/cookbook/specs/celestia.json b/cookbook/specs/celestia.json index 1d2f46d6bf..a4117cd787 100644 --- a/cookbook/specs/celestia.json +++ b/cookbook/specs/celestia.json @@ -31,6 +31,132 @@ "add_on": "" }, "apis": [ + { + "name": "/celestia/minfee/v1/min_gas_price", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/ibc/apps/packetforward/v1/params", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/celestia/core/v1/tx/{tx_id}", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/cosmos/mint/v1beta1/inflation_rate", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/cosmos/mint/v1beta1/genesis_time", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/signal/v1/tally/{version}", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "/signal/v1/upgrade", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, { "name": "/blob/v1/params", "block_parsing": { @@ -382,6 +508,24 @@ }, "extra_compute_units": 0 }, + { + "name": "celestia.core.v1.tx.Tx/TxStatus", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": false, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, { "name": "celestia.qgb.v1.Query/LatestValsetRequestBeforeNonce", "block_parsing": { @@ -418,6 +562,24 @@ }, "extra_compute_units": 0 }, + { + "name": "packetforward.v1.Query/Params", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, { "name": "celestia.qgb.v1.Query/LatestDataCommitment", "block_parsing": { @@ -454,6 +616,60 @@ }, "extra_compute_units": 0 }, + { + "name": "celestia.signal.v1.Query/GetUpgrade", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "celestia.signal.v1.Query/VersionTally", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, + { + "name": "celestia.minfee.v1.Querycelestia.minfee.v1.Query/NetworkMinGasPrice", + "block_parsing": { + "parser_arg": [ + "latest" + ], + "parser_func": "DEFAULT" + }, + "compute_units": 10, + "enabled": true, + "category": { + "deterministic": true, + "local": false, + "subscription": false, + "stateful": 0 + }, + "extra_compute_units": 0 + }, { "name": "celestia.mint.v1.Query/GenesisTime", "block_parsing": { From da37d597dc0da98bb58338a266d1052cfb98def0 Mon Sep 17 00:00:00 2001 From: gilxgil <100146006+gilxgil@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:07:26 -0400 Subject: [PATCH 10/14] feat: lavap command to get block heights (#1693) * chain heights command, get latest chain heights from all providers of a chain * moved chain-heights to lavap test --------- Co-authored-by: Gil Co-authored-by: Ran Mishael --- cmd/lavap/main.go | 1 + protocol/monitoring/chain_heights.go | 172 +++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 protocol/monitoring/chain_heights.go diff --git a/cmd/lavap/main.go b/cmd/lavap/main.go index 03feb9efda..e57f91651b 100644 --- a/cmd/lavap/main.go +++ b/cmd/lavap/main.go @@ -73,6 +73,7 @@ func main() { testCmd.AddCommand(connection.CreateTestConnectionServerCobraCommand()) testCmd.AddCommand(connection.CreateTestConnectionProbeCobraCommand()) testCmd.AddCommand(monitoring.CreateHealthCobraCommand()) + testCmd.AddCommand(monitoring.CreateChainHeightsCommand()) testCmd.AddCommand(loadtest.CreateTestLoadCobraCommand()) rootCmd.AddCommand(cache.CreateCacheCobraCommand()) diff --git a/protocol/monitoring/chain_heights.go b/protocol/monitoring/chain_heights.go new file mode 100644 index 0000000000..5380693652 --- /dev/null +++ b/protocol/monitoring/chain_heights.go @@ -0,0 +1,172 @@ +package monitoring + +import ( + "context" + "fmt" + "runtime" + "sync" + "time" + + "github.com/cosmos/cosmos-sdk/client" + "github.com/cosmos/cosmos-sdk/client/flags" + "github.com/spf13/cobra" + "golang.org/x/sync/semaphore" + + "github.com/lavanet/lava/v3/protocol/lavasession" + "github.com/lavanet/lava/v3/utils" + "github.com/lavanet/lava/v3/utils/rand" + "github.com/lavanet/lava/v3/x/epochstorage/types" + pairingtypes "github.com/lavanet/lava/v3/x/pairing/types" +) + +const ( + chainIDFlag = "chain-id" + contFlag = "cont" +) + +func CreateChainHeightsCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "chain-heights ", + Short: "Get chain heights from all providers for a specific chain", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + // init + rand.InitRandomSeed() + clientCtx, err := client.GetClientQueryContext(cmd) + if err != nil { + return err + } + logLevel, err := cmd.Flags().GetString(flags.FlagLogLevel) + if err != nil { + return fmt.Errorf("failed to read log level flag: %w", err) + } + utils.SetGlobalLoggingLevel(logLevel) + + // + // Get args + chainID := args[0] + cont, err := cmd.Flags().GetUint64(contFlag) + if err != nil { + return err + } + + // + // Run + for { + err := runChainHeights(cmd.Context(), clientCtx, chainID) + if err != nil { + return err + } + + if cont == 0 { + break + } + + time.Sleep(time.Duration(cont) * time.Second) + } + + return nil + }, + } + + cmd.Flags().Uint64(contFlag, 0, "Continuous mode: seconds to wait before repeating (0 for single run)") + flags.AddQueryFlagsToCmd(cmd) + cmd.Flags().String(flags.FlagLogLevel, "info", "The logging level (trace|debug|info|warn|error|fatal|panic)") + return cmd +} + +func runChainHeights(ctx context.Context, clientCtx client.Context, chainID string) error { + pairingQuerier := pairingtypes.NewQueryClient(clientCtx) + + // Get providers for the chain + providersResp, err := pairingQuerier.Providers(ctx, &pairingtypes.QueryProvidersRequest{ + ChainID: chainID, + }) + if err != nil { + return fmt.Errorf("failed to get providers for chain %s: %w", chainID, err) + } + if len(providersResp.StakeEntry) == 0 { + return fmt.Errorf("no providers found for chain %s", chainID) + } + + fmt.Printf("Chain Heights for %s:\n", chainID) + + // Determine the number of goroutines to use + maxGoroutines := runtime.NumCPU() - 2 + if maxGoroutines < 1 { + maxGoroutines = 1 + } + + // Create a semaphore to limit the number of concurrent goroutines + sem := semaphore.NewWeighted(int64(maxGoroutines)) + + var wg sync.WaitGroup + results := make(chan string, len(providersResp.StakeEntry)) + + for _, provider := range providersResp.StakeEntry { + if len(provider.Endpoints) == 0 || len(provider.Endpoints[0].GetSupportedServices()) == 0 { + continue + } + + wg.Add(1) + go func(provider types.StakeEntry) { + defer wg.Done() + + // Acquire semaphore + if err := sem.Acquire(ctx, 1); err != nil { + utils.LavaFormatError("Failed to acquire semaphore", err) + return + } + defer sem.Release(1) + + endpoint := provider.Endpoints[0] + service := endpoint.GetSupportedServices()[0] + height, err := probeProvider(ctx, endpoint.IPPORT, chainID, service.ApiInterface) + if err != nil { + utils.LavaFormatDebug("Error probing provider", utils.LogAttr("provider", provider.Address), utils.LogAttr("error", err)) + } else { + results <- fmt.Sprintf(" %s: %d", provider.Address, height) + } + }(provider) + } + + // Close the results channel when all goroutines are done + go func() { + wg.Wait() + close(results) + }() + + // Print results as they come in + for result := range results { + if result == "" { + continue + } + fmt.Println(result) + } + + fmt.Println() + + return nil +} + +func probeProvider(ctx context.Context, ipport, chainID, apiInterface string) (int64, error) { + cswp := lavasession.ConsumerSessionsWithProvider{} + relayerClient, conn, err := cswp.ConnectRawClientWithTimeout(ctx, ipport) + if err != nil { + return 0, err + } + defer conn.Close() + + probeReq := &pairingtypes.ProbeRequest{ + Guid: uint64(rand.Int63()), + SpecId: chainID, + ApiInterface: apiInterface, + } + + probeResp, err := relayerClient.Probe(ctx, probeReq) + if err != nil { + return 0, err + } + + return probeResp.LatestBlock, nil +} From a0b0de8d77607f51b0c7fae67a554d758303b287 Mon Sep 17 00:00:00 2001 From: Leon Magma Date: Wed, 30 Oct 2024 19:09:10 +0100 Subject: [PATCH 11/14] feat: PRT - Add provider freeze and jailed status metric (#1729) * add AvailabilityStateUpdater per chain to rpcprovider * added provider availability updater * fix lint * added correct metrics * remove redundent comment * update latest epoch * wip - adding unitests * wip - availabilitty updater mock and tests * fix frozen metric help description * revert init lava * fix pr - freeze metric creation logic and split jailed metric data * fix lint * fix make lint * set updater key with chainid only * fix public address setup for freeze updater * add unitests for freeze updater epoch updates * change query to provider instead of providers * fix jailed status metric labels * Update protocol/metrics/metrics_provider_manager.go Co-authored-by: Elad Gildnur <6321801+shleikes@users.noreply.github.com> * Fix after merge * Numerus changes * Fix lint --------- Co-authored-by: leon mandel Co-authored-by: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Co-authored-by: Elad Gildnur <6321801+shleikes@users.noreply.github.com> Co-authored-by: Elad Gildnur Co-authored-by: Ran Mishael Co-authored-by: omerlavanet --- protocol/metrics/provider_metrics_manager.go | 51 ++++++++ protocol/rpcprovider/rpcprovider.go | 12 ++ .../updaters/provider_freeze_jail_updater.go | 71 ++++++++++ .../provider_freeze_jail_updater_mocks.go | 121 ++++++++++++++++++ .../provider_freeze_jail_updater_test.go | 86 +++++++++++++ utils/convert.go | 8 ++ 6 files changed, 349 insertions(+) create mode 100644 protocol/statetracker/updaters/provider_freeze_jail_updater.go create mode 100644 protocol/statetracker/updaters/provider_freeze_jail_updater_mocks.go create mode 100644 protocol/statetracker/updaters/provider_freeze_jail_updater_test.go create mode 100644 utils/convert.go diff --git a/protocol/metrics/provider_metrics_manager.go b/protocol/metrics/provider_metrics_manager.go index a4578df233..8481b937b4 100644 --- a/protocol/metrics/provider_metrics_manager.go +++ b/protocol/metrics/provider_metrics_manager.go @@ -41,6 +41,9 @@ type ProviderMetricsManager struct { endpointsHealthChecksOk uint64 relaysMonitors map[string]*RelaysMonitor relaysMonitorsLock sync.RWMutex + frozenStatusMetric *prometheus.GaugeVec + jailStatusMetric *prometheus.GaugeVec + jailedCountMetric *prometheus.GaugeVec loadRateMetric *prometheus.GaugeVec } @@ -117,20 +120,38 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { Name: "lava_provider_fetch_block_success", Help: "The total number of get specific block queries that succeeded by chainfetcher", }, []string{"spec"}) + virtualEpochMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "virtual_epoch", Help: "The current virtual epoch measured", }, []string{"spec"}) + endpointsHealthChecksOkMetric := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "lava_provider_overall_health", Help: "At least one endpoint is healthy", }) endpointsHealthChecksOkMetric.Set(1) + frozenStatusMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_provider_frozen_status", + Help: "Frozen: 1, Not Frozen: 0", + }, []string{"chainID"}) + + jailStatusMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_provider_jail_status", + Help: "Jailed: 1, Not Jailed: 0", + }, []string{"chainID"}) + + jailedCountMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "lava_provider_jailed_count", + Help: "The amount of times the provider was jailed in the last 24 hours", + }, []string{"chainID"}) + protocolVersionMetric := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "lava_provider_protocol_version", Help: "The current running lavap version for the process. major := version / 1000000, minor := (version / 1000) % 1000 patch := version % 1000", }, []string{"version"}) + // Register the metrics with the Prometheus registry. prometheus.MustRegister(totalCUServicedMetric) prometheus.MustRegister(totalCUPaidMetric) @@ -147,6 +168,9 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { prometheus.MustRegister(virtualEpochMetric) prometheus.MustRegister(endpointsHealthChecksOkMetric) prometheus.MustRegister(protocolVersionMetric) + prometheus.MustRegister(frozenStatusMetric) + prometheus.MustRegister(jailStatusMetric) + prometheus.MustRegister(jailedCountMetric) prometheus.MustRegister(loadRateMetric) providerMetricsManager := &ProviderMetricsManager{ @@ -168,6 +192,9 @@ func NewProviderMetricsManager(networkAddress string) *ProviderMetricsManager { endpointsHealthChecksOk: 1, protocolVersionMetric: protocolVersionMetric, relaysMonitors: map[string]*RelaysMonitor{}, + frozenStatusMetric: frozenStatusMetric, + jailStatusMetric: jailStatusMetric, + jailedCountMetric: jailedCountMetric, loadRateMetric: loadRateMetric, } @@ -358,3 +385,27 @@ func (pme *ProviderMetricsManager) RegisterRelaysMonitor(chainID, apiInterface s defer pme.relaysMonitorsLock.Unlock() pme.relaysMonitors[chainID+apiInterface] = relaysMonitor } + +func (pme *ProviderMetricsManager) SetFrozenStatus(chain string, frozen bool) { + if pme == nil { + return + } + + pme.frozenStatusMetric.WithLabelValues(chain).Set(utils.Btof(frozen)) +} + +func (pme *ProviderMetricsManager) SetJailStatus(chain string, jailed bool) { + if pme == nil { + return + } + + pme.jailStatusMetric.WithLabelValues(chain).Set(utils.Btof(jailed)) +} + +func (pme *ProviderMetricsManager) SetJailedCount(chain string, jailedCount uint64) { + if pme == nil { + return + } + + pme.jailedCountMetric.WithLabelValues(chain).Set(float64(jailedCount)) +} diff --git a/protocol/rpcprovider/rpcprovider.go b/protocol/rpcprovider/rpcprovider.go index 22b9ed8b65..89a99d5f98 100644 --- a/protocol/rpcprovider/rpcprovider.go +++ b/protocol/rpcprovider/rpcprovider.go @@ -193,6 +193,7 @@ func (rpcp *RPCProvider) Start(options *rpcProviderStartOptions) (err error) { rpcp.providerStateTracker.RegisterForEpochUpdates(ctx, rpcp.rewardServer) rpcp.providerStateTracker.RegisterPaymentUpdatableForPayments(ctx, rpcp.rewardServer) } + keyName, err := sigs.GetKeyName(options.clientCtx) if err != nil { utils.LavaFormatFatal("failed getting key name from clientCtx", err) @@ -214,8 +215,13 @@ func (rpcp *RPCProvider) Start(options *rpcProviderStartOptions) (err error) { if err != nil { utils.LavaFormatFatal("failed unmarshaling public address", err, utils.Attribute{Key: "keyName", Value: keyName}, utils.Attribute{Key: "pubkey", Value: pubKey.Address()}) } + utils.LavaFormatInfo("RPCProvider pubkey: " + rpcp.addr.String()) + + rpcp.createAndRegisterFreezeUpdatersByOptions(ctx, options.clientCtx, rpcp.addr.String()) + utils.LavaFormatInfo("RPCProvider setting up endpoints", utils.Attribute{Key: "count", Value: strconv.Itoa(len(options.rpcProviderEndpoints))}) + blockMemorySize, err := rpcp.providerStateTracker.GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment(ctx) // get the number of blocks to keep in PSM. if err != nil { utils.LavaFormatFatal("Failed fetching GetEpochSizeMultipliedByRecommendedEpochNumToCollectPayment in RPCProvider Start", err) @@ -275,6 +281,12 @@ func (rpcp *RPCProvider) Start(options *rpcProviderStartOptions) (err error) { return nil } +func (rpcp *RPCProvider) createAndRegisterFreezeUpdatersByOptions(ctx context.Context, clientCtx client.Context, publicAddress string) { + queryClient := pairingtypes.NewQueryClient(clientCtx) + freezeJailUpdater := updaters.NewProviderFreezeJailUpdater(queryClient, publicAddress, rpcp.providerMetricsManager) + rpcp.providerStateTracker.RegisterForEpochUpdates(ctx, freezeJailUpdater) +} + func getActiveEndpoints(rpcProviderEndpoints []*lavasession.RPCProviderEndpoint, disabledEndpointsList []*lavasession.RPCProviderEndpoint) []*lavasession.RPCProviderEndpoint { activeEndpoints := map[*lavasession.RPCProviderEndpoint]struct{}{} for _, endpoint := range rpcProviderEndpoints { diff --git a/protocol/statetracker/updaters/provider_freeze_jail_updater.go b/protocol/statetracker/updaters/provider_freeze_jail_updater.go new file mode 100644 index 0000000000..69f24ad1ab --- /dev/null +++ b/protocol/statetracker/updaters/provider_freeze_jail_updater.go @@ -0,0 +1,71 @@ +package updaters + +import ( + "context" + "time" + + "github.com/lavanet/lava/v4/utils" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" + "google.golang.org/grpc" +) + +const ( + CallbackKeyForFreezeUpdate = "freeze-update" +) + +type ProviderPairingStatusStateQueryInf interface { + Provider(ctx context.Context, in *pairingtypes.QueryProviderRequest, opts ...grpc.CallOption) (*pairingtypes.QueryProviderResponse, error) +} + +type ProviderMetricsManagerInf interface { + SetFrozenStatus(string, bool) + SetJailStatus(string, bool) + SetJailedCount(string, uint64) +} + +type FrozenStatus uint64 + +const ( + AVAILABLE FrozenStatus = iota + FROZEN +) + +type ProviderFreezeJailUpdater struct { + pairingQueryClient ProviderPairingStatusStateQueryInf + metricsManager ProviderMetricsManagerInf + publicAddress string +} + +func NewProviderFreezeJailUpdater( + pairingQueryClient ProviderPairingStatusStateQueryInf, + publicAddress string, + metricsManager ProviderMetricsManagerInf, +) *ProviderFreezeJailUpdater { + return &ProviderFreezeJailUpdater{ + pairingQueryClient: pairingQueryClient, + publicAddress: publicAddress, + metricsManager: metricsManager, + } +} + +func (pfu *ProviderFreezeJailUpdater) UpdateEpoch(epoch uint64) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + response, err := pfu.pairingQueryClient.Provider(ctx, &pairingtypes.QueryProviderRequest{Address: pfu.publicAddress}) + cancel() + + if err != nil { + utils.LavaFormatError("Failed querying pairing client for provider", err) + return + } + + for _, provider := range response.StakeEntries { + if provider.Address != pfu.publicAddress || !provider.IsAddressVaultOrProvider(provider.Address) { + // should never happen, but just in case + continue + } + + pfu.metricsManager.SetJailedCount(provider.Chain, provider.Jails) + pfu.metricsManager.SetJailStatus(provider.Chain, provider.IsJailed(time.Now().UTC().Unix())) + pfu.metricsManager.SetFrozenStatus(provider.Chain, provider.IsFrozen() || provider.StakeAppliedBlock > epoch) + } +} diff --git a/protocol/statetracker/updaters/provider_freeze_jail_updater_mocks.go b/protocol/statetracker/updaters/provider_freeze_jail_updater_mocks.go new file mode 100644 index 0000000000..24b0738393 --- /dev/null +++ b/protocol/statetracker/updaters/provider_freeze_jail_updater_mocks.go @@ -0,0 +1,121 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: protocol/statetracker/updaters/provider_freeze_updater.go +// +// Generated by this command: +// +// mockgen -source=protocol/statetracker/updaters/provider_freeze_updater.go -destination protocol/statetracker/updaters/provider_freeze_updater_mocks.go -package updaters +// + +// Package updaters is a generated GoMock package. +package updaters + +import ( + context "context" + reflect "reflect" + + types "github.com/lavanet/lava/v4/x/pairing/types" + gomock "go.uber.org/mock/gomock" + grpc "google.golang.org/grpc" +) + +// MockProviderPairingStatusStateQueryInf is a mock of ProviderPairingStatusStateQueryInf interface. +type MockProviderPairingStatusStateQueryInf struct { + ctrl *gomock.Controller + recorder *MockProviderPairingStatusStateQueryInfMockRecorder +} + +// MockProviderPairingStatusStateQueryInfMockRecorder is the mock recorder for MockProviderPairingStatusStateQueryInf. +type MockProviderPairingStatusStateQueryInfMockRecorder struct { + mock *MockProviderPairingStatusStateQueryInf +} + +// NewMockProviderPairingStatusStateQueryInf creates a new mock instance. +func NewMockProviderPairingStatusStateQueryInf(ctrl *gomock.Controller) *MockProviderPairingStatusStateQueryInf { + mock := &MockProviderPairingStatusStateQueryInf{ctrl: ctrl} + mock.recorder = &MockProviderPairingStatusStateQueryInfMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockProviderPairingStatusStateQueryInf) EXPECT() *MockProviderPairingStatusStateQueryInfMockRecorder { + return m.recorder +} + +// Provider mocks base method. +func (m *MockProviderPairingStatusStateQueryInf) Provider(ctx context.Context, in *types.QueryProviderRequest, opts ...grpc.CallOption) (*types.QueryProviderResponse, error) { + m.ctrl.T.Helper() + varargs := []any{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "Provider", varargs...) + ret0, _ := ret[0].(*types.QueryProviderResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Provider indicates an expected call of Provider. +func (mr *MockProviderPairingStatusStateQueryInfMockRecorder) Provider(ctx, in any, opts ...any) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]any{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Provider", reflect.TypeOf((*MockProviderPairingStatusStateQueryInf)(nil).Provider), varargs...) +} + +// MockProviderMetricsManagerInf is a mock of ProviderMetricsManagerInf interface. +type MockProviderMetricsManagerInf struct { + ctrl *gomock.Controller + recorder *MockProviderMetricsManagerInfMockRecorder +} + +// MockProviderMetricsManagerInfMockRecorder is the mock recorder for MockProviderMetricsManagerInf. +type MockProviderMetricsManagerInfMockRecorder struct { + mock *MockProviderMetricsManagerInf +} + +// NewMockProviderMetricsManagerInf creates a new mock instance. +func NewMockProviderMetricsManagerInf(ctrl *gomock.Controller) *MockProviderMetricsManagerInf { + mock := &MockProviderMetricsManagerInf{ctrl: ctrl} + mock.recorder = &MockProviderMetricsManagerInfMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockProviderMetricsManagerInf) EXPECT() *MockProviderMetricsManagerInfMockRecorder { + return m.recorder +} + +// SetFrozenStatus mocks base method. +func (m *MockProviderMetricsManagerInf) SetFrozenStatus(arg0 string, arg1 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetFrozenStatus", arg0, arg1) +} + +// SetFrozenStatus indicates an expected call of SetFrozenStatus. +func (mr *MockProviderMetricsManagerInfMockRecorder) SetFrozenStatus(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetFrozenStatus", reflect.TypeOf((*MockProviderMetricsManagerInf)(nil).SetFrozenStatus), arg0, arg1) +} + +// SetJailStatus mocks base method. +func (m *MockProviderMetricsManagerInf) SetJailStatus(arg0 string, arg1 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetJailStatus", arg0, arg1) +} + +// SetJailStatus indicates an expected call of SetJailStatus. +func (mr *MockProviderMetricsManagerInfMockRecorder) SetJailStatus(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetJailStatus", reflect.TypeOf((*MockProviderMetricsManagerInf)(nil).SetJailStatus), arg0, arg1) +} + +// SetJailedCount mocks base method. +func (m *MockProviderMetricsManagerInf) SetJailedCount(arg0 string, arg1 uint64) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetJailedCount", arg0, arg1) +} + +// SetJailedCount indicates an expected call of SetJailedCount. +func (mr *MockProviderMetricsManagerInfMockRecorder) SetJailedCount(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetJailedCount", reflect.TypeOf((*MockProviderMetricsManagerInf)(nil).SetJailedCount), arg0, arg1) +} diff --git a/protocol/statetracker/updaters/provider_freeze_jail_updater_test.go b/protocol/statetracker/updaters/provider_freeze_jail_updater_test.go new file mode 100644 index 0000000000..ce6c6f68be --- /dev/null +++ b/protocol/statetracker/updaters/provider_freeze_jail_updater_test.go @@ -0,0 +1,86 @@ +package updaters + +import ( + "testing" + "time" + + "github.com/lavanet/lava/v4/utils/rand" + epochstoragetypes "github.com/lavanet/lava/v4/x/epochstorage/types" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" + gomock "go.uber.org/mock/gomock" +) + +func TestFreezeJailMetricsOnEpochUpdate(t *testing.T) { + rand.InitRandomSeed() + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + specID := "test-spec" + address := "initial1" + epoch := uint64(100) + stakeAppliedBlock := uint64(10) + + stakeEntryList := []epochstoragetypes.StakeEntry{ + { + Address: address, + Chain: specID, + Endpoints: []epochstoragetypes.Endpoint{ + { + IPPORT: "1234567", + Geolocation: 1, + Addons: []string{}, + ApiInterfaces: []string{"banana"}, + Extensions: []string{}, + }, + }, + StakeAppliedBlock: stakeAppliedBlock, + }, + } + + response := &pairingtypes.QueryProviderResponse{StakeEntries: stakeEntryList} + + stateQuery := NewMockProviderPairingStatusStateQueryInf(ctrl) + metricManager := NewMockProviderMetricsManagerInf(ctrl) + + freezeUpdater := NewProviderFreezeJailUpdater(stateQuery, address, metricManager) + + expectAndRun := func(stakeAppliedBlock, jailedCount uint64, frozen bool, jailed bool) { + stakeEntryList[0].StakeAppliedBlock = stakeAppliedBlock + stakeEntryList[0].Jails = jailedCount + if jailed { + stakeEntryList[0].JailEndTime = time.Now().Add(time.Hour).UTC().Unix() + } + response = &pairingtypes.QueryProviderResponse{StakeEntries: stakeEntryList} + stateQuery. + EXPECT(). + Provider(gomock.Any(), gomock.Any(), gomock.Any()). + Return(response, nil). + AnyTimes() + + metricManager. + EXPECT(). + SetJailStatus(specID, jailed). + Times(1) + + metricManager. + EXPECT(). + SetFrozenStatus(specID, frozen). + Times(1) + + metricManager. + EXPECT(). + SetJailedCount(specID, jailedCount). + Times(1) + + freezeUpdater.UpdateEpoch(epoch) + } + + // Normal - no freeze, no jail + expectAndRun(stakeAppliedBlock, 0, false, false) + + // StakeAppliedBlock > epoch - frozen + expectAndRun(epoch+1, 0, true, false) + + // Jail status changed + jail count + expectAndRun(epoch-1, 1, false, true) +} diff --git a/utils/convert.go b/utils/convert.go new file mode 100644 index 0000000000..0a39cc7093 --- /dev/null +++ b/utils/convert.go @@ -0,0 +1,8 @@ +package utils + +func Btof(b bool) float64 { + if b { + return 1 + } + return 0 +} From cc682df9ad7918b390cba1f6136032256bfb81c3 Mon Sep 17 00:00:00 2001 From: Omer <100387053+omerlavanet@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:48:28 +0200 Subject: [PATCH 12/14] fix paths in main (#1765) --- protocol/monitoring/chain_heights.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/protocol/monitoring/chain_heights.go b/protocol/monitoring/chain_heights.go index 5380693652..0d4b31eb24 100644 --- a/protocol/monitoring/chain_heights.go +++ b/protocol/monitoring/chain_heights.go @@ -12,11 +12,11 @@ import ( "github.com/spf13/cobra" "golang.org/x/sync/semaphore" - "github.com/lavanet/lava/v3/protocol/lavasession" - "github.com/lavanet/lava/v3/utils" - "github.com/lavanet/lava/v3/utils/rand" - "github.com/lavanet/lava/v3/x/epochstorage/types" - pairingtypes "github.com/lavanet/lava/v3/x/pairing/types" + "github.com/lavanet/lava/v4/protocol/lavasession" + "github.com/lavanet/lava/v4/utils" + "github.com/lavanet/lava/v4/utils/rand" + "github.com/lavanet/lava/v4/x/epochstorage/types" + pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" ) const ( From 213d0c6761c537417ded79a638ecece0af3d792e Mon Sep 17 00:00:00 2001 From: Ran Mishael <106548467+ranlavanet@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:16:44 +0100 Subject: [PATCH 13/14] feat: PRT Block Hash Retry Archive - Part 1, redesign UsedProviders. (#1726) * feat: PRT Block Hash Retry Archive * fix deref * fix lint * remove extensions from all flows and save RouterKey in singleConsumerSession * version merge * rename function for better description on functionality * give a bigger window for the test --------- Co-authored-by: omerlavanet --- protocol/chainlib/chain_message.go | 4 + protocol/chainlib/chainlib.go | 1 + protocol/chainlib/chainlib_mock.go | 8 + .../lavasession/consumer_session_manager.go | 9 +- .../consumer_session_manager_test.go | 24 +-- protocol/lavasession/consumer_types.go | 7 +- .../end_to_end_lavasession_test.go | 4 +- protocol/lavasession/router_key.go | 25 +++- .../lavasession/single_consumer_session.go | 17 ++- protocol/lavasession/used_providers.go | 138 ++++++++++++------ protocol/lavasession/used_providers_test.go | 20 +-- .../consumer_relay_state_machine.go | 24 ++- .../consumer_relay_state_machine_test.go | 3 +- protocol/rpcconsumer/relay_processor.go | 14 +- protocol/rpcconsumer/relay_processor_test.go | 24 +-- protocol/rpcconsumer/rpcconsumer_server.go | 10 +- .../rpcprovider/rpcprovider_server_test.go | 2 +- 17 files changed, 216 insertions(+), 118 deletions(-) diff --git a/protocol/chainlib/chain_message.go b/protocol/chainlib/chain_message.go index 0cf5f923c2..fdd33ea80c 100644 --- a/protocol/chainlib/chain_message.go +++ b/protocol/chainlib/chain_message.go @@ -38,6 +38,10 @@ type baseChainMessageContainer struct { resultErrorParsingMethod func(data []byte, httpStatusCode int) (hasError bool, errorMessage string) } +func (bcnc *baseChainMessageContainer) GetRequestedBlocksHashes() []string { + return bcnc.requestedBlockHashes +} + func (bcnc *baseChainMessageContainer) SubscriptionIdExtractor(reply *rpcclient.JsonrpcMessage) string { return bcnc.msg.SubscriptionIdExtractor(reply) } diff --git a/protocol/chainlib/chainlib.go b/protocol/chainlib/chainlib.go index c443132f00..6fbf4ba536 100644 --- a/protocol/chainlib/chainlib.go +++ b/protocol/chainlib/chainlib.go @@ -88,6 +88,7 @@ type ChainMessage interface { SetForceCacheRefresh(force bool) bool CheckResponseError(data []byte, httpStatusCode int) (hasError bool, errorMessage string) GetRawRequestHash() ([]byte, error) + GetRequestedBlocksHashes() []string ChainMessageForSend } diff --git a/protocol/chainlib/chainlib_mock.go b/protocol/chainlib/chainlib_mock.go index 6013afca30..fec033fe68 100644 --- a/protocol/chainlib/chainlib_mock.go +++ b/protocol/chainlib/chainlib_mock.go @@ -282,6 +282,14 @@ func (m *MockChainMessage) EXPECT() *MockChainMessageMockRecorder { return m.recorder } +// AppendHeader mocks base method. +func (m *MockChainMessage) GetRequestedBlocksHashes() []string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetRequestedBlocksHashes") + ret0, _ := ret[0].([]string) + return ret0 +} + // AppendHeader mocks base method. func (m *MockChainMessage) AppendHeader(metadata []types.Metadata) { m.ctrl.T.Helper() diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index 8ca37ff6fe..2a2de957e7 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -433,7 +433,8 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS return nil, utils.LavaFormatError("failed getting sessions from used Providers", nil, utils.LogAttr("usedProviders", usedProviders), utils.LogAttr("endpoint", csm.rpcEndpoint)) } defer func() { usedProviders.AddUsed(consumerSessionMap, errRet) }() - initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend() + routerKey := NewRouterKeyFromExtensions(extensions) + initUnwantedProviders := usedProviders.GetUnwantedProvidersToSend(routerKey) extensionNames := common.GetExtensionNames(extensions) // if pairing list is empty we reset the state. @@ -567,7 +568,7 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS // we don't want to update the reputation by it, so we null the rawQosReport rawQosReport = nil } - consumerSession.SetUsageForSession(cuNeededForSession, qosReport, rawQosReport, usedProviders) + consumerSession.SetUsageForSession(cuNeededForSession, qosReport, rawQosReport, usedProviders, routerKey) // We successfully added provider, we should ignore it if we need to fetch new tempIgnoredProviders.providers[providerAddress] = struct{}{} if len(sessions) == wantedSession { @@ -687,6 +688,7 @@ func (csm *ConsumerSessionManager) tryGetConsumerSessionWithProviderFromBlockedP // if we got here we validated the epoch is still the same epoch as we expected and we need to fetch a session from the blocked provider list. defer csm.lock.RUnlock() + routerKey := NewRouterKey(extensions) // csm.currentlyBlockedProviderAddresses is sorted by the provider with the highest cu used this epoch to the lowest // meaning if we fetch the first successful index this is probably the highest success ratio to get a response. for _, providerAddress := range csm.currentlyBlockedProviderAddresses { @@ -697,7 +699,7 @@ func (csm *ConsumerSessionManager) tryGetConsumerSessionWithProviderFromBlockedP consumerSessionsWithProvider := csm.pairing[providerAddress] // Add to ignored (no matter what) ignoredProviders.providers[providerAddress] = struct{}{} - usedProviders.AddUnwantedAddresses(providerAddress) // add the address to our unwanted providers to avoid infinite recursion + usedProviders.AddUnwantedAddresses(providerAddress, routerKey) // add the address to our unwanted providers to avoid infinite recursion // validate this provider has enough cu to be used if err := consumerSessionsWithProvider.validateComputeUnits(cuNeededForSession, virtualEpoch); err != nil { @@ -1019,6 +1021,7 @@ func (csm *ConsumerSessionManager) OnSessionDone( numOfProviders int, providersCount uint64, isHangingApi bool, + extensions []*spectypes.Extension, ) error { // release locks, update CU, relaynum etc.. if err := consumerSession.VerifyLock(); err != nil { diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index 7d46c63b16..c92ead202d 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -83,7 +83,7 @@ func TestHappyFlow(t *testing.T) { require.NotNil(t, cs) require.Equal(t, cs.Epoch, csm.currentEpoch) require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, cuForFirstRequest) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) @@ -416,7 +416,7 @@ func runOnSessionDoneForConsumerSessionMap(t *testing.T, css ConsumerSessionsMap require.NotNil(t, cs) require.Equal(t, cs.Epoch, csm.currentEpoch) require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) - err := csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err := csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, cuForFirstRequest) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) @@ -448,7 +448,7 @@ func TestHappyFlowVirtualEpoch(t *testing.T) { require.NotNil(t, cs) require.Equal(t, cs.Epoch, csm.currentEpoch) require.Equal(t, cs.Session.LatestRelayCu, maxCuForVirtualEpoch*(virtualEpoch+1)) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, maxCuForVirtualEpoch*(virtualEpoch+1), time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, maxCuForVirtualEpoch*(virtualEpoch+1), time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, maxCuForVirtualEpoch*(virtualEpoch+1)) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) @@ -484,7 +484,7 @@ func TestPairingReset(t *testing.T) { require.NotNil(t, cs) require.Equal(t, cs.Epoch, csm.currentEpoch) require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, cuForFirstRequest) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) @@ -573,7 +573,7 @@ func TestPairingResetWithMultipleFailures(t *testing.T) { require.NotNil(t, cs) require.Equal(t, cs.Epoch, csm.currentEpoch) require.Equal(t, cs.Session.LatestRelayCu, cuForFirstRequest) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, cuForFirstRequest) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) @@ -619,7 +619,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { require.Equal(t, epoch, csm.currentEpoch) if rand.Intn(2) > 0 { - err = csm.OnSessionDone(cs, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, cs.CuSum, cuForFirstRequest) require.Equal(t, cs.LatestRelayCu, latestRelayCuAfterDone) @@ -653,7 +653,7 @@ func TestSuccessAndFailureOfSessionWithUpdatePairingsInTheMiddle(t *testing.T) { for j := numberOfAllowedSessionsPerConsumer / 2; j < numberOfAllowedSessionsPerConsumer; j++ { cs := sessionList[j].cs if rand.Intn(2) > 0 { - err = csm.OnSessionDone(cs, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) require.Equal(t, sessionListData[j].cuSum+cuForFirstRequest, cs.CuSum) require.Equal(t, cs.LatestRelayCu, latestRelayCuAfterDone) @@ -676,7 +676,7 @@ func successfulSession(ctx context.Context, csm *ConsumerSessionManager, t *test for _, cs := range css { require.NotNil(t, cs) time.Sleep(time.Duration((rand.Intn(500) + 1)) * time.Millisecond) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) ch <- p } @@ -957,7 +957,7 @@ func TestPairingWithAddons(t *testing.T) { css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, addon, nil, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) } }) @@ -1032,7 +1032,7 @@ func TestPairingWithExtensions(t *testing.T) { css, err := csm.GetSessions(ctx, cuForFirstRequest, NewUsedProviders(nil), servicedBlockNumber, extensionOpt.addon, extensionsList, common.NO_STATE, 0) // get a session require.NoError(t, err) for _, cs := range css { - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) } }) @@ -1068,11 +1068,11 @@ func TestPairingWithStateful(t *testing.T) { require.NoError(t, err) require.Equal(t, allProviders, len(css)) for _, cs := range css { - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), numberOfProviders, numberOfProviders, false, nil) require.NoError(t, err) } usedProviders := NewUsedProviders(nil) - usedProviders.RemoveUsed(providerAddresses[0], nil) + usedProviders.RemoveUsed(providerAddresses[0], NewRouterKey(nil), nil) css, err = csm.GetSessions(ctx, cuForFirstRequest, usedProviders, servicedBlockNumber, addon, nil, common.CONSISTENCY_SELECT_ALL_PROVIDERS, 0) // get a session require.NoError(t, err) require.Equal(t, allProviders-1, len(css)) diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index deb11f7994..22c6bed45c 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -51,11 +51,11 @@ var ( ) type UsedProvidersInf interface { - RemoveUsed(providerAddress string, err error) + RemoveUsed(providerAddress string, routerKey RouterKey, err error) TryLockSelection(context.Context) error AddUsed(ConsumerSessionsMap, error) - GetUnwantedProvidersToSend() map[string]struct{} - AddUnwantedAddresses(address string) + GetUnwantedProvidersToSend(RouterKey) map[string]struct{} + AddUnwantedAddresses(address string, routerKey RouterKey) CurrentlyUsed() int } @@ -439,6 +439,7 @@ func (cswp *ConsumerSessionsWithProvider) GetConsumerSessionInstanceFromEndpoint Parent: cswp, EndpointConnection: endpointConnection, StaticProvider: cswp.StaticProvider, + routerKey: NewRouterKey(nil), } consumerSession.TryUseSession() // we must lock the session so other requests wont get it. diff --git a/protocol/lavasession/end_to_end_lavasession_test.go b/protocol/lavasession/end_to_end_lavasession_test.go index de9fb09186..6fc7cfc82e 100644 --- a/protocol/lavasession/end_to_end_lavasession_test.go +++ b/protocol/lavasession/end_to_end_lavasession_test.go @@ -72,7 +72,7 @@ func TestHappyFlowE2EEmergency(t *testing.T) { err = psm.OnSessionDone(sps, cs.Session.RelayNum-skippedRelays) require.NoError(t, err) - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, maxCuForVirtualEpoch, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), 1, 1, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, maxCuForVirtualEpoch, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), 1, 1, false, nil) require.NoError(t, err) } @@ -195,7 +195,7 @@ func prepareSessionsWithFirstRelay(t *testing.T, cuForFirstRequest uint64) (*Con require.NoError(t, err) // Consumer Side: - err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), 1, 1, false) + err = csm.OnSessionDone(cs.Session, servicedBlockNumber, cuForFirstRequest, time.Millisecond, cs.Session.CalculateExpectedLatency(2*time.Millisecond), (servicedBlockNumber - 1), 1, 1, false, nil) require.NoError(t, err) require.Equal(t, cs.Session.CuSum, cuForFirstRequest) require.Equal(t, cs.Session.LatestRelayCu, latestRelayCuAfterDone) diff --git a/protocol/lavasession/router_key.go b/protocol/lavasession/router_key.go index 291e543235..671f3e780d 100644 --- a/protocol/lavasession/router_key.go +++ b/protocol/lavasession/router_key.go @@ -4,6 +4,8 @@ import ( "sort" "strconv" "strings" + + spectypes "github.com/lavanet/lava/v4/x/spec/types" ) const ( @@ -18,18 +20,31 @@ func (rk *RouterKey) ApplyMethodsRoute(routeNum int) RouterKey { return RouterKey(string(*rk) + methodRouteSep + additionalPath) } +func newRouterKeyInner(uniqueExtensions map[string]struct{}) RouterKey { + uniqueExtensionsSlice := []string{} + for addon := range uniqueExtensions { // we are sorting this anyway so we don't have to keep order + uniqueExtensionsSlice = append(uniqueExtensionsSlice, addon) + } + sort.Strings(uniqueExtensionsSlice) + return RouterKey(sep + strings.Join(uniqueExtensionsSlice, sep) + sep) +} + func NewRouterKey(extensions []string) RouterKey { // make sure addons have no repetitions uniqueExtensions := map[string]struct{}{} for _, extension := range extensions { uniqueExtensions[extension] = struct{}{} } - uniqueExtensionsSlice := []string{} - for addon := range uniqueExtensions { // we are sorting this anyway so we don't have to keep order - uniqueExtensionsSlice = append(uniqueExtensionsSlice, addon) + return newRouterKeyInner(uniqueExtensions) +} + +func NewRouterKeyFromExtensions(extensions []*spectypes.Extension) RouterKey { + // make sure addons have no repetitions + uniqueExtensions := map[string]struct{}{} + for _, extension := range extensions { + uniqueExtensions[extension.Name] = struct{}{} } - sort.Strings(uniqueExtensionsSlice) - return RouterKey(sep + strings.Join(uniqueExtensionsSlice, sep) + sep) + return newRouterKeyInner(uniqueExtensions) } func GetEmptyRouterKey() RouterKey { diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 4fc8b1b67d..dfea92c4ce 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -25,9 +25,10 @@ type SingleConsumerSession struct { BlockListed bool // if session lost sync we blacklist it. ConsecutiveErrors []error errorsCount uint64 - relayProcessor UsedProvidersInf + usedProviders UsedProvidersInf providerUniqueId string StaticProvider bool + routerKey RouterKey } // returns the expected latency to a threshold. @@ -103,7 +104,7 @@ func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Dura } } -func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, rawQoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf) error { +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, rawQoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { scs.LatestRelayCu = cuNeededForSession // set latestRelayCu scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { @@ -111,15 +112,17 @@ func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport scs.QoSInfo.LastExcellenceQoSReportRaw = rawQoSExcellenceReport } - scs.relayProcessor = usedProviders + scs.usedProviders = usedProviders + scs.routerKey = routerKey return nil } func (scs *SingleConsumerSession) Free(err error) { - if scs.relayProcessor != nil { - scs.relayProcessor.RemoveUsed(scs.Parent.PublicLavaAddress, err) - scs.relayProcessor = nil + if scs.usedProviders != nil { + scs.usedProviders.RemoveUsed(scs.Parent.PublicLavaAddress, scs.routerKey, err) + scs.usedProviders = nil } + scs.routerKey = NewRouterKey(nil) scs.EndpointConnection.decreaseSessionUsingConnection() scs.lock.Unlock() } @@ -130,7 +133,7 @@ func (session *SingleConsumerSession) TryUseSession() (blocked bool, ok bool) { session.lock.Unlock() return true, false } - if session.relayProcessor != nil { + if session.usedProviders != nil { utils.LavaFormatError("session misuse detected, usedProviders isn't nil, missing Free call, blocking", nil, utils.LogAttr("session", session.SessionId)) session.BlockListed = true session.lock.Unlock() diff --git a/protocol/lavasession/used_providers.go b/protocol/lavasession/used_providers.go index bcfcd0c2a0..ec5820f9a3 100644 --- a/protocol/lavasession/used_providers.go +++ b/protocol/lavasession/used_providers.go @@ -25,22 +25,34 @@ func NewUsedProviders(blockedProviders BlockedProvidersInf) *UsedProviders { } } return &UsedProviders{ - providers: map[string]struct{}{}, - unwantedProviders: unwantedProviders, - blockOnSyncLoss: map[string]struct{}{}, - erroredProviders: map[string]struct{}{}, + uniqueUsedProviders: map[RouterKey]*UniqueUsedProviders{NewRouterKey([]string{}): { + providers: map[string]struct{}{}, + unwantedProviders: unwantedProviders, + blockOnSyncLoss: map[string]struct{}{}, + erroredProviders: map[string]struct{}{}, + }}, + // we keep the original unwanted providers so when we create more unique used providers + // we can reuse it as its the user's instructions. + originalUnwantedProviders: unwantedProviders, } } +// unique used providers are specific for an extension router key. +// meaning each extension router key has a different used providers struct +type UniqueUsedProviders struct { + providers map[string]struct{} + unwantedProviders map[string]struct{} + erroredProviders map[string]struct{} // providers who returned protocol errors (used to debug relays for now) + blockOnSyncLoss map[string]struct{} +} + type UsedProviders struct { - lock sync.RWMutex - providers map[string]struct{} - selecting bool - unwantedProviders map[string]struct{} - erroredProviders map[string]struct{} // providers who returned protocol errors (used to debug relays for now) - blockOnSyncLoss map[string]struct{} - sessionsLatestBatch int - batchNumber int + lock sync.RWMutex + uniqueUsedProviders map[RouterKey]*UniqueUsedProviders + originalUnwantedProviders map[string]struct{} + selecting bool + sessionsLatestBatch int + batchNumber int } func (up *UsedProviders) CurrentlyUsed() int { @@ -50,7 +62,11 @@ func (up *UsedProviders) CurrentlyUsed() int { } up.lock.RLock() defer up.lock.RUnlock() - return len(up.providers) + currentlyUsed := 0 + for _, uniqueUsedProviders := range up.uniqueUsedProviders { + currentlyUsed += len(uniqueUsedProviders.providers) + } + return currentlyUsed } func (up *UsedProviders) SessionsLatestBatch() int { @@ -81,13 +97,15 @@ func (up *UsedProviders) CurrentlyUsedAddresses() []string { up.lock.RLock() defer up.lock.RUnlock() addresses := []string{} - for addr := range up.providers { - addresses = append(addresses, addr) + for _, uniqueUsedProviders := range up.uniqueUsedProviders { + for addr := range uniqueUsedProviders.providers { + addresses = append(addresses, addr) + } } return addresses } -func (up *UsedProviders) UnwantedAddresses() []string { +func (up *UsedProviders) AllUnwantedAddresses() []string { if up == nil { utils.LavaFormatError("UsedProviders.UnwantedAddresses is nil, misuse detected", nil) return []string{} @@ -95,46 +113,68 @@ func (up *UsedProviders) UnwantedAddresses() []string { up.lock.RLock() defer up.lock.RUnlock() addresses := []string{} - for addr := range up.unwantedProviders { - addresses = append(addresses, addr) + for _, uniqueUsedProviders := range up.uniqueUsedProviders { + for addr := range uniqueUsedProviders.unwantedProviders { + addresses = append(addresses, addr) + } } return addresses } -func (up *UsedProviders) AddUnwantedAddresses(address string) { +// Use when locked. Checking wether a router key exists in unique used providers, +// if it does, return it. If it doesn't +// creating a new instance and returning it. +func (up *UsedProviders) createOrUseUniqueUsedProvidersForKey(key RouterKey) *UniqueUsedProviders { + uniqueUsedProviders, ok := up.uniqueUsedProviders[key] + if !ok { + uniqueUsedProviders = &UniqueUsedProviders{ + providers: map[string]struct{}{}, + unwantedProviders: up.originalUnwantedProviders, + blockOnSyncLoss: map[string]struct{}{}, + erroredProviders: map[string]struct{}{}, + } + up.uniqueUsedProviders[key] = uniqueUsedProviders + } + return uniqueUsedProviders +} + +func (up *UsedProviders) AddUnwantedAddresses(address string, routerKey RouterKey) { if up == nil { utils.LavaFormatError("UsedProviders.AddUnwantedAddresses is nil, misuse detected", nil) return } up.lock.Lock() defer up.lock.Unlock() - up.unwantedProviders[address] = struct{}{} + uniqueUsedProviders := up.createOrUseUniqueUsedProvidersForKey(routerKey) + uniqueUsedProviders.unwantedProviders[address] = struct{}{} } -func (up *UsedProviders) RemoveUsed(provider string, err error) { +func (up *UsedProviders) RemoveUsed(provider string, routerKey RouterKey, err error) { if up == nil { return } up.lock.Lock() defer up.lock.Unlock() + uniqueUsedProviders := up.createOrUseUniqueUsedProvidersForKey(routerKey) + if err != nil { - up.erroredProviders[provider] = struct{}{} + uniqueUsedProviders.erroredProviders[provider] = struct{}{} if shouldRetryWithThisError(err) { - _, ok := up.blockOnSyncLoss[provider] + _, ok := uniqueUsedProviders.blockOnSyncLoss[provider] if !ok && IsSessionSyncLoss(err) { - up.blockOnSyncLoss[provider] = struct{}{} + uniqueUsedProviders.blockOnSyncLoss[provider] = struct{}{} utils.LavaFormatWarning("Identified SyncLoss in provider, allowing retry", err, utils.Attribute{Key: "address", Value: provider}) } else { - up.setUnwanted(provider) + up.setUnwanted(uniqueUsedProviders, provider) } } else { - up.setUnwanted(provider) + up.setUnwanted(uniqueUsedProviders, provider) } } else { // we got a valid response from this provider, no reason to keep using it - up.setUnwanted(provider) + up.setUnwanted(uniqueUsedProviders, provider) } - delete(up.providers, provider) + delete(uniqueUsedProviders.providers, provider) } func (up *UsedProviders) ClearUnwanted() { @@ -144,7 +184,9 @@ func (up *UsedProviders) ClearUnwanted() { up.lock.Lock() defer up.lock.Unlock() // this is nil safe - up.unwantedProviders = map[string]struct{}{} + for _, uniqueUsedProviders := range up.uniqueUsedProviders { + uniqueUsedProviders.unwantedProviders = map[string]struct{}{} + } } func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { @@ -156,8 +198,15 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { // this is nil safe if len(sessions) > 0 && err == nil { up.sessionsLatestBatch = 0 - for provider := range sessions { // the key for ConsumerSessionsMap is the provider public address - up.providers[provider] = struct{}{} + for provider, sessionInfo := range sessions { // the key for ConsumerSessionsMap is the provider public address + var routerKey RouterKey + if sessionInfo.Session != nil { + routerKey = sessionInfo.Session.routerKey + } else { + routerKey = NewRouterKey(nil) + } + uniqueUsedProviders := up.createOrUseUniqueUsedProvidersForKey(routerKey) + uniqueUsedProviders.providers[provider] = struct{}{} up.sessionsLatestBatch++ } // increase batch number @@ -167,11 +216,8 @@ func (up *UsedProviders) AddUsed(sessions ConsumerSessionsMap, err error) { } // called when already locked. -func (up *UsedProviders) setUnwanted(provider string) { - if up == nil { - return - } - up.unwantedProviders[provider] = struct{}{} +func (up *UsedProviders) setUnwanted(uniqueUsedProviders *UniqueUsedProviders, provider string) { + uniqueUsedProviders.unwantedProviders[provider] = struct{}{} } func (up *UsedProviders) TryLockSelection(ctx context.Context) error { @@ -206,28 +252,30 @@ func (up *UsedProviders) tryLockSelection() bool { return false } -func (up *UsedProviders) GetErroredProviders() map[string]struct{} { +func (up *UsedProviders) GetErroredProviders(routerKey RouterKey) map[string]struct{} { if up == nil { return map[string]struct{}{} } - up.lock.RLock() - defer up.lock.RUnlock() - return up.erroredProviders + up.lock.Lock() + defer up.lock.Unlock() + uniqueUsedProviders := up.createOrUseUniqueUsedProvidersForKey(routerKey) + return uniqueUsedProviders.erroredProviders } -func (up *UsedProviders) GetUnwantedProvidersToSend() map[string]struct{} { +func (up *UsedProviders) GetUnwantedProvidersToSend(routerKey RouterKey) map[string]struct{} { if up == nil { return map[string]struct{}{} } - up.lock.RLock() - defer up.lock.RUnlock() + up.lock.Lock() + defer up.lock.Unlock() + uniqueUsedProviders := up.createOrUseUniqueUsedProvidersForKey(routerKey) unwantedProvidersToSend := map[string]struct{}{} // block the currently used providers - for provider := range up.providers { + for provider := range uniqueUsedProviders.providers { unwantedProvidersToSend[provider] = struct{}{} } // block providers that we have a response for - for provider := range up.unwantedProviders { + for provider := range uniqueUsedProviders.unwantedProviders { unwantedProvidersToSend[provider] = struct{}{} } return unwantedProvidersToSend diff --git a/protocol/lavasession/used_providers_test.go b/protocol/lavasession/used_providers_test.go index 30f3c7a641..7f0adcb5be 100644 --- a/protocol/lavasession/used_providers_test.go +++ b/protocol/lavasession/used_providers_test.go @@ -20,35 +20,35 @@ func TestUsedProviders(t *testing.T) { require.False(t, canUseAgain) require.Zero(t, usedProviders.CurrentlyUsed()) require.Zero(t, usedProviders.SessionsLatestBatch()) - unwanted := usedProviders.GetUnwantedProvidersToSend() + unwanted := usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 0) consumerSessionsMap := ConsumerSessionsMap{"test": &SessionInfo{}, "test2": &SessionInfo{}} usedProviders.AddUsed(consumerSessionsMap, nil) canUseAgain = usedProviders.tryLockSelection() require.True(t, canUseAgain) - unwanted = usedProviders.GetUnwantedProvidersToSend() + unwanted = usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 2) require.Equal(t, 2, usedProviders.CurrentlyUsed()) canUseAgain = usedProviders.tryLockSelection() require.False(t, canUseAgain) consumerSessionsMap = ConsumerSessionsMap{"test3": &SessionInfo{}, "test4": &SessionInfo{}} usedProviders.AddUsed(consumerSessionsMap, nil) - unwanted = usedProviders.GetUnwantedProvidersToSend() + unwanted = usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 4) require.Equal(t, 4, usedProviders.CurrentlyUsed()) // one provider gives a retry - usedProviders.RemoveUsed("test", status.Error(codes.Code(SessionOutOfSyncError.ABCICode()), "")) + usedProviders.RemoveUsed("test", NewRouterKey(nil), status.Error(codes.Code(SessionOutOfSyncError.ABCICode()), "")) require.Equal(t, 3, usedProviders.CurrentlyUsed()) - unwanted = usedProviders.GetUnwantedProvidersToSend() + unwanted = usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 3) // one provider gives a result - usedProviders.RemoveUsed("test2", nil) - unwanted = usedProviders.GetUnwantedProvidersToSend() + usedProviders.RemoveUsed("test2", NewRouterKey(nil), nil) + unwanted = usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 3) require.Equal(t, 2, usedProviders.CurrentlyUsed()) // one provider gives an error - usedProviders.RemoveUsed("test3", fmt.Errorf("bad")) - unwanted = usedProviders.GetUnwantedProvidersToSend() + usedProviders.RemoveUsed("test3", NewRouterKey(nil), fmt.Errorf("bad")) + unwanted = usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 3) require.Equal(t, 1, usedProviders.CurrentlyUsed()) canUseAgain = usedProviders.tryLockSelection() @@ -74,7 +74,7 @@ func TestUsedProvidersAsync(t *testing.T) { defer cancel() canUseAgain := usedProviders.TryLockSelection(ctx) require.Nil(t, canUseAgain) - unwanted := usedProviders.GetUnwantedProvidersToSend() + unwanted := usedProviders.GetUnwantedProvidersToSend(NewRouterKey(nil)) require.Len(t, unwanted, 2) require.Equal(t, 2, usedProviders.CurrentlyUsed()) }) diff --git a/protocol/rpcconsumer/consumer_relay_state_machine.go b/protocol/rpcconsumer/consumer_relay_state_machine.go index f4932618a1..ca0df4c4b9 100644 --- a/protocol/rpcconsumer/consumer_relay_state_machine.go +++ b/protocol/rpcconsumer/consumer_relay_state_machine.go @@ -2,6 +2,7 @@ package rpcconsumer import ( context "context" + "sync/atomic" "time" "github.com/lavanet/lava/v4/protocol/chainlib" @@ -13,7 +14,6 @@ import ( type RelayStateMachine interface { GetProtocolMessage() chainlib.ProtocolMessage - ShouldRetry(numberOfRetriesLaunched int) bool GetDebugState() bool GetRelayTaskChannel() chan RelayStateSendInstructions UpdateBatch(err error) @@ -23,7 +23,6 @@ type RelayStateMachine interface { } type ConsumerRelaySender interface { - sendRelayToProvider(ctx context.Context, protocolMessage chainlib.ProtocolMessage, relayProcessor *RelayProcessor, analytics *metrics.RelayMetrics) (errRet error) getProcessingTimeout(chainMessage chainlib.ChainMessage) (processingTimeout time.Duration, relayTimeout time.Duration) GetChainIdAndApiInterface() (string, string) } @@ -84,7 +83,13 @@ func (crsm *ConsumerRelayStateMachine) GetSelection() Selection { return crsm.selection } -func (crsm *ConsumerRelayStateMachine) ShouldRetry(numberOfRetriesLaunched int) bool { +func (crsm *ConsumerRelayStateMachine) shouldRetryOnResult(numberOfRetriesLaunched int, numberOfNodeErrors uint64) bool { + shouldRetry := crsm.shouldRetryInner(numberOfRetriesLaunched) + // archive functionality will be added here. + return shouldRetry +} + +func (crsm *ConsumerRelayStateMachine) shouldRetryInner(numberOfRetriesLaunched int) bool { if numberOfRetriesLaunched >= MaximumNumberOfTickerRelayRetries { return false } @@ -92,6 +97,10 @@ func (crsm *ConsumerRelayStateMachine) ShouldRetry(numberOfRetriesLaunched int) return crsm.selection != BestResult } +func (crsm *ConsumerRelayStateMachine) shouldRetryTicker(numberOfRetriesLaunched int) bool { + return crsm.shouldRetryInner(numberOfRetriesLaunched) +} + func (crsm *ConsumerRelayStateMachine) GetDebugState() bool { return crsm.debugRelays } @@ -124,12 +133,15 @@ func (crsm *ConsumerRelayStateMachine) GetRelayTaskChannel() chan RelayStateSend processingCtx, processingCtxCancel := context.WithTimeout(crsm.ctx, processingTimeout) defer processingCtxCancel() + numberOfNodeErrorsAtomic := atomic.Uint64{} readResultsFromProcessor := func() { // ProcessResults is reading responses while blocking until the conditions are met utils.LavaFormatTrace("[StateMachine] Waiting for results", utils.LogAttr("batch", crsm.usedProviders.BatchNumber())) crsm.parentRelayProcessor.WaitForResults(processingCtx) // Decide if we need to resend or not - if crsm.parentRelayProcessor.HasRequiredNodeResults() { + metRequiredNodeResults, numberOfNodeErrors := crsm.parentRelayProcessor.HasRequiredNodeResults() + numberOfNodeErrorsAtomic.Store(uint64(numberOfNodeErrors)) + if metRequiredNodeResults { gotResults <- true } else { gotResults <- false @@ -193,7 +205,7 @@ func (crsm *ConsumerRelayStateMachine) GetRelayTaskChannel() chan RelayStateSend return } // If should retry == true, send a new batch. (success == false) - if crsm.ShouldRetry(crsm.usedProviders.BatchNumber()) { + if crsm.shouldRetryOnResult(crsm.usedProviders.BatchNumber(), numberOfNodeErrorsAtomic.Load()) { utils.LavaFormatTrace("[StateMachine] success := <-gotResults - crsm.ShouldRetry(batchNumber)", utils.LogAttr("batch", crsm.usedProviders.BatchNumber())) relayTaskChannel <- RelayStateSendInstructions{protocolMessage: crsm.GetProtocolMessage()} } else { @@ -202,7 +214,7 @@ func (crsm *ConsumerRelayStateMachine) GetRelayTaskChannel() chan RelayStateSend go readResultsFromProcessor() case <-startNewBatchTicker.C: // Only trigger another batch for non BestResult relays or if we didn't pass the retry limit. - if crsm.ShouldRetry(crsm.usedProviders.BatchNumber()) { + if crsm.shouldRetryTicker(crsm.usedProviders.BatchNumber()) { utils.LavaFormatTrace("[StateMachine] ticker triggered", utils.LogAttr("batch", crsm.usedProviders.BatchNumber())) relayTaskChannel <- RelayStateSendInstructions{protocolMessage: crsm.GetProtocolMessage()} // Add ticker launch metrics diff --git a/protocol/rpcconsumer/consumer_relay_state_machine_test.go b/protocol/rpcconsumer/consumer_relay_state_machine_test.go index 0f35df708d..dfd6eeb871 100644 --- a/protocol/rpcconsumer/consumer_relay_state_machine_test.go +++ b/protocol/rpcconsumer/consumer_relay_state_machine_test.go @@ -92,7 +92,8 @@ func TestConsumerStateMachineHappyFlow(t *testing.T) { sendSuccessResp(relayProcessor, "lava4@test", time.Millisecond*1) case 4: require.True(t, task.IsDone()) - require.True(t, relayProcessor.HasRequiredNodeResults()) + results, _ := relayProcessor.HasRequiredNodeResults() + require.True(t, results) returnedResult, err := relayProcessor.ProcessingResult() require.NoError(t, err) require.Equal(t, string(returnedResult.Reply.Data), "ok") diff --git a/protocol/rpcconsumer/relay_processor.go b/protocol/rpcconsumer/relay_processor.go index 12c1b7cb8e..589c054fcc 100644 --- a/protocol/rpcconsumer/relay_processor.go +++ b/protocol/rpcconsumer/relay_processor.go @@ -120,7 +120,7 @@ func (rp *RelayProcessor) String() string { usedProviders := rp.GetUsedProviders() currentlyUsedAddresses := usedProviders.CurrentlyUsedAddresses() - unwantedAddresses := usedProviders.UnwantedAddresses() + unwantedAddresses := usedProviders.AllUnwantedAddresses() return fmt.Sprintf("relayProcessor {%s, unwantedAddresses: %s,currentlyUsedAddresses:%s}", rp.ResultsManager.String(), strings.Join(unwantedAddresses, ";"), strings.Join(currentlyUsedAddresses, ";")) } @@ -214,9 +214,9 @@ func (rp *RelayProcessor) shouldRetryRelay(resultsCount int, hashErr error, node return true } -func (rp *RelayProcessor) HasRequiredNodeResults() bool { +func (rp *RelayProcessor) HasRequiredNodeResults() (bool, int) { if rp == nil { - return false + return false, 0 } rp.lock.RLock() defer rp.lock.RUnlock() @@ -236,17 +236,17 @@ func (rp *RelayProcessor) HasRequiredNodeResults() bool { go rp.metricsInf.SetNodeErrorRecoveredSuccessfullyMetric(chainId, apiInterface, strconv.Itoa(nodeErrors)) } } - return true + return true, nodeErrors } if rp.selection == Quorum { // We need a quorum of all node results if nodeErrors+resultsCount >= rp.requiredSuccesses { // Retry on node error flow: - return rp.shouldRetryRelay(resultsCount, hashErr, nodeErrors, hash) + return rp.shouldRetryRelay(resultsCount, hashErr, nodeErrors, hash), nodeErrors } } // on BestResult we want to retry if there is no success - return false + return false, nodeErrors } func (rp *RelayProcessor) handleResponse(response *relayResponse) { @@ -370,7 +370,7 @@ func (rp *RelayProcessor) ProcessingResult() (returnedResult *common.RelayResult } // this must be here before the lock because this function locks - allProvidersAddresses := rp.GetUsedProviders().UnwantedAddresses() + allProvidersAddresses := rp.GetUsedProviders().AllUnwantedAddresses() rp.lock.RLock() defer rp.lock.RUnlock() diff --git a/protocol/rpcconsumer/relay_processor_test.go b/protocol/rpcconsumer/relay_processor_test.go index f56ce79651..a814a4a9f6 100644 --- a/protocol/rpcconsumer/relay_processor_test.go +++ b/protocol/rpcconsumer/relay_processor_test.go @@ -42,7 +42,7 @@ var ( func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time.Duration) { time.Sleep(delay) - relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + relayProcessor.GetUsedProviders().RemoveUsed(provider, lavasession.NewRouterKey(nil), nil) response := &relayResponse{ relayResult: common.RelayResult{ Request: &pairingtypes.RelayRequest{ @@ -60,7 +60,7 @@ func sendSuccessResp(relayProcessor *RelayProcessor, provider string, delay time func sendProtocolError(relayProcessor *RelayProcessor, provider string, delay time.Duration, err error) { time.Sleep(delay) - relayProcessor.GetUsedProviders().RemoveUsed(provider, err) + relayProcessor.GetUsedProviders().RemoveUsed(provider, lavasession.NewRouterKey(nil), err) response := &relayResponse{ relayResult: common.RelayResult{ Request: &pairingtypes.RelayRequest{ @@ -78,7 +78,7 @@ func sendProtocolError(relayProcessor *RelayProcessor, provider string, delay ti func sendNodeError(relayProcessor *RelayProcessor, provider string, delay time.Duration) { time.Sleep(delay) - relayProcessor.GetUsedProviders().RemoveUsed(provider, nil) + relayProcessor.GetUsedProviders().RemoveUsed(provider, lavasession.NewRouterKey(nil), nil) response := &relayResponse{ relayResult: common.RelayResult{ Request: &pairingtypes.RelayRequest{ @@ -187,7 +187,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk := relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults := relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ := relayProcessor.HasRequiredNodeResults() require.False(t, requiredNodeResults) // check first retry go sendNodeError(relayProcessor, "lava@test", time.Millisecond*5) @@ -195,7 +195,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk = relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults = relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ = relayProcessor.HasRequiredNodeResults() require.False(t, requiredNodeResults) // check first second retry @@ -204,7 +204,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk = relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults = relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ = relayProcessor.HasRequiredNodeResults() require.True(t, requiredNodeResults) // 2nd relay, same inputs @@ -230,7 +230,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk = relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults = relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ = relayProcessor.HasRequiredNodeResults() require.True(t, requiredNodeResults) // 3nd relay, different inputs @@ -256,7 +256,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk = relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults = relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ = relayProcessor.HasRequiredNodeResults() // check our hashing mechanism works with different inputs require.False(t, requiredNodeResults) @@ -284,7 +284,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk = relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults = relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ = relayProcessor.HasRequiredNodeResults() require.True(t, requiredNodeResults) // A way for us to break early from sleep, just waiting up to 5 seconds and breaking as soon as the value we expect is there. @@ -333,7 +333,7 @@ func TestRelayProcessorNodeErrorRetryFlow(t *testing.T) { require.NoError(t, err) resultsOk := relayProcessor.HasResults() require.True(t, resultsOk) - requiredNodeResults := relayProcessor.HasRequiredNodeResults() + requiredNodeResults, _ := relayProcessor.HasRequiredNodeResults() require.True(t, requiredNodeResults) relayCountOnNodeError = 2 }) @@ -520,14 +520,14 @@ func TestRelayProcessorStatefulApi(t *testing.T) { err := relayProcessor.WaitForResults(ctx) require.NoError(t, err) // Decide if we need to resend or not - if relayProcessor.HasRequiredNodeResults() { + if results, _ := relayProcessor.HasRequiredNodeResults(); results { break } time.Sleep(5 * time.Millisecond) } resultsOk := relayProcessor.HasResults() require.True(t, resultsOk) - resultsOk = relayProcessor.HasRequiredNodeResults() + resultsOk, _ = relayProcessor.HasRequiredNodeResults() require.True(t, resultsOk) protocolErrors := relayProcessor.ProtocolErrors() require.Equal(t, uint64(1), protocolErrors) diff --git a/protocol/rpcconsumer/rpcconsumer_server.go b/protocol/rpcconsumer/rpcconsumer_server.go index dcb7657aa5..4b3612dd1d 100644 --- a/protocol/rpcconsumer/rpcconsumer_server.go +++ b/protocol/rpcconsumer/rpcconsumer_server.go @@ -600,10 +600,11 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( } if rpccs.debugRelays { + routerKey := lavasession.NewRouterKeyFromExtensions(extensions) utils.LavaFormatDebug("[Before Send] returned the following sessions", utils.LogAttr("sessions", sessions), - utils.LogAttr("usedProviders.GetUnwantedProvidersToSend", usedProviders.GetUnwantedProvidersToSend()), - utils.LogAttr("usedProviders.GetErroredProviders", usedProviders.GetErroredProviders()), + utils.LogAttr("usedProviders.GetUnwantedProvidersToSend", usedProviders.GetUnwantedProvidersToSend(routerKey)), + utils.LogAttr("usedProviders.GetErroredProviders", usedProviders.GetErroredProviders(routerKey)), utils.LogAttr("addons", addon), utils.LogAttr("extensions", extensions), utils.LogAttr("AllowSessionDegradation", relayProcessor.GetAllowSessionDegradation()), @@ -757,7 +758,7 @@ func (rpccs *RPCConsumerServer) sendRelayToProvider( ) } - errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(protocolMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(expectedRelayTimeoutForQOS), expectedBH, numOfProviders, pairingAddressesLen, protocolMessage.GetApi().Category.HangingApi) // session done successfully + errResponse = rpccs.consumerSessionManager.OnSessionDone(singleConsumerSession, latestBlock, chainlib.GetComputeUnits(protocolMessage), relayLatency, singleConsumerSession.CalculateExpectedLatency(expectedRelayTimeoutForQOS), expectedBH, numOfProviders, pairingAddressesLen, protocolMessage.GetApi().Category.HangingApi, extensions) // session done successfully isNodeError, _ := protocolMessage.CheckResponseError(localRelayResult.Reply.Data, localRelayResult.StatusCode) localRelayResult.IsNodeError = isNodeError if rpccs.debugRelays { @@ -1355,7 +1356,8 @@ func (rpccs *RPCConsumerServer) appendHeadersToRelayResult(ctx context.Context, directiveHeaders := protocolMessage.GetDirectiveHeaders() _, debugRelays := directiveHeaders[common.LAVA_DEBUG_RELAY] if debugRelays { - erroredProviders := relayProcessor.GetUsedProviders().GetErroredProviders() + routerKey := lavasession.NewRouterKeyFromExtensions(protocolMessage.GetExtensions()) + erroredProviders := relayProcessor.GetUsedProviders().GetErroredProviders(routerKey) if len(erroredProviders) > 0 { erroredProvidersArray := make([]string, len(erroredProviders)) idx := 0 diff --git a/protocol/rpcprovider/rpcprovider_server_test.go b/protocol/rpcprovider/rpcprovider_server_test.go index 4af6bbc5c3..a18228b9ec 100644 --- a/protocol/rpcprovider/rpcprovider_server_test.go +++ b/protocol/rpcprovider/rpcprovider_server_test.go @@ -129,7 +129,7 @@ func TestHandleConsistency(t *testing.T) { requestBlock: spectypes.LATEST_BLOCK, specId: "LAV1", err: nil, - timeout: 15 * time.Millisecond, // 150 is one way travel time + timeout: 20 * time.Millisecond, // 150 is one way travel time chainTrackerBlocks: []int64{100, 101}, changeTime: 100 * time.Second, sleep: true, From c57c9f807ef914e810276d5895fac7aa9b2a82ab Mon Sep 17 00:00:00 2001 From: Yaroms <103432884+Yaroms@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:51:46 +0200 Subject: [PATCH 14/14] add handler (#1767) Co-authored-by: Yaroms --- app/app.go | 1 + app/upgrades/empty_upgrades.go | 6 ++++++ x/protocol/module.go | 6 +++++- x/protocol/types/params.go | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/app/app.go b/app/app.go index e9d1484547..d219fb6c69 100644 --- a/app/app.go +++ b/app/app.go @@ -168,6 +168,7 @@ const ( var Upgrades = []upgrades.Upgrade{ upgrades.Upgrade_3_1_0, upgrades.Upgrade_4_0_0, + upgrades.Upgrade_4_1_0, } // this line is used by starport scaffolding # stargate/wasm/app/enabledProposals diff --git a/app/upgrades/empty_upgrades.go b/app/upgrades/empty_upgrades.go index 0a4ad8ec05..3cb03b74e0 100644 --- a/app/upgrades/empty_upgrades.go +++ b/app/upgrades/empty_upgrades.go @@ -51,3 +51,9 @@ var Upgrade_4_0_0 = Upgrade{ CreateUpgradeHandler: defaultUpgradeHandler, StoreUpgrades: store.StoreUpgrades{}, } + +var Upgrade_4_1_0 = Upgrade{ + UpgradeName: "v4.1.0", + CreateUpgradeHandler: defaultUpgradeHandler, + StoreUpgrades: store.StoreUpgrades{}, +} diff --git a/x/protocol/module.go b/x/protocol/module.go index cfabb651cf..456c0e0e58 100644 --- a/x/protocol/module.go +++ b/x/protocol/module.go @@ -221,10 +221,14 @@ func (am AppModule) RegisterServices(cfg module.Configurator) { // panic:ok: at start up, migration cannot proceed anyhow panic(fmt.Errorf("%s: failed to register migration to v24: %w", types.ModuleName, err)) } + if err := cfg.RegisterMigration(types.ModuleName, 24, migrator.MigrateVersion); err != nil { + // panic:ok: at start up, migration cannot proceed anyhow + panic(fmt.Errorf("%s: failed to register migration to v25: %w", types.ModuleName, err)) + } } // ConsensusVersion implements ConsensusVersion. -func (AppModule) ConsensusVersion() uint64 { return 24 } +func (AppModule) ConsensusVersion() uint64 { return 25 } // RegisterInvariants registers the capability module's invariants. func (am AppModule) RegisterInvariants(_ sdk.InvariantRegistry) {} diff --git a/x/protocol/types/params.go b/x/protocol/types/params.go index 5ba305a8ee..772f2ab9e5 100644 --- a/x/protocol/types/params.go +++ b/x/protocol/types/params.go @@ -12,7 +12,7 @@ import ( var _ paramtypes.ParamSet = (*Params)(nil) const ( - TARGET_VERSION = "4.0.0" + TARGET_VERSION = "4.1.0" MIN_VERSION = "3.1.0" )