Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: start spans only when telemetry is enabled #719

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions docs/opentelemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,27 @@ Keep in mind that logging level must be set to "trace" (-1) as all the tracing d

## Features

Tracing ID is parsed from the W3C Trace Context header or generated when missing for each incoming request. The Trace ID is generated even if tracing feature is turned off because this field is used for correlation of log messages for each request on the application level.
Tracing ID is parsed from the W3C Trace Context header or generated when missing for each incoming request.

Spans are created for each Chi route with the route being the name of the span (e.g. `/api/provisioning/v1/ready/{SRV}`).

Spans are created for each HTTP client call being made via `telemetry.HTTPClient`. Name is set to "HTTP" and followed by HTTP method.

Spans are created for all SQL operations made through the `pgx` SQL driver.

Spans are created for custom instrumentation points. An example:
Spans are created for custom instrumentation points in code. An example:

```go
func Function() {
ctx, span := otel.Tracer(TraceName).Start(ctx, "Function")
ctx, span := telemetry.StartSpan(ctx, "Span label")
defer span.End()
// ...
err := someDangerousCode()
if err != nil {
span.SetStatus(codes.Error, "description why it is an error")
// ...
}
// ...
}
```

7 changes: 2 additions & 5 deletions internal/cache/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ import (
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"go.opentelemetry.io/otel"
)

const TraceName = telemetry.TracePrefix + "internal/cache"

var (
ErrNotFound = errors.New("not found in cache")
ErrNilValue = errors.New("value is nil")
Expand Down Expand Up @@ -102,7 +99,7 @@ func Find(ctx context.Context, key string, value Cacheable) error {
}

prefix := value.CacheKeyName()
ctx, span := otel.Tracer(TraceName).Start(ctx, "Find")
ctx, span := telemetry.StartSpan(ctx, "Find")
defer span.End()

cmd := client.Get(ctx, prefix+key)
Expand Down Expand Up @@ -146,7 +143,7 @@ func SetExpires(ctx context.Context, key string, value Cacheable, expiration tim
}

prefix := value.CacheKeyName()
ctx, span := otel.Tracer(TraceName).Start(ctx, "Set")
ctx, span := telemetry.StartSpan(ctx, "Set")
defer span.End()

var buf bytes.Buffer
Expand Down
8 changes: 4 additions & 4 deletions internal/clients/http/azure/azure_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
"github.com/RHEnVision/provisioning-backend/internal/clients"
"github.com/RHEnVision/provisioning-backend/internal/config"
"go.opentelemetry.io/otel"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"
)

type client struct {
Expand Down Expand Up @@ -117,7 +117,7 @@ func (c *client) newInterfacesClient(ctx context.Context) (*armnetwork.Interface
}

func (c *client) Status(ctx context.Context) error {
ctx, span := otel.Tracer(TraceName).Start(ctx, "Status")
ctx, span := telemetry.StartSpan(ctx, "Status")
defer span.End()

client, err := c.newSubscriptionsClient(ctx)
Expand All @@ -132,7 +132,7 @@ func (c *client) Status(ctx context.Context) error {
}

func (c *client) ListResourceGroups(ctx context.Context) ([]string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListResourceGroups")
ctx, span := telemetry.StartSpan(ctx, "ListResourceGroups")
defer span.End()

var list []string
Expand All @@ -156,7 +156,7 @@ func (c *client) ListResourceGroups(ctx context.Context) ([]string, error) {
}

func (c *client) TenantId(ctx context.Context) (clients.AzureTenantId, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "TenantId")
ctx, span := telemetry.StartSpan(ctx, "TenantId")
defer span.End()

subClient, err := c.newSubscriptionsClient(ctx)
Expand Down
3 changes: 0 additions & 3 deletions internal/clients/http/azure/azure_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@ package azure
import (
"context"

"github.com/RHEnVision/provisioning-backend/internal/telemetry"
"github.com/rs/zerolog"
)

const TraceName = telemetry.TracePrefix + "internal/clients/http/azure"

func logger(ctx context.Context) zerolog.Logger {
return zerolog.Ctx(ctx).With().Str("client", "azure").Logger()
}
20 changes: 10 additions & 10 deletions internal/clients/http/azure/create_vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/RHEnVision/provisioning-backend/internal/clients"
"github.com/RHEnVision/provisioning-backend/internal/ptr"
"go.opentelemetry.io/otel"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"
"go.opentelemetry.io/otel/codes"
)

Expand All @@ -34,7 +34,7 @@ const (
)

func (c *client) BeginCreateVM(ctx context.Context, networkInterface *armnetwork.Interface, vmParams clients.AzureInstanceParams, vmName string) (string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "BeginCreateVM")
ctx, span := telemetry.StartSpan(ctx, "BeginCreateVM")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -65,7 +65,7 @@ func (c *client) BeginCreateVM(ctx context.Context, networkInterface *armnetwork
}

func (c *client) WaitForVM(ctx context.Context, resumeToken string) (clients.AzureInstanceID, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "WaitForVM")
ctx, span := telemetry.StartSpan(ctx, "WaitForVM")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -97,7 +97,7 @@ func (c *client) WaitForVM(ctx context.Context, resumeToken string) (clients.Azu
}

func (c *client) ensureSharedNetworking(ctx context.Context, location, resourceGroupName string) (*armnetwork.Subnet, *armnetwork.SecurityGroup, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ensureSharedNetworking")
ctx, span := telemetry.StartSpan(ctx, "ensureSharedNetworking")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -130,7 +130,7 @@ func (c *client) ensureSharedNetworking(ctx context.Context, location, resourceG
}

func (c *client) prepareVMNetworking(ctx context.Context, subnet *armnetwork.Subnet, securityGroup *armnetwork.SecurityGroup, vmParams clients.AzureInstanceParams, vmName string) (*armnetwork.Interface, *armnetwork.PublicIPAddress, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "prepareVMNetworking")
ctx, span := telemetry.StartSpan(ctx, "prepareVMNetworking")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -190,7 +190,7 @@ func (c *client) EnsureResourceGroup(ctx context.Context, name string, location
}

func (c *client) createVirtualNetwork(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.VirtualNetwork, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "createVirtualNetwork")
ctx, span := telemetry.StartSpan(ctx, "createVirtualNetwork")
defer span.End()

vnetClient, err := c.newVirtualNetworksClient(ctx)
Expand Down Expand Up @@ -248,7 +248,7 @@ func (c *client) createVirtualNetwork(ctx context.Context, location string, reso
}

func (c *client) createSubnets(ctx context.Context, resourceGroupName string, vnetName string, name string) (*armnetwork.Subnet, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "createSubnets")
ctx, span := telemetry.StartSpan(ctx, "createSubnets")
defer span.End()

subnetClient, err := c.newSubnetsClient(ctx)
Expand Down Expand Up @@ -279,7 +279,7 @@ func (c *client) createSubnets(ctx context.Context, resourceGroupName string, vn
}

func (c *client) createNetworkSecurityGroup(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.SecurityGroup, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "createNetworkSecurityGroup")
ctx, span := telemetry.StartSpan(ctx, "createNetworkSecurityGroup")
defer span.End()

nsgClient, err := c.newSecurityGroupsClient(ctx)
Expand Down Expand Up @@ -340,7 +340,7 @@ func (c *client) createNetworkSecurityGroup(ctx context.Context, location string
}

func (c *client) createPublicIP(ctx context.Context, location string, resourceGroupName string, name string) (*armnetwork.PublicIPAddress, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "createPublicIP")
ctx, span := telemetry.StartSpan(ctx, "createPublicIP")
defer span.End()

publicIPAddressClient, err := c.newPublicIPAddressesClient(ctx)
Expand Down Expand Up @@ -370,7 +370,7 @@ func (c *client) createPublicIP(ctx context.Context, location string, resourceGr
}

func (c *client) createNetworkInterface(ctx context.Context, location string, resourceGroupName string, subnet *armnetwork.Subnet, publicIP *armnetwork.PublicIPAddress, nsg *armnetwork.SecurityGroup, name string) (*armnetwork.Interface, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "createNetworkInterface")
ctx, span := telemetry.StartSpan(ctx, "createNetworkInterface")
defer span.End()

nicClient, err := c.newInterfacesClient(ctx)
Expand Down
4 changes: 2 additions & 2 deletions internal/clients/http/azure/create_vms.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ import (

"github.com/RHEnVision/provisioning-backend/internal/clients"
"github.com/RHEnVision/provisioning-backend/internal/ptr"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"
"github.com/google/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
)

func (c *client) CreateVMs(ctx context.Context, vmParams clients.AzureInstanceParams, amount int64, vmNamePrefix string) ([]clients.InstanceDescription, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "CreateVMs")
ctx, span := telemetry.StartSpan(ctx, "CreateVMs")
defer span.End()

logger := logger(ctx)
Expand Down
21 changes: 9 additions & 12 deletions internal/clients/http/ec2/ec2_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ import (
"strconv"

"github.com/RHEnVision/provisioning-backend/internal/identity"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"

"github.com/RHEnVision/provisioning-backend/internal/clients"
"github.com/RHEnVision/provisioning-backend/internal/clients/http"
"github.com/RHEnVision/provisioning-backend/internal/config"
"github.com/RHEnVision/provisioning-backend/internal/models"
"github.com/RHEnVision/provisioning-backend/internal/page"
"github.com/RHEnVision/provisioning-backend/internal/ptr"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"
"github.com/aws/aws-sdk-go-v2/aws"
awsCfg "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
Expand All @@ -24,12 +24,9 @@ import (
"github.com/aws/aws-sdk-go-v2/service/sts"
stsTypes "github.com/aws/aws-sdk-go-v2/service/sts/types"
"github.com/rs/zerolog"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
)

const TraceName = telemetry.TracePrefix + "internal/clients/http/ec2"

type ec2Client struct {
ec2 *ec2.Client
sts *sts.Client
Expand Down Expand Up @@ -152,7 +149,7 @@ func getStsAssumedCredentials(ctx context.Context, arn string, region string) (*
// ImportPubkey imports a key and returns AWS KeyPair name.
// The AWS name will be set to value of models.Pubkey Name.
func (c *ec2Client) ImportPubkey(ctx context.Context, key *models.Pubkey, tag string) (string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ImportPubkey")
ctx, span := telemetry.StartSpan(ctx, "ImportPubkey")
defer span.End()

if !c.assumed {
Expand Down Expand Up @@ -190,7 +187,7 @@ func (c *ec2Client) ImportPubkey(ctx context.Context, key *models.Pubkey, tag st
}

func (c *ec2Client) GetPubkeyName(ctx context.Context, fingerprint string) (string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "fetchPubkeyName")
ctx, span := telemetry.StartSpan(ctx, "fetchPubkeyName")
defer span.End()

if !c.assumed {
Expand All @@ -217,7 +214,7 @@ func (c *ec2Client) GetPubkeyName(ctx context.Context, fingerprint string) (stri
}

func (c *ec2Client) DeleteSSHKey(ctx context.Context, handle string) error {
ctx, span := otel.Tracer(TraceName).Start(ctx, "DeleteSSHKey")
ctx, span := telemetry.StartSpan(ctx, "DeleteSSHKey")
defer span.End()

if !c.assumed {
Expand Down Expand Up @@ -289,7 +286,7 @@ func (c *ec2Client) ListAllZones(ctx context.Context, region clients.Region) ([]
}

func (c *ec2Client) ListInstanceTypes(ctx context.Context) ([]*clients.InstanceType, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListInstanceTypes")
ctx, span := telemetry.StartSpan(ctx, "ListInstanceTypes")
defer span.End()

input := &ec2.DescribeInstanceTypesInput{MaxResults: ptr.ToInt32(100)}
Expand Down Expand Up @@ -319,7 +316,7 @@ func (c *ec2Client) ListInstanceTypes(ctx context.Context) ([]*clients.InstanceT
}

func (c *ec2Client) DescribeInstanceDetails(ctx context.Context, InstanceIds []string) ([]*clients.InstanceDescription, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "DescribeInstanceDetails")
ctx, span := telemetry.StartSpan(ctx, "DescribeInstanceDetails")
defer span.End()

input := &ec2.DescribeInstancesInput{
Expand All @@ -342,7 +339,7 @@ func (c *ec2Client) DescribeInstanceDetails(ctx context.Context, InstanceIds []s
}

func (c *ec2Client) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchTemplate, string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListLaunchTemplates")
ctx, span := telemetry.StartSpan(ctx, "ListLaunchTemplates")
defer span.End()

limit := page.Limit(ctx).Int32()
Expand Down Expand Up @@ -375,7 +372,7 @@ func (c *ec2Client) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchT
}

func (c *ec2Client) RunInstances(ctx context.Context, params *clients.AWSInstanceParams, amount int32, name string, reservation *models.AWSReservation) ([]*string, *string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "RunInstances")
ctx, span := telemetry.StartSpan(ctx, "RunInstances")
defer span.End()

if !c.assumed {
Expand Down Expand Up @@ -472,7 +469,7 @@ func (c *ec2Client) parseDescribeInstances(respAWS *ec2.DescribeInstancesOutput)
}

func (c *ec2Client) GetAccountId(ctx context.Context) (string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "GetAccountId")
ctx, span := telemetry.StartSpan(ctx, "GetAccountId")
defer span.End()

input := &sts.GetCallerIdentityInput{}
Expand Down
20 changes: 8 additions & 12 deletions internal/clients/http/gcp/gcp_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,16 @@ import (
"strconv"

"github.com/RHEnVision/provisioning-backend/internal/identity"

"github.com/RHEnVision/provisioning-backend/internal/logging"
"github.com/RHEnVision/provisioning-backend/internal/models"
"github.com/RHEnVision/provisioning-backend/internal/page"
"github.com/RHEnVision/provisioning-backend/internal/telemetry"

compute "cloud.google.com/go/compute/apiv1"
"cloud.google.com/go/compute/apiv1/computepb"
"github.com/RHEnVision/provisioning-backend/internal/clients"
"github.com/RHEnVision/provisioning-backend/internal/config"
"github.com/RHEnVision/provisioning-backend/internal/logging"
"github.com/RHEnVision/provisioning-backend/internal/models"
"github.com/RHEnVision/provisioning-backend/internal/page"
"github.com/RHEnVision/provisioning-backend/internal/ptr"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
Expand All @@ -33,8 +31,6 @@ func init() {
clients.GetGCPClient = newGCPClient
}

const TraceName = telemetry.TracePrefix + "internal/clients/http/gcp"

// GCP SDK does not provide a single client, so only configuration can be shared and
// clients need to be created and closed in each function.
// The difference between the customer and service authentication is which Project ID was given: the service or the customer
Expand All @@ -56,7 +52,7 @@ func (c *gcpClient) Status(ctx context.Context) error {
}

func (c *gcpClient) ListAllRegions(ctx context.Context) ([]clients.Region, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListAllRegions")
ctx, span := telemetry.StartSpan(ctx, "ListAllRegions")
defer span.End()

client, err := compute.NewRegionsRESTClient(ctx, c.options...)
Expand Down Expand Up @@ -104,7 +100,7 @@ func (c *gcpClient) NewInstanceTemplatesClient(ctx context.Context) (*compute.In
}

func (c *gcpClient) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchTemplate, string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListLaunchTemplates")
ctx, span := telemetry.StartSpan(ctx, "ListLaunchTemplates")
defer span.End()
var token string
logger := logger(ctx)
Expand Down Expand Up @@ -142,7 +138,7 @@ func (c *gcpClient) ListLaunchTemplates(ctx context.Context) ([]*clients.LaunchT
}

func (c *gcpClient) InsertInstances(ctx context.Context, params *clients.GCPInstanceParams, amount int64) ([]*string, *string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "InsertInstances")
ctx, span := telemetry.StartSpan(ctx, "InsertInstances")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -255,7 +251,7 @@ func (c *gcpClient) InsertInstances(ctx context.Context, params *clients.GCPInst
}

func (c *gcpClient) ListInstancesIDsByLabel(ctx context.Context, uuid string) ([]*string, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "ListInstancesIDsByLabel")
ctx, span := telemetry.StartSpan(ctx, "ListInstancesIDsByLabel")
defer span.End()

logger := logger(ctx)
Expand Down Expand Up @@ -295,7 +291,7 @@ func (c *gcpClient) ListInstancesIDsByLabel(ctx context.Context, uuid string) ([
}

func (c *gcpClient) GetInstanceDescriptionByID(ctx context.Context, id, zone string) (*clients.InstanceDescription, error) {
ctx, span := otel.Tracer(TraceName).Start(ctx, "GetInstanceDescriptionByID")
ctx, span := telemetry.StartSpan(ctx, "GetInstanceDescriptionByID")
defer span.End()

logger := logger(ctx)
Expand Down
Loading