Skip to content

Commit

Permalink
Added with_product(...) and with_user_agent_extra(...) public fun…
Browse files Browse the repository at this point in the history
…ctions to improve telemetry for mid-stream libraries (#679)

## Changes

This PR solves three tracking challenges:
1. Adds consistent mechanism with Go SDK Public API. See
https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L20-L31
and
https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L49-L54

2. Some of our products, like UCX and Remorph, are used not only as
standalone CLI, but also as mid-stream libraries in other products,
where developers don't specify product name and product version within
the WorkspaceClient. This results in missing tracking information for
those integrations.
3. Mid-stream libraries, like blueprint, pytester, and lsql, do have
their own versions, but they don't create manage sdk.WorkspaceClient
and/or core.Config themselves, so we currently lack traffic attribution
for those libraries. Technically, Databricks Connect falls into the same
use-case.

## Tests
Moved unit tests that are relevant to User-Agent verification from
test_core.py to test_config.py to bring back consistency.
  • Loading branch information
nfx authored Jun 20, 2024
1 parent 4e751f8 commit 58574b8
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 43 deletions.
47 changes: 44 additions & 3 deletions databricks/sdk/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import platform
import sys
import urllib.parse
from typing import Dict, Iterable, Optional
from typing import Dict, Iterable, List, Optional, Tuple

import requests

Expand Down Expand Up @@ -44,6 +44,32 @@ def __repr__(self) -> str:
return f"<ConfigAttribute '{self.name}' {self.transform.__name__}>"


_DEFAULT_PRODUCT_NAME = 'unknown'
_DEFAULT_PRODUCT_VERSION = '0.0.0'
_STATIC_USER_AGENT: Tuple[str, str, List[str]] = (_DEFAULT_PRODUCT_NAME, _DEFAULT_PRODUCT_VERSION, [])


def with_product(product: str, product_version: str):
"""[INTERNAL API] Change the product name and version used in the User-Agent header."""
global _STATIC_USER_AGENT
prev_product, prev_version, prev_other_info = _STATIC_USER_AGENT
logger.debug(f'Changing product from {prev_product}/{prev_version} to {product}/{product_version}')
_STATIC_USER_AGENT = product, product_version, prev_other_info


def with_user_agent_extra(key: str, value: str):
"""[INTERNAL API] Add extra metadata to the User-Agent header when developing a library."""
global _STATIC_USER_AGENT
product_name, product_version, other_info = _STATIC_USER_AGENT
for item in other_info:
if item.startswith(f"{key}/"):
# ensure that we don't have duplicates
other_info.remove(item)
break
other_info.append(f"{key}/{value}")
_STATIC_USER_AGENT = product_name, product_version, other_info


class Config:
host: str = ConfigAttribute(env='DATABRICKS_HOST')
account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID')
Expand Down Expand Up @@ -85,12 +111,21 @@ def __init__(self,
# Deprecated. Use credentials_strategy instead.
credentials_provider: CredentialsStrategy = None,
credentials_strategy: CredentialsStrategy = None,
product="unknown",
product_version="0.0.0",
product=_DEFAULT_PRODUCT_NAME,
product_version=_DEFAULT_PRODUCT_VERSION,
clock: Clock = None,
**kwargs):
self._header_factory = None
self._inner = {}
# as in SDK for Go, pull information from global static user agent context,
# so that we can track additional metadata for mid-stream libraries, as well
# as for cases, when the downstream product is used as a library and is not
# configured with a proper product name and version.
static_product, static_version, _ = _STATIC_USER_AGENT
if product == _DEFAULT_PRODUCT_NAME:
product = static_product
if product_version == _DEFAULT_PRODUCT_VERSION:
product_version = static_version
self._user_agent_other_info = []
if credentials_strategy and credentials_provider:
raise ValueError(
Expand Down Expand Up @@ -234,6 +269,12 @@ def user_agent(self):
]
if len(self._user_agent_other_info) > 0:
ua.append(' '.join(self._user_agent_other_info))
# as in SDK for Go, pull information from global static user agent context,
# so that we can track additional metadata for mid-stream libraries. this value
# is shared across all instances of Config objects intentionally.
_, _, static_info = _STATIC_USER_AGENT
if len(static_info) > 0:
ua.append(' '.join(static_info))
if len(self._upstream_user_agent) > 0:
ua.append(self._upstream_user_agent)
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
Expand Down
53 changes: 52 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from databricks.sdk.config import Config
import platform

from databricks.sdk.config import Config, with_product, with_user_agent_extra
from databricks.sdk.version import __version__

from .conftest import noop_credentials

Expand All @@ -15,3 +18,51 @@ def test_config_supports_legacy_credentials_provider():
c2 = c.copy()
assert c2._product == 'foo'
assert c2._product_version == '1.2.3'


def test_extra_and_upstream_user_agent(monkeypatch):

class MockUname:

@property
def system(self):
return 'TestOS'

monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0')
monkeypatch.setattr(platform, 'uname', MockUname)
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product")
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1")
monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else")

config = Config(host='http://localhost', username="something", password="something", product='test',
product_version='0.0.0') \
.with_user_agent_extra('test-extra-1', '1') \
.with_user_agent_extra('test-extra-2', '2')

assert config.user_agent == (
f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic"
f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1"
" runtime/13.1-anything-else")

with_product('some-product', '0.32.1')
config2 = Config(host='http://localhost', token='...')
assert config2.user_agent.startswith('some-product/0.32.1')

config3 = Config(host='http://localhost', token='...', product='abc', product_version='1.2.3')
assert not config3.user_agent.startswith('some-product/0.32.1')


def test_config_copy_deep_copies_user_agent_other_info(config):
config_copy = config.copy()

config.with_user_agent_extra("test", "test1")
assert "test/test1" not in config_copy.user_agent
assert "test/test1" in config.user_agent

config_copy.with_user_agent_extra("test", "test2")
assert "test/test2" in config_copy.user_agent
assert "test/test2" not in config.user_agent

with_user_agent_extra("blueprint", "0.4.6")
assert "blueprint/0.4.6" in config.user_agent
assert "blueprint/0.4.6" in config_copy.user_agent
39 changes: 0 additions & 39 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import functools
import os
import pathlib
import platform
import random
import string
import typing
Expand All @@ -25,7 +24,6 @@
DatabricksEnvironment)
from databricks.sdk.service.catalog import PermissionsChange
from databricks.sdk.service.iam import AccessControlRequest
from databricks.sdk.version import __version__

from .clock import FakeClock
from .conftest import noop_credentials
Expand Down Expand Up @@ -181,31 +179,6 @@ def test_databricks_cli_credential_provider_installed_new(config, monkeypatch, t
assert databricks_cli(config) is not None


def test_extra_and_upstream_user_agent(monkeypatch):

class MockUname:

@property
def system(self):
return 'TestOS'

monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0')
monkeypatch.setattr(platform, 'uname', MockUname)
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product")
monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1")
monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else")

config = Config(host='http://localhost', username="something", password="something", product='test',
product_version='0.0.0') \
.with_user_agent_extra('test-extra-1', '1') \
.with_user_agent_extra('test-extra-2', '2')

assert config.user_agent == (
f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic"
f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1"
" runtime/13.1-anything-else")


def test_config_copy_shallow_copies_credential_provider():

class TestCredentialsStrategy(CredentialsStrategy):
Expand Down Expand Up @@ -237,18 +210,6 @@ def refresh(self):
assert config._credentials_strategy == config_copy._credentials_strategy


def test_config_copy_deep_copies_user_agent_other_info(config):
config_copy = config.copy()

config.with_user_agent_extra("test", "test1")
assert "test/test1" not in config_copy.user_agent
assert "test/test1" in config.user_agent

config_copy.with_user_agent_extra("test", "test2")
assert "test/test2" in config_copy.user_agent
assert "test/test2" not in config.user_agent


def test_config_accounts_aws_is_accounts_host(config):
config.host = "https://accounts.cloud.databricks.com"
assert config.is_account_client
Expand Down

0 comments on commit 58574b8

Please sign in to comment.