From 58574b8a6c1bee7bfdbeb8a8da6c10dea69029a6 Mon Sep 17 00:00:00 2001 From: Serge Smertin <259697+nfx@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:44:36 +0200 Subject: [PATCH] Added `with_product(...)` and `with_user_agent_extra(...)` public functions to improve telemetry for mid-stream libraries (#679) ## Changes This PR solves three tracking challenges: 1. Adds consistent mechanism with Go SDK Public API. See https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L20-L31 and https://github.com/databricks/databricks-sdk-go/blob/00b1d09b24aa9fb971bcf23f3db3e80bf2bec6fe/useragent/user_agent.go#L49-L54 2. Some of our products, like UCX and Remorph, are used not only as standalone CLI, but also as mid-stream libraries in other products, where developers don't specify product name and product version within the WorkspaceClient. This results in missing tracking information for those integrations. 3. Mid-stream libraries, like blueprint, pytester, and lsql, do have their own versions, but they don't create manage sdk.WorkspaceClient and/or core.Config themselves, so we currently lack traffic attribution for those libraries. Technically, Databricks Connect falls into the same use-case. ## Tests Moved unit tests that are relevant to User-Agent verification from test_core.py to test_config.py to bring back consistency. --- databricks/sdk/config.py | 47 ++++++++++++++++++++++++++++++++--- tests/test_config.py | 53 +++++++++++++++++++++++++++++++++++++++- tests/test_core.py | 39 ----------------------------- 3 files changed, 96 insertions(+), 43 deletions(-) diff --git a/databricks/sdk/config.py b/databricks/sdk/config.py index 38d329fa5..ca1c2cfc1 100644 --- a/databricks/sdk/config.py +++ b/databricks/sdk/config.py @@ -6,7 +6,7 @@ import platform import sys import urllib.parse -from typing import Dict, Iterable, Optional +from typing import Dict, Iterable, List, Optional, Tuple import requests @@ -44,6 +44,32 @@ def __repr__(self) -> str: return f"" +_DEFAULT_PRODUCT_NAME = 'unknown' +_DEFAULT_PRODUCT_VERSION = '0.0.0' +_STATIC_USER_AGENT: Tuple[str, str, List[str]] = (_DEFAULT_PRODUCT_NAME, _DEFAULT_PRODUCT_VERSION, []) + + +def with_product(product: str, product_version: str): + """[INTERNAL API] Change the product name and version used in the User-Agent header.""" + global _STATIC_USER_AGENT + prev_product, prev_version, prev_other_info = _STATIC_USER_AGENT + logger.debug(f'Changing product from {prev_product}/{prev_version} to {product}/{product_version}') + _STATIC_USER_AGENT = product, product_version, prev_other_info + + +def with_user_agent_extra(key: str, value: str): + """[INTERNAL API] Add extra metadata to the User-Agent header when developing a library.""" + global _STATIC_USER_AGENT + product_name, product_version, other_info = _STATIC_USER_AGENT + for item in other_info: + if item.startswith(f"{key}/"): + # ensure that we don't have duplicates + other_info.remove(item) + break + other_info.append(f"{key}/{value}") + _STATIC_USER_AGENT = product_name, product_version, other_info + + class Config: host: str = ConfigAttribute(env='DATABRICKS_HOST') account_id: str = ConfigAttribute(env='DATABRICKS_ACCOUNT_ID') @@ -85,12 +111,21 @@ def __init__(self, # Deprecated. Use credentials_strategy instead. credentials_provider: CredentialsStrategy = None, credentials_strategy: CredentialsStrategy = None, - product="unknown", - product_version="0.0.0", + product=_DEFAULT_PRODUCT_NAME, + product_version=_DEFAULT_PRODUCT_VERSION, clock: Clock = None, **kwargs): self._header_factory = None self._inner = {} + # as in SDK for Go, pull information from global static user agent context, + # so that we can track additional metadata for mid-stream libraries, as well + # as for cases, when the downstream product is used as a library and is not + # configured with a proper product name and version. + static_product, static_version, _ = _STATIC_USER_AGENT + if product == _DEFAULT_PRODUCT_NAME: + product = static_product + if product_version == _DEFAULT_PRODUCT_VERSION: + product_version = static_version self._user_agent_other_info = [] if credentials_strategy and credentials_provider: raise ValueError( @@ -234,6 +269,12 @@ def user_agent(self): ] if len(self._user_agent_other_info) > 0: ua.append(' '.join(self._user_agent_other_info)) + # as in SDK for Go, pull information from global static user agent context, + # so that we can track additional metadata for mid-stream libraries. this value + # is shared across all instances of Config objects intentionally. + _, _, static_info = _STATIC_USER_AGENT + if len(static_info) > 0: + ua.append(' '.join(static_info)) if len(self._upstream_user_agent) > 0: ua.append(self._upstream_user_agent) if 'DATABRICKS_RUNTIME_VERSION' in os.environ: diff --git a/tests/test_config.py b/tests/test_config.py index 22a4b71c6..4b6c05638 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,7 @@ -from databricks.sdk.config import Config +import platform + +from databricks.sdk.config import Config, with_product, with_user_agent_extra +from databricks.sdk.version import __version__ from .conftest import noop_credentials @@ -15,3 +18,51 @@ def test_config_supports_legacy_credentials_provider(): c2 = c.copy() assert c2._product == 'foo' assert c2._product_version == '1.2.3' + + +def test_extra_and_upstream_user_agent(monkeypatch): + + class MockUname: + + @property + def system(self): + return 'TestOS' + + monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0') + monkeypatch.setattr(platform, 'uname', MockUname) + monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product") + monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1") + monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else") + + config = Config(host='http://localhost', username="something", password="something", product='test', + product_version='0.0.0') \ + .with_user_agent_extra('test-extra-1', '1') \ + .with_user_agent_extra('test-extra-2', '2') + + assert config.user_agent == ( + f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic" + f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1" + " runtime/13.1-anything-else") + + with_product('some-product', '0.32.1') + config2 = Config(host='http://localhost', token='...') + assert config2.user_agent.startswith('some-product/0.32.1') + + config3 = Config(host='http://localhost', token='...', product='abc', product_version='1.2.3') + assert not config3.user_agent.startswith('some-product/0.32.1') + + +def test_config_copy_deep_copies_user_agent_other_info(config): + config_copy = config.copy() + + config.with_user_agent_extra("test", "test1") + assert "test/test1" not in config_copy.user_agent + assert "test/test1" in config.user_agent + + config_copy.with_user_agent_extra("test", "test2") + assert "test/test2" in config_copy.user_agent + assert "test/test2" not in config.user_agent + + with_user_agent_extra("blueprint", "0.4.6") + assert "blueprint/0.4.6" in config.user_agent + assert "blueprint/0.4.6" in config_copy.user_agent diff --git a/tests/test_core.py b/tests/test_core.py index 74d38bd35..2403d654b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2,7 +2,6 @@ import functools import os import pathlib -import platform import random import string import typing @@ -25,7 +24,6 @@ DatabricksEnvironment) from databricks.sdk.service.catalog import PermissionsChange from databricks.sdk.service.iam import AccessControlRequest -from databricks.sdk.version import __version__ from .clock import FakeClock from .conftest import noop_credentials @@ -181,31 +179,6 @@ def test_databricks_cli_credential_provider_installed_new(config, monkeypatch, t assert databricks_cli(config) is not None -def test_extra_and_upstream_user_agent(monkeypatch): - - class MockUname: - - @property - def system(self): - return 'TestOS' - - monkeypatch.setattr(platform, 'python_version', lambda: '3.0.0') - monkeypatch.setattr(platform, 'uname', MockUname) - monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM', "upstream-product") - monkeypatch.setenv('DATABRICKS_SDK_UPSTREAM_VERSION', "0.0.1") - monkeypatch.setenv('DATABRICKS_RUNTIME_VERSION', "13.1 anything/else") - - config = Config(host='http://localhost', username="something", password="something", product='test', - product_version='0.0.0') \ - .with_user_agent_extra('test-extra-1', '1') \ - .with_user_agent_extra('test-extra-2', '2') - - assert config.user_agent == ( - f"test/0.0.0 databricks-sdk-py/{__version__} python/3.0.0 os/testos auth/basic" - f" test-extra-1/1 test-extra-2/2 upstream/upstream-product upstream-version/0.0.1" - " runtime/13.1-anything-else") - - def test_config_copy_shallow_copies_credential_provider(): class TestCredentialsStrategy(CredentialsStrategy): @@ -237,18 +210,6 @@ def refresh(self): assert config._credentials_strategy == config_copy._credentials_strategy -def test_config_copy_deep_copies_user_agent_other_info(config): - config_copy = config.copy() - - config.with_user_agent_extra("test", "test1") - assert "test/test1" not in config_copy.user_agent - assert "test/test1" in config.user_agent - - config_copy.with_user_agent_extra("test", "test2") - assert "test/test2" in config_copy.user_agent - assert "test/test2" not in config.user_agent - - def test_config_accounts_aws_is_accounts_host(config): config.host = "https://accounts.cloud.databricks.com" assert config.is_account_client