From b4ac1af1a170961a5eed8d22e5f6bc43a2fc9ab2 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 16:57:40 +1100 Subject: [PATCH 1/7] c --- crates/polars-io/src/cloud/options.rs | 94 ++++++++++++++++--- .../polars/io/cloud/credential_provider.py | 92 ++---------------- 2 files changed, 85 insertions(+), 101 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 739a7009916c..5f0526bc11b3 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -398,6 +398,9 @@ impl CloudOptions { /// Build the [`object_store::ObjectStore`] implementation for Azure. #[cfg(feature = "azure")] pub fn build_azure(&self, url: &str) -> PolarsResult { + use futures::TryStreamExt; + use object_store::ObjectStore; + use super::credential_provider::IntoCredentialProvider; let verbose = polars_core::config::verbose(); @@ -424,6 +427,14 @@ impl CloudOptions { .with_url(url) .with_retry(get_retry_config(self.max_retries)); + let opt_account_key = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path + .map(|x| x.into()) + .or(storage_account) + .as_deref() + .and_then(get_azure_storage_account_key); + + let builder_copy = opt_account_key.is_some().then(|| builder.clone()); + let builder = if let Some(v) = self.credential_provider.clone() { if verbose { eprintln!( @@ -432,26 +443,79 @@ impl CloudOptions { ); } builder.with_credentials(v.into_azure_provider()) - } else if let Some(v) = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path - .map(|x| x.into()) - .or(storage_account) - .as_deref() - .and_then(get_azure_storage_account_key) - { - if verbose { - eprintln!("[CloudOptions::build_azure]: Retrieved account key from Azure CLI") - } - builder.with_access_key(v) } else { + builder + }; + + if verbose { + eprintln!( + "[CloudOptions::build_azure]: opt_account_key: {:?}", + opt_account_key.as_ref().map(|_| "") + ); + } + + let store = builder.build().map_err(to_compute_err)?; + + let Some(account_key) = opt_account_key else { + return Ok(store); + }; + + // We do `list()` instead of `head(path)` as it could be a glob path. + let success = crate::pl_async::get_runtime() + .block_on_potential_spawn(async { store.list(None).try_next().await.is_ok() }); + + if success { if verbose { + eprintln!("[CloudOptions::build_azure]: Success on list() without account key"); + } + return Ok(store); + } + + let cfg_use_account_key = std::env::var("POLARS_ALLOW_AZURE_ACCOUNT_KEY"); + let cfg_use_account_key = cfg_use_account_key.as_deref(); + + if verbose { + eprintln!( + "[CloudOptions::build_azure]: Err on list(), cfg_use_account_key={:?}", + cfg_use_account_key + ); + } + + if cfg_use_account_key != Ok("1") { + use std::sync::Once; + static HINT: Once = Once::new(); + + if cfg_use_account_key != Ok("0") { + HINT.call_once(|| { + eprintln!( + " +[CloudOptions::build_azure]: Warning: Azure authentication check failed, \ +subsequent cloud operations may return an error. Polars was able to retrieve \ +the storage account key for this URL - to allow polars to use storage account \ +keys for authentication, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=1 in the \ +environment. To silence this warning, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=0. + " + ) + }); + } else if verbose { eprintln!( - "[CloudOptions::build_azure]: Could not retrieve account key from Azure CLI" - ) + "[CloudOptions::build_azure]: Storage account key authentication disabled \ + (POLARS_ALLOW_AZURE_ACCOUNT_KEY=0)" + ); } - builder - }; - builder.build().map_err(to_compute_err) + return Ok(store); + } + + if verbose { + eprintln!("[CloudOptions::build_azure]: Using storage account key authentication"); + } + + builder_copy + .unwrap() + .with_access_key(account_key) + .build() + .map_err(to_compute_err) } /// Set the configuration for GCP connections. This is the preferred API from rust. diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index 504cf2dd2b29..ab327a9f61e6 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -156,7 +156,6 @@ def __init__( self, *, scopes: list[str] | None = None, - storage_account: str | None = None, tenant_id: str | None = None, _verbose: bool = False, ) -> None: @@ -169,11 +168,6 @@ def __init__( ---------- scopes Scopes to pass to `get_token` - storage_account - If specified, an attempt will be made to retrieve the account keys - for this account using the Azure CLI. If this is successful, the - account keys will be used instead of - `DefaultAzureCredential.get_token()` tenant_id Azure tenant ID. """ @@ -182,7 +176,6 @@ def __init__( self._check_module_availability() - self.account_name = storage_account self.tenant_id = tenant_id # Done like this to bypass mypy, we don't have stubs for azure.identity self.credential = importlib.import_module("azure.identity").__dict__[ @@ -195,7 +188,6 @@ def __init__( print( ( "CredentialProviderAzure " - f"{self.account_name = } " f"{self.tenant_id = } " f"{self.scopes = } " ), @@ -204,28 +196,6 @@ def __init__( def __call__(self) -> CredentialProviderFunctionReturn: """Fetch the credentials.""" - if self.account_name is not None: - try: - creds = { - "account_key": self._get_azure_storage_account_key_az_cli( - self.account_name - ) - } - - if self._verbose: - print( - "[CredentialProviderAzure]: Retrieved account key from Azure CLI", - file=sys.stderr, - ) - except Exception as e: - if self._verbose: - print( - f"[CredentialProviderAzure]: Could not retrieve account key from Azure CLI: {e}", - file=sys.stderr, - ) - else: - return creds, None # type: ignore[return-value] - token = self.credential.get_token(*self.scopes, tenant_id=self.tenant_id) return { @@ -238,51 +208,6 @@ def _check_module_availability(cls) -> None: msg = "azure-identity must be installed to use `CredentialProviderAzure`" raise ImportError(msg) - @staticmethod - def _extract_adls_uri_storage_account(uri: str) -> str | None: - # "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/" - # ^^^^^^^^^^^^^^^^^ - try: - return ( - uri.split("://", 1)[1] - .split("/", 1)[0] - .split("@", 1)[1] - .split(".dfs.core.windows.net", 1)[0] - ) - - except IndexError: - return None - - @classmethod - def _get_azure_storage_account_key_az_cli(cls, account_name: str) -> str: - # [ - # { - # "creationTime": "1970-01-01T00:00:00.000000+00:00", - # "keyName": "key1", - # "permissions": "FULL", - # "value": "..." - # }, - # { - # "creationTime": "1970-01-01T00:00:00.000000+00:00", - # "keyName": "key2", - # "permissions": "FULL", - # "value": "..." - # } - # ] - - return json.loads( - cls._azcli( - "storage", - "account", - "keys", - "list", - "--output", - "json", - "--account-name", - account_name, - ) - )[0]["value"] - @classmethod def _azcli_version(cls) -> str | None: try: @@ -421,7 +346,6 @@ def _maybe_init_credential_provider( # For Azure we dispatch to `azure.identity` as much as possible if _is_azure_cloud(scheme): tenant_id = None - storage_account = None if storage_options is not None: for k, v in storage_options.items(): @@ -435,23 +359,19 @@ def _maybe_init_credential_provider( "authority_id", }: tenant_id = v - elif k in {"azure_storage_account_name", "account_name"}: - storage_account = v - elif k in {"azure_use_azure_cli", "use_azure_cli"}: + elif k in { + "azure_storage_account_name", + "account_name", + "azure_use_azure_cli", + "use_azure_cli", + }: continue else: # We assume some sort of access key was given, so we # just dispatch to the rust side. return None - storage_account = ( - # Prefer the one embedded in the path - CredentialProviderAzure._extract_adls_uri_storage_account(str(path)) - or storage_account - ) - provider = CredentialProviderAzure( - storage_account=storage_account, tenant_id=tenant_id, _verbose=verbose, ) From 1cbdba3c97031ba9ac2b9d86a945bbf94f46f7e4 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 17:03:43 +1100 Subject: [PATCH 2/7] c --- crates/polars-io/src/cloud/options.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 5f0526bc11b3..e1a0ffe4e498 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -489,12 +489,12 @@ impl CloudOptions { HINT.call_once(|| { eprintln!( " -[CloudOptions::build_azure]: Warning: Azure authentication check failed, \ -subsequent cloud operations may return an error. Polars was able to retrieve \ -the storage account key for this URL - to allow polars to use storage account \ -keys for authentication, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=1 in the \ -environment. To silence this warning, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=0. - " +[CloudOptions::build_azure]: Warning: Azure authentication check failed, subsequent cloud \ +operations may return an error. Note that polars was able to retrieve the storage account \ +key for this URL from the Azure CLI - to allow polars to automatically use storage account \ +keys for authentication, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=1 in the environment. To silence \ +this warning, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=0. +" ) }); } else if verbose { From 4301bd8ad0089ca721e7e62a194f0450446a3b7f Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 17:18:39 +1100 Subject: [PATCH 3/7] c --- crates/polars-io/src/cloud/options.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index e1a0ffe4e498..6ddb262d144b 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -471,7 +471,7 @@ impl CloudOptions { return Ok(store); } - let cfg_use_account_key = std::env::var("POLARS_ALLOW_AZURE_ACCOUNT_KEY"); + let cfg_use_account_key = std::env::var("POLARS_AUTO_AZURE_ACCOUNT_KEY"); let cfg_use_account_key = cfg_use_account_key.as_deref(); if verbose { @@ -492,15 +492,15 @@ impl CloudOptions { [CloudOptions::build_azure]: Warning: Azure authentication check failed, subsequent cloud \ operations may return an error. Note that polars was able to retrieve the storage account \ key for this URL from the Azure CLI - to allow polars to automatically use storage account \ -keys for authentication, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=1 in the environment. To silence \ -this warning, set POLARS_ALLOW_AZURE_ACCOUNT_KEY=0. +keys for authentication, set POLARS_AUTO_AZURE_ACCOUNT_KEY=1 in the environment. To silence \ +this warning, set POLARS_AUTO_AZURE_ACCOUNT_KEY=0. " ) }); } else if verbose { eprintln!( "[CloudOptions::build_azure]: Storage account key authentication disabled \ - (POLARS_ALLOW_AZURE_ACCOUNT_KEY=0)" + (POLARS_AUTO_AZURE_ACCOUNT_KEY=0)" ); } From 4ddf9b24d12269052bc9f64830e0661413ecc821 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 18:42:10 +1100 Subject: [PATCH 4/7] c --- crates/polars-io/src/cloud/options.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 6ddb262d144b..0ac7adf0c99a 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -398,7 +398,6 @@ impl CloudOptions { /// Build the [`object_store::ObjectStore`] implementation for Azure. #[cfg(feature = "azure")] pub fn build_azure(&self, url: &str) -> PolarsResult { - use futures::TryStreamExt; use object_store::ObjectStore; use super::credential_provider::IntoCredentialProvider; @@ -460,13 +459,15 @@ impl CloudOptions { return Ok(store); }; - // We do `list()` instead of `head(path)` as it could be a glob path. - let success = crate::pl_async::get_runtime() - .block_on_potential_spawn(async { store.list(None).try_next().await.is_ok() }); + let had_permission_error = crate::pl_async::get_runtime().block_on_potential_spawn(async { + // Identify if there is a permission error using a dummy path + let v = store.head(&".".into()).await; + matches!(v, Err(object_store::Error::PermissionDenied { .. })) + }); - if success { + if !had_permission_error { if verbose { - eprintln!("[CloudOptions::build_azure]: Success on list() without account key"); + eprintln!("[CloudOptions::build_azure]: Permission check OK"); } return Ok(store); } @@ -476,7 +477,7 @@ impl CloudOptions { if verbose { eprintln!( - "[CloudOptions::build_azure]: Err on list(), cfg_use_account_key={:?}", + "[CloudOptions::build_azure]: Permission check failed, cfg_use_account_key={:?}", cfg_use_account_key ); } From 8242ea84ae9cb7de388ff9f9d9ae9740845538b8 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 18:45:37 +1100 Subject: [PATCH 5/7] c --- crates/polars-io/src/cloud/options.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 0ac7adf0c99a..d48fbf075c43 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -509,7 +509,9 @@ this warning, set POLARS_AUTO_AZURE_ACCOUNT_KEY=0. } if verbose { - eprintln!("[CloudOptions::build_azure]: Using storage account key authentication"); + eprintln!( + "[CloudOptions::build_azure]: Using auto-inferred storage account key for authentication" + ); } builder_copy From 3fc79fa0f45f118dfd74daeb25097dcdb0d84277 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 21:11:50 +1100 Subject: [PATCH 6/7] c --- crates/polars-io/src/cloud/options.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index d48fbf075c43..8ef979040048 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -426,13 +426,13 @@ impl CloudOptions { .with_url(url) .with_retry(get_retry_config(self.max_retries)); - let opt_account_key = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path + let azure_cli_account_key = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path .map(|x| x.into()) .or(storage_account) .as_deref() .and_then(get_azure_storage_account_key); - let builder_copy = opt_account_key.is_some().then(|| builder.clone()); + let builder_copy = azure_cli_account_key.is_some().then(|| builder.clone()); let builder = if let Some(v) = self.credential_provider.clone() { if verbose { @@ -448,14 +448,14 @@ impl CloudOptions { if verbose { eprintln!( - "[CloudOptions::build_azure]: opt_account_key: {:?}", - opt_account_key.as_ref().map(|_| "") + "[CloudOptions::build_azure]: azure_cli_account_key: {:?}", + azure_cli_account_key.as_ref().map(|_| "") ); } let store = builder.build().map_err(to_compute_err)?; - let Some(account_key) = opt_account_key else { + let Some(account_key) = azure_cli_account_key else { return Ok(store); }; @@ -510,7 +510,8 @@ this warning, set POLARS_AUTO_AZURE_ACCOUNT_KEY=0. if verbose { eprintln!( - "[CloudOptions::build_azure]: Using auto-inferred storage account key for authentication" + "[CloudOptions::build_azure]: Using storage account key retrieved from Azure CLI \ + for authentication" ); } From 056089f91e8f9bd034a4d0a21c84bfd4c91e0d4f Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 22:33:09 +1100 Subject: [PATCH 7/7] dont invoke Azure CLI if env var disabled --- crates/polars-io/src/cloud/options.rs | 28 ++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 8ef979040048..a4737ac71787 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -426,11 +426,18 @@ impl CloudOptions { .with_url(url) .with_retry(get_retry_config(self.max_retries)); - let azure_cli_account_key = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path - .map(|x| x.into()) - .or(storage_account) - .as_deref() - .and_then(get_azure_storage_account_key); + let cfg_use_account_key = std::env::var("POLARS_AUTO_AZURE_ACCOUNT_KEY"); + let cfg_use_account_key = cfg_use_account_key.as_deref(); + + let azure_cli_account_key = if cfg_use_account_key != Ok("0") { + extract_adls_uri_storage_account(url) // Prefer the one embedded in the path + .map(|x| x.into()) + .or(storage_account) + .as_deref() + .and_then(get_azure_storage_account_key) + } else { + None + }; let builder_copy = azure_cli_account_key.is_some().then(|| builder.clone()); @@ -448,7 +455,8 @@ impl CloudOptions { if verbose { eprintln!( - "[CloudOptions::build_azure]: azure_cli_account_key: {:?}", + "[CloudOptions::build_azure]: cfg_use_account_key: {:?}, azure_cli_account_key: {:?}", + cfg_use_account_key, azure_cli_account_key.as_ref().map(|_| "") ); } @@ -472,14 +480,8 @@ impl CloudOptions { return Ok(store); } - let cfg_use_account_key = std::env::var("POLARS_AUTO_AZURE_ACCOUNT_KEY"); - let cfg_use_account_key = cfg_use_account_key.as_deref(); - if verbose { - eprintln!( - "[CloudOptions::build_azure]: Permission check failed, cfg_use_account_key={:?}", - cfg_use_account_key - ); + eprintln!("[CloudOptions::build_azure]: Permission check failed"); } if cfg_use_account_key != Ok("1") {