diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index ab45171369953..f7f2a5f80887c 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -70,6 +70,13 @@ def finalize_s3(): check_status(CFinalizeS3()) +def ensure_s3_finalized(): + """ + Finalize S3 if already initialized + """ + check_status(CEnsureS3Finalized()) + + def resolve_s3_region(bucket): """ Resolve the S3 region of a bucket. @@ -93,6 +100,8 @@ def resolve_s3_region(bucket): c_string c_bucket c_string c_region + ensure_s3_initialized() + c_bucket = tobytes(bucket) with nogil: c_region = GetResultValue(ResolveS3BucketRegion(c_bucket)) @@ -260,6 +269,8 @@ cdef class S3FileSystem(FileSystem): load_frequency=900, proxy_options=None, allow_bucket_creation=False, allow_bucket_deletion=False, retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)): + ensure_s3_initialized() + cdef: optional[CS3Options] options shared_ptr[CS3FileSystem] wrapped diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 36655c7d12863..ead750ca44ef8 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -54,13 +54,16 @@ from pyarrow._s3fs import ( # noqa AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy, S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized, - finalize_s3, initialize_s3, resolve_s3_region) + finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region) except ImportError: _not_imported.append("S3FileSystem") else: - ensure_s3_initialized() + # GH-38364: we don't initialize S3 eagerly as that could lead + # to crashes at shutdown even when S3 isn't used. + # Instead, S3 is initialized lazily using `ensure_s3_initialized` + # in assorted places. import atexit - atexit.register(finalize_s3) + atexit.register(ensure_s3_finalized) def __getattr__(name): diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index 2727fc201198f..cb30f4e750eff 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -211,6 +211,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: const CS3GlobalOptions& options) cdef CStatus CEnsureS3Initialized "arrow::fs::EnsureS3Initialized"() cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"() + cdef CStatus CEnsureS3Finalized "arrow::fs::EnsureS3Finalized"() cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)