diff --git a/.asf.yaml b/.asf.yaml
index 364b9b254..ce27a54e3 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -22,7 +22,7 @@
github:
description: "Apache Iceberg"
- homepage: https://iceberg.apache.org/
+ homepage: https://rust.iceberg.apache.org/
labels:
- iceberg
- apache
@@ -42,17 +42,20 @@ github:
required_approving_review_count: 1
required_linear_history: true
+
features:
wiki: false
issues: true
- projects: false
+ projects: true
collaborators:
- Xuanwo
- liurenjie1024
- JanKaul
+ ghp_branch: gh-pages
+ ghp_path: /
notifications:
- commits: commits@iceberg.apache.org
- issues: issues@iceberg.apache.org
- pullrequests: issues@iceberg.apache.org
- jira_options: link label link label
+ commits: commits@iceberg.apache.org
+ issues: issues@iceberg.apache.org
+ pullrequests: issues@iceberg.apache.org
+ jira_options: link label link label
diff --git a/.cargo/audit.toml b/.cargo/audit.toml
new file mode 100644
index 000000000..5db5a9d81
--- /dev/null
+++ b/.cargo/audit.toml
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[advisories]
+ignore = [
+ # rsa
+ # Marvin Attack: potential key recovery through timing sidechannels
+ # Issues: https://github.com/apache/iceberg-rust/issues/221
+ "RUSTSEC-2023-0071",
+]
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..908bda4b5
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+website export-ignore
diff --git a/.github/actions/setup-builder/action.yml b/.github/actions/setup-builder/action.yml
new file mode 100644
index 000000000..43de1cbaa
--- /dev/null
+++ b/.github/actions/setup-builder/action.yml
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is heavily inspired by
+# [datafusion](https://github.com/apache/datafusion/blob/main/.github/actions/setup-builder/action.yaml).
+name: Prepare Rust Builder
+description: 'Prepare Rust Build Environment'
+inputs:
+ rust-version:
+ description: 'version of rust to install (e.g. stable)'
+ required: true
+ default: 'stable'
+runs:
+ using: "composite"
+ steps:
+ - name: Setup Rust toolchain
+ shell: bash
+ run: |
+ echo "Installing ${{ inputs.rust-version }}"
+ rustup toolchain install ${{ inputs.rust-version }}
+ rustup default ${{ inputs.rust-version }}
+ rustup component add rustfmt clippy
+ - name: Fixup git permissions
+ # https://github.com/actions/checkout/issues/766
+ shell: bash
+ run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
\ No newline at end of file
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
new file mode 100644
index 000000000..0d65b1aa8
--- /dev/null
+++ b/.github/workflows/audit.yml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Security audit
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+ cancel-in-progress: true
+
+on:
+ push:
+ paths:
+ - "**/Cargo.toml"
+ - "**/Cargo.lock"
+
+ pull_request:
+ paths:
+ - "**/Cargo.toml"
+ - "**/Cargo.lock"
+
+jobs:
+ security_audit:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install cargo-audit
+ run: cargo install cargo-audit
+ - name: Run audit check
+ run: cargo audit
diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml
new file mode 100644
index 000000000..d4b1aa922
--- /dev/null
+++ b/.github/workflows/bindings_python_ci.yml
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Bindings Python CI
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: true
+
+jobs:
+ check-rust:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Check format
+ run: cargo fmt --all -- --check
+ - name: Check clippy
+ run: cargo clippy --all-targets --all-features -- -D warnings
+
+ check-python:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install tools
+ run: |
+ pip install ruff
+ - name: Check format
+ working-directory: "bindings/python"
+ run: |
+ ruff format . --diff
+ - name: Check style
+ working-directory: "bindings/python"
+ run: |
+ ruff check .
+
+ test:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os:
+ - ubuntu-latest
+ - macos-latest
+ - windows-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: 3.8
+ - uses: PyO3/maturin-action@v1
+ with:
+ working-directory: "bindings/python"
+ command: build
+ args: --out dist --sdist
+ - name: Run tests
+ working-directory: "bindings/python"
+ shell: bash
+ run: |
+ set -e
+ pip install hatch==1.12.0
+ hatch run dev:pip install dist/pyiceberg_core-*.whl --force-reinstall
+ hatch run dev:test
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1523971a2..38f450bf7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,14 +29,28 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
+env:
+ rust_msrv: "1.77.1"
+
jobs:
check:
- runs-on: ubuntu-latest
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os:
+ - ubuntu-latest
+ - macos-latest
steps:
- uses: actions/checkout@v4
- name: Check License Header
- uses: apache/skywalking-eyes/header@v0.5.0
+ uses: apache/skywalking-eyes/header@v0.6.0
+
+ - name: Install cargo-sort
+ run: make install-cargo-sort
+
+ - name: Install taplo-cli
+ run: make install-taplo-cli
- name: Cargo format
run: make check-fmt
@@ -50,8 +64,29 @@ jobs:
- name: Cargo sort
run: make cargo-sort
+ - name: Cargo Machete
+ run: make cargo-machete
build:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os:
+ - ubuntu-latest
+ - macos-latest
+ - windows-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Rust toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{ env.rust_msrv }}
+
+ - name: Build
+ run: make build
+
+ build_with_no_default_features:
runs-on: ${{ matrix.os }}
strategy:
matrix:
@@ -62,15 +97,23 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Build
- run: cargo build
+ run: cargo build -p iceberg --no-default-features
unit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
+ - name: Setup Rust toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{ env.rust_msrv }}
+
- name: Test
run: cargo test --no-fail-fast --all-targets --all-features --workspace
-
+
+ - name: Async-std Test
+ run: cargo test --no-fail-fast --all-targets --no-default-features --features "async-std" --features "storage-all" --workspace
+
- name: Doc Test
run: cargo test --no-fail-fast --doc --all-features --workspace
diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml
index 51a6a7b91..da72929dd 100644
--- a/.github/workflows/ci_typos.yml
+++ b/.github/workflows/ci_typos.yml
@@ -41,7 +41,5 @@ jobs:
FORCE_COLOR: 1
steps:
- uses: actions/checkout@v4
- - run: curl -LsSf https://github.com/crate-ci/typos/releases/download/v1.14.8/typos-v1.14.8-x86_64-unknown-linux-musl.tar.gz | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin
-
- - name: do typos check with typos-cli
- run: typos
+ - name: Check typos
+ uses: crate-ci/typos@v1.24.5
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 000000000..486d66246
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Publish
+
+on:
+ push:
+ tags:
+ - '*'
+ workflow_dispatch:
+
+env:
+ rust_msrv: "1.77.1"
+
+jobs:
+ publish:
+ runs-on: ubuntu-latest
+ strategy:
+ # Publish package one by one instead of flooding the registry
+ max-parallel: 1
+ matrix:
+ # Order here is sensitive, as it will be used to determine the order of publishing
+ package:
+ - "crates/iceberg"
+ - "crates/catalog/glue"
+ - "crates/catalog/hms"
+ - "crates/catalog/memory"
+ - "crates/catalog/rest"
+ # sql is not ready for release yet.
+ # - "crates/catalog/sql"
+ - "crates/integrations/datafusion"
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Rust toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{ env.rust_msrv }}
+
+ - name: Publish ${{ matrix.package }}
+ working-directory: ${{ matrix.package }}
+ # Only publish if it's a tag and the tag is not a pre-release
+ if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
+ run: cargo publish --all-features
+ env:
+ CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml
new file mode 100644
index 000000000..bbe3e53c4
--- /dev/null
+++ b/.github/workflows/website.yml
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Website
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: true
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup mdBook
+ uses: peaceiris/actions-mdbook@v2
+ with:
+ mdbook-version: '0.4.36'
+
+ - name: Build
+ working-directory: website
+ run: mdbook build
+
+ - name: Copy asf file
+ run: cp .asf.yaml ./website/book/.asf.yaml
+
+ - name: Build API docs
+ run: |
+ cargo doc --no-deps --workspace --all-features
+ cp -r target/doc ./website/book/api
+
+ - name: Deploy to gh-pages
+ uses: peaceiris/actions-gh-pages@v4.0.0
+ if: github.event_name == 'push' && github.ref_name == 'main'
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: website/book
+ publish_branch: gh-pages
diff --git a/.gitignore b/.gitignore
index 72c34840c..a3f05e817 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,8 +15,14 @@
# specific language governing permissions and limitations
# under the License.
-/target
-/Cargo.lock
+target
+Cargo.lock
.idea
.vscode
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
+dist/*
+**/venv
+*.so
+*.pyc
+*.whl
+*.tar.gz
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 000000000..ea5e0779f
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+!.gitignore
+!vcs.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 000000000..6fd581ec8
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/.licenserc.yaml b/.licenserc.yaml
index cd362bc94..38aa58402 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -23,6 +23,12 @@ header:
paths-ignore:
- 'LICENSE'
- 'NOTICE'
+ - '.gitattributes'
- '**/*.json'
-
+ # Generated content by mdbook
+ - 'website/book'
+ # Generated content by scripts
+ - '**/DEPENDENCIES.*.tsv'
+ # Release distributions
+ - 'dist/*'
comment: on-failure
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d1a50b33..fc576c52f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,4 +24,278 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/)
and this project adheres to [Semantic Versioning](https://semver.org/).
-## Unreleased
+## [v0.3.0] - 2024-08-14
+
+* Smooth out release steps by @Fokko in https://github.com/apache/iceberg-rust/pull/197
+* refactor: remove support of manifest list format as a list of file path by @Dysprosium0626 in https://github.com/apache/iceberg-rust/pull/201
+* refactor: remove unwraps by @odysa in https://github.com/apache/iceberg-rust/pull/196
+* Fix: add required rust version in cargo.toml by @dp-0 in https://github.com/apache/iceberg-rust/pull/193
+* Fix the REST spec version by @Fokko in https://github.com/apache/iceberg-rust/pull/198
+* feat: Add Sync + Send to Catalog trait by @ZhengLin-Li in https://github.com/apache/iceberg-rust/pull/202
+* feat: Make thrift transport configurable by @DeaconDesperado in https://github.com/apache/iceberg-rust/pull/194
+* Add UnboundSortOrder by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/115
+* ci: Add workflow for publish by @Xuanwo in https://github.com/apache/iceberg-rust/pull/218
+* Add workflow for cargo audit by @sdd in https://github.com/apache/iceberg-rust/pull/217
+* docs: Add basic README for all crates by @Xuanwo in https://github.com/apache/iceberg-rust/pull/215
+* Follow naming convention from Iceberg's Java and Python implementations by @s-akhtar-baig in https://github.com/apache/iceberg-rust/pull/204
+* doc: Add download page by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/219
+* chore(deps): Update derive_builder requirement from 0.13.0 to 0.20.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/203
+* test: add FileIO s3 test by @odysa in https://github.com/apache/iceberg-rust/pull/220
+* ci: Ignore RUSTSEC-2023-0071 for no actions to take by @Xuanwo in https://github.com/apache/iceberg-rust/pull/222
+* feat: Add expression builder and display. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/169
+* chord: Add IssueNavigationLink for RustRover by @stream2000 in https://github.com/apache/iceberg-rust/pull/230
+* minor: Fix `double` API doc by @viirya in https://github.com/apache/iceberg-rust/pull/226
+* feat: add `UnboundPredicate::negate()` by @sdd in https://github.com/apache/iceberg-rust/pull/228
+* fix: Remove deprecated methods to pass ci by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/234
+* Implement basic Parquet data file reading capability by @sdd in https://github.com/apache/iceberg-rust/pull/207
+* chore: doc-test as a target by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/235
+* feat: add parquet writer by @ZENOTME in https://github.com/apache/iceberg-rust/pull/176
+* Add hive metastore catalog support (part 1/2) by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/237
+* chore: Enable projects. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/247
+* refactor: Make plan_files as asynchronous stream by @viirya in https://github.com/apache/iceberg-rust/pull/243
+* feat: Implement binding expression by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/231
+* Implement Display instead of ToString by @lewiszlw in https://github.com/apache/iceberg-rust/pull/256
+* add rewrite_not by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/263
+* feat: init TableMetadataBuilder by @ZENOTME in https://github.com/apache/iceberg-rust/pull/262
+* Rename stat_table to table_exists in Catalog trait by @lewiszlw in https://github.com/apache/iceberg-rust/pull/257
+* feat (static table): implement a read-only table struct loaded from metadata by @a-agmon in https://github.com/apache/iceberg-rust/pull/259
+* feat: implement OAuth for catalog rest client by @TennyZhuang in https://github.com/apache/iceberg-rust/pull/254
+* docs: annotate precision and length to primitive types by @waynexia in https://github.com/apache/iceberg-rust/pull/270
+* build: Restore CI by making parquet and arrow version consistent by @viirya in https://github.com/apache/iceberg-rust/pull/280
+* Metadata Serde + default partition_specs and sort_orders by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/272
+* feat: make optional oauth param configurable by @himadripal in https://github.com/apache/iceberg-rust/pull/278
+* fix: enable public access to ManifestEntry properties by @a-agmon in https://github.com/apache/iceberg-rust/pull/284
+* feat: Implement the conversion from Arrow Schema to Iceberg Schema by @viirya in https://github.com/apache/iceberg-rust/pull/258
+* Rename function name to `add_manifests` by @viirya in https://github.com/apache/iceberg-rust/pull/293
+* Modify `Bind` calls so that they don't consume `self` and instead return a new struct, leaving the original unmoved by @sdd in https://github.com/apache/iceberg-rust/pull/290
+* Add hive metastore catalog support (part 2/2) by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/285
+* feat: implement prune column for schema by @Dysprosium0626 in https://github.com/apache/iceberg-rust/pull/261
+* chore(deps): Update reqwest requirement from ^0.11 to ^0.12 by @dependabot in https://github.com/apache/iceberg-rust/pull/296
+* Glue Catalog: Basic Setup + Test Infra (1/3) by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/294
+* feat: rest client respect prefix prop by @TennyZhuang in https://github.com/apache/iceberg-rust/pull/297
+* fix: HMS Catalog missing properties `fn create_namespace` by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/303
+* fix: renaming FileScanTask.data_file to data_manifest_entry by @a-agmon in https://github.com/apache/iceberg-rust/pull/300
+* feat: Make OAuth token server configurable by @whynick1 in https://github.com/apache/iceberg-rust/pull/305
+* feat: Glue Catalog - namespace operations (2/3) by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/304
+* feat: add transform_literal by @ZENOTME in https://github.com/apache/iceberg-rust/pull/287
+* feat: Complete predicate builders for all operators. by @QuakeWang in https://github.com/apache/iceberg-rust/pull/276
+* feat: Support customized header in Rest catalog client by @whynick1 in https://github.com/apache/iceberg-rust/pull/306
+* fix: chrono dep by @odysa in https://github.com/apache/iceberg-rust/pull/274
+* feat: Read Parquet data file with projection by @viirya in https://github.com/apache/iceberg-rust/pull/245
+* Fix day timestamp micro by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/312
+* feat: support uri redirect in rest client by @TennyZhuang in https://github.com/apache/iceberg-rust/pull/310
+* refine: separate parquet reader and arrow convert by @ZENOTME in https://github.com/apache/iceberg-rust/pull/313
+* chore: upgrade to rust-version 1.77.1 by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/316
+* Support identifier warehouses by @Fokko in https://github.com/apache/iceberg-rust/pull/308
+* feat: Project transform by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/309
+* Add Struct Accessors to BoundReferences by @sdd in https://github.com/apache/iceberg-rust/pull/317
+* Use `str` args rather than `String` in transform to avoid needing to clone strings by @sdd in https://github.com/apache/iceberg-rust/pull/325
+* chore(deps): Update pilota requirement from 0.10.0 to 0.11.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/327
+* chore(deps): Bump peaceiris/actions-mdbook from 1 to 2 by @dependabot in https://github.com/apache/iceberg-rust/pull/332
+* chore(deps): Bump peaceiris/actions-gh-pages from 3.9.3 to 4.0.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/333
+* chore(deps): Bump apache/skywalking-eyes from 0.5.0 to 0.6.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/328
+* Add `BoundPredicateVisitor` (alternate version) by @sdd in https://github.com/apache/iceberg-rust/pull/334
+* add `InclusiveProjection` Visitor by @sdd in https://github.com/apache/iceberg-rust/pull/335
+* feat: Implement the conversion from Iceberg Schema to Arrow Schema by @ZENOTME in https://github.com/apache/iceberg-rust/pull/277
+* Simplify expression when doing `{and,or}` operations by @Fokko in https://github.com/apache/iceberg-rust/pull/339
+* feat: Glue Catalog - table operations (3/3) by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/314
+* chore: update roadmap by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/336
+* Add `ManifestEvaluator`, used to filter manifests in table scans by @sdd in https://github.com/apache/iceberg-rust/pull/322
+* feat: init iceberg writer by @ZENOTME in https://github.com/apache/iceberg-rust/pull/275
+* Implement manifest filtering in `TableScan` by @sdd in https://github.com/apache/iceberg-rust/pull/323
+* Refactor: Extract `partition_filters` from `ManifestEvaluator` by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/360
+* Basic Integration with Datafusion by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/324
+* refactor: cache partition_schema in `fn plan_files()` by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/362
+* fix (manifest-list): added serde aliases to support both forms conventions by @a-agmon in https://github.com/apache/iceberg-rust/pull/365
+* feat: Extract FileRead and FileWrite trait by @Xuanwo in https://github.com/apache/iceberg-rust/pull/364
+* feat: Convert predicate to arrow filter and push down to parquet reader by @viirya in https://github.com/apache/iceberg-rust/pull/295
+* chore(deps): Update datafusion requirement from 37.0.0 to 38.0.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/369
+* chore(deps): Update itertools requirement from 0.12 to 0.13 by @dependabot in https://github.com/apache/iceberg-rust/pull/376
+* Add `InclusiveMetricsEvaluator` by @sdd in https://github.com/apache/iceberg-rust/pull/347
+* Rename V2 spec names by @gupteaj in https://github.com/apache/iceberg-rust/pull/380
+* feat: make file scan task serializable by @ZENOTME in https://github.com/apache/iceberg-rust/pull/377
+* Feature: Schema into_builder method by @c-thiel in https://github.com/apache/iceberg-rust/pull/381
+* replaced `i32` in `TableUpdate::SetDefaultSortOrder` to `i64` by @rwwwx in https://github.com/apache/iceberg-rust/pull/387
+* fix: make PrimitiveLiteral and Literal not be Ord by @ZENOTME in https://github.com/apache/iceberg-rust/pull/386
+* docs(writer/docker): fix small typos and wording by @jdockerty in https://github.com/apache/iceberg-rust/pull/389
+* feat: `StructAccessor.get` returns `Result
>` instead of `Result` by @sdd in https://github.com/apache/iceberg-rust/pull/390
+* feat: add `ExpressionEvaluator` by @marvinlanhenke in https://github.com/apache/iceberg-rust/pull/363
+* Derive Clone for TableUpdate by @c-thiel in https://github.com/apache/iceberg-rust/pull/402
+* Add accessor for Schema identifier_field_ids by @c-thiel in https://github.com/apache/iceberg-rust/pull/388
+* deps: Bump arrow related crates to 52 by @Dysprosium0626 in https://github.com/apache/iceberg-rust/pull/403
+* SnapshotRetention::Tag max_ref_age_ms should be optional by @c-thiel in https://github.com/apache/iceberg-rust/pull/391
+* feat: Add storage features for iceberg by @Xuanwo in https://github.com/apache/iceberg-rust/pull/400
+* Implement BoundPredicateVisitor trait for ManifestFilterVisitor by @s-akhtar-baig in https://github.com/apache/iceberg-rust/pull/367
+* Add missing arrow predicate pushdown implementations for `StartsWith`, `NotStartsWith`, `In`, and `NotIn` by @sdd in https://github.com/apache/iceberg-rust/pull/404
+* feat: make BoundPredicate,Datum serializable by @ZENOTME in https://github.com/apache/iceberg-rust/pull/406
+* refactor: Upgrade hive_metastore to 0.1 by @Xuanwo in https://github.com/apache/iceberg-rust/pull/409
+* fix: Remove duplicate filter by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/414
+* Enhancement: refine the reader interface by @ZENOTME in https://github.com/apache/iceberg-rust/pull/401
+* refactor(catalog/rest): Split http client logic to separate mod by @Xuanwo in https://github.com/apache/iceberg-rust/pull/423
+* Remove #[allow(dead_code)] from the codebase by @vivek378521 in https://github.com/apache/iceberg-rust/pull/421
+* ci: use official typos github action by @shoothzj in https://github.com/apache/iceberg-rust/pull/426
+* feat: support lower_bound&&upper_bound for parquet writer by @ZENOTME in https://github.com/apache/iceberg-rust/pull/383
+* refactor: Implement ArrowAsyncFileWriter directly to remove tokio by @Xuanwo in https://github.com/apache/iceberg-rust/pull/427
+* chore: Don't enable reqwest default features by @Xuanwo in https://github.com/apache/iceberg-rust/pull/432
+* refactor(catalogs/rest): Split user config and runtime config by @Xuanwo in https://github.com/apache/iceberg-rust/pull/431
+* feat: runtime module by @odysa in https://github.com/apache/iceberg-rust/pull/233
+* fix: Fix namespace identifier in url by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/435
+* refactor(io): Split io into smaller mods by @Xuanwo in https://github.com/apache/iceberg-rust/pull/438
+* chore: Use once_cell to replace lazy_static by @Xuanwo in https://github.com/apache/iceberg-rust/pull/443
+* fix: Fix build while no-default-features enabled by @Xuanwo in https://github.com/apache/iceberg-rust/pull/442
+* chore(deps): Bump crate-ci/typos from 1.22.9 to 1.23.1 by @dependabot in https://github.com/apache/iceberg-rust/pull/447
+* docs: Refactor the README to be more user-oriented by @Xuanwo in https://github.com/apache/iceberg-rust/pull/444
+* feat: Add cargo machete by @vaibhawvipul in https://github.com/apache/iceberg-rust/pull/448
+* chore: Use nightly toolchain for check by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/445
+* reuse docker container to save compute resources by @thexiay in https://github.com/apache/iceberg-rust/pull/428
+* feat: Add macos runner for ci by @QuakeWang in https://github.com/apache/iceberg-rust/pull/441
+* chore: remove compose obsolete version (#452) by @yinheli in https://github.com/apache/iceberg-rust/pull/454
+* Refactor file_io_s3_test.rs by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/455
+* chore(deps): Bump crate-ci/typos from 1.23.1 to 1.23.2 by @dependabot in https://github.com/apache/iceberg-rust/pull/457
+* refine: move binary serialize in literal to datum by @ZENOTME in https://github.com/apache/iceberg-rust/pull/456
+* fix: Hms test on macos should use correct arch by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/461
+* Fix ManifestFile length calculation by @nooberfsh in https://github.com/apache/iceberg-rust/pull/466
+* chore(deps): Update typed-builder requirement from ^0.18 to ^0.19 by @dependabot in https://github.com/apache/iceberg-rust/pull/473
+* fix: use avro fixed to represent decimal by @xxchan in https://github.com/apache/iceberg-rust/pull/472
+* feat(catalog!): Deprecate rest.authorization-url in favor of oauth2-server-uri by @ndrluis in https://github.com/apache/iceberg-rust/pull/480
+* Alter `Transform::Day` to map partition types to `Date` rather than `Int` for consistency with reference implementation by @sdd in https://github.com/apache/iceberg-rust/pull/479
+* feat(iceberg): Add memory file IO support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/481
+* Add memory catalog implementation by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/475
+* chore: Enable new rust code format settings by @Xuanwo in https://github.com/apache/iceberg-rust/pull/483
+* docs: Generate rust API docs by @Xuanwo in https://github.com/apache/iceberg-rust/pull/486
+* chore: Fix format of recent PRs by @Xuanwo in https://github.com/apache/iceberg-rust/pull/487
+* Rename folder to memory by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/490
+* chore(deps): Bump crate-ci/typos from 1.23.2 to 1.23.5 by @dependabot in https://github.com/apache/iceberg-rust/pull/493
+* View Spec implementation by @c-thiel in https://github.com/apache/iceberg-rust/pull/331
+* fix: Return error on reader task by @ndrluis in https://github.com/apache/iceberg-rust/pull/498
+* chore: Bump OpenDAL to 0.48 by @Xuanwo in https://github.com/apache/iceberg-rust/pull/500
+* feat: add check compatible func for primitive type by @ZENOTME in https://github.com/apache/iceberg-rust/pull/492
+* refactor(iceberg): Remove an extra config parse logic by @Xuanwo in https://github.com/apache/iceberg-rust/pull/499
+* feat: permit Datum Date<->Int type conversion by @sdd in https://github.com/apache/iceberg-rust/pull/496
+* Add additional S3 FileIO Attributes by @c-thiel in https://github.com/apache/iceberg-rust/pull/505
+* docs: Add links to dev docs by @Xuanwo in https://github.com/apache/iceberg-rust/pull/508
+* chore: Remove typo in README by @Xuanwo in https://github.com/apache/iceberg-rust/pull/509
+* feat: podman support by @alexyin1 in https://github.com/apache/iceberg-rust/pull/489
+* feat(table): Add debug and clone trait to static table struct by @ndrluis in https://github.com/apache/iceberg-rust/pull/510
+* Use namespace location or warehouse location if table location is missing by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/511
+* chore(deps): Bump crate-ci/typos from 1.23.5 to 1.23.6 by @dependabot in https://github.com/apache/iceberg-rust/pull/521
+* Concurrent table scans by @sdd in https://github.com/apache/iceberg-rust/pull/373
+* refactor: replace num_cpus with thread::available_parallelism by @SteveLauC in https://github.com/apache/iceberg-rust/pull/526
+* Fix: MappedLocalTime should not be exposed by @c-thiel in https://github.com/apache/iceberg-rust/pull/529
+* feat: Establish subproject pyiceberg_core by @Xuanwo in https://github.com/apache/iceberg-rust/pull/518
+* fix: complete miss attribute for map && list in avro schema by @ZENOTME in https://github.com/apache/iceberg-rust/pull/411
+* arrow/schema.rs: refactor tests by @AndreMouche in https://github.com/apache/iceberg-rust/pull/531
+* feat: initialise SQL Catalog by @callum-ryan in https://github.com/apache/iceberg-rust/pull/524
+* chore(deps): Bump actions/setup-python from 4 to 5 by @dependabot in https://github.com/apache/iceberg-rust/pull/536
+* feat(storage): support aws session token by @twuebi in https://github.com/apache/iceberg-rust/pull/530
+* Simplify PrimitiveLiteral by @ZENOTME in https://github.com/apache/iceberg-rust/pull/502
+* chore: bump opendal to 0.49 by @jdockerty in https://github.com/apache/iceberg-rust/pull/540
+* feat: support timestamp columns in row filters by @sdd in https://github.com/apache/iceberg-rust/pull/533
+* fix: don't silently drop errors encountered in table scan file planning by @sdd in https://github.com/apache/iceberg-rust/pull/535
+* chore(deps): Update sqlx requirement from 0.7.4 to 0.8.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/537
+* Fix main branch building break by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/541
+* feat: support for gcs storage by @jdockerty in https://github.com/apache/iceberg-rust/pull/520
+* feat: Allow FileIO to reuse http client by @Xuanwo in https://github.com/apache/iceberg-rust/pull/544
+* docs: Add an example to scan an iceberg table by @Xuanwo in https://github.com/apache/iceberg-rust/pull/545
+* Concurrent data file fetching and parallel RecordBatch processing by @sdd in https://github.com/apache/iceberg-rust/pull/515
+* doc: Add statement for contributors to avoid force push as much as possible by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/546
+* feat: Partition Binding and safe PartitionSpecBuilder by @c-thiel in https://github.com/apache/iceberg-rust/pull/491
+
+## v0.2.0 - 2024-02-20
+
+* chore: Setup project layout by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1
+* ci: Fix version for apache/skywalking-eyes/header by @Xuanwo in https://github.com/apache/iceberg-rust/pull/4
+* feat: Implement serialize/deserialize for datatypes by @JanKaul in https://github.com/apache/iceberg-rust/pull/6
+* docs: Add CONTRIBUTING and finish project setup by @Xuanwo in https://github.com/apache/iceberg-rust/pull/7
+* feat: Add lookup tables to StructType by @JanKaul in https://github.com/apache/iceberg-rust/pull/12
+* feat: Implement error handling by @Xuanwo in https://github.com/apache/iceberg-rust/pull/13
+* chore: Use HashMap instead of BTreeMap for storing fields by id in StructType by @amogh-jahagirdar in https://github.com/apache/iceberg-rust/pull/14
+* chore: Change iceberg into workspace by @Xuanwo in https://github.com/apache/iceberg-rust/pull/15
+* feat: Use macro to define from error. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/17
+* feat: Introduce schema definition. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/19
+* refactor: Align data type with other implementation. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/21
+* chore: Ignore .idea by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/27
+* feat: Implement Iceberg values by @JanKaul in https://github.com/apache/iceberg-rust/pull/20
+* feat: Define schema post order visitor. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/25
+* feat: Add transform by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/26
+* fix: Fix build break in main branch by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/30
+* fix: Update github configuration to avoid conflicting merge by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/31
+* chore(deps): Bump apache/skywalking-eyes from 0.4.0 to 0.5.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/35
+* feat: Table metadata by @JanKaul in https://github.com/apache/iceberg-rust/pull/29
+* feat: Add utility methods to help conversion between literals. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/38
+* [comment] should be IEEE 754 rather than 753 by @zhjwpku in https://github.com/apache/iceberg-rust/pull/39
+* fix: Add doc test action by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/44
+* chore: Ping toolchain version by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/48
+* feat: Introduce conversion between iceberg schema and avro schema by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/40
+* feat: Allow Schema Serialization/deserialization by @y0psolo in https://github.com/apache/iceberg-rust/pull/46
+* chore: Add cargo sort check by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/51
+* chore(deps): Bump actions/checkout from 3 to 4 by @dependabot in https://github.com/apache/iceberg-rust/pull/58
+* Metadata integration tests by @JanKaul in https://github.com/apache/iceberg-rust/pull/57
+* feat: Introduce FileIO by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/53
+* feat: Add Catalog API by @Xuanwo in https://github.com/apache/iceberg-rust/pull/54
+* feat: support transform function by @ZENOTME in https://github.com/apache/iceberg-rust/pull/42
+* chore(deps): Update ordered-float requirement from 3.7.0 to 4.0.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/64
+* feat: Add public methods for catalog related structs by @Xuanwo in https://github.com/apache/iceberg-rust/pull/63
+* minor: Upgrade to latest toolchain by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/68
+* chore(deps): Update opendal requirement from 0.39 to 0.40 by @dependabot in https://github.com/apache/iceberg-rust/pull/65
+* refactor: Make directory for catalog by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/69
+* feat: support read Manifest List by @ZENOTME in https://github.com/apache/iceberg-rust/pull/56
+* chore(deps): Update apache-avro requirement from 0.15 to 0.16 by @dependabot in https://github.com/apache/iceberg-rust/pull/71
+* fix: avro bytes test for Literal by @JanKaul in https://github.com/apache/iceberg-rust/pull/80
+* chore(deps): Update opendal requirement from 0.40 to 0.41 by @dependabot in https://github.com/apache/iceberg-rust/pull/84
+* feat: manifest list writer by @barronw in https://github.com/apache/iceberg-rust/pull/76
+* feat: First version of rest catalog. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/78
+* chore(deps): Update typed-builder requirement from ^0.17 to ^0.18 by @dependabot in https://github.com/apache/iceberg-rust/pull/87
+* feat: Implement load table api. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/89
+* chroes:Manage dependencies using workspace. by @my-vegetable-has-exploded in https://github.com/apache/iceberg-rust/pull/93
+* minor: Provide Debug impl for pub structs #73 by @DeaconDesperado in https://github.com/apache/iceberg-rust/pull/92
+* feat: support ser/deser of value by @ZENOTME in https://github.com/apache/iceberg-rust/pull/82
+* fix: Migrate from tempdir to tempfile crate by @cdaudt in https://github.com/apache/iceberg-rust/pull/91
+* chore(deps): Update opendal requirement from 0.41 to 0.42 by @dependabot in https://github.com/apache/iceberg-rust/pull/101
+* chore(deps): Update itertools requirement from 0.11 to 0.12 by @dependabot in https://github.com/apache/iceberg-rust/pull/102
+* Replace i64 with DateTime by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/94
+* feat: Implement create table and update table api for rest catalog. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/97
+* Fix compile failures by @fqaiser94 in https://github.com/apache/iceberg-rust/pull/105
+* feat: replace 'Builder' with 'TypedBuilder' for 'Snapshot' by @xiaoyang-sde in https://github.com/apache/iceberg-rust/pull/110
+* chore: Upgrade uuid manually and remove pinned version by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/108
+* chore: Add cargo build and build guide by @manuzhang in https://github.com/apache/iceberg-rust/pull/111
+* feat: Add hms catalog layout by @Xuanwo in https://github.com/apache/iceberg-rust/pull/112
+* feat: support UnboundPartitionSpec by @my-vegetable-has-exploded in https://github.com/apache/iceberg-rust/pull/106
+* test: Add integration tests for rest catalog. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/109
+* chore(deps): Update opendal requirement from 0.42 to 0.43 by @dependabot in https://github.com/apache/iceberg-rust/pull/116
+* feat: support read/write Manifest by @ZENOTME in https://github.com/apache/iceberg-rust/pull/79
+* test: Remove binary manifest list avro file by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/118
+* refactor: Conversion between literal and json should depends on type. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/120
+* fix: fix parse partitions in manifest_list by @ZENOTME in https://github.com/apache/iceberg-rust/pull/122
+* feat: Add website layout by @Xuanwo in https://github.com/apache/iceberg-rust/pull/130
+* feat: Expression system. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/132
+* website: Fix typo in book.toml by @Xuanwo in https://github.com/apache/iceberg-rust/pull/136
+* Set `ghp_{pages,path}` properties by @Fokko in https://github.com/apache/iceberg-rust/pull/138
+* chore: Upgrade toolchain to 1.75.0 by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/140
+* feat: Add roadmap and features status in README.md by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/134
+* Remove `publish:` section from `.asf.yaml` by @Fokko in https://github.com/apache/iceberg-rust/pull/141
+* chore(deps): Bump peaceiris/actions-gh-pages from 3.9.2 to 3.9.3 by @dependabot in https://github.com/apache/iceberg-rust/pull/143
+* chore(deps): Update opendal requirement from 0.43 to 0.44 by @dependabot in https://github.com/apache/iceberg-rust/pull/142
+* docs: Change homepage to rust.i.a.o by @Xuanwo in https://github.com/apache/iceberg-rust/pull/146
+* feat: Introduce basic file scan planning. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/129
+* chore: Update contributing guide. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/163
+* chore: Update reader api status by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/162
+* #154 : Add homepage to Cargo.toml by @hiirrxnn in https://github.com/apache/iceberg-rust/pull/160
+* Add formatting for toml files by @Tyler-Sch in https://github.com/apache/iceberg-rust/pull/167
+* chore(deps): Update env_logger requirement from 0.10.0 to 0.11.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/170
+* feat: init file writer interface by @ZENOTME in https://github.com/apache/iceberg-rust/pull/168
+* fix: Manifest parsing should consider schema evolution. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/171
+* docs: Add release guide for iceberg-rust by @Xuanwo in https://github.com/apache/iceberg-rust/pull/147
+* fix: Ignore negative statistics value by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/173
+* feat: Add user guide for website. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/178
+* chore(deps): Update derive_builder requirement from 0.12.0 to 0.13.0 by @dependabot in https://github.com/apache/iceberg-rust/pull/175
+* refactor: Replace unwrap by @odysa in https://github.com/apache/iceberg-rust/pull/183
+* feat: add handwritten serialize by @odysa in https://github.com/apache/iceberg-rust/pull/185
+* Fix: avro schema names for manifest and manifest_list by @JanKaul in https://github.com/apache/iceberg-rust/pull/182
+* feat: Bump hive_metastore to use pure rust thrift impl `volo` by @Xuanwo in https://github.com/apache/iceberg-rust/pull/174
+* feat: Bump version 0.2.0 to prepare for release. by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/181
+* fix: default_partition_spec using the partition_spec_id set by @odysa in https://github.com/apache/iceberg-rust/pull/190
+* Docs: Add required Cargo version to install guide by @manuzhang in https://github.com/apache/iceberg-rust/pull/191
+* chore(deps): Update opendal requirement from 0.44 to 0.45 by @dependabot in https://github.com/apache/iceberg-rust/pull/195
+
+[v0.3.0]: https://github.com/apache/iceberg-rust/compare/v0.2.0...v0.3.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 712e7e27d..f66d3248e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -73,6 +73,8 @@ All pull requests should be reviewed by at least one iceberg-rust committer.
All pull requests are squash merged. We generally discourage large pull requests that are over 300-500 lines of diff. If you would like to propose a change that is larger we suggest coming onto [Iceberg's DEV mailing list](mailto:dev@iceberg.apache.org) or [Slack #rust Channel](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1zbov3k6e-KtJfoaxp97YfX6dPz1Bk7A) and discuss it with us. This way we can talk through the solution and discuss if a change that large is even needed! This will produce a quicker response to the change and likely produce code that aligns better with our process.
+When a pull request is under review, please avoid using force push as it makes it difficult for reviewer to track changes. If you need to keep the branch up to date with the main branch, consider using `git merge` instead.
+
### CI
Currently, iceberg-rust uses GitHub Actions to run tests. The workflows are defined in `.github/workflows`.
@@ -91,6 +93,8 @@ The fastest way is:
### Bring your own toolbox
+#### Install rust
+
iceberg-rust is primarily a Rust project. To build iceberg-rust, you will need to set up Rust development first. We highly recommend using [rustup](https://rustup.rs/) for the setup process.
For Linux or MacOS, use the following command:
@@ -108,11 +112,22 @@ $ cargo version
cargo 1.69.0 (6e9a83356 2023-04-12)
```
+#### Install Docker or Podman
+
+Currently, iceberg-rust uses Docker to set up environment for integration tests. Podman is also supported.
+
+You can learn how to install Docker from [here](https://docs.docker.com/get-docker/).
+
+For macos users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative.
+
+For podman users, refer to [Using Podman instead of Docker](docs/contributing/podman.md)
+
## Build
* To compile the project: `make build`
* To check code styles: `make check`
-* To run tests: `make test`
+* To run unit tests only: `make unit-test`
+* To run all tests: `make test`
## Code of Conduct
diff --git a/Cargo.toml b/Cargo.toml
index a59a4bb4c..8d04f6799 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,45 +17,81 @@
[workspace]
resolver = "2"
-members = ["crates/catalog/*", "crates/iceberg", "crates/test_utils"]
+members = [
+ "crates/catalog/*",
+ "crates/examples",
+ "crates/iceberg",
+ "crates/integrations/*",
+ "crates/test_utils",
+]
+exclude = ["bindings/python"]
+
+[workspace.package]
+version = "0.3.0"
+edition = "2021"
+homepage = "https://rust.iceberg.apache.org/"
+
+repository = "https://github.com/apache/iceberg-rust"
+license = "Apache-2.0"
+rust-version = "1.77.1"
[workspace.dependencies]
anyhow = "1.0.72"
-apache-avro = "0.16"
-arrow-arith = { version = ">=46" }
-arrow-array = { version = ">=46" }
-arrow-schema = { version = ">=46" }
+apache-avro = "0.17"
+array-init = "2"
+arrow-arith = { version = "52" }
+arrow-array = { version = "52" }
+arrow-ord = { version = "52" }
+arrow-schema = { version = "52" }
+arrow-select = { version = "52" }
+arrow-string = { version = "52" }
+async-stream = "0.3.5"
async-trait = "0.1"
+async-std = "1.12"
+aws-config = "1.1.8"
+aws-sdk-glue = "1.21"
bimap = "0.6"
bitvec = "1.0.1"
-chrono = "0.4"
-derive_builder = "0.12.0"
+bytes = "1.5"
+chrono = "0.4.34"
+ctor = "0.2.8"
+derive_builder = "0.20"
either = "1"
-env_logger = "0.10.0"
+env_logger = "0.11.0"
+fnv = "1"
futures = "0.3"
-iceberg = { path = "./crates/iceberg" }
-iceberg-catalog-rest = { path = "./crates/catalog/rest" }
-itertools = "0.12"
-lazy_static = "1"
-log = "^0.4"
-mockito = "^1"
+iceberg = { version = "0.3.0", path = "./crates/iceberg" }
+iceberg-catalog-rest = { version = "0.3.0", path = "./crates/catalog/rest" }
+iceberg-catalog-hms = { version = "0.3.0", path = "./crates/catalog/hms" }
+iceberg-catalog-memory = { version = "0.3.0", path = "./crates/catalog/memory" }
+itertools = "0.13"
+log = "0.4"
+mockito = "1"
murmur3 = "0.5.2"
once_cell = "1"
-opendal = "0.43"
-ordered-float = "4.0.0"
-pretty_assertions = "1.4.0"
+opendal = "0.49"
+ordered-float = "4"
+parquet = "52"
+paste = "1"
+pilota = "0.11.2"
+pretty_assertions = "1.4"
port_scanner = "0.1.5"
-reqwest = { version = "^0.11", features = ["json"] }
-rust_decimal = "1.31.0"
-serde = { version = "^1.0", features = ["rc"] }
+rand = "0.8"
+regex = "1.10.5"
+reqwest = { version = "0.12", default-features = false, features = ["json"] }
+rust_decimal = "1.31"
+serde = { version = "1", features = ["rc"] }
serde_bytes = "0.11.8"
-serde_derive = "^1.0"
-serde_json = "^1.0"
+serde_derive = "1"
+serde_json = "1"
serde_repr = "0.1.16"
-serde_with = "3.4.0"
+serde_with = "3.4"
tempfile = "3.8"
-tokio = { version = "1", features = ["macros"] }
-typed-builder = "^0.18"
+tokio = { version = "1", default-features = false }
+typed-builder = "0.20"
url = "2"
urlencoding = "2"
-uuid = "1.6.1"
+uuid = { version = "1.6.1", features = ["v7"] }
+volo-thrift = "0.10"
+hive_metastore = "0.1"
+tera = "1"
diff --git a/Makefile b/Makefile
index c34f6c97d..4ecc9bd88 100644
--- a/Makefile
+++ b/Makefile
@@ -17,26 +17,46 @@
.EXPORT_ALL_VARIABLES:
-RUST_LOG = debug
-
build:
- cargo build
+ cargo build --all-targets --all-features --workspace
check-fmt:
- cargo fmt --all -- --check
+ cargo fmt --all -- --check
check-clippy:
- cargo clippy --all-targets --all-features --workspace -- -D warnings
+ cargo clippy --all-targets --all-features --workspace -- -D warnings
+
+install-cargo-sort:
+ cargo install cargo-sort@1.0.9
-cargo-sort:
- cargo install cargo-sort
+cargo-sort: install-cargo-sort
cargo sort -c -w
-check: check-fmt check-clippy cargo-sort
+install-cargo-machete:
+ cargo install cargo-machete
+
+cargo-machete: install-cargo-machete
+ cargo machete
+
+install-taplo-cli:
+ cargo install taplo-cli@0.9.0
+
+fix-toml: install-taplo-cli
+ taplo fmt
-unit-test:
+check-toml: install-taplo-cli
+ taplo check
+
+check: check-fmt check-clippy cargo-sort check-toml cargo-machete
+
+doc-test:
+ cargo test --no-fail-fast --doc --all-features --workspace
+
+unit-test: doc-test
cargo test --no-fail-fast --lib --all-features --workspace
-test:
+test: doc-test
cargo test --no-fail-fast --all-targets --all-features --workspace
- cargo test --no-fail-fast --doc --all-features --workspace
\ No newline at end of file
+
+clean:
+ cargo clean
diff --git a/README.md b/README.md
index d7caa34bc..30168141b 100644
--- a/README.md
+++ b/README.md
@@ -17,18 +17,90 @@
~ under the License.
-->
-# Apache Iceberg Rust
+# Apache Iceberg™ Rust
+
+
+
+Rust implementation of [Apache Iceberg™](https://iceberg.apache.org/).
+
+Working on [v0.3.0 Release Milestone](https://github.com/apache/iceberg-rust/milestone/2)
+
+## Components
+
+The Apache Iceberg Rust project is composed of the following components:
+
+| Name | Release | Docs |
+|--------------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|
+| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] |
+| [iceberg-datafusion] | [![iceberg-datafusion image]][iceberg-datafusion link] | [![docs release]][iceberg-datafusion release docs] [![docs dev]][iceberg-datafusion dev docs] |
+| [iceberg-catalog-glue] | [![iceberg-catalog-glue image]][iceberg-catalog-glue link] | [![docs release]][iceberg-catalog-glue release docs] [![docs dev]][iceberg-catalog-glue dev docs] |
+| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] |
+| [iceberg-catalog-memory] | [![iceberg-catalog-memory image]][iceberg-catalog-memory link] | [![docs release]][iceberg-catalog-memory release docs] [![docs dev]][iceberg-catalog-memory dev docs] |
+| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] |
+
+[docs release]: https://img.shields.io/badge/docs-release-blue
+[docs dev]: https://img.shields.io/badge/docs-dev-blue
+[iceberg]: crates/iceberg/README.md
+[iceberg image]: https://img.shields.io/crates/v/iceberg.svg
+[iceberg link]: https://crates.io/crates/iceberg
+[iceberg release docs]: https://docs.rs/iceberg
+[iceberg dev docs]: https://rust.iceberg.apache.org/api/iceberg/
+
+[iceberg-datafusion]: crates/integrations/datafusion/README.md
+[iceberg-datafusion image]: https://img.shields.io/crates/v/iceberg-datafusion.svg
+[iceberg-datafusion link]: https://crates.io/crates/iceberg-datafusion
+[iceberg-datafusion dev docs]: https://rust.iceberg.apache.org/api/iceberg_datafusion/
+[iceberg-datafusion release docs]: https://docs.rs/iceberg-datafusion
+
+[iceberg-catalog-glue]: crates/catalog/glue/README.md
+[iceberg-catalog-glue image]: https://img.shields.io/crates/v/iceberg-catalog-glue.svg
+[iceberg-catalog-glue link]: https://crates.io/crates/iceberg-catalog-glue
+[iceberg-catalog-glue release docs]: https://docs.rs/iceberg-catalog-glue
+[iceberg-catalog-glue dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_glue/
+
+[iceberg-catalog-hms]: crates/catalog/hms/README.md
+[iceberg-catalog-hms image]: https://img.shields.io/crates/v/iceberg-catalog-hms.svg
+[iceberg-catalog-hms link]: https://crates.io/crates/iceberg-catalog-hms
+[iceberg-catalog-hms release docs]: https://docs.rs/iceberg-catalog-hms
+[iceberg-catalog-hms dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_hms/
+
+[iceberg-catalog-memory]: crates/catalog/memory/README.md
+[iceberg-catalog-memory image]: https://img.shields.io/crates/v/iceberg-catalog-memory.svg
+[iceberg-catalog-memory link]: https://crates.io/crates/iceberg-catalog-memory
+[iceberg-catalog-memory release docs]: https://docs.rs/iceberg-catalog-memory
+[iceberg-catalog-memory dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_memory/
+
+[iceberg-catalog-rest]: crates/catalog/rest/README.md
+[iceberg-catalog-rest image]: https://img.shields.io/crates/v/iceberg-catalog-rest.svg
+[iceberg-catalog-rest link]: https://crates.io/crates/iceberg-catalog-rest
+[iceberg-catalog-rest release docs]: https://docs.rs/iceberg-catalog-rest
+[iceberg-catalog-rest dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_rest/
+
+## Supported Rust Version
+
+Iceberg Rust is built and tested with stable rust, and will keep a rolling MSRV(minimum supported rust version). The
+current MSRV is 1.77.1.
+
+Also, we use unstable rust to run linters, such as `clippy` and `rustfmt`. But this will not affect downstream users,
+and only MSRV is required.
-Native Rust implementation of [Apache Iceberg](https://iceberg.apache.org/).
## Contribute
-Iceberg is an active open-source project. We are always open to people who want to use it or contribute to it. Here are some ways to go.
+Apache Iceberg is an active open-source project, governed under the Apache Software Foundation (ASF). We are always open to people who want to use or contribute to it. Here are some ways to get involved.
- Start with [Contributing Guide](CONTRIBUTING.md).
- Submit [Issues](https://github.com/apache/iceberg-rust/issues/new) for bug report or feature requests.
-- Discuss at [dev mailing list](mailto:dev@iceberg.apache.org) ([subscribe](mailto:dev-subscribe@iceberg.apache.org?subject=(send%20this%20email%20to%20subscribe)) / [unsubscribe](mailto:dev-unsubscribe@iceberg.apache.org?subject=(send%20this%20email%20to%20unsubscribe)) / [archives](https://lists.apache.org/list.html?dev@iceberg.apache.org))
-- Talk to community directly at [Slack #rust channel](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1zbov3k6e-KtJfoaxp97YfX6dPz1Bk7A).
+- Discuss
+ at [dev mailing list](mailto:dev@iceberg.apache.org) ([subscribe]() / [unsubscribe]() / [archives](https://lists.apache.org/list.html?dev@iceberg.apache.org))
+- Talk to the community directly
+ at [Slack #rust channel](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1zbov3k6e-KtJfoaxp97YfX6dPz1Bk7A).
+
+The Apache Iceberg community is built on the principles described in the [Apache Way](https://www.apache.org/theapacheway/index.html) and all who engage with the community are expected to be respectful, open, come with the best interests of the community in mind, and abide by the Apache Foundation [Code of Conduct](https://www.apache.org/foundation/policies/conduct.html).
+## Users
+
+- [Databend](https://github.com/datafuselabs/databend/): An open-source cloud data warehouse that serves as a cost-effective alternative to Snowflake.
+- [iceberg-catalog](https://github.com/hansetag/iceberg-catalog): A Rust implementation of the Iceberg REST Catalog specification.
## License
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
new file mode 100644
index 000000000..0260f788b
--- /dev/null
+++ b/bindings/python/Cargo.toml
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "pyiceberg_core_rust"
+version = "0.0.1"
+edition = "2021"
+homepage = "https://rust.iceberg.apache.org"
+rust-version = "1.77.1"
+# This crate is used to build python bindings, we don't want to publish it
+publish = false
+
+license = "Apache-2.0"
+keywords = ["iceberg"]
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+iceberg = { path = "../../crates/iceberg" }
+pyo3 = { version = "0.21.1", features = ["extension-module"] }
+arrow = { version = "52.2.0", features = ["pyarrow"] }
diff --git a/bindings/python/README.md b/bindings/python/README.md
new file mode 100644
index 000000000..fe4300e1f
--- /dev/null
+++ b/bindings/python/README.md
@@ -0,0 +1,40 @@
+
+
+# Pyiceberg Core
+
+This project is used to build an iceberg-rust powered core for pyiceberg.
+
+## Setup
+
+```shell
+pip install hatch==1.12.0
+```
+
+## Build
+
+```shell
+hatch run dev:develop
+```
+
+## Test
+
+```shell
+hatch run dev:test
+```
\ No newline at end of file
diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
new file mode 100644
index 000000000..f1f0a100f
--- /dev/null
+++ b/bindings/python/pyproject.toml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[build-system]
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"
+
+[project]
+name = "pyiceberg_core"
+version = "0.0.1"
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+]
+
+[tool.maturin]
+features = ["pyo3/extension-module"]
+python-source = "python"
+module-name = "pyiceberg_core.pyiceberg_core_rust"
+
+[tool.ruff.lint]
+ignore = ["F403", "F405"]
+
+[tool.hatch.envs.dev]
+dependencies = [
+ "maturin>=1.0,<2.0",
+ "pytest>=8.3.2",
+ "pyarrow>=17.0.0",
+]
+
+[tool.hatch.envs.dev.scripts]
+develop = "maturin develop"
+build = "maturin build --out dist --sdist"
+test = "pytest"
diff --git a/bindings/python/python/pyiceberg_core/__init__.py b/bindings/python/python/pyiceberg_core/__init__.py
new file mode 100644
index 000000000..067bb6f07
--- /dev/null
+++ b/bindings/python/python/pyiceberg_core/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .pyiceberg_core_rust import *
+
+__doc__ = pyiceberg_core_rust.__doc__
+__all__ = pyiceberg_core_rust.__all__
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
new file mode 100644
index 000000000..5c3f77ff7
--- /dev/null
+++ b/bindings/python/src/lib.rs
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use iceberg::io::FileIOBuilder;
+use pyo3::prelude::*;
+use pyo3::wrap_pyfunction;
+
+mod transform;
+
+#[pyfunction]
+fn hello_world() -> PyResult {
+ let _ = FileIOBuilder::new_fs_io().build().unwrap();
+ Ok("Hello, world!".to_string())
+}
+
+
+#[pymodule]
+fn pyiceberg_core_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(hello_world, m)?)?;
+
+ m.add_class::()?;
+ Ok(())
+}
diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs
new file mode 100644
index 000000000..8f4585b2a
--- /dev/null
+++ b/bindings/python/src/transform.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use iceberg::spec::Transform;
+use iceberg::transform::create_transform_function;
+
+use arrow::{
+ array::{make_array, Array, ArrayData},
+};
+use arrow::pyarrow::{FromPyArrow, ToPyArrow};
+use pyo3::{exceptions::PyValueError, prelude::*};
+
+fn to_py_err(err: iceberg::Error) -> PyErr {
+ PyValueError::new_err(err.to_string())
+}
+
+#[pyclass]
+pub struct ArrowArrayTransform {
+}
+
+fn apply(array: PyObject, transform: Transform, py: Python) -> PyResult {
+ // import
+ let array = ArrayData::from_pyarrow_bound(array.bind(py))?;
+ let array = make_array(array);
+ let transform_function = create_transform_function(&transform).map_err(to_py_err)?;
+ let array = transform_function.transform(array).map_err(to_py_err)?;
+ // export
+ let array = array.into_data();
+ array.to_pyarrow(py)
+}
+
+#[pymethods]
+impl ArrowArrayTransform {
+ #[staticmethod]
+ pub fn identity(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Identity, py)
+ }
+
+ #[staticmethod]
+ pub fn void(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Void, py)
+ }
+
+ #[staticmethod]
+ pub fn year(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Year, py)
+ }
+
+ #[staticmethod]
+ pub fn month(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Month, py)
+ }
+
+ #[staticmethod]
+ pub fn day(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Day, py)
+ }
+
+ #[staticmethod]
+ pub fn hour(array: PyObject, py: Python) -> PyResult {
+ apply(array, Transform::Hour, py)
+ }
+
+ #[staticmethod]
+ pub fn bucket(array: PyObject, num_buckets: u32, py: Python) -> PyResult {
+ apply(array, Transform::Bucket(num_buckets), py)
+ }
+
+ #[staticmethod]
+ pub fn truncate(array: PyObject, width: u32, py: Python) -> PyResult {
+ apply(array, Transform::Truncate(width), py)
+ }
+}
diff --git a/bindings/python/tests/test_basic.py b/bindings/python/tests/test_basic.py
new file mode 100644
index 000000000..817793ba8
--- /dev/null
+++ b/bindings/python/tests/test_basic.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyiceberg_core import hello_world
+
+
+def test_hello_world():
+ hello_world()
diff --git a/bindings/python/tests/test_transform.py b/bindings/python/tests/test_transform.py
new file mode 100644
index 000000000..1fa2d577a
--- /dev/null
+++ b/bindings/python/tests/test_transform.py
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import date, datetime
+
+import pyarrow as pa
+import pytest
+from pyiceberg_core import ArrowArrayTransform
+
+
+def test_identity_transform():
+ arr = pa.array([1, 2])
+ result = ArrowArrayTransform.identity(arr)
+ assert result == arr
+
+
+def test_bucket_transform():
+ arr = pa.array([1, 2])
+ result = ArrowArrayTransform.bucket(arr, 10)
+ expected = pa.array([6, 2], type=pa.int32())
+ assert result == expected
+
+
+def test_bucket_transform_fails_for_list_type_input():
+ arr = pa.array([[1, 2], [3, 4]])
+ with pytest.raises(
+ ValueError,
+ match=r"FeatureUnsupported => Unsupported data type for bucket transform",
+ ):
+ ArrowArrayTransform.bucket(arr, 10)
+
+
+def test_bucket_chunked_array():
+ chunked = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
+ result_chunks = []
+ for arr in chunked.iterchunks():
+ result_chunks.append(ArrowArrayTransform.bucket(arr, 10))
+
+ expected = pa.chunked_array(
+ [pa.array([6, 2], type=pa.int32()), pa.array([5, 0], type=pa.int32())]
+ )
+ assert pa.chunked_array(result_chunks).equals(expected)
+
+
+def test_year_transform():
+ arr = pa.array([date(1970, 1, 1), date(2000, 1, 1)])
+ result = ArrowArrayTransform.year(arr)
+ expected = pa.array([0, 30], type=pa.int32())
+ assert result == expected
+
+
+def test_month_transform():
+ arr = pa.array([date(1970, 1, 1), date(2000, 4, 1)])
+ result = ArrowArrayTransform.month(arr)
+ expected = pa.array([0, 30 * 12 + 3], type=pa.int32())
+ assert result == expected
+
+
+def test_day_transform():
+ arr = pa.array([date(1970, 1, 1), date(2000, 4, 1)])
+ result = ArrowArrayTransform.day(arr)
+ expected = pa.array([0, 11048], type=pa.int32())
+ assert result == expected
+
+
+def test_hour_transform():
+ arr = pa.array([datetime(1970, 1, 1, 19, 1, 23), datetime(2000, 3, 1, 12, 1, 23)])
+ result = ArrowArrayTransform.hour(arr)
+ expected = pa.array([19, 264420], type=pa.int32())
+ assert result == expected
+
+
+def test_truncate_transform():
+ arr = pa.array(["this is a long string", "hi my name is sung"])
+ result = ArrowArrayTransform.truncate(arr, 5)
+ expected = pa.array(["this ", "hi my"])
+ assert result == expected
diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml
new file mode 100644
index 000000000..0d2e1f983
--- /dev/null
+++ b/crates/catalog/glue/Cargo.toml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "iceberg-catalog-glue"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+rust-version = { workspace = true }
+
+categories = ["database"]
+description = "Apache Iceberg Glue Catalog Support"
+repository = { workspace = true }
+license = { workspace = true }
+keywords = ["iceberg", "glue", "catalog"]
+
+[dependencies]
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+aws-config = { workspace = true }
+aws-sdk-glue = { workspace = true }
+iceberg = { workspace = true }
+log = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
+typed-builder = { workspace = true }
+uuid = { workspace = true }
+
+[dev-dependencies]
+ctor = { workspace = true }
+iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
+port_scanner = { workspace = true }
diff --git a/crates/catalog/glue/DEPENDENCIES.rust.tsv b/crates/catalog/glue/DEPENDENCIES.rust.tsv
new file mode 100644
index 000000000..735d5447b
--- /dev/null
+++ b/crates/catalog/glue/DEPENDENCIES.rust.tsv
@@ -0,0 +1,328 @@
+crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC MIT MPL-2.0 OpenSSL Unicode-DFS-2016 Unlicense Zlib
+addr2line@0.22.0 X X
+adler@1.0.2 X X X
+adler32@1.2.0 X
+ahash@0.8.11 X X
+aho-corasick@1.1.3 X X
+alloc-no-stdlib@2.0.4 X
+alloc-stdlib@0.2.2 X
+allocator-api2@0.2.18 X X
+android-tzdata@0.1.1 X X
+android_system_properties@0.1.5 X X
+anstream@0.6.15 X X
+anstyle@1.0.8 X X
+anstyle-parse@0.2.5 X X
+anstyle-query@1.1.1 X X
+anstyle-wincon@3.0.4 X X
+anyhow@1.0.86 X X
+apache-avro@0.17.0 X
+array-init@2.1.0 X X
+arrayvec@0.7.4 X X
+arrow-arith@52.2.0 X
+arrow-array@52.2.0 X
+arrow-buffer@52.2.0 X
+arrow-cast@52.2.0 X
+arrow-data@52.2.0 X
+arrow-ipc@52.2.0 X
+arrow-ord@52.2.0 X
+arrow-schema@52.2.0 X
+arrow-select@52.2.0 X
+arrow-string@52.2.0 X
+async-trait@0.1.81 X X
+atoi@2.0.0 X
+autocfg@1.3.0 X X
+aws-config@1.5.5 X
+aws-credential-types@1.2.0 X
+aws-runtime@1.4.0 X
+aws-sdk-glue@1.53.0 X
+aws-sdk-sso@1.37.0 X
+aws-sdk-ssooidc@1.38.0 X
+aws-sdk-sts@1.37.0 X
+aws-sigv4@1.2.3 X
+aws-smithy-async@1.2.1 X
+aws-smithy-http@0.60.9 X
+aws-smithy-json@0.60.7 X
+aws-smithy-query@0.60.7 X
+aws-smithy-runtime@1.6.2 X
+aws-smithy-runtime-api@1.7.2 X
+aws-smithy-types@1.2.0 X
+aws-smithy-xml@0.60.8 X
+aws-types@1.3.3 X
+backon@0.4.4 X
+backtrace@0.3.73 X X
+base64@0.21.7 X X
+base64@0.22.1 X X
+base64-simd@0.8.0 X
+bigdecimal@0.4.5 X X
+bimap@0.6.3 X X
+bitflags@1.3.2 X X
+bitflags@2.6.0 X X
+bitvec@1.0.1 X
+block-buffer@0.10.4 X X
+brotli@6.0.0 X X
+brotli-decompressor@4.0.1 X X
+bumpalo@3.16.0 X X
+byteorder@1.5.0 X X
+bytes@1.7.1 X
+bytes-utils@0.1.4 X X
+cc@1.1.11 X X
+cfg-if@1.0.0 X X
+chrono@0.4.38 X X
+colorchoice@1.0.2 X X
+const-oid@0.9.6 X X
+const-random@0.1.18 X X
+const-random-macro@0.1.16 X X
+core-foundation@0.9.4 X X
+core-foundation-sys@0.8.7 X X
+core2@0.4.0 X X
+cpufeatures@0.2.13 X X
+crc32c@0.6.8 X X
+crc32fast@1.4.2 X X
+crunchy@0.2.2 X
+crypto-common@0.1.6 X X
+darling@0.20.10 X
+darling_core@0.20.10 X
+darling_macro@0.20.10 X
+dary_heap@0.3.6 X X
+deranged@0.3.11 X X
+derive_builder@0.20.0 X X
+derive_builder_core@0.20.0 X X
+derive_builder_macro@0.20.0 X X
+digest@0.10.7 X X
+either@1.13.0 X X
+env_filter@0.1.2 X X
+env_logger@0.11.5 X X
+equivalent@1.0.1 X X
+fastrand@2.1.0 X X
+flagset@0.4.6 X
+flatbuffers@24.3.25 X
+flate2@1.0.31 X X
+fnv@1.0.7 X X
+form_urlencoded@1.2.1 X X
+funty@2.0.0 X
+futures@0.3.30 X X
+futures-channel@0.3.30 X X
+futures-core@0.3.30 X X
+futures-executor@0.3.30 X X
+futures-io@0.3.30 X X
+futures-macro@0.3.30 X X
+futures-sink@0.3.30 X X
+futures-task@0.3.30 X X
+futures-util@0.3.30 X X
+generic-array@0.14.7 X
+getrandom@0.2.15 X X
+gimli@0.29.0 X X
+h2@0.3.26 X
+half@2.4.1 X X
+hashbrown@0.14.5 X X
+heck@0.5.0 X X
+hermit-abi@0.3.9 X X
+hex@0.4.3 X X
+hmac@0.12.1 X X
+home@0.5.9 X X
+http@0.2.12 X X
+http@1.1.0 X X
+http-body@0.4.6 X
+http-body@1.0.1 X
+http-body-util@0.1.2 X
+httparse@1.9.4 X X
+httpdate@1.0.3 X X
+humantime@2.1.0 X X
+hyper@0.14.30 X
+hyper@1.4.1 X
+hyper-rustls@0.24.2 X X X
+hyper-rustls@0.27.2 X X X
+hyper-util@0.1.7 X
+iana-time-zone@0.1.60 X X
+iana-time-zone-haiku@0.1.2 X X
+iceberg@0.3.0 X
+iceberg-catalog-glue@0.3.0 X
+iceberg-catalog-memory@0.3.0 X
+iceberg_test_utils@0.3.0 X
+ident_case@1.0.1 X X
+idna@0.5.0 X X
+indexmap@2.4.0 X X
+integer-encoding@3.0.4 X
+ipnet@2.9.0 X X
+is_terminal_polyfill@1.70.1 X X
+itertools@0.13.0 X X
+itoa@1.0.11 X X
+jobserver@0.1.32 X X
+js-sys@0.3.70 X X
+lexical-core@0.8.5 X X
+lexical-parse-float@0.8.5 X X
+lexical-parse-integer@0.8.6 X X
+lexical-util@0.8.5 X X
+lexical-write-float@0.8.5 X X
+lexical-write-integer@0.8.5 X X
+libc@0.2.155 X X
+libflate@2.1.0 X
+libflate_lz77@2.1.0 X
+libm@0.2.8 X X
+log@0.4.22 X X
+lz4_flex@0.11.3 X
+md-5@0.10.6 X X
+memchr@2.7.4 X X
+mime@0.3.17 X X
+miniz_oxide@0.7.4 X X X
+mio@1.0.2 X
+murmur3@0.5.2 X X
+num@0.4.3 X X
+num-bigint@0.4.6 X X
+num-complex@0.4.6 X X
+num-conv@0.1.0 X X
+num-integer@0.1.46 X X
+num-iter@0.1.45 X X
+num-rational@0.4.2 X X
+num-traits@0.2.19 X X
+object@0.36.3 X X
+once_cell@1.19.0 X X
+opendal@0.49.0 X
+openssl-probe@0.1.5 X X
+ordered-float@2.10.1 X
+ordered-float@4.2.2 X
+outref@0.5.1 X
+parquet@52.2.0 X
+paste@1.0.15 X X
+percent-encoding@2.3.1 X X
+pin-project@1.1.5 X X
+pin-project-internal@1.1.5 X X
+pin-project-lite@0.2.14 X X
+pin-utils@0.1.0 X X
+pkg-config@0.3.30 X X
+powerfmt@0.2.0 X X
+ppv-lite86@0.2.20 X X
+proc-macro2@1.0.86 X X
+quad-rand@0.2.1 X
+quick-xml@0.36.1 X
+quote@1.0.36 X X
+radium@0.7.0 X
+rand@0.8.5 X X
+rand_chacha@0.3.1 X X
+rand_core@0.6.4 X X
+regex@1.10.6 X X
+regex-automata@0.4.7 X X
+regex-lite@0.1.6 X X
+regex-syntax@0.8.4 X X
+reqsign@0.16.0 X
+reqwest@0.12.5 X X
+ring@0.17.8 X
+rle-decode-fast@1.0.3 X X
+rust_decimal@1.35.0 X
+rustc-demangle@0.1.24 X X
+rustc_version@0.4.0 X X
+rustls@0.21.12 X X X
+rustls@0.23.12 X X X
+rustls-native-certs@0.6.3 X X X
+rustls-pemfile@1.0.4 X X X
+rustls-pemfile@2.1.3 X X X
+rustls-pki-types@1.8.0 X X
+rustls-webpki@0.101.7 X
+rustls-webpki@0.102.6 X
+rustversion@1.0.17 X X
+ryu@1.0.18 X X
+schannel@0.1.23 X
+sct@0.7.1 X X X
+security-framework@2.11.1 X X
+security-framework-sys@2.11.1 X X
+semver@1.0.23 X X
+seq-macro@0.3.5 X X
+serde@1.0.207 X X
+serde_bytes@0.11.15 X X
+serde_derive@1.0.207 X X
+serde_json@1.0.124 X X
+serde_repr@0.1.19 X X
+serde_urlencoded@0.7.1 X X
+serde_with@3.9.0 X X
+serde_with_macros@3.9.0 X X
+sha1@0.10.6 X X
+sha2@0.10.8 X X
+shlex@1.3.0 X X
+signal-hook-registry@1.4.2 X X
+slab@0.4.9 X
+smallvec@1.13.2 X X
+snap@1.1.1 X
+socket2@0.5.7 X X
+spin@0.9.8 X
+static_assertions@1.1.0 X X
+strsim@0.11.1 X
+strum@0.26.3 X
+strum_macros@0.26.4 X
+subtle@2.6.1 X
+syn@2.0.74 X X
+sync_wrapper@1.0.1 X
+tap@1.0.1 X
+thiserror@1.0.63 X X
+thiserror-impl@1.0.63 X X
+thrift@0.17.0 X
+time@0.3.36 X X
+time-core@0.1.2 X X
+tiny-keccak@2.0.2 X
+tinyvec@1.8.0 X X X
+tinyvec_macros@0.1.1 X X X
+tokio@1.39.2 X
+tokio-macros@2.4.0 X
+tokio-rustls@0.24.1 X X
+tokio-rustls@0.26.0 X X
+tokio-util@0.7.11 X
+tower@0.4.13 X
+tower-layer@0.3.3 X
+tower-service@0.3.3 X
+tracing@0.1.40 X
+tracing-attributes@0.1.27 X
+tracing-core@0.1.32 X
+try-lock@0.2.5 X
+twox-hash@1.6.3 X
+typed-builder@0.19.1 X X
+typed-builder-macro@0.19.1 X X
+typenum@1.17.0 X X
+unicode-bidi@0.3.15 X X
+unicode-ident@1.0.12 X X X
+unicode-normalization@0.1.23 X X
+untrusted@0.9.0 X
+url@2.5.2 X X
+urlencoding@2.1.3 X
+utf8parse@0.2.2 X X
+uuid@1.10.0 X X
+version_check@0.9.5 X X
+vsimd@0.8.0 X
+want@0.3.1 X
+wasi@0.11.0+wasi-snapshot-preview1 X X X
+wasm-bindgen@0.2.93 X X
+wasm-bindgen-backend@0.2.93 X X
+wasm-bindgen-futures@0.4.43 X X
+wasm-bindgen-macro@0.2.93 X X
+wasm-bindgen-macro-support@0.2.93 X X
+wasm-bindgen-shared@0.2.93 X X
+wasm-streams@0.4.0 X X
+web-sys@0.3.70 X X
+webpki-roots@0.26.3 X
+windows-core@0.52.0 X X
+windows-sys@0.48.0 X X
+windows-sys@0.52.0 X X
+windows-targets@0.48.5 X X
+windows-targets@0.52.6 X X
+windows_aarch64_gnullvm@0.48.5 X X
+windows_aarch64_gnullvm@0.52.6 X X
+windows_aarch64_msvc@0.48.5 X X
+windows_aarch64_msvc@0.52.6 X X
+windows_i686_gnu@0.48.5 X X
+windows_i686_gnu@0.52.6 X X
+windows_i686_gnullvm@0.52.6 X X
+windows_i686_msvc@0.48.5 X X
+windows_i686_msvc@0.52.6 X X
+windows_x86_64_gnu@0.48.5 X X
+windows_x86_64_gnu@0.52.6 X X
+windows_x86_64_gnullvm@0.48.5 X X
+windows_x86_64_gnullvm@0.52.6 X X
+windows_x86_64_msvc@0.48.5 X X
+windows_x86_64_msvc@0.52.6 X X
+winreg@0.52.0 X
+wyz@0.5.1 X
+xmlparser@0.13.6 X X
+zerocopy@0.7.35 X X X
+zerocopy-derive@0.7.35 X X X
+zeroize@1.8.1 X X
+zstd@0.13.2 X
+zstd-safe@7.2.1 X X
+zstd-sys@2.0.12+zstd.1.5.6 X X
diff --git a/crates/catalog/glue/README.md b/crates/catalog/glue/README.md
new file mode 100644
index 000000000..fb7f6bf0f
--- /dev/null
+++ b/crates/catalog/glue/README.md
@@ -0,0 +1,27 @@
+
+
+# Apache Iceberg Glue Catalog Official Native Rust Implementation
+
+[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-glue)
+[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-glue/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg Glue Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-glue/latest) for examples and the full API.
diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs
new file mode 100644
index 000000000..18e30f3d0
--- /dev/null
+++ b/crates/catalog/glue/src/catalog.rs
@@ -0,0 +1,600 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashMap;
+use std::fmt::Debug;
+
+use async_trait::async_trait;
+use aws_sdk_glue::types::TableInput;
+use iceberg::io::FileIO;
+use iceberg::spec::{TableMetadata, TableMetadataBuilder};
+use iceberg::table::Table;
+use iceberg::{
+ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
+ TableIdent,
+};
+use typed_builder::TypedBuilder;
+
+use crate::error::{from_aws_build_error, from_aws_sdk_error};
+use crate::utils::{
+ convert_to_database, convert_to_glue_table, convert_to_namespace, create_metadata_location,
+ create_sdk_config, get_default_table_location, get_metadata_location, validate_namespace,
+};
+use crate::with_catalog_id;
+
+#[derive(Debug, TypedBuilder)]
+/// Glue Catalog configuration
+pub struct GlueCatalogConfig {
+ #[builder(default, setter(strip_option))]
+ uri: Option,
+ #[builder(default, setter(strip_option))]
+ catalog_id: Option,
+ warehouse: String,
+ #[builder(default)]
+ props: HashMap,
+}
+
+struct GlueClient(aws_sdk_glue::Client);
+
+/// Glue Catalog
+pub struct GlueCatalog {
+ config: GlueCatalogConfig,
+ client: GlueClient,
+ file_io: FileIO,
+}
+
+impl Debug for GlueCatalog {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("GlueCatalog")
+ .field("config", &self.config)
+ .finish_non_exhaustive()
+ }
+}
+
+impl GlueCatalog {
+ /// Create a new glue catalog
+ pub async fn new(config: GlueCatalogConfig) -> Result {
+ let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await;
+
+ let client = aws_sdk_glue::Client::new(&sdk_config);
+
+ let file_io = FileIO::from_path(&config.warehouse)?
+ .with_props(&config.props)
+ .build()?;
+
+ Ok(GlueCatalog {
+ config,
+ client: GlueClient(client),
+ file_io,
+ })
+ }
+ /// Get the catalogs `FileIO`
+ pub fn file_io(&self) -> FileIO {
+ self.file_io.clone()
+ }
+}
+
+#[async_trait]
+impl Catalog for GlueCatalog {
+ /// List namespaces from glue catalog.
+ ///
+ /// Glue doesn't support nested namespaces.
+ /// We will return an empty list if parent is some.
+ async fn list_namespaces(
+ &self,
+ parent: Option<&NamespaceIdent>,
+ ) -> Result> {
+ if parent.is_some() {
+ return Ok(vec![]);
+ }
+
+ let mut database_list: Vec = Vec::new();
+ let mut next_token: Option = None;
+
+ loop {
+ let builder = match &next_token {
+ Some(token) => self.client.0.get_databases().next_token(token),
+ None => self.client.0.get_databases(),
+ };
+ let builder = with_catalog_id!(builder, self.config);
+ let resp = builder.send().await.map_err(from_aws_sdk_error)?;
+
+ let dbs: Vec = resp
+ .database_list()
+ .iter()
+ .map(|db| NamespaceIdent::new(db.name().to_string()))
+ .collect();
+
+ database_list.extend(dbs);
+
+ next_token = resp.next_token().map(ToOwned::to_owned);
+ if next_token.is_none() {
+ break;
+ }
+ }
+
+ Ok(database_list)
+ }
+
+ /// Creates a new namespace with the given identifier and properties.
+ ///
+ /// Attempts to create a namespace defined by the `namespace`
+ /// parameter and configured with the specified `properties`.
+ ///
+ /// This function can return an error in the following situations:
+ ///
+ /// - Errors from `validate_namespace` if the namespace identifier does not
+ /// meet validation criteria.
+ /// - Errors from `convert_to_database` if the properties cannot be
+ /// successfully converted into a database configuration.
+ /// - Errors from the underlying database creation process, converted using
+ /// `from_sdk_error`.
+ async fn create_namespace(
+ &self,
+ namespace: &NamespaceIdent,
+ properties: HashMap,
+ ) -> Result {
+ let db_input = convert_to_database(namespace, &properties)?;
+
+ let builder = self.client.0.create_database().database_input(db_input);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ Ok(Namespace::with_properties(namespace.clone(), properties))
+ }
+
+ /// Retrieves a namespace by its identifier.
+ ///
+ /// Validates the given namespace identifier and then queries the
+ /// underlying database client to fetch the corresponding namespace data.
+ /// Constructs a `Namespace` object with the retrieved data and returns it.
+ ///
+ /// This function can return an error in any of the following situations:
+ /// - If the provided namespace identifier fails validation checks
+ /// - If there is an error querying the database, returned by
+ /// `from_sdk_error`.
+ async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result {
+ let db_name = validate_namespace(namespace)?;
+
+ let builder = self.client.0.get_database().name(&db_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ let resp = builder.send().await.map_err(from_aws_sdk_error)?;
+
+ match resp.database() {
+ Some(db) => {
+ let namespace = convert_to_namespace(db);
+ Ok(namespace)
+ }
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Database with name: {} does not exist", db_name),
+ )),
+ }
+ }
+
+ /// Checks if a namespace exists within the Glue Catalog.
+ ///
+ /// Validates the namespace identifier by querying the Glue Catalog
+ /// to determine if the specified namespace (database) exists.
+ ///
+ /// # Returns
+ /// A `Result` indicating the outcome of the check:
+ /// - `Ok(true)` if the namespace exists.
+ /// - `Ok(false)` if the namespace does not exist, identified by a specific
+ /// `EntityNotFoundException` variant.
+ /// - `Err(...)` if an error occurs during validation or the Glue Catalog
+ /// query, with the error encapsulating the issue.
+ async fn namespace_exists(&self, namespace: &NamespaceIdent) -> Result {
+ let db_name = validate_namespace(namespace)?;
+
+ let builder = self.client.0.get_database().name(&db_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ let resp = builder.send().await;
+
+ match resp {
+ Ok(_) => Ok(true),
+ Err(err) => {
+ if err
+ .as_service_error()
+ .map(|e| e.is_entity_not_found_exception())
+ == Some(true)
+ {
+ return Ok(false);
+ }
+ Err(from_aws_sdk_error(err))
+ }
+ }
+ }
+
+ /// Asynchronously updates properties of an existing namespace.
+ ///
+ /// Converts the given namespace identifier and properties into a database
+ /// representation and then attempts to update the corresponding namespace
+ /// in the Glue Catalog.
+ ///
+ /// # Returns
+ /// Returns `Ok(())` if the namespace update is successful. If the
+ /// namespace cannot be updated due to missing information or an error
+ /// during the update process, an `Err(...)` is returned.
+ async fn update_namespace(
+ &self,
+ namespace: &NamespaceIdent,
+ properties: HashMap,
+ ) -> Result<()> {
+ let db_name = validate_namespace(namespace)?;
+ let db_input = convert_to_database(namespace, &properties)?;
+
+ let builder = self
+ .client
+ .0
+ .update_database()
+ .name(&db_name)
+ .database_input(db_input);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ Ok(())
+ }
+
+ /// Asynchronously drops a namespace from the Glue Catalog.
+ ///
+ /// Checks if the namespace is empty. If it still contains tables the
+ /// namespace will not be dropped, but an error is returned instead.
+ ///
+ /// # Returns
+ /// A `Result<()>` indicating the outcome:
+ /// - `Ok(())` signifies successful namespace deletion.
+ /// - `Err(...)` signifies failure to drop the namespace due to validation
+ /// errors, connectivity issues, or Glue Catalog constraints.
+ async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
+ let db_name = validate_namespace(namespace)?;
+ let table_list = self.list_tables(namespace).await?;
+
+ if !table_list.is_empty() {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Database with name: {} is not empty", &db_name),
+ ));
+ }
+
+ let builder = self.client.0.delete_database().name(db_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ Ok(())
+ }
+
+ /// Asynchronously lists all tables within a specified namespace.
+ ///
+ /// # Returns
+ /// A `Result>`, which is:
+ /// - `Ok(vec![...])` containing a vector of `TableIdent` instances, each
+ /// representing a table within the specified namespace.
+ /// - `Err(...)` if an error occurs during namespace validation or while
+ /// querying the database.
+ async fn list_tables(&self, namespace: &NamespaceIdent) -> Result> {
+ let db_name = validate_namespace(namespace)?;
+
+ let mut table_list: Vec = Vec::new();
+ let mut next_token: Option = None;
+
+ loop {
+ let builder = match &next_token {
+ Some(token) => self
+ .client
+ .0
+ .get_tables()
+ .database_name(&db_name)
+ .next_token(token),
+ None => self.client.0.get_tables().database_name(&db_name),
+ };
+ let builder = with_catalog_id!(builder, self.config);
+ let resp = builder.send().await.map_err(from_aws_sdk_error)?;
+
+ let tables: Vec<_> = resp
+ .table_list()
+ .iter()
+ .map(|tbl| TableIdent::new(namespace.clone(), tbl.name().to_string()))
+ .collect();
+
+ table_list.extend(tables);
+
+ next_token = resp.next_token().map(ToOwned::to_owned);
+ if next_token.is_none() {
+ break;
+ }
+ }
+
+ Ok(table_list)
+ }
+
+ /// Creates a new table within a specified namespace using the provided
+ /// table creation settings.
+ ///
+ /// # Returns
+ /// A `Result` wrapping a `Table` object representing the newly created
+ /// table.
+ ///
+ /// # Errors
+ /// This function may return an error in several cases, including invalid
+ /// namespace identifiers, failure to determine a default storage location,
+ /// issues generating or writing table metadata, and errors communicating
+ /// with the Glue Catalog.
+ async fn create_table(
+ &self,
+ namespace: &NamespaceIdent,
+ creation: TableCreation,
+ ) -> Result
{
+ let db_name = validate_namespace(namespace)?;
+ let table_name = creation.name.clone();
+
+ let location = match &creation.location {
+ Some(location) => location.clone(),
+ None => {
+ let ns = self.get_namespace(namespace).await?;
+ get_default_table_location(&ns, &db_name, &table_name, &self.config.warehouse)
+ }
+ };
+
+ let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?;
+ let metadata_location = create_metadata_location(&location, 0)?;
+
+ self.file_io
+ .new_output(&metadata_location)?
+ .write(serde_json::to_vec(&metadata)?.into())
+ .await?;
+
+ let glue_table = convert_to_glue_table(
+ &table_name,
+ metadata_location.clone(),
+ &metadata,
+ metadata.properties(),
+ None,
+ )?;
+
+ let builder = self
+ .client
+ .0
+ .create_table()
+ .database_name(&db_name)
+ .table_input(glue_table);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ Table::builder()
+ .file_io(self.file_io())
+ .metadata_location(metadata_location)
+ .metadata(metadata)
+ .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name))
+ .build()
+ }
+
+ /// Loads a table from the Glue Catalog and constructs a `Table` object
+ /// based on its metadata.
+ ///
+ /// # Returns
+ /// A `Result` wrapping a `Table` object that represents the loaded table.
+ ///
+ /// # Errors
+ /// This function may return an error in several scenarios, including:
+ /// - Failure to validate the namespace.
+ /// - Failure to retrieve the table from the Glue Catalog.
+ /// - Absence of metadata location information in the table's properties.
+ /// - Issues reading or deserializing the table's metadata file.
+ async fn load_table(&self, table: &TableIdent) -> Result
{
+ let db_name = validate_namespace(table.namespace())?;
+ let table_name = table.name();
+
+ let builder = self
+ .client
+ .0
+ .get_table()
+ .database_name(&db_name)
+ .name(table_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ let glue_table_output = builder.send().await.map_err(from_aws_sdk_error)?;
+
+ match glue_table_output.table() {
+ None => Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "Table object for database: {} and table: {} does not exist",
+ db_name, table_name
+ ),
+ )),
+ Some(table) => {
+ let metadata_location = get_metadata_location(&table.parameters)?;
+
+ let input_file = self.file_io.new_input(&metadata_location)?;
+ let metadata_content = input_file.read().await?;
+ let metadata = serde_json::from_slice::(&metadata_content)?;
+
+ Table::builder()
+ .file_io(self.file_io())
+ .metadata_location(metadata_location)
+ .metadata(metadata)
+ .identifier(TableIdent::new(
+ NamespaceIdent::new(db_name),
+ table_name.to_owned(),
+ ))
+ .build()
+ }
+ }
+ }
+
+ /// Asynchronously drops a table from the database.
+ ///
+ /// # Errors
+ /// Returns an error if:
+ /// - The namespace provided in `table` cannot be validated
+ /// or does not exist.
+ /// - The underlying database client encounters an error while
+ /// attempting to drop the table. This includes scenarios where
+ /// the table does not exist.
+ /// - Any network or communication error occurs with the database backend.
+ async fn drop_table(&self, table: &TableIdent) -> Result<()> {
+ let db_name = validate_namespace(table.namespace())?;
+ let table_name = table.name();
+
+ let builder = self
+ .client
+ .0
+ .delete_table()
+ .database_name(&db_name)
+ .name(table_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ Ok(())
+ }
+
+ /// Asynchronously checks the existence of a specified table
+ /// in the database.
+ ///
+ /// # Returns
+ /// - `Ok(true)` if the table exists in the database.
+ /// - `Ok(false)` if the table does not exist in the database.
+ /// - `Err(...)` if an error occurs during the process
+ async fn table_exists(&self, table: &TableIdent) -> Result {
+ let db_name = validate_namespace(table.namespace())?;
+ let table_name = table.name();
+
+ let builder = self
+ .client
+ .0
+ .get_table()
+ .database_name(&db_name)
+ .name(table_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ let resp = builder.send().await;
+
+ match resp {
+ Ok(_) => Ok(true),
+ Err(err) => {
+ if err
+ .as_service_error()
+ .map(|e| e.is_entity_not_found_exception())
+ == Some(true)
+ {
+ return Ok(false);
+ }
+ Err(from_aws_sdk_error(err))
+ }
+ }
+ }
+
+ /// Asynchronously renames a table within the database
+ /// or moves it between namespaces (databases).
+ ///
+ /// # Returns
+ /// - `Ok(())` on successful rename or move of the table.
+ /// - `Err(...)` if an error occurs during the process.
+ async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) -> Result<()> {
+ let src_db_name = validate_namespace(src.namespace())?;
+ let dest_db_name = validate_namespace(dest.namespace())?;
+
+ let src_table_name = src.name();
+ let dest_table_name = dest.name();
+
+ let builder = self
+ .client
+ .0
+ .get_table()
+ .database_name(&src_db_name)
+ .name(src_table_name);
+ let builder = with_catalog_id!(builder, self.config);
+
+ let glue_table_output = builder.send().await.map_err(from_aws_sdk_error)?;
+
+ match glue_table_output.table() {
+ None => Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "'Table' object for database: {} and table: {} does not exist",
+ src_db_name, src_table_name
+ ),
+ )),
+ Some(table) => {
+ let rename_table_input = TableInput::builder()
+ .name(dest_table_name)
+ .set_parameters(table.parameters.clone())
+ .set_storage_descriptor(table.storage_descriptor.clone())
+ .set_table_type(table.table_type.clone())
+ .set_description(table.description.clone())
+ .build()
+ .map_err(from_aws_build_error)?;
+
+ let builder = self
+ .client
+ .0
+ .create_table()
+ .database_name(&dest_db_name)
+ .table_input(rename_table_input);
+ let builder = with_catalog_id!(builder, self.config);
+
+ builder.send().await.map_err(from_aws_sdk_error)?;
+
+ let drop_src_table_result = self.drop_table(src).await;
+
+ match drop_src_table_result {
+ Ok(_) => Ok(()),
+ Err(_) => {
+ let err_msg_src_table = format!(
+ "Failed to drop old table {}.{}.",
+ src_db_name, src_table_name
+ );
+
+ let drop_dest_table_result = self.drop_table(dest).await;
+
+ match drop_dest_table_result {
+ Ok(_) => Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "{} Rolled back table creation for {}.{}.",
+ err_msg_src_table, dest_db_name, dest_table_name
+ ),
+ )),
+ Err(_) => Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "{} Failed to roll back table creation for {}.{}. Please clean up manually.",
+ err_msg_src_table, dest_db_name, dest_table_name
+ ),
+ )),
+ }
+ }
+ }
+ }
+ }
+ }
+
+ async fn update_table(&self, _commit: TableCommit) -> Result
{
+ Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Updating a table is not supported yet",
+ ))
+ }
+}
diff --git a/crates/catalog/glue/src/error.rs b/crates/catalog/glue/src/error.rs
new file mode 100644
index 000000000..a94f6c220
--- /dev/null
+++ b/crates/catalog/glue/src/error.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::Debug;
+
+use anyhow::anyhow;
+use iceberg::{Error, ErrorKind};
+
+/// Format AWS SDK error into iceberg error
+pub(crate) fn from_aws_sdk_error(error: aws_sdk_glue::error::SdkError) -> Error
+where T: Debug {
+ Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting aws skd error".to_string(),
+ )
+ .with_source(anyhow!("aws sdk error: {:?}", error))
+}
+
+/// Format AWS Build error into iceberg error
+pub(crate) fn from_aws_build_error(error: aws_sdk_glue::error::BuildError) -> Error {
+ Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting aws build error".to_string(),
+ )
+ .with_source(anyhow!("aws build error: {:?}", error))
+}
diff --git a/crates/catalog/glue/src/lib.rs b/crates/catalog/glue/src/lib.rs
new file mode 100644
index 000000000..237657335
--- /dev/null
+++ b/crates/catalog/glue/src/lib.rs
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg Glue Catalog implementation.
+
+#![deny(missing_docs)]
+
+mod catalog;
+mod error;
+mod schema;
+mod utils;
+pub use catalog::*;
+pub use utils::{
+ AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN,
+};
diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs
new file mode 100644
index 000000000..bb676e36e
--- /dev/null
+++ b/crates/catalog/glue/src/schema.rs
@@ -0,0 +1,482 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Property `iceberg.field.id` for `Column`
+pub(crate) const ICEBERG_FIELD_ID: &str = "iceberg.field.id";
+/// Property `iceberg.field.optional` for `Column`
+pub(crate) const ICEBERG_FIELD_OPTIONAL: &str = "iceberg.field.optional";
+/// Property `iceberg.field.current` for `Column`
+pub(crate) const ICEBERG_FIELD_CURRENT: &str = "iceberg.field.current";
+
+use std::collections::HashMap;
+
+use aws_sdk_glue::types::Column;
+use iceberg::spec::{visit_schema, PrimitiveType, SchemaVisitor, TableMetadata};
+use iceberg::{Error, ErrorKind, Result};
+
+use crate::error::from_aws_build_error;
+
+type GlueSchema = Vec;
+
+#[derive(Debug, Default)]
+pub(crate) struct GlueSchemaBuilder {
+ schema: GlueSchema,
+ is_current: bool,
+ depth: usize,
+}
+
+impl GlueSchemaBuilder {
+ /// Creates a new `GlueSchemaBuilder` from iceberg `Schema`
+ pub fn from_iceberg(metadata: &TableMetadata) -> Result {
+ let current_schema = metadata.current_schema();
+
+ let mut builder = Self {
+ schema: Vec::new(),
+ is_current: true,
+ depth: 0,
+ };
+
+ visit_schema(current_schema, &mut builder)?;
+
+ builder.is_current = false;
+
+ for schema in metadata.schemas_iter() {
+ if schema.schema_id() == current_schema.schema_id() {
+ continue;
+ }
+
+ visit_schema(schema, &mut builder)?;
+ }
+
+ Ok(builder)
+ }
+
+ /// Returns the newly converted `GlueSchema`
+ pub fn build(self) -> GlueSchema {
+ self.schema
+ }
+
+ /// Check if is in `StructType` while traversing schema
+ fn is_inside_struct(&self) -> bool {
+ self.depth > 0
+ }
+}
+
+impl SchemaVisitor for GlueSchemaBuilder {
+ type T = String;
+
+ fn schema(
+ &mut self,
+ _schema: &iceberg::spec::Schema,
+ value: Self::T,
+ ) -> iceberg::Result {
+ Ok(value)
+ }
+
+ fn before_struct_field(&mut self, _field: &iceberg::spec::NestedFieldRef) -> Result<()> {
+ self.depth += 1;
+ Ok(())
+ }
+
+ fn r#struct(
+ &mut self,
+ r#_struct: &iceberg::spec::StructType,
+ results: Vec,
+ ) -> iceberg::Result {
+ Ok(format!("struct<{}>", results.join(", ")))
+ }
+
+ fn after_struct_field(&mut self, _field: &iceberg::spec::NestedFieldRef) -> Result<()> {
+ self.depth -= 1;
+ Ok(())
+ }
+
+ fn field(
+ &mut self,
+ field: &iceberg::spec::NestedFieldRef,
+ value: String,
+ ) -> iceberg::Result {
+ if self.is_inside_struct() {
+ return Ok(format!("{}:{}", field.name, &value));
+ }
+
+ let parameters = HashMap::from([
+ (ICEBERG_FIELD_ID.to_string(), format!("{}", field.id)),
+ (
+ ICEBERG_FIELD_OPTIONAL.to_string(),
+ format!("{}", field.required).to_lowercase(),
+ ),
+ (
+ ICEBERG_FIELD_CURRENT.to_string(),
+ format!("{}", self.is_current).to_lowercase(),
+ ),
+ ]);
+
+ let mut builder = Column::builder()
+ .name(field.name.clone())
+ .r#type(&value)
+ .set_parameters(Some(parameters));
+
+ if let Some(comment) = field.doc.as_ref() {
+ builder = builder.comment(comment);
+ }
+
+ let column = builder.build().map_err(from_aws_build_error)?;
+
+ self.schema.push(column);
+
+ Ok(value)
+ }
+
+ fn list(&mut self, _list: &iceberg::spec::ListType, value: String) -> iceberg::Result {
+ Ok(format!("array<{}>", value))
+ }
+
+ fn map(
+ &mut self,
+ _map: &iceberg::spec::MapType,
+ key_value: String,
+ value: String,
+ ) -> iceberg::Result {
+ Ok(format!("map<{},{}>", key_value, value))
+ }
+
+ fn primitive(&mut self, p: &iceberg::spec::PrimitiveType) -> iceberg::Result {
+ let glue_type = match p {
+ PrimitiveType::Boolean => "boolean".to_string(),
+ PrimitiveType::Int => "int".to_string(),
+ PrimitiveType::Long => "bigint".to_string(),
+ PrimitiveType::Float => "float".to_string(),
+ PrimitiveType::Double => "double".to_string(),
+ PrimitiveType::Date => "date".to_string(),
+ PrimitiveType::Timestamp => "timestamp".to_string(),
+ PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
+ PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
+ PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => {
+ "string".to_string()
+ }
+ PrimitiveType::Binary | PrimitiveType::Fixed(_) => "binary".to_string(),
+ PrimitiveType::Decimal { precision, scale } => {
+ format!("decimal({},{})", precision, scale)
+ }
+ _ => {
+ return Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Conversion from 'Timestamptz' is not supported",
+ ))
+ }
+ };
+
+ Ok(glue_type)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use iceberg::spec::{Schema, TableMetadataBuilder};
+ use iceberg::TableCreation;
+
+ use super::*;
+
+ fn create_metadata(schema: Schema) -> Result {
+ let table_creation = TableCreation::builder()
+ .name("my_table".to_string())
+ .location("my_location".to_string())
+ .schema(schema)
+ .build();
+ let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
+
+ Ok(metadata)
+ }
+
+ fn create_column(
+ name: impl Into,
+ r#type: impl Into,
+ id: impl Into,
+ ) -> Result {
+ let parameters = HashMap::from([
+ (ICEBERG_FIELD_ID.to_string(), id.into()),
+ (ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()),
+ (ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()),
+ ]);
+
+ Column::builder()
+ .name(name)
+ .r#type(r#type)
+ .set_comment(None)
+ .set_parameters(Some(parameters))
+ .build()
+ .map_err(from_aws_build_error)
+ }
+
+ #[test]
+ fn test_schema_with_simple_fields() -> Result<()> {
+ let record = r#"{
+ "type": "struct",
+ "schema-id": 1,
+ "fields": [
+ {
+ "id": 1,
+ "name": "c1",
+ "required": true,
+ "type": "boolean"
+ },
+ {
+ "id": 2,
+ "name": "c2",
+ "required": true,
+ "type": "int"
+ },
+ {
+ "id": 3,
+ "name": "c3",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 4,
+ "name": "c4",
+ "required": true,
+ "type": "float"
+ },
+ {
+ "id": 5,
+ "name": "c5",
+ "required": true,
+ "type": "double"
+ },
+ {
+ "id": 6,
+ "name": "c6",
+ "required": true,
+ "type": "decimal(2,2)"
+ },
+ {
+ "id": 7,
+ "name": "c7",
+ "required": true,
+ "type": "date"
+ },
+ {
+ "id": 8,
+ "name": "c8",
+ "required": true,
+ "type": "time"
+ },
+ {
+ "id": 9,
+ "name": "c9",
+ "required": true,
+ "type": "timestamp"
+ },
+ {
+ "id": 10,
+ "name": "c10",
+ "required": true,
+ "type": "string"
+ },
+ {
+ "id": 11,
+ "name": "c11",
+ "required": true,
+ "type": "uuid"
+ },
+ {
+ "id": 12,
+ "name": "c12",
+ "required": true,
+ "type": "fixed[4]"
+ },
+ {
+ "id": 13,
+ "name": "c13",
+ "required": true,
+ "type": "binary"
+ }
+ ]
+ }"#;
+
+ let schema = serde_json::from_str::(record)?;
+ let metadata = create_metadata(schema)?;
+
+ let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
+
+ let expected = vec![
+ create_column("c1", "boolean", "1")?,
+ create_column("c2", "int", "2")?,
+ create_column("c3", "bigint", "3")?,
+ create_column("c4", "float", "4")?,
+ create_column("c5", "double", "5")?,
+ create_column("c6", "decimal(2,2)", "6")?,
+ create_column("c7", "date", "7")?,
+ create_column("c8", "string", "8")?,
+ create_column("c9", "timestamp", "9")?,
+ create_column("c10", "string", "10")?,
+ create_column("c11", "string", "11")?,
+ create_column("c12", "binary", "12")?,
+ create_column("c13", "binary", "13")?,
+ ];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_structs() -> Result<()> {
+ let record = r#"{
+ "type": "struct",
+ "schema-id": 1,
+ "fields": [
+ {
+ "id": 1,
+ "name": "person",
+ "required": true,
+ "type": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 2,
+ "name": "name",
+ "required": true,
+ "type": "string"
+ },
+ {
+ "id": 3,
+ "name": "age",
+ "required": false,
+ "type": "int"
+ }
+ ]
+ }
+ }
+ ]
+ }"#;
+
+ let schema = serde_json::from_str::(record)?;
+ let metadata = create_metadata(schema)?;
+
+ let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
+
+ let expected = vec![create_column(
+ "person",
+ "struct",
+ "1",
+ )?];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_struct_inside_list() -> Result<()> {
+ let record = r#"
+ {
+ "schema-id": 1,
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "location",
+ "required": true,
+ "type": {
+ "type": "list",
+ "element-id": 2,
+ "element-required": true,
+ "element": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 3,
+ "name": "latitude",
+ "required": false,
+ "type": "float"
+ },
+ {
+ "id": 4,
+ "name": "longitude",
+ "required": false,
+ "type": "float"
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ "#;
+
+ let schema = serde_json::from_str::(record)?;
+ let metadata = create_metadata(schema)?;
+
+ let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
+
+ let expected = vec![create_column(
+ "location",
+ "array>",
+ "1",
+ )?];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_nested_maps() -> Result<()> {
+ let record = r#"
+ {
+ "schema-id": 1,
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "quux",
+ "required": true,
+ "type": {
+ "type": "map",
+ "key-id": 2,
+ "key": "string",
+ "value-id": 3,
+ "value-required": true,
+ "value": {
+ "type": "map",
+ "key-id": 4,
+ "key": "string",
+ "value-id": 5,
+ "value-required": true,
+ "value": "int"
+ }
+ }
+ }
+ ]
+ }
+ "#;
+
+ let schema = serde_json::from_str::(record)?;
+ let metadata = create_metadata(schema)?;
+
+ let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
+
+ let expected = vec![create_column("quux", "map>", "1")?];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+}
diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs
new file mode 100644
index 000000000..a99fb19c7
--- /dev/null
+++ b/crates/catalog/glue/src/utils.rs
@@ -0,0 +1,518 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashMap;
+
+use aws_config::{BehaviorVersion, Region, SdkConfig};
+use aws_sdk_glue::config::Credentials;
+use aws_sdk_glue::types::{Database, DatabaseInput, StorageDescriptor, TableInput};
+use iceberg::spec::TableMetadata;
+use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result};
+use uuid::Uuid;
+
+use crate::error::from_aws_build_error;
+use crate::schema::GlueSchemaBuilder;
+
+/// Property aws profile name
+pub const AWS_PROFILE_NAME: &str = "profile_name";
+/// Property aws region
+pub const AWS_REGION_NAME: &str = "region_name";
+/// Property aws access key
+pub const AWS_ACCESS_KEY_ID: &str = "aws_access_key_id";
+/// Property aws secret access key
+pub const AWS_SECRET_ACCESS_KEY: &str = "aws_secret_access_key";
+/// Property aws session token
+pub const AWS_SESSION_TOKEN: &str = "aws_session_token";
+/// Parameter namespace description
+const DESCRIPTION: &str = "description";
+/// Parameter namespace location uri
+const LOCATION: &str = "location_uri";
+/// Property `metadata_location` for `TableInput`
+const METADATA_LOCATION: &str = "metadata_location";
+/// Property `previous_metadata_location` for `TableInput`
+const PREV_METADATA_LOCATION: &str = "previous_metadata_location";
+/// Property external table for `TableInput`
+const EXTERNAL_TABLE: &str = "EXTERNAL_TABLE";
+/// Parameter key `table_type` for `TableInput`
+const TABLE_TYPE: &str = "table_type";
+/// Parameter value `table_type` for `TableInput`
+const ICEBERG: &str = "ICEBERG";
+
+/// Creates an aws sdk configuration based on
+/// provided properties and an optional endpoint URL.
+pub(crate) async fn create_sdk_config(
+ properties: &HashMap,
+ endpoint_uri: Option<&String>,
+) -> SdkConfig {
+ let mut config = aws_config::defaults(BehaviorVersion::latest());
+
+ if let Some(endpoint) = endpoint_uri {
+ config = config.endpoint_url(endpoint)
+ };
+
+ if properties.is_empty() {
+ return config.load().await;
+ }
+
+ if let (Some(access_key), Some(secret_key)) = (
+ properties.get(AWS_ACCESS_KEY_ID),
+ properties.get(AWS_SECRET_ACCESS_KEY),
+ ) {
+ let session_token = properties.get(AWS_SESSION_TOKEN).cloned();
+ let credentials_provider =
+ Credentials::new(access_key, secret_key, session_token, None, "properties");
+
+ config = config.credentials_provider(credentials_provider)
+ };
+
+ if let Some(profile_name) = properties.get(AWS_PROFILE_NAME) {
+ config = config.profile_name(profile_name);
+ }
+
+ if let Some(region_name) = properties.get(AWS_REGION_NAME) {
+ let region = Region::new(region_name.clone());
+ config = config.region(region);
+ }
+
+ config.load().await
+}
+
+/// Create `DatabaseInput` from `NamespaceIdent` and properties
+pub(crate) fn convert_to_database(
+ namespace: &NamespaceIdent,
+ properties: &HashMap,
+) -> Result {
+ let db_name = validate_namespace(namespace)?;
+ let mut builder = DatabaseInput::builder().name(db_name);
+
+ for (k, v) in properties.iter() {
+ match k.as_ref() {
+ DESCRIPTION => {
+ builder = builder.description(v);
+ }
+ LOCATION => {
+ builder = builder.location_uri(v);
+ }
+ _ => {
+ builder = builder.parameters(k, v);
+ }
+ }
+ }
+
+ builder.build().map_err(from_aws_build_error)
+}
+
+/// Create `Namespace` from aws sdk glue `Database`
+pub(crate) fn convert_to_namespace(database: &Database) -> Namespace {
+ let db_name = database.name().to_string();
+ let mut properties = database
+ .parameters()
+ .map_or_else(HashMap::new, |p| p.clone());
+
+ if let Some(location_uri) = database.location_uri() {
+ properties.insert(LOCATION.to_string(), location_uri.to_string());
+ };
+
+ if let Some(description) = database.description() {
+ properties.insert(DESCRIPTION.to_string(), description.to_string());
+ }
+
+ Namespace::with_properties(NamespaceIdent::new(db_name), properties)
+}
+
+/// Converts Iceberg table metadata into an
+/// AWS Glue `TableInput` representation.
+///
+/// This function facilitates the integration of Iceberg tables with AWS Glue
+/// by converting Iceberg table metadata into a Glue-compatible `TableInput`
+/// structure.
+pub(crate) fn convert_to_glue_table(
+ table_name: impl Into,
+ metadata_location: String,
+ metadata: &TableMetadata,
+ properties: &HashMap,
+ prev_metadata_location: Option,
+) -> Result {
+ let glue_schema = GlueSchemaBuilder::from_iceberg(metadata)?.build();
+
+ let storage_descriptor = StorageDescriptor::builder()
+ .set_columns(Some(glue_schema))
+ .location(&metadata_location)
+ .build();
+
+ let mut parameters = HashMap::from([
+ (TABLE_TYPE.to_string(), ICEBERG.to_string()),
+ (METADATA_LOCATION.to_string(), metadata_location),
+ ]);
+
+ if let Some(prev) = prev_metadata_location {
+ parameters.insert(PREV_METADATA_LOCATION.to_string(), prev);
+ }
+
+ let mut table_input_builder = TableInput::builder()
+ .name(table_name)
+ .set_parameters(Some(parameters))
+ .storage_descriptor(storage_descriptor)
+ .table_type(EXTERNAL_TABLE);
+
+ if let Some(description) = properties.get(DESCRIPTION) {
+ table_input_builder = table_input_builder.description(description);
+ }
+
+ let table_input = table_input_builder.build().map_err(from_aws_build_error)?;
+
+ Ok(table_input)
+}
+
+/// Checks if provided `NamespaceIdent` is valid
+pub(crate) fn validate_namespace(namespace: &NamespaceIdent) -> Result {
+ let name = namespace.as_ref();
+
+ if name.len() != 1 {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Invalid database name: {:?}, hierarchical namespaces are not supported",
+ namespace
+ ),
+ ));
+ }
+
+ let name = name[0].clone();
+
+ if name.is_empty() {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Invalid database, provided namespace is empty.",
+ ));
+ }
+
+ Ok(name)
+}
+
+/// Get default table location from `Namespace` properties
+pub(crate) fn get_default_table_location(
+ namespace: &Namespace,
+ db_name: impl AsRef,
+ table_name: impl AsRef,
+ warehouse: impl AsRef,
+) -> String {
+ let properties = namespace.properties();
+
+ match properties.get(LOCATION) {
+ Some(location) => format!("{}/{}", location, table_name.as_ref()),
+ None => {
+ let warehouse_location = warehouse.as_ref().trim_end_matches('/');
+
+ format!(
+ "{}/{}.db/{}",
+ warehouse_location,
+ db_name.as_ref(),
+ table_name.as_ref()
+ )
+ }
+ }
+}
+
+/// Create metadata location from `location` and `version`
+pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result {
+ if version < 0 {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Table metadata version: '{}' must be a non-negative integer",
+ version
+ ),
+ ));
+ };
+
+ let version = format!("{:0>5}", version);
+ let id = Uuid::new_v4();
+ let metadata_location = format!(
+ "{}/metadata/{}-{}.metadata.json",
+ location.as_ref(),
+ version,
+ id
+ );
+
+ Ok(metadata_location)
+}
+
+/// Get metadata location from `GlueTable` parameters
+pub(crate) fn get_metadata_location(
+ parameters: &Option>,
+) -> Result {
+ match parameters {
+ Some(properties) => match properties.get(METADATA_LOCATION) {
+ Some(location) => Ok(location.to_string()),
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("No '{}' set on table", METADATA_LOCATION),
+ )),
+ },
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ "No 'parameters' set on table. Location of metadata is undefined",
+ )),
+ }
+}
+
+#[macro_export]
+/// Extends aws sdk builder with `catalog_id` if present
+macro_rules! with_catalog_id {
+ ($builder:expr, $config:expr) => {{
+ if let Some(catalog_id) = &$config.catalog_id {
+ $builder.catalog_id(catalog_id)
+ } else {
+ $builder
+ }
+ }};
+}
+
+#[cfg(test)]
+mod tests {
+ use aws_sdk_glue::config::ProvideCredentials;
+ use aws_sdk_glue::types::Column;
+ use iceberg::spec::{NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type};
+ use iceberg::{Namespace, Result, TableCreation};
+
+ use super::*;
+ use crate::schema::{ICEBERG_FIELD_CURRENT, ICEBERG_FIELD_ID, ICEBERG_FIELD_OPTIONAL};
+
+ fn create_metadata(schema: Schema) -> Result {
+ let table_creation = TableCreation::builder()
+ .name("my_table".to_string())
+ .location("my_location".to_string())
+ .schema(schema)
+ .build();
+ let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
+
+ Ok(metadata)
+ }
+
+ #[test]
+ fn test_get_metadata_location() -> Result<()> {
+ let params_valid = Some(HashMap::from([(
+ METADATA_LOCATION.to_string(),
+ "my_location".to_string(),
+ )]));
+ let params_missing_key = Some(HashMap::from([(
+ "not_here".to_string(),
+ "my_location".to_string(),
+ )]));
+
+ let result_valid = get_metadata_location(¶ms_valid)?;
+ let result_missing_key = get_metadata_location(¶ms_missing_key);
+ let result_no_params = get_metadata_location(&None);
+
+ assert_eq!(result_valid, "my_location");
+ assert!(result_missing_key.is_err());
+ assert!(result_no_params.is_err());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_glue_table() -> Result<()> {
+ let table_name = "my_table".to_string();
+ let location = "s3a://warehouse/hive".to_string();
+ let metadata_location = create_metadata_location(location.clone(), 0)?;
+ let properties = HashMap::new();
+ let schema = Schema::builder()
+ .with_schema_id(1)
+ .with_fields(vec![NestedField::required(
+ 1,
+ "foo",
+ Type::Primitive(PrimitiveType::Int),
+ )
+ .into()])
+ .build()?;
+
+ let metadata = create_metadata(schema)?;
+
+ let parameters = HashMap::from([
+ (ICEBERG_FIELD_ID.to_string(), "1".to_string()),
+ (ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()),
+ (ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()),
+ ]);
+
+ let column = Column::builder()
+ .name("foo")
+ .r#type("int")
+ .set_parameters(Some(parameters))
+ .set_comment(None)
+ .build()
+ .map_err(from_aws_build_error)?;
+
+ let storage_descriptor = StorageDescriptor::builder()
+ .set_columns(Some(vec![column]))
+ .location(&metadata_location)
+ .build();
+
+ let result =
+ convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?;
+
+ assert_eq!(result.name(), &table_name);
+ assert_eq!(result.description(), None);
+ assert_eq!(result.storage_descriptor, Some(storage_descriptor));
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_create_metadata_location() -> Result<()> {
+ let location = "my_base_location";
+ let valid_version = 0;
+ let invalid_version = -1;
+
+ let valid_result = create_metadata_location(location, valid_version)?;
+ let invalid_result = create_metadata_location(location, invalid_version);
+
+ assert!(valid_result.starts_with("my_base_location/metadata/00000-"));
+ assert!(valid_result.ends_with(".metadata.json"));
+ assert!(invalid_result.is_err());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_get_default_table_location() -> Result<()> {
+ let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]);
+
+ let namespace =
+ Namespace::with_properties(NamespaceIdent::new("default".into()), properties);
+ let db_name = validate_namespace(namespace.name())?;
+ let table_name = "my_table";
+
+ let expected = "db_location/my_table";
+ let result =
+ get_default_table_location(&namespace, db_name, table_name, "warehouse_location");
+
+ assert_eq!(expected, result);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_get_default_table_location_warehouse() -> Result<()> {
+ let namespace = Namespace::new(NamespaceIdent::new("default".into()));
+ let db_name = validate_namespace(namespace.name())?;
+ let table_name = "my_table";
+
+ let expected = "warehouse_location/default.db/my_table";
+ let result =
+ get_default_table_location(&namespace, db_name, table_name, "warehouse_location");
+
+ assert_eq!(expected, result);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_namespace() -> Result<()> {
+ let db = Database::builder()
+ .name("my_db")
+ .location_uri("my_location")
+ .description("my_description")
+ .build()
+ .map_err(from_aws_build_error)?;
+
+ let properties = HashMap::from([
+ (DESCRIPTION.to_string(), "my_description".to_string()),
+ (LOCATION.to_string(), "my_location".to_string()),
+ ]);
+
+ let expected =
+ Namespace::with_properties(NamespaceIdent::new("my_db".to_string()), properties);
+ let result = convert_to_namespace(&db);
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_database() -> Result<()> {
+ let namespace = NamespaceIdent::new("my_database".to_string());
+ let properties = HashMap::from([(LOCATION.to_string(), "my_location".to_string())]);
+
+ let result = convert_to_database(&namespace, &properties)?;
+
+ assert_eq!("my_database", result.name());
+ assert_eq!(Some("my_location".to_string()), result.location_uri);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_validate_namespace() {
+ let valid_ns = Namespace::new(NamespaceIdent::new("ns".to_string()));
+ let empty_ns = Namespace::new(NamespaceIdent::new("".to_string()));
+ let hierarchical_ns = Namespace::new(
+ NamespaceIdent::from_vec(vec!["level1".to_string(), "level2".to_string()]).unwrap(),
+ );
+
+ let valid = validate_namespace(valid_ns.name());
+ let empty = validate_namespace(empty_ns.name());
+ let hierarchical = validate_namespace(hierarchical_ns.name());
+
+ assert!(valid.is_ok());
+ assert!(empty.is_err());
+ assert!(hierarchical.is_err());
+ }
+
+ #[tokio::test]
+ async fn test_config_with_custom_endpoint() {
+ let properties = HashMap::new();
+ let endpoint_url = "http://custom_url:5000";
+
+ let sdk_config = create_sdk_config(&properties, Some(&endpoint_url.to_string())).await;
+
+ let result = sdk_config.endpoint_url().unwrap();
+
+ assert_eq!(result, endpoint_url);
+ }
+
+ #[tokio::test]
+ async fn test_config_with_properties() {
+ let properties = HashMap::from([
+ (AWS_PROFILE_NAME.to_string(), "my_profile".to_string()),
+ (AWS_REGION_NAME.to_string(), "us-east-1".to_string()),
+ (AWS_ACCESS_KEY_ID.to_string(), "my-access-id".to_string()),
+ (
+ AWS_SECRET_ACCESS_KEY.to_string(),
+ "my-secret-key".to_string(),
+ ),
+ (AWS_SESSION_TOKEN.to_string(), "my-token".to_string()),
+ ]);
+
+ let sdk_config = create_sdk_config(&properties, None).await;
+
+ let region = sdk_config.region().unwrap().as_ref();
+ let credentials = sdk_config
+ .credentials_provider()
+ .unwrap()
+ .provide_credentials()
+ .await
+ .unwrap();
+
+ assert_eq!("us-east-1", region);
+ assert_eq!("my-access-id", credentials.access_key_id());
+ assert_eq!("my-secret-key", credentials.secret_access_key());
+ assert_eq!("my-token", credentials.session_token().unwrap());
+ }
+}
diff --git a/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml b/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml
new file mode 100644
index 000000000..0a2c938a7
--- /dev/null
+++ b/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+services:
+ minio:
+ image: minio/minio:RELEASE.2024-03-07T00-43-48Z
+ expose:
+ - 9000
+ - 9001
+ environment:
+ - MINIO_ROOT_USER=admin
+ - MINIO_ROOT_PASSWORD=password
+ - MINIO_DOMAIN=minio
+ command: [ "server", "/data", "--console-address", ":9001" ]
+
+ mc:
+ depends_on:
+ - minio
+ image: minio/mc:RELEASE.2024-03-07T00-31-49Z
+ environment:
+ - AWS_ACCESS_KEY_ID=admin
+ - AWS_SECRET_ACCESS_KEY=password
+ - AWS_REGION=us-east-1
+ entrypoint: >
+ /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null "
+
+ moto:
+ image: motoserver/moto:5.0.3
+ expose:
+ - 5000
diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs
new file mode 100644
index 000000000..d9c5b4e0b
--- /dev/null
+++ b/crates/catalog/glue/tests/glue_catalog_test.rs
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for glue catalog.
+
+use std::collections::HashMap;
+use std::net::SocketAddr;
+use std::sync::RwLock;
+
+use ctor::{ctor, dtor};
+use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
+use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCreation, TableIdent};
+use iceberg_catalog_glue::{
+ GlueCatalog, GlueCatalogConfig, AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY,
+};
+use iceberg_test_utils::docker::DockerCompose;
+use iceberg_test_utils::{normalize_test_name, set_up};
+use port_scanner::scan_port_addr;
+use tokio::time::sleep;
+
+const GLUE_CATALOG_PORT: u16 = 5000;
+const MINIO_PORT: u16 = 9000;
+static DOCKER_COMPOSE_ENV: RwLock
> = RwLock::new(None);
+
+#[ctor]
+fn before_all() {
+ let mut guard = DOCKER_COMPOSE_ENV.write().unwrap();
+ let docker_compose = DockerCompose::new(
+ normalize_test_name(module_path!()),
+ format!("{}/testdata/glue_catalog", env!("CARGO_MANIFEST_DIR")),
+ );
+ docker_compose.run();
+ guard.replace(docker_compose);
+}
+
+#[dtor]
+fn after_all() {
+ let mut guard = DOCKER_COMPOSE_ENV.write().unwrap();
+ guard.take();
+}
+
+async fn get_catalog() -> GlueCatalog {
+ set_up();
+
+ let (glue_catalog_ip, minio_ip) = {
+ let guard = DOCKER_COMPOSE_ENV.read().unwrap();
+ let docker_compose = guard.as_ref().unwrap();
+ (
+ docker_compose.get_container_ip("moto"),
+ docker_compose.get_container_ip("minio"),
+ )
+ };
+ let glue_socket_addr = SocketAddr::new(glue_catalog_ip, GLUE_CATALOG_PORT);
+ let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT);
+ while !scan_port_addr(glue_socket_addr) {
+ log::info!("Waiting for 1s glue catalog to ready...");
+ sleep(std::time::Duration::from_millis(1000)).await;
+ }
+
+ let props = HashMap::from([
+ (AWS_ACCESS_KEY_ID.to_string(), "my_access_id".to_string()),
+ (
+ AWS_SECRET_ACCESS_KEY.to_string(),
+ "my_secret_key".to_string(),
+ ),
+ (AWS_REGION_NAME.to_string(), "us-east-1".to_string()),
+ (
+ S3_ENDPOINT.to_string(),
+ format!("http://{}", minio_socket_addr),
+ ),
+ (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
+ (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
+ (S3_REGION.to_string(), "us-east-1".to_string()),
+ ]);
+
+ let config = GlueCatalogConfig::builder()
+ .uri(format!("http://{}", glue_socket_addr))
+ .warehouse("s3a://warehouse/hive".to_string())
+ .props(props.clone())
+ .build();
+
+ GlueCatalog::new(config).await.unwrap()
+}
+
+async fn set_test_namespace(catalog: &GlueCatalog, namespace: &NamespaceIdent) -> Result<()> {
+ let properties = HashMap::new();
+ catalog.create_namespace(namespace, properties).await?;
+
+ Ok(())
+}
+
+fn set_table_creation(location: impl ToString, name: impl ToString) -> Result {
+ let schema = Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![
+ NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::required(2, "bar", Type::Primitive(PrimitiveType::String)).into(),
+ ])
+ .build()?;
+
+ let creation = TableCreation::builder()
+ .location(location.to_string())
+ .name(name.to_string())
+ .properties(HashMap::new())
+ .schema(schema)
+ .build();
+
+ Ok(creation)
+}
+
+#[tokio::test]
+async fn test_rename_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_rename_table".into()));
+
+ catalog
+ .create_namespace(namespace.name(), HashMap::new())
+ .await?;
+
+ let table = catalog.create_table(namespace.name(), creation).await?;
+
+ let dest = TableIdent::new(namespace.name().clone(), "my_table_rename".to_string());
+
+ catalog.rename_table(table.identifier(), &dest).await?;
+
+ let table = catalog.load_table(&dest).await?;
+ assert_eq!(table.identifier(), &dest);
+
+ let src = TableIdent::new(namespace.name().clone(), "my_table".to_string());
+
+ let src_table_exists = catalog.table_exists(&src).await?;
+ assert!(!src_table_exists);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_table_exists() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_table_exists".into()));
+
+ catalog
+ .create_namespace(namespace.name(), HashMap::new())
+ .await?;
+
+ let ident = TableIdent::new(namespace.name().clone(), "my_table".to_string());
+
+ let exists = catalog.table_exists(&ident).await?;
+ assert!(!exists);
+
+ let table = catalog.create_table(namespace.name(), creation).await?;
+
+ let exists = catalog.table_exists(table.identifier()).await?;
+
+ assert!(exists);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_drop_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_drop_table".into()));
+
+ catalog
+ .create_namespace(namespace.name(), HashMap::new())
+ .await?;
+
+ let table = catalog.create_table(namespace.name(), creation).await?;
+
+ catalog.drop_table(table.identifier()).await?;
+
+ let result = catalog.table_exists(table.identifier()).await?;
+
+ assert!(!result);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_load_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_load_table".into()));
+
+ catalog
+ .create_namespace(namespace.name(), HashMap::new())
+ .await?;
+
+ let expected = catalog.create_table(namespace.name(), creation).await?;
+
+ let result = catalog
+ .load_table(&TableIdent::new(
+ namespace.name().clone(),
+ "my_table".to_string(),
+ ))
+ .await?;
+
+ assert_eq!(result.identifier(), expected.identifier());
+ assert_eq!(result.metadata_location(), expected.metadata_location());
+ assert_eq!(result.metadata(), expected.metadata());
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_create_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let namespace = NamespaceIdent::new("test_create_table".to_string());
+ set_test_namespace(&catalog, &namespace).await?;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+
+ let result = catalog.create_table(&namespace, creation).await?;
+
+ assert_eq!(result.identifier().name(), "my_table");
+ assert!(result
+ .metadata_location()
+ .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")));
+ assert!(
+ catalog
+ .file_io()
+ .is_exist("s3a://warehouse/hive/metadata/")
+ .await?
+ );
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_list_tables() -> Result<()> {
+ let catalog = get_catalog().await;
+ let namespace = NamespaceIdent::new("test_list_tables".to_string());
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let expected = vec![];
+ let result = catalog.list_tables(&namespace).await?;
+
+ assert_eq!(result, expected);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_drop_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+ let namespace = NamespaceIdent::new("test_drop_namespace".to_string());
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let exists = catalog.namespace_exists(&namespace).await?;
+ assert!(exists);
+
+ catalog.drop_namespace(&namespace).await?;
+
+ let exists = catalog.namespace_exists(&namespace).await?;
+ assert!(!exists);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_update_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+ let namespace = NamespaceIdent::new("test_update_namespace".into());
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let before_update = catalog.get_namespace(&namespace).await?;
+ let before_update = before_update.properties().get("description");
+
+ assert_eq!(before_update, None);
+
+ let properties = HashMap::from([("description".to_string(), "my_update".to_string())]);
+
+ catalog.update_namespace(&namespace, properties).await?;
+
+ let after_update = catalog.get_namespace(&namespace).await?;
+ let after_update = after_update.properties().get("description");
+
+ assert_eq!(after_update, Some("my_update".to_string()).as_ref());
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_namespace_exists() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let namespace = NamespaceIdent::new("test_namespace_exists".into());
+
+ let exists = catalog.namespace_exists(&namespace).await?;
+ assert!(!exists);
+
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let exists = catalog.namespace_exists(&namespace).await?;
+ assert!(exists);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_get_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let namespace = NamespaceIdent::new("test_get_namespace".into());
+
+ let does_not_exist = catalog.get_namespace(&namespace).await;
+ assert!(does_not_exist.is_err());
+
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let result = catalog.get_namespace(&namespace).await?;
+ let expected = Namespace::new(namespace);
+
+ assert_eq!(result, expected);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_create_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let properties = HashMap::new();
+ let namespace = NamespaceIdent::new("test_create_namespace".into());
+
+ let expected = Namespace::new(namespace.clone());
+
+ let result = catalog.create_namespace(&namespace, properties).await?;
+
+ assert_eq!(result, expected);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_list_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let namespace = NamespaceIdent::new("test_list_namespace".to_string());
+ set_test_namespace(&catalog, &namespace).await?;
+
+ let result = catalog.list_namespaces(None).await?;
+ assert!(result.contains(&namespace));
+
+ let empty_result = catalog.list_namespaces(Some(&namespace)).await?;
+ assert!(empty_result.is_empty());
+
+ Ok(())
+}
diff --git a/crates/catalog/hms/Cargo.toml b/crates/catalog/hms/Cargo.toml
index 61c03fddf..e7d4ec2f3 100644
--- a/crates/catalog/hms/Cargo.toml
+++ b/crates/catalog/hms/Cargo.toml
@@ -17,23 +17,32 @@
[package]
name = "iceberg-catalog-hms"
-version = "0.1.0"
-edition = "2021"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+rust-version = { workspace = true }
categories = ["database"]
description = "Apache Iceberg Hive Metastore Catalog Support"
-repository = "https://github.com/apache/iceberg-rust"
-license = "Apache-2.0"
+repository = { workspace = true }
+license = { workspace = true }
keywords = ["iceberg", "hive", "catalog"]
[dependencies]
+anyhow = { workspace = true }
async-trait = { workspace = true }
-hive_metastore = "0.0.1"
+chrono = { workspace = true }
+hive_metastore = { workspace = true }
iceberg = { workspace = true }
-# the thrift upstream suffered from no regular rust release.
-#
-# [test-rs](https://github.com/tent-rs) is an organization that helps resolves this
-# issue. And [tent-thrift](https://github.com/tent-rs/thrift) is a fork of the thrift
-# crate, built from the thrift upstream with only version bumped.
-thrift = { package = "tent-thrift", version = "0.18.1" }
+log = { workspace = true }
+pilota = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
typed-builder = { workspace = true }
+uuid = { workspace = true }
+volo-thrift = { workspace = true }
+
+[dev-dependencies]
+ctor = { workspace = true }
+iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
+port_scanner = { workspace = true }
diff --git a/crates/catalog/hms/DEPENDENCIES.rust.tsv b/crates/catalog/hms/DEPENDENCIES.rust.tsv
new file mode 100644
index 000000000..f54295ca5
--- /dev/null
+++ b/crates/catalog/hms/DEPENDENCIES.rust.tsv
@@ -0,0 +1,328 @@
+crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC MIT MPL-2.0 OpenSSL Unicode-DFS-2016 Unlicense Zlib
+addr2line@0.22.0 X X
+adler@1.0.2 X X X
+adler32@1.2.0 X
+ahash@0.8.11 X X
+aho-corasick@1.1.3 X X
+alloc-no-stdlib@2.0.4 X
+alloc-stdlib@0.2.2 X
+allocator-api2@0.2.18 X X
+android-tzdata@0.1.1 X X
+android_system_properties@0.1.5 X X
+anstream@0.6.15 X X
+anstyle@1.0.8 X X
+anstyle-parse@0.2.5 X X
+anstyle-query@1.1.1 X X
+anstyle-wincon@3.0.4 X X
+anyhow@1.0.86 X X
+apache-avro@0.17.0 X
+array-init@2.1.0 X X
+arrayref@0.3.8 X
+arrayvec@0.7.4 X X
+arrow-arith@52.2.0 X
+arrow-array@52.2.0 X
+arrow-buffer@52.2.0 X
+arrow-cast@52.2.0 X
+arrow-data@52.2.0 X
+arrow-ipc@52.2.0 X
+arrow-ord@52.2.0 X
+arrow-schema@52.2.0 X
+arrow-select@52.2.0 X
+arrow-string@52.2.0 X
+async-broadcast@0.7.1 X X
+async-recursion@1.1.1 X X
+async-trait@0.1.81 X X
+atoi@2.0.0 X
+autocfg@1.3.0 X X
+backon@0.4.4 X
+backtrace@0.3.73 X X
+base64@0.22.1 X X
+bigdecimal@0.4.5 X X
+bimap@0.6.3 X X
+bitflags@1.3.2 X X
+bitflags@2.6.0 X X
+bitvec@1.0.1 X
+block-buffer@0.10.4 X X
+brotli@6.0.0 X X
+brotli-decompressor@4.0.1 X X
+bumpalo@3.16.0 X X
+byteorder@1.5.0 X X
+bytes@1.7.1 X
+cc@1.1.11 X X
+cfg-if@1.0.0 X X
+cfg_aliases@0.1.1 X
+chrono@0.4.38 X X
+colorchoice@1.0.2 X X
+concurrent-queue@2.5.0 X X
+const-oid@0.9.6 X X
+const-random@0.1.18 X X
+const-random-macro@0.1.16 X X
+core-foundation-sys@0.8.7 X X
+core2@0.4.0 X X
+cpufeatures@0.2.13 X X
+crc32c@0.6.8 X X
+crc32fast@1.4.2 X X
+crossbeam-utils@0.8.20 X X
+crunchy@0.2.2 X
+crypto-common@0.1.6 X X
+darling@0.20.10 X
+darling_core@0.20.10 X
+darling_macro@0.20.10 X
+dary_heap@0.3.6 X X
+dashmap@5.5.3 X
+derivative@2.2.0 X X
+derive_builder@0.20.0 X X
+derive_builder_core@0.20.0 X X
+derive_builder_macro@0.20.0 X X
+digest@0.10.7 X X
+either@1.13.0 X X
+env_filter@0.1.2 X X
+env_logger@0.11.5 X X
+equivalent@1.0.1 X X
+event-listener@5.3.1 X X
+event-listener-strategy@0.5.2 X X
+fastrand@2.1.0 X X
+faststr@0.2.21 X X
+flagset@0.4.6 X
+flatbuffers@24.3.25 X
+flate2@1.0.31 X X
+fnv@1.0.7 X X
+form_urlencoded@1.2.1 X X
+funty@2.0.0 X
+futures@0.3.30 X X
+futures-channel@0.3.30 X X
+futures-core@0.3.30 X X
+futures-executor@0.3.30 X X
+futures-io@0.3.30 X X
+futures-macro@0.3.30 X X
+futures-sink@0.3.30 X X
+futures-task@0.3.30 X X
+futures-util@0.3.30 X X
+generic-array@0.14.7 X
+getrandom@0.2.15 X X
+gimli@0.29.0 X X
+half@2.4.1 X X
+hashbrown@0.14.5 X X
+heck@0.5.0 X X
+hermit-abi@0.3.9 X X
+hex@0.4.3 X X
+hive_metastore@0.1.0 X
+hmac@0.12.1 X X
+home@0.5.9 X X
+http@1.1.0 X X
+http-body@1.0.1 X
+http-body-util@0.1.2 X
+httparse@1.9.4 X X
+humantime@2.1.0 X X
+hyper@1.4.1 X
+hyper-rustls@0.27.2 X X X
+hyper-util@0.1.7 X
+iana-time-zone@0.1.60 X X
+iana-time-zone-haiku@0.1.2 X X
+iceberg@0.3.0 X
+iceberg-catalog-hms@0.3.0 X
+iceberg-catalog-memory@0.3.0 X
+iceberg_test_utils@0.3.0 X
+ident_case@1.0.1 X X
+idna@0.5.0 X X
+indexmap@2.4.0 X X
+integer-encoding@3.0.4 X
+integer-encoding@4.0.2 X
+ipnet@2.9.0 X X
+is_terminal_polyfill@1.70.1 X X
+itertools@0.13.0 X X
+itoa@1.0.11 X X
+jobserver@0.1.32 X X
+js-sys@0.3.70 X X
+lazy_static@1.5.0 X X
+lexical-core@0.8.5 X X
+lexical-parse-float@0.8.5 X X
+lexical-parse-integer@0.8.6 X X
+lexical-util@0.8.5 X X
+lexical-write-float@0.8.5 X X
+lexical-write-integer@0.8.5 X X
+libc@0.2.155 X X
+libflate@2.1.0 X
+libflate_lz77@2.1.0 X
+libm@0.2.8 X X
+linked-hash-map@0.5.6 X X
+linkedbytes@0.1.8 X X
+lock_api@0.4.12 X X
+log@0.4.22 X X
+lz4_flex@0.11.3 X
+md-5@0.10.6 X X
+memchr@2.7.4 X X
+memoffset@0.9.1 X
+metainfo@0.7.12 X X
+mime@0.3.17 X X
+miniz_oxide@0.7.4 X X X
+mio@1.0.2 X
+motore@0.4.1 X X
+motore-macros@0.4.1 X X
+mur3@0.1.0 X
+murmur3@0.5.2 X X
+nix@0.28.0 X
+num@0.4.3 X X
+num-bigint@0.4.6 X X
+num-complex@0.4.6 X X
+num-integer@0.1.46 X X
+num-iter@0.1.45 X X
+num-rational@0.4.2 X X
+num-traits@0.2.19 X X
+num_enum@0.7.3 X X X
+num_enum_derive@0.7.3 X X X
+object@0.36.3 X X
+once_cell@1.19.0 X X
+opendal@0.49.0 X
+ordered-float@2.10.1 X
+ordered-float@4.2.2 X
+page_size@0.6.0 X X
+parking@2.2.0 X X
+parking_lot@0.12.3 X X
+parking_lot_core@0.9.10 X X
+parquet@52.2.0 X
+paste@1.0.15 X X
+percent-encoding@2.3.1 X X
+pilota@0.11.3 X X
+pin-project@1.1.5 X X
+pin-project-internal@1.1.5 X X
+pin-project-lite@0.2.14 X X
+pin-utils@0.1.0 X X
+pkg-config@0.3.30 X X
+ppv-lite86@0.2.20 X X
+proc-macro-crate@3.1.0 X X
+proc-macro2@1.0.86 X X
+quad-rand@0.2.1 X
+quick-xml@0.36.1 X
+quote@1.0.36 X X
+radium@0.7.0 X
+rand@0.8.5 X X
+rand_chacha@0.3.1 X X
+rand_core@0.6.4 X X
+redox_syscall@0.5.3 X
+regex@1.10.6 X X
+regex-automata@0.4.7 X X
+regex-lite@0.1.6 X X
+regex-syntax@0.8.4 X X
+reqsign@0.16.0 X
+reqwest@0.12.5 X X
+ring@0.17.8 X
+rle-decode-fast@1.0.3 X X
+rust_decimal@1.35.0 X
+rustc-demangle@0.1.24 X X
+rustc-hash@2.0.0 X X
+rustc_version@0.4.0 X X
+rustls@0.23.12 X X X
+rustls-pemfile@2.1.3 X X X
+rustls-pki-types@1.8.0 X X
+rustls-webpki@0.102.6 X
+rustversion@1.0.17 X X
+ryu@1.0.18 X X
+scopeguard@1.2.0 X X
+semver@1.0.23 X X
+seq-macro@0.3.5 X X
+serde@1.0.207 X X
+serde_bytes@0.11.15 X X
+serde_derive@1.0.207 X X
+serde_json@1.0.124 X X
+serde_repr@0.1.19 X X
+serde_urlencoded@0.7.1 X X
+serde_with@3.9.0 X X
+serde_with_macros@3.9.0 X X
+sha1@0.10.6 X X
+sha2@0.10.8 X X
+shlex@1.3.0 X X
+signal-hook-registry@1.4.2 X X
+simdutf8@0.1.4 X X
+slab@0.4.9 X
+smallvec@1.13.2 X X
+snap@1.1.1 X
+socket2@0.5.7 X X
+sonic-rs@0.3.10 X
+spin@0.9.8 X
+static_assertions@1.1.0 X X
+strsim@0.11.1 X
+strum@0.26.3 X
+strum_macros@0.26.4 X
+subtle@2.6.1 X
+syn@1.0.109 X X
+syn@2.0.74 X X
+sync_wrapper@1.0.1 X
+tap@1.0.1 X
+thiserror@1.0.63 X X
+thiserror-impl@1.0.63 X X
+thrift@0.17.0 X
+tiny-keccak@2.0.2 X
+tinyvec@1.8.0 X X X
+tinyvec_macros@0.1.1 X X X
+tokio@1.39.2 X
+tokio-macros@2.4.0 X
+tokio-rustls@0.26.0 X X
+tokio-stream@0.1.15 X
+tokio-util@0.7.11 X
+toml_datetime@0.6.8 X X
+toml_edit@0.21.1 X X
+tower@0.4.13 X
+tower-layer@0.3.3 X
+tower-service@0.3.3 X
+tracing@0.1.40 X
+tracing-attributes@0.1.27 X
+tracing-core@0.1.32 X
+try-lock@0.2.5 X
+twox-hash@1.6.3 X
+typed-builder@0.19.1 X X
+typed-builder-macro@0.19.1 X X
+typenum@1.17.0 X X
+unicode-bidi@0.3.15 X X
+unicode-ident@1.0.12 X X X
+unicode-normalization@0.1.23 X X
+untrusted@0.9.0 X
+url@2.5.2 X X
+utf8parse@0.2.2 X X
+uuid@1.10.0 X X
+version_check@0.9.5 X X
+volo@0.10.1 X X
+volo-thrift@0.10.2 X X
+want@0.3.1 X
+wasi@0.11.0+wasi-snapshot-preview1 X X X
+wasm-bindgen@0.2.93 X X
+wasm-bindgen-backend@0.2.93 X X
+wasm-bindgen-futures@0.4.43 X X
+wasm-bindgen-macro@0.2.93 X X
+wasm-bindgen-macro-support@0.2.93 X X
+wasm-bindgen-shared@0.2.93 X X
+wasm-streams@0.4.0 X X
+web-sys@0.3.70 X X
+webpki-roots@0.26.3 X
+winapi@0.3.9 X X
+winapi-i686-pc-windows-gnu@0.4.0 X X
+winapi-x86_64-pc-windows-gnu@0.4.0 X X
+windows-core@0.52.0 X X
+windows-sys@0.48.0 X X
+windows-sys@0.52.0 X X
+windows-targets@0.48.5 X X
+windows-targets@0.52.6 X X
+windows_aarch64_gnullvm@0.48.5 X X
+windows_aarch64_gnullvm@0.52.6 X X
+windows_aarch64_msvc@0.48.5 X X
+windows_aarch64_msvc@0.52.6 X X
+windows_i686_gnu@0.48.5 X X
+windows_i686_gnu@0.52.6 X X
+windows_i686_gnullvm@0.52.6 X X
+windows_i686_msvc@0.48.5 X X
+windows_i686_msvc@0.52.6 X X
+windows_x86_64_gnu@0.48.5 X X
+windows_x86_64_gnu@0.52.6 X X
+windows_x86_64_gnullvm@0.48.5 X X
+windows_x86_64_gnullvm@0.52.6 X X
+windows_x86_64_msvc@0.48.5 X X
+windows_x86_64_msvc@0.52.6 X X
+winnow@0.5.40 X
+winreg@0.52.0 X
+wyz@0.5.1 X
+zerocopy@0.7.35 X X X
+zerocopy-derive@0.7.35 X X X
+zeroize@1.8.1 X X
+zstd@0.13.2 X
+zstd-safe@7.2.1 X X
+zstd-sys@2.0.12+zstd.1.5.6 X X
diff --git a/crates/catalog/hms/README.md b/crates/catalog/hms/README.md
new file mode 100644
index 000000000..bebb2200a
--- /dev/null
+++ b/crates/catalog/hms/README.md
@@ -0,0 +1,27 @@
+
+
+# Apache Iceberg HiveMetaStore Catalog Official Native Rust Implementation
+
+[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-hms)
+[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-hms/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg HiveMetaStore Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-hms/latest) for examples and the full API.
diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs
index 2b1fe2cc4..6e5db1968 100644
--- a/crates/catalog/hms/src/catalog.rs
+++ b/crates/catalog/hms/src/catalog.rs
@@ -15,49 +15,57 @@
// specific language governing permissions and limitations
// under the License.
-use super::utils::*;
-use async_trait::async_trait;
-use hive_metastore::{TThriftHiveMetastoreSyncClient, ThriftHiveMetastoreSyncClient};
-use iceberg::table::Table;
-use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent};
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
-use std::sync::{Arc, Mutex};
-use thrift::protocol::{TBinaryInputProtocol, TBinaryOutputProtocol};
-use thrift::transport::{
- ReadHalf, TBufferedReadTransport, TBufferedWriteTransport, TIoChannel, WriteHalf,
+use std::net::ToSocketAddrs;
+
+use anyhow::anyhow;
+use async_trait::async_trait;
+use hive_metastore::{
+ ThriftHiveMetastoreClient, ThriftHiveMetastoreClientBuilder,
+ ThriftHiveMetastoreGetDatabaseException, ThriftHiveMetastoreGetTableException,
+};
+use iceberg::io::FileIO;
+use iceberg::spec::{TableMetadata, TableMetadataBuilder};
+use iceberg::table::Table;
+use iceberg::{
+ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
+ TableIdent,
};
use typed_builder::TypedBuilder;
+use volo_thrift::MaybeException;
+
+use super::utils::*;
+use crate::error::{from_io_error, from_thrift_error, from_thrift_exception};
+
+/// Which variant of the thrift transport to communicate with HMS
+/// See:
+#[derive(Debug, Default)]
+pub enum HmsThriftTransport {
+ /// Use the framed transport
+ Framed,
+ /// Use the buffered transport (default)
+ #[default]
+ Buffered,
+}
/// Hive metastore Catalog configuration.
#[derive(Debug, TypedBuilder)]
pub struct HmsCatalogConfig {
address: String,
+ thrift_transport: HmsThriftTransport,
+ warehouse: String,
+ #[builder(default)]
+ props: HashMap,
}
-/// TODO: We only support binary protocol for now.
-type HmsClientType = ThriftHiveMetastoreSyncClient<
- TBinaryInputProtocol>>,
- TBinaryOutputProtocol>>,
->;
-
-/// # TODO
-///
-/// we are using the same connection everytime, we should support connection
-/// pool in the future.
-struct HmsClient(Arc>);
-
-impl HmsClient {
- fn call(&self, f: impl FnOnce(&mut HmsClientType) -> thrift::Result) -> Result {
- let mut client = self.0.lock().unwrap();
- f(&mut client).map_err(from_thrift_error)
- }
-}
+struct HmsClient(ThriftHiveMetastoreClient);
/// Hive metastore Catalog.
pub struct HmsCatalog {
config: HmsCatalogConfig,
client: HmsClient,
+ file_io: FileIO,
}
impl Debug for HmsCatalog {
@@ -71,24 +79,46 @@ impl Debug for HmsCatalog {
impl HmsCatalog {
/// Create a new hms catalog.
pub fn new(config: HmsCatalogConfig) -> Result {
- let mut channel = thrift::transport::TTcpChannel::new();
- channel
- .open(config.address.as_str())
- .map_err(from_thrift_error)?;
- let (i_chan, o_chan) = channel.split().map_err(from_thrift_error)?;
- let i_chan = TBufferedReadTransport::new(i_chan);
- let o_chan = TBufferedWriteTransport::new(o_chan);
- let i_proto = TBinaryInputProtocol::new(i_chan, true);
- let o_proto = TBinaryOutputProtocol::new(o_chan, true);
- let client = ThriftHiveMetastoreSyncClient::new(i_proto, o_proto);
+ let address = config
+ .address
+ .as_str()
+ .to_socket_addrs()
+ .map_err(from_io_error)?
+ .next()
+ .ok_or_else(|| {
+ Error::new(
+ ErrorKind::Unexpected,
+ format!("invalid address: {}", config.address),
+ )
+ })?;
+
+ let builder = ThriftHiveMetastoreClientBuilder::new("hms").address(address);
+
+ let client = match &config.thrift_transport {
+ HmsThriftTransport::Framed => builder
+ .make_codec(volo_thrift::codec::default::DefaultMakeCodec::framed())
+ .build(),
+ HmsThriftTransport::Buffered => builder
+ .make_codec(volo_thrift::codec::default::DefaultMakeCodec::buffered())
+ .build(),
+ };
+
+ let file_io = FileIO::from_path(&config.warehouse)?
+ .with_props(&config.props)
+ .build()?;
+
Ok(Self {
config,
- client: HmsClient(Arc::new(Mutex::new(client))),
+ client: HmsClient(client),
+ file_io,
})
}
+ /// Get the catalogs `FileIO`
+ pub fn file_io(&self) -> FileIO {
+ self.file_io.clone()
+ }
}
-/// Refer to for implementation details.
#[async_trait]
impl Catalog for HmsCatalog {
/// HMS doesn't support nested namespaces.
@@ -103,69 +133,377 @@ impl Catalog for HmsCatalog {
let dbs = if parent.is_some() {
return Ok(vec![]);
} else {
- self.client.call(|client| client.get_all_databases())?
+ self.client
+ .0
+ .get_all_databases()
+ .await
+ .map(from_thrift_exception)
+ .map_err(from_thrift_error)??
};
- Ok(dbs.into_iter().map(NamespaceIdent::new).collect())
+ Ok(dbs
+ .into_iter()
+ .map(|v| NamespaceIdent::new(v.into()))
+ .collect())
}
+ /// Creates a new namespace with the given identifier and properties.
+ ///
+ /// Attempts to create a namespace defined by the `namespace`
+ /// parameter and configured with the specified `properties`.
+ ///
+ /// This function can return an error in the following situations:
+ ///
+ /// - If `hive.metastore.database.owner-type` is specified without
+ /// `hive.metastore.database.owner`,
+ /// - Errors from `validate_namespace` if the namespace identifier does not
+ /// meet validation criteria.
+ /// - Errors from `convert_to_database` if the properties cannot be
+ /// successfully converted into a database configuration.
+ /// - Errors from the underlying database creation process, converted using
+ /// `from_thrift_error`.
async fn create_namespace(
&self,
- _namespace: &NamespaceIdent,
- _properties: HashMap,
+ namespace: &NamespaceIdent,
+ properties: HashMap,
) -> Result {
- todo!()
+ let database = convert_to_database(namespace, &properties)?;
+
+ self.client
+ .0
+ .create_database(database)
+ .await
+ .map_err(from_thrift_error)?;
+
+ Ok(Namespace::with_properties(namespace.clone(), properties))
}
- async fn get_namespace(&self, _namespace: &NamespaceIdent) -> Result {
- todo!()
+ /// Retrieves a namespace by its identifier.
+ ///
+ /// Validates the given namespace identifier and then queries the
+ /// underlying database client to fetch the corresponding namespace data.
+ /// Constructs a `Namespace` object with the retrieved data and returns it.
+ ///
+ /// This function can return an error in any of the following situations:
+ /// - If the provided namespace identifier fails validation checks
+ /// - If there is an error querying the database, returned by
+ /// `from_thrift_error`.
+ async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result {
+ let name = validate_namespace(namespace)?;
+
+ let db = self
+ .client
+ .0
+ .get_database(name.into())
+ .await
+ .map(from_thrift_exception)
+ .map_err(from_thrift_error)??;
+
+ let ns = convert_to_namespace(&db)?;
+
+ Ok(ns)
}
- async fn namespace_exists(&self, _namespace: &NamespaceIdent) -> Result {
- todo!()
+ /// Checks if a namespace exists within the Hive Metastore.
+ ///
+ /// Validates the namespace identifier by querying the Hive Metastore
+ /// to determine if the specified namespace (database) exists.
+ ///
+ /// # Returns
+ /// A `Result` indicating the outcome of the check:
+ /// - `Ok(true)` if the namespace exists.
+ /// - `Ok(false)` if the namespace does not exist, identified by a specific
+ /// `UserException` variant.
+ /// - `Err(...)` if an error occurs during validation or the Hive Metastore
+ /// query, with the error encapsulating the issue.
+ async fn namespace_exists(&self, namespace: &NamespaceIdent) -> Result {
+ let name = validate_namespace(namespace)?;
+
+ let resp = self.client.0.get_database(name.into()).await;
+
+ match resp {
+ Ok(MaybeException::Ok(_)) => Ok(true),
+ Ok(MaybeException::Exception(ThriftHiveMetastoreGetDatabaseException::O1(_))) => {
+ Ok(false)
+ }
+ Ok(MaybeException::Exception(exception)) => Err(Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting thrift error".to_string(),
+ )
+ .with_source(anyhow!("thrift error: {:?}", exception))),
+ Err(err) => Err(from_thrift_error(err)),
+ }
}
+ /// Asynchronously updates properties of an existing namespace.
+ ///
+ /// Converts the given namespace identifier and properties into a database
+ /// representation and then attempts to update the corresponding namespace
+ /// in the Hive Metastore.
+ ///
+ /// # Returns
+ /// Returns `Ok(())` if the namespace update is successful. If the
+ /// namespace cannot be updated due to missing information or an error
+ /// during the update process, an `Err(...)` is returned.
async fn update_namespace(
&self,
- _namespace: &NamespaceIdent,
- _properties: HashMap,
+ namespace: &NamespaceIdent,
+ properties: HashMap,
) -> Result<()> {
- todo!()
+ let db = convert_to_database(namespace, &properties)?;
+
+ let name = match &db.name {
+ Some(name) => name,
+ None => {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Database name must be specified",
+ ))
+ }
+ };
+
+ self.client
+ .0
+ .alter_database(name.clone(), db)
+ .await
+ .map_err(from_thrift_error)?;
+
+ Ok(())
}
- async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> {
- todo!()
+ /// Asynchronously drops a namespace from the Hive Metastore.
+ ///
+ /// # Returns
+ /// A `Result<()>` indicating the outcome:
+ /// - `Ok(())` signifies successful namespace deletion.
+ /// - `Err(...)` signifies failure to drop the namespace due to validation
+ /// errors, connectivity issues, or Hive Metastore constraints.
+ async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
+ let name = validate_namespace(namespace)?;
+
+ self.client
+ .0
+ .drop_database(name.into(), false, false)
+ .await
+ .map_err(from_thrift_error)?;
+
+ Ok(())
}
- async fn list_tables(&self, _namespace: &NamespaceIdent) -> Result> {
- todo!()
+ /// Asynchronously lists all tables within a specified namespace.
+ ///
+ /// # Returns
+ ///
+ /// A `Result>`, which is:
+ /// - `Ok(vec![...])` containing a vector of `TableIdent` instances, each
+ /// representing a table within the specified namespace.
+ /// - `Err(...)` if an error occurs during namespace validation or while
+ /// querying the database.
+ async fn list_tables(&self, namespace: &NamespaceIdent) -> Result> {
+ let name = validate_namespace(namespace)?;
+
+ let tables = self
+ .client
+ .0
+ .get_all_tables(name.into())
+ .await
+ .map(from_thrift_exception)
+ .map_err(from_thrift_error)??;
+
+ let tables = tables
+ .iter()
+ .map(|table| TableIdent::new(namespace.clone(), table.to_string()))
+ .collect();
+
+ Ok(tables)
}
+ /// Creates a new table within a specified namespace using the provided
+ /// table creation settings.
+ ///
+ /// # Returns
+ /// A `Result` wrapping a `Table` object representing the newly created
+ /// table.
+ ///
+ /// # Errors
+ /// This function may return an error in several cases, including invalid
+ /// namespace identifiers, failure to determine a default storage location,
+ /// issues generating or writing table metadata, and errors communicating
+ /// with the Hive Metastore.
async fn create_table(
&self,
- _namespace: &NamespaceIdent,
- _creation: TableCreation,
+ namespace: &NamespaceIdent,
+ creation: TableCreation,
) -> Result
{
- todo!()
+ /// Loads a table from the Hive Metastore and constructs a `Table` object
+ /// based on its metadata.
+ ///
+ /// # Returns
+ /// A `Result` wrapping a `Table` object that represents the loaded table.
+ ///
+ /// # Errors
+ /// This function may return an error in several scenarios, including:
+ /// - Failure to validate the namespace.
+ /// - Failure to retrieve the table from the Hive Metastore.
+ /// - Absence of metadata location information in the table's properties.
+ /// - Issues reading or deserializing the table's metadata file.
+ async fn load_table(&self, table: &TableIdent) -> Result
{
+ let db_name = validate_namespace(table.namespace())?;
+
+ let hive_table = self
+ .client
+ .0
+ .get_table(db_name.clone().into(), table.name.clone().into())
+ .await
+ .map(from_thrift_exception)
+ .map_err(from_thrift_error)??;
+
+ let metadata_location = get_metadata_location(&hive_table.parameters)?;
+
+ let metadata_content = self.file_io.new_input(&metadata_location)?.read().await?;
+ let metadata = serde_json::from_slice::(&metadata_content)?;
+
+ Table::builder()
+ .file_io(self.file_io())
+ .metadata_location(metadata_location)
+ .metadata(metadata)
+ .identifier(TableIdent::new(
+ NamespaceIdent::new(db_name),
+ table.name.clone(),
+ ))
+ .build()
}
- async fn drop_table(&self, _table: &TableIdent) -> Result<()> {
- todo!()
+ /// Asynchronously drops a table from the database.
+ ///
+ /// # Errors
+ /// Returns an error if:
+ /// - The namespace provided in `table` cannot be validated
+ /// or does not exist.
+ /// - The underlying database client encounters an error while
+ /// attempting to drop the table. This includes scenarios where
+ /// the table does not exist.
+ /// - Any network or communication error occurs with the database backend.
+ async fn drop_table(&self, table: &TableIdent) -> Result<()> {
+ let db_name = validate_namespace(table.namespace())?;
+
+ self.client
+ .0
+ .drop_table(db_name.into(), table.name.clone().into(), false)
+ .await
+ .map_err(from_thrift_error)?;
+
+ Ok(())
}
- async fn stat_table(&self, _table: &TableIdent) -> Result {
- todo!()
+ /// Asynchronously checks the existence of a specified table
+ /// in the database.
+ ///
+ /// # Returns
+ /// - `Ok(true)` if the table exists in the database.
+ /// - `Ok(false)` if the table does not exist in the database.
+ /// - `Err(...)` if an error occurs during the process
+ async fn table_exists(&self, table: &TableIdent) -> Result {
+ let db_name = validate_namespace(table.namespace())?;
+ let table_name = table.name.clone();
+
+ let resp = self
+ .client
+ .0
+ .get_table(db_name.into(), table_name.into())
+ .await;
+
+ match resp {
+ Ok(MaybeException::Ok(_)) => Ok(true),
+ Ok(MaybeException::Exception(ThriftHiveMetastoreGetTableException::O2(_))) => Ok(false),
+ Ok(MaybeException::Exception(exception)) => Err(Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting thrift error".to_string(),
+ )
+ .with_source(anyhow!("thrift error: {:?}", exception))),
+ Err(err) => Err(from_thrift_error(err)),
+ }
}
- async fn rename_table(&self, _src: &TableIdent, _dest: &TableIdent) -> Result<()> {
- todo!()
+ /// Asynchronously renames a table within the database
+ /// or moves it between namespaces (databases).
+ ///
+ /// # Returns
+ /// - `Ok(())` on successful rename or move of the table.
+ /// - `Err(...)` if an error occurs during the process.
+ async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) -> Result<()> {
+ let src_dbname = validate_namespace(src.namespace())?;
+ let dest_dbname = validate_namespace(dest.namespace())?;
+
+ let src_tbl_name = src.name.clone();
+ let dest_tbl_name = dest.name.clone();
+
+ let mut tbl = self
+ .client
+ .0
+ .get_table(src_dbname.clone().into(), src_tbl_name.clone().into())
+ .await
+ .map(from_thrift_exception)
+ .map_err(from_thrift_error)??;
+
+ tbl.db_name = Some(dest_dbname.into());
+ tbl.table_name = Some(dest_tbl_name.into());
+
+ self.client
+ .0
+ .alter_table(src_dbname.into(), src_tbl_name.into(), tbl)
+ .await
+ .map_err(from_thrift_error)?;
+
+ Ok(())
}
async fn update_table(&self, _commit: TableCommit) -> Result
{
- todo!()
+ Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Updating a table is not supported yet",
+ ))
}
}
diff --git a/crates/catalog/hms/src/error.rs b/crates/catalog/hms/src/error.rs
new file mode 100644
index 000000000..15da3eaf6
--- /dev/null
+++ b/crates/catalog/hms/src/error.rs
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::Debug;
+use std::io;
+
+use anyhow::anyhow;
+use iceberg::{Error, ErrorKind};
+use volo_thrift::MaybeException;
+
+/// Format a thrift error into iceberg error.
+///
+/// Please only throw this error when you are sure that the error is caused by thrift.
+pub fn from_thrift_error(error: impl std::error::Error) -> Error {
+ Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting thrift error".to_string(),
+ )
+ .with_source(anyhow!("thrift error: {:?}", error))
+}
+
+/// Format a thrift exception into iceberg error.
+pub fn from_thrift_exception(value: MaybeException) -> Result {
+ match value {
+ MaybeException::Ok(v) => Ok(v),
+ MaybeException::Exception(err) => Err(Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting thrift error".to_string(),
+ )
+ .with_source(anyhow!("thrift error: {:?}", err))),
+ }
+}
+
+/// Format an io error into iceberg error.
+pub fn from_io_error(error: io::Error) -> Error {
+ Error::new(
+ ErrorKind::Unexpected,
+ "Operation failed for hitting io error".to_string(),
+ )
+ .with_source(error)
+}
diff --git a/crates/catalog/hms/src/lib.rs b/crates/catalog/hms/src/lib.rs
index b75e74977..db0034d46 100644
--- a/crates/catalog/hms/src/lib.rs
+++ b/crates/catalog/hms/src/lib.rs
@@ -22,4 +22,6 @@
mod catalog;
pub use catalog::*;
+mod error;
+mod schema;
mod utils;
diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs
new file mode 100644
index 000000000..4012098c2
--- /dev/null
+++ b/crates/catalog/hms/src/schema.rs
@@ -0,0 +1,460 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use hive_metastore::FieldSchema;
+use iceberg::spec::{visit_schema, PrimitiveType, Schema, SchemaVisitor};
+use iceberg::{Error, ErrorKind, Result};
+
+type HiveSchema = Vec;
+
+#[derive(Debug, Default)]
+pub(crate) struct HiveSchemaBuilder {
+ schema: HiveSchema,
+ depth: usize,
+}
+
+impl HiveSchemaBuilder {
+ /// Creates a new `HiveSchemaBuilder` from iceberg `Schema`
+ pub fn from_iceberg(schema: &Schema) -> Result {
+ let mut builder = Self::default();
+ visit_schema(schema, &mut builder)?;
+ Ok(builder)
+ }
+
+ /// Returns the newly converted `HiveSchema`
+ pub fn build(self) -> HiveSchema {
+ self.schema
+ }
+
+ /// Check if is in `StructType` while traversing schema
+ fn is_inside_struct(&self) -> bool {
+ self.depth > 0
+ }
+}
+
+impl SchemaVisitor for HiveSchemaBuilder {
+ type T = String;
+
+ fn schema(
+ &mut self,
+ _schema: &iceberg::spec::Schema,
+ value: String,
+ ) -> iceberg::Result {
+ Ok(value)
+ }
+
+ fn before_struct_field(
+ &mut self,
+ _field: &iceberg::spec::NestedFieldRef,
+ ) -> iceberg::Result<()> {
+ self.depth += 1;
+ Ok(())
+ }
+
+ fn r#struct(
+ &mut self,
+ r#_struct: &iceberg::spec::StructType,
+ results: Vec,
+ ) -> iceberg::Result {
+ Ok(format!("struct<{}>", results.join(", ")))
+ }
+
+ fn after_struct_field(
+ &mut self,
+ _field: &iceberg::spec::NestedFieldRef,
+ ) -> iceberg::Result<()> {
+ self.depth -= 1;
+ Ok(())
+ }
+
+ fn field(
+ &mut self,
+ field: &iceberg::spec::NestedFieldRef,
+ value: String,
+ ) -> iceberg::Result {
+ if self.is_inside_struct() {
+ return Ok(format!("{}:{}", field.name, value));
+ }
+
+ self.schema.push(FieldSchema {
+ name: Some(field.name.clone().into()),
+ r#type: Some(value.clone().into()),
+ comment: field.doc.clone().map(|doc| doc.into()),
+ });
+
+ Ok(value)
+ }
+
+ fn list(&mut self, _list: &iceberg::spec::ListType, value: String) -> iceberg::Result {
+ Ok(format!("array<{}>", value))
+ }
+
+ fn map(
+ &mut self,
+ _map: &iceberg::spec::MapType,
+ key_value: String,
+ value: String,
+ ) -> iceberg::Result {
+ Ok(format!("map<{},{}>", key_value, value))
+ }
+
+ fn primitive(&mut self, p: &iceberg::spec::PrimitiveType) -> iceberg::Result {
+ let hive_type = match p {
+ PrimitiveType::Boolean => "boolean".to_string(),
+ PrimitiveType::Int => "int".to_string(),
+ PrimitiveType::Long => "bigint".to_string(),
+ PrimitiveType::Float => "float".to_string(),
+ PrimitiveType::Double => "double".to_string(),
+ PrimitiveType::Date => "date".to_string(),
+ PrimitiveType::Timestamp => "timestamp".to_string(),
+ PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
+ PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
+ PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => {
+ "string".to_string()
+ }
+ PrimitiveType::Binary | PrimitiveType::Fixed(_) => "binary".to_string(),
+ PrimitiveType::Decimal { precision, scale } => {
+ format!("decimal({},{})", precision, scale)
+ }
+ _ => {
+ return Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Conversion from 'Timestamptz' is not supported",
+ ))
+ }
+ };
+
+ Ok(hive_type)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use iceberg::spec::Schema;
+ use iceberg::Result;
+
+ use super::*;
+
+ #[test]
+ fn test_schema_with_nested_maps() -> Result<()> {
+ let record = r#"
+ {
+ "schema-id": 1,
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "quux",
+ "required": true,
+ "type": {
+ "type": "map",
+ "key-id": 2,
+ "key": "string",
+ "value-id": 3,
+ "value-required": true,
+ "value": {
+ "type": "map",
+ "key-id": 4,
+ "key": "string",
+ "value-id": 5,
+ "value-required": true,
+ "value": "int"
+ }
+ }
+ }
+ ]
+ }
+ "#;
+
+ let schema = serde_json::from_str::(record)?;
+
+ let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
+
+ let expected = vec![FieldSchema {
+ name: Some("quux".into()),
+ r#type: Some("map>".into()),
+ comment: None,
+ }];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_struct_inside_list() -> Result<()> {
+ let record = r#"
+ {
+ "schema-id": 1,
+ "type": "struct",
+ "fields": [
+ {
+ "id": 1,
+ "name": "location",
+ "required": true,
+ "type": {
+ "type": "list",
+ "element-id": 2,
+ "element-required": true,
+ "element": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 3,
+ "name": "latitude",
+ "required": false,
+ "type": "float"
+ },
+ {
+ "id": 4,
+ "name": "longitude",
+ "required": false,
+ "type": "float"
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ "#;
+
+ let schema = serde_json::from_str::(record)?;
+
+ let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
+
+ let expected = vec![FieldSchema {
+ name: Some("location".into()),
+ r#type: Some("array>".into()),
+ comment: None,
+ }];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_structs() -> Result<()> {
+ let record = r#"{
+ "type": "struct",
+ "schema-id": 1,
+ "fields": [
+ {
+ "id": 1,
+ "name": "person",
+ "required": true,
+ "type": {
+ "type": "struct",
+ "fields": [
+ {
+ "id": 2,
+ "name": "name",
+ "required": true,
+ "type": "string"
+ },
+ {
+ "id": 3,
+ "name": "age",
+ "required": false,
+ "type": "int"
+ }
+ ]
+ }
+ }
+ ]
+ }"#;
+
+ let schema = serde_json::from_str::(record)?;
+
+ let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
+
+ let expected = vec![FieldSchema {
+ name: Some("person".into()),
+ r#type: Some("struct".into()),
+ comment: None,
+ }];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_schema_with_simple_fields() -> Result<()> {
+ let record = r#"{
+ "type": "struct",
+ "schema-id": 1,
+ "fields": [
+ {
+ "id": 1,
+ "name": "c1",
+ "required": true,
+ "type": "boolean"
+ },
+ {
+ "id": 2,
+ "name": "c2",
+ "required": true,
+ "type": "int"
+ },
+ {
+ "id": 3,
+ "name": "c3",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 4,
+ "name": "c4",
+ "required": true,
+ "type": "float"
+ },
+ {
+ "id": 5,
+ "name": "c5",
+ "required": true,
+ "type": "double"
+ },
+ {
+ "id": 6,
+ "name": "c6",
+ "required": true,
+ "type": "decimal(2,2)"
+ },
+ {
+ "id": 7,
+ "name": "c7",
+ "required": true,
+ "type": "date"
+ },
+ {
+ "id": 8,
+ "name": "c8",
+ "required": true,
+ "type": "time"
+ },
+ {
+ "id": 9,
+ "name": "c9",
+ "required": true,
+ "type": "timestamp"
+ },
+ {
+ "id": 10,
+ "name": "c10",
+ "required": true,
+ "type": "string"
+ },
+ {
+ "id": 11,
+ "name": "c11",
+ "required": true,
+ "type": "uuid"
+ },
+ {
+ "id": 12,
+ "name": "c12",
+ "required": true,
+ "type": "fixed[4]"
+ },
+ {
+ "id": 13,
+ "name": "c13",
+ "required": true,
+ "type": "binary"
+ }
+ ]
+ }"#;
+
+ let schema = serde_json::from_str::(record)?;
+
+ let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
+
+ let expected = vec![
+ FieldSchema {
+ name: Some("c1".into()),
+ r#type: Some("boolean".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c2".into()),
+ r#type: Some("int".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c3".into()),
+ r#type: Some("bigint".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c4".into()),
+ r#type: Some("float".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c5".into()),
+ r#type: Some("double".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c6".into()),
+ r#type: Some("decimal(2,2)".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c7".into()),
+ r#type: Some("date".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c8".into()),
+ r#type: Some("string".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c9".into()),
+ r#type: Some("timestamp".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c10".into()),
+ r#type: Some("string".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c11".into()),
+ r#type: Some("string".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c12".into()),
+ r#type: Some("binary".into()),
+ comment: None,
+ },
+ FieldSchema {
+ name: Some("c13".into()),
+ r#type: Some("binary".into()),
+ comment: None,
+ },
+ ];
+
+ assert_eq!(result, expected);
+
+ Ok(())
+ }
+}
diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs
index 0daa52aa1..1e48d3fbd 100644
--- a/crates/catalog/hms/src/utils.rs
+++ b/crates/catalog/hms/src/utils.rs
@@ -15,13 +15,547 @@
// specific language governing permissions and limitations
// under the License.
-use iceberg::{Error, ErrorKind};
-
-/// Format a thrift error into iceberg error.
-pub fn from_thrift_error(error: thrift::Error) -> Error {
- Error::new(
- ErrorKind::Unexpected,
- "operation failed for hitting thrift error".to_string(),
- )
- .with_source(error)
+use std::collections::HashMap;
+
+use chrono::Utc;
+use hive_metastore::{Database, PrincipalType, SerDeInfo, StorageDescriptor};
+use iceberg::spec::Schema;
+use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result};
+use pilota::{AHashMap, FastStr};
+use uuid::Uuid;
+
+use crate::schema::HiveSchemaBuilder;
+
+/// hive.metastore.database.owner setting
+const HMS_DB_OWNER: &str = "hive.metastore.database.owner";
+/// hive.metastore.database.owner default setting
+const HMS_DEFAULT_DB_OWNER: &str = "user.name";
+/// hive.metastore.database.owner-type setting
+const HMS_DB_OWNER_TYPE: &str = "hive.metastore.database.owner-type";
+/// hive metatore `owner` property
+const OWNER: &str = "owner";
+/// hive metatore `description` property
+const COMMENT: &str = "comment";
+/// hive metatore `location` property
+const LOCATION: &str = "location";
+/// hive metatore `metadat_location` property
+const METADATA_LOCATION: &str = "metadata_location";
+/// hive metatore `external` property
+const EXTERNAL: &str = "EXTERNAL";
+/// hive metatore `external_table` property
+const EXTERNAL_TABLE: &str = "EXTERNAL_TABLE";
+/// hive metatore `table_type` property
+const TABLE_TYPE: &str = "table_type";
+/// hive metatore `SerDeInfo` serialization_lib parameter
+const SERIALIZATION_LIB: &str = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
+/// hive metatore input format
+const INPUT_FORMAT: &str = "org.apache.hadoop.mapred.FileInputFormat";
+/// hive metatore output format
+const OUTPUT_FORMAT: &str = "org.apache.hadoop.mapred.FileOutputFormat";
+
+/// Returns a `Namespace` by extracting database name and properties
+/// from `hive_metastore::hms::Database`
+pub(crate) fn convert_to_namespace(database: &Database) -> Result {
+ let mut properties = HashMap::new();
+
+ let name = database
+ .name
+ .as_ref()
+ .ok_or_else(|| Error::new(ErrorKind::DataInvalid, "Database name must be specified"))?
+ .to_string();
+
+ if let Some(description) = &database.description {
+ properties.insert(COMMENT.to_string(), description.to_string());
+ };
+
+ if let Some(location) = &database.location_uri {
+ properties.insert(LOCATION.to_string(), location.to_string());
+ };
+
+ if let Some(owner) = &database.owner_name {
+ properties.insert(HMS_DB_OWNER.to_string(), owner.to_string());
+ };
+
+ if let Some(owner_type) = database.owner_type {
+ let value = if owner_type == PrincipalType::USER {
+ "User"
+ } else if owner_type == PrincipalType::GROUP {
+ "Group"
+ } else if owner_type == PrincipalType::ROLE {
+ "Role"
+ } else {
+ unreachable!("Invalid owner type")
+ };
+
+ properties.insert(HMS_DB_OWNER_TYPE.to_string(), value.to_string());
+ };
+
+ if let Some(params) = &database.parameters {
+ params.iter().for_each(|(k, v)| {
+ properties.insert(k.clone().into(), v.clone().into());
+ });
+ };
+
+ Ok(Namespace::with_properties(
+ NamespaceIdent::new(name),
+ properties,
+ ))
+}
+
+/// Converts name and properties into `hive_metastore::hms::Database`
+/// after validating the `namespace` and `owner-settings`.
+pub(crate) fn convert_to_database(
+ namespace: &NamespaceIdent,
+ properties: &HashMap,
+) -> Result {
+ let name = validate_namespace(namespace)?;
+ validate_owner_settings(properties)?;
+
+ let mut db = Database::default();
+ let mut parameters = AHashMap::new();
+
+ db.name = Some(name.into());
+
+ for (k, v) in properties {
+ match k.as_str() {
+ COMMENT => db.description = Some(v.clone().into()),
+ LOCATION => db.location_uri = Some(format_location_uri(v.clone()).into()),
+ HMS_DB_OWNER => db.owner_name = Some(v.clone().into()),
+ HMS_DB_OWNER_TYPE => {
+ let owner_type = match v.to_lowercase().as_str() {
+ "user" => PrincipalType::USER,
+ "group" => PrincipalType::GROUP,
+ "role" => PrincipalType::ROLE,
+ _ => {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("Invalid value for setting 'owner_type': {}", v),
+ ))
+ }
+ };
+ db.owner_type = Some(owner_type);
+ }
+ _ => {
+ parameters.insert(
+ FastStr::from_string(k.clone()),
+ FastStr::from_string(v.clone()),
+ );
+ }
+ }
+ }
+
+ db.parameters = Some(parameters);
+
+ // Set default owner, if none provided
+ // https://github.com/apache/iceberg/blob/main/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java#L44
+ if db.owner_name.is_none() {
+ db.owner_name = Some(HMS_DEFAULT_DB_OWNER.into());
+ db.owner_type = Some(PrincipalType::USER);
+ }
+
+ Ok(db)
+}
+
+pub(crate) fn convert_to_hive_table(
+ db_name: String,
+ schema: &Schema,
+ table_name: String,
+ location: String,
+ metadata_location: String,
+ properties: &HashMap,
+) -> Result {
+ let serde_info = SerDeInfo {
+ serialization_lib: Some(SERIALIZATION_LIB.into()),
+ ..Default::default()
+ };
+
+ let hive_schema = HiveSchemaBuilder::from_iceberg(schema)?.build();
+
+ let storage_descriptor = StorageDescriptor {
+ location: Some(location.into()),
+ cols: Some(hive_schema),
+ input_format: Some(INPUT_FORMAT.into()),
+ output_format: Some(OUTPUT_FORMAT.into()),
+ serde_info: Some(serde_info),
+ ..Default::default()
+ };
+
+ let parameters = AHashMap::from([
+ (FastStr::from(EXTERNAL), FastStr::from("TRUE")),
+ (FastStr::from(TABLE_TYPE), FastStr::from("ICEBERG")),
+ (
+ FastStr::from(METADATA_LOCATION),
+ FastStr::from(metadata_location),
+ ),
+ ]);
+
+ let current_time_ms = get_current_time()?;
+ let owner = properties
+ .get(OWNER)
+ .map_or(HMS_DEFAULT_DB_OWNER.to_string(), |v| v.into());
+
+ Ok(hive_metastore::Table {
+ table_name: Some(table_name.into()),
+ db_name: Some(db_name.into()),
+ table_type: Some(EXTERNAL_TABLE.into()),
+ owner: Some(owner.into()),
+ create_time: Some(current_time_ms),
+ last_access_time: Some(current_time_ms),
+ sd: Some(storage_descriptor),
+ parameters: Some(parameters),
+ ..Default::default()
+ })
+}
+
+/// Checks if provided `NamespaceIdent` is valid.
+pub(crate) fn validate_namespace(namespace: &NamespaceIdent) -> Result {
+ let name = namespace.as_ref();
+
+ if name.len() != 1 {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Invalid database name: {:?}, hierarchical namespaces are not supported",
+ namespace
+ ),
+ ));
+ }
+
+ let name = name[0].clone();
+
+ if name.is_empty() {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Invalid database, provided namespace is empty.",
+ ));
+ }
+
+ Ok(name)
+}
+
+/// Get default table location from `Namespace` properties
+pub(crate) fn get_default_table_location(
+ namespace: &Namespace,
+ table_name: impl AsRef,
+ warehouse: impl AsRef,
+) -> String {
+ let properties = namespace.properties();
+
+ let location = match properties.get(LOCATION) {
+ Some(location) => location,
+ None => warehouse.as_ref(),
+ };
+
+ format!("{}/{}", location, table_name.as_ref())
+}
+
+/// Create metadata location from `location` and `version`
+pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result {
+ if version < 0 {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Table metadata version: '{}' must be a non-negative integer",
+ version
+ ),
+ ));
+ };
+
+ let version = format!("{:0>5}", version);
+ let id = Uuid::new_v4();
+ let metadata_location = format!(
+ "{}/metadata/{}-{}.metadata.json",
+ location.as_ref(),
+ version,
+ id
+ );
+
+ Ok(metadata_location)
+}
+
+/// Get metadata location from `HiveTable` parameters
+pub(crate) fn get_metadata_location(
+ parameters: &Option>,
+) -> Result {
+ match parameters {
+ Some(properties) => match properties.get(METADATA_LOCATION) {
+ Some(location) => Ok(location.to_string()),
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!("No '{}' set on table", METADATA_LOCATION),
+ )),
+ },
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ "No 'parameters' set on table. Location of metadata is undefined",
+ )),
+ }
+}
+
+/// Formats location_uri by e.g. removing trailing slashes.
+fn format_location_uri(location: String) -> String {
+ let mut location = location;
+
+ if !location.starts_with('/') {
+ location = format!("/{}", location);
+ }
+
+ if location.ends_with('/') && location.len() > 1 {
+ location.pop();
+ }
+
+ location
+}
+
+/// Checks if `owner-settings` are valid.
+/// If `owner_type` is set, then `owner` must also be set.
+fn validate_owner_settings(properties: &HashMap) -> Result<()> {
+ let owner_is_set = properties.get(HMS_DB_OWNER).is_some();
+ let owner_type_is_set = properties.get(HMS_DB_OWNER_TYPE).is_some();
+
+ if owner_type_is_set && !owner_is_set {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ format!(
+ "Setting '{}' without setting '{}' is not allowed",
+ HMS_DB_OWNER_TYPE, HMS_DB_OWNER
+ ),
+ ));
+ }
+
+ Ok(())
+}
+
+fn get_current_time() -> Result {
+ let now = Utc::now();
+ now.timestamp().try_into().map_err(|_| {
+ Error::new(
+ ErrorKind::Unexpected,
+ "Current time is out of range for i32",
+ )
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use iceberg::spec::{NestedField, PrimitiveType, Type};
+ use iceberg::{Namespace, NamespaceIdent};
+
+ use super::*;
+
+ #[test]
+ fn test_get_metadata_location() -> Result<()> {
+ let params_valid = Some(AHashMap::from([(
+ FastStr::new(METADATA_LOCATION),
+ FastStr::new("my_location"),
+ )]));
+ let params_missing_key = Some(AHashMap::from([(
+ FastStr::new("not_here"),
+ FastStr::new("my_location"),
+ )]));
+
+ let result_valid = get_metadata_location(¶ms_valid)?;
+ let result_missing_key = get_metadata_location(¶ms_missing_key);
+ let result_no_params = get_metadata_location(&None);
+
+ assert_eq!(result_valid, "my_location");
+ assert!(result_missing_key.is_err());
+ assert!(result_no_params.is_err());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_hive_table() -> Result<()> {
+ let db_name = "my_db".to_string();
+ let table_name = "my_table".to_string();
+ let location = "s3a://warehouse/hms".to_string();
+ let metadata_location = create_metadata_location(location.clone(), 0)?;
+ let properties = HashMap::new();
+ let schema = Schema::builder()
+ .with_schema_id(1)
+ .with_fields(vec![
+ NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
+ ])
+ .build()?;
+
+ let result = convert_to_hive_table(
+ db_name.clone(),
+ &schema,
+ table_name.clone(),
+ location.clone(),
+ metadata_location,
+ &properties,
+ )?;
+
+ let serde_info = SerDeInfo {
+ serialization_lib: Some(SERIALIZATION_LIB.into()),
+ ..Default::default()
+ };
+
+ let hive_schema = HiveSchemaBuilder::from_iceberg(&schema)?.build();
+
+ let sd = StorageDescriptor {
+ location: Some(location.into()),
+ cols: Some(hive_schema),
+ input_format: Some(INPUT_FORMAT.into()),
+ output_format: Some(OUTPUT_FORMAT.into()),
+ serde_info: Some(serde_info),
+ ..Default::default()
+ };
+
+ assert_eq!(result.db_name, Some(db_name.into()));
+ assert_eq!(result.table_name, Some(table_name.into()));
+ assert_eq!(result.table_type, Some(EXTERNAL_TABLE.into()));
+ assert_eq!(result.owner, Some(HMS_DEFAULT_DB_OWNER.into()));
+ assert_eq!(result.sd, Some(sd));
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_create_metadata_location() -> Result<()> {
+ let location = "my_base_location";
+ let valid_version = 0;
+ let invalid_version = -1;
+
+ let valid_result = create_metadata_location(location, valid_version)?;
+ let invalid_result = create_metadata_location(location, invalid_version);
+
+ assert!(valid_result.starts_with("my_base_location/metadata/00000-"));
+ assert!(valid_result.ends_with(".metadata.json"));
+ assert!(invalid_result.is_err());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_get_default_table_location() -> Result<()> {
+ let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]);
+
+ let namespace =
+ Namespace::with_properties(NamespaceIdent::new("default".into()), properties);
+ let table_name = "my_table";
+
+ let expected = "db_location/my_table";
+ let result = get_default_table_location(&namespace, table_name, "warehouse_location");
+
+ assert_eq!(expected, result);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_get_default_table_location_warehouse() -> Result<()> {
+ let namespace = Namespace::new(NamespaceIdent::new("default".into()));
+ let table_name = "my_table";
+
+ let expected = "warehouse_location/my_table";
+ let result = get_default_table_location(&namespace, table_name, "warehouse_location");
+
+ assert_eq!(expected, result);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_namespace() -> Result<()> {
+ let properties = HashMap::from([
+ (COMMENT.to_string(), "my_description".to_string()),
+ (LOCATION.to_string(), "/my_location".to_string()),
+ (HMS_DB_OWNER.to_string(), "apache".to_string()),
+ (HMS_DB_OWNER_TYPE.to_string(), "User".to_string()),
+ ("key1".to_string(), "value1".to_string()),
+ ]);
+
+ let ident = NamespaceIdent::new("my_namespace".into());
+ let db = convert_to_database(&ident, &properties)?;
+
+ let expected_ns = Namespace::with_properties(ident, properties);
+ let result_ns = convert_to_namespace(&db)?;
+
+ assert_eq!(expected_ns, result_ns);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_validate_owner_settings() {
+ let valid = HashMap::from([
+ (HMS_DB_OWNER.to_string(), "apache".to_string()),
+ (HMS_DB_OWNER_TYPE.to_string(), "user".to_string()),
+ ]);
+ let invalid = HashMap::from([(HMS_DB_OWNER_TYPE.to_string(), "user".to_string())]);
+
+ assert!(validate_owner_settings(&valid).is_ok());
+ assert!(validate_owner_settings(&invalid).is_err());
+ }
+
+ #[test]
+ fn test_convert_to_database() -> Result<()> {
+ let ns = NamespaceIdent::new("my_namespace".into());
+ let properties = HashMap::from([
+ (COMMENT.to_string(), "my_description".to_string()),
+ (LOCATION.to_string(), "my_location".to_string()),
+ (HMS_DB_OWNER.to_string(), "apache".to_string()),
+ (HMS_DB_OWNER_TYPE.to_string(), "user".to_string()),
+ ("key1".to_string(), "value1".to_string()),
+ ]);
+
+ let db = convert_to_database(&ns, &properties)?;
+
+ assert_eq!(db.name, Some(FastStr::from("my_namespace")));
+ assert_eq!(db.description, Some(FastStr::from("my_description")));
+ assert_eq!(db.owner_name, Some(FastStr::from("apache")));
+ assert_eq!(db.owner_type, Some(PrincipalType::USER));
+
+ if let Some(params) = db.parameters {
+ assert_eq!(params.get("key1"), Some(&FastStr::from("value1")));
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_convert_to_database_with_default_user() -> Result<()> {
+ let ns = NamespaceIdent::new("my_namespace".into());
+ let properties = HashMap::new();
+
+ let db = convert_to_database(&ns, &properties)?;
+
+ assert_eq!(db.name, Some(FastStr::from("my_namespace")));
+ assert_eq!(db.owner_name, Some(FastStr::from(HMS_DEFAULT_DB_OWNER)));
+ assert_eq!(db.owner_type, Some(PrincipalType::USER));
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_validate_namespace() {
+ let valid_ns = Namespace::new(NamespaceIdent::new("ns".to_string()));
+ let empty_ns = Namespace::new(NamespaceIdent::new("".to_string()));
+ let hierarchical_ns = Namespace::new(
+ NamespaceIdent::from_vec(vec!["level1".to_string(), "level2".to_string()]).unwrap(),
+ );
+
+ let valid = validate_namespace(valid_ns.name());
+ let empty = validate_namespace(empty_ns.name());
+ let hierarchical = validate_namespace(hierarchical_ns.name());
+
+ assert!(valid.is_ok());
+ assert!(empty.is_err());
+ assert!(hierarchical.is_err());
+ }
+
+ #[test]
+ fn test_format_location_uri() {
+ let inputs = vec!["iceberg", "is/", "/nice/", "really/nice/", "/"];
+ let outputs = vec!["/iceberg", "/is", "/nice", "/really/nice", "/"];
+
+ inputs.into_iter().zip(outputs).for_each(|(inp, out)| {
+ let location = format_location_uri(inp.to_string());
+ assert_eq!(location, out);
+ })
+ }
}
diff --git a/crates/catalog/hms/testdata/hms_catalog/Dockerfile b/crates/catalog/hms/testdata/hms_catalog/Dockerfile
new file mode 100644
index 000000000..8392e174a
--- /dev/null
+++ b/crates/catalog/hms/testdata/hms_catalog/Dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM --platform=$BUILDPLATFORM openjdk:8-jre-slim AS build
+
+ARG BUILDPLATFORM
+
+RUN apt-get update -qq && apt-get -qq -y install curl
+
+ENV AWSSDK_VERSION=2.20.18
+ENV HADOOP_VERSION=3.1.0
+
+RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar
+RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar
+
+
+FROM apache/hive:3.1.3
+
+ENV AWSSDK_VERSION=2.20.18
+ENV HADOOP_VERSION=3.1.0
+
+COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar
+COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar
+COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml
\ No newline at end of file
diff --git a/crates/catalog/hms/testdata/hms_catalog/core-site.xml b/crates/catalog/hms/testdata/hms_catalog/core-site.xml
new file mode 100644
index 000000000..f0583a0bc
--- /dev/null
+++ b/crates/catalog/hms/testdata/hms_catalog/core-site.xml
@@ -0,0 +1,51 @@
+
+
+
+
+ fs.defaultFS
+ s3a://warehouse/hive
+
+
+ fs.s3a.impl
+ org.apache.hadoop.fs.s3a.S3AFileSystem
+
+
+ fs.s3a.fast.upload
+ true
+
+
+ fs.s3a.endpoint
+ http://minio:9000
+
+
+ fs.s3a.access.key
+ admin
+
+
+ fs.s3a.secret.key
+ password
+
+
+ fs.s3a.connection.ssl.enabled
+ false
+
+
+ fs.s3a.path.style.access
+ true
+
+
\ No newline at end of file
diff --git a/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml b/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml
new file mode 100644
index 000000000..181fac149
--- /dev/null
+++ b/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+services:
+ minio:
+ image: minio/minio:RELEASE.2024-03-07T00-43-48Z
+ expose:
+ - 9000
+ - 9001
+ environment:
+ - MINIO_ROOT_USER=admin
+ - MINIO_ROOT_PASSWORD=password
+ - MINIO_DOMAIN=minio
+ command: [ "server", "/data", "--console-address", ":9001" ]
+
+ mc:
+ depends_on:
+ - minio
+ image: minio/mc:RELEASE.2024-03-07T00-31-49Z
+ environment:
+ - AWS_ACCESS_KEY_ID=admin
+ - AWS_SECRET_ACCESS_KEY=password
+ - AWS_REGION=us-east-1
+ entrypoint: >
+ /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null "
+
+ hive-metastore:
+ image: iceberg-hive-metastore
+ build: ./
+ platform: ${DOCKER_DEFAULT_PLATFORM}
+ expose:
+ - 9083
+ environment:
+ SERVICE_NAME: "metastore"
+ SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"
diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs
new file mode 100644
index 000000000..5b8004439
--- /dev/null
+++ b/crates/catalog/hms/tests/hms_catalog_test.rs
@@ -0,0 +1,369 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for hms catalog.
+
+use std::collections::HashMap;
+use std::net::SocketAddr;
+use std::sync::RwLock;
+
+use ctor::{ctor, dtor};
+use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
+use iceberg::{Catalog, Namespace, NamespaceIdent, TableCreation, TableIdent};
+use iceberg_catalog_hms::{HmsCatalog, HmsCatalogConfig, HmsThriftTransport};
+use iceberg_test_utils::docker::DockerCompose;
+use iceberg_test_utils::{normalize_test_name, set_up};
+use port_scanner::scan_port_addr;
+use tokio::time::sleep;
+
+const HMS_CATALOG_PORT: u16 = 9083;
+const MINIO_PORT: u16 = 9000;
+static DOCKER_COMPOSE_ENV: RwLock
> = RwLock::new(None);
+type Result = std::result::Result;
+
+#[ctor]
+fn before_all() {
+ let mut guard = DOCKER_COMPOSE_ENV.write().unwrap();
+ let docker_compose = DockerCompose::new(
+ normalize_test_name(module_path!()),
+ format!("{}/testdata/hms_catalog", env!("CARGO_MANIFEST_DIR")),
+ );
+ docker_compose.run();
+ guard.replace(docker_compose);
+}
+
+#[dtor]
+fn after_all() {
+ let mut guard = DOCKER_COMPOSE_ENV.write().unwrap();
+ guard.take();
+}
+
+async fn get_catalog() -> HmsCatalog {
+ set_up();
+
+ let (hms_catalog_ip, minio_ip) = {
+ let guard = DOCKER_COMPOSE_ENV.read().unwrap();
+ let docker_compose = guard.as_ref().unwrap();
+ (
+ docker_compose.get_container_ip("hive-metastore"),
+ docker_compose.get_container_ip("minio"),
+ )
+ };
+ let hms_socket_addr = SocketAddr::new(hms_catalog_ip, HMS_CATALOG_PORT);
+ let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT);
+ while !scan_port_addr(hms_socket_addr) {
+ log::info!("scan hms_socket_addr {} check", hms_socket_addr);
+ log::info!("Waiting for 1s hms catalog to ready...");
+ sleep(std::time::Duration::from_millis(1000)).await;
+ }
+
+ let props = HashMap::from([
+ (
+ S3_ENDPOINT.to_string(),
+ format!("http://{}", minio_socket_addr),
+ ),
+ (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
+ (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
+ (S3_REGION.to_string(), "us-east-1".to_string()),
+ ]);
+
+ let config = HmsCatalogConfig::builder()
+ .address(hms_socket_addr.to_string())
+ .thrift_transport(HmsThriftTransport::Buffered)
+ .warehouse("s3a://warehouse/hive".to_string())
+ .props(props)
+ .build();
+
+ HmsCatalog::new(config).unwrap()
+}
+
+async fn set_test_namespace(catalog: &HmsCatalog, namespace: &NamespaceIdent) -> Result<()> {
+ let properties = HashMap::new();
+
+ catalog.create_namespace(namespace, properties).await?;
+
+ Ok(())
+}
+
+fn set_table_creation(location: impl ToString, name: impl ToString) -> Result {
+ let schema = Schema::builder()
+ .with_schema_id(0)
+ .with_fields(vec![
+ NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::required(2, "bar", Type::Primitive(PrimitiveType::String)).into(),
+ ])
+ .build()?;
+
+ let creation = TableCreation::builder()
+ .location(location.to_string())
+ .name(name.to_string())
+ .properties(HashMap::new())
+ .schema(schema)
+ .build();
+
+ Ok(creation)
+}
+
+#[tokio::test]
+async fn test_rename_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation: TableCreation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_rename_table".into()));
+ set_test_namespace(&catalog, namespace.name()).await?;
+
+ let table: iceberg::table::Table = catalog.create_table(namespace.name(), creation).await?;
+
+ let dest = TableIdent::new(namespace.name().clone(), "my_table_rename".to_string());
+
+ catalog.rename_table(table.identifier(), &dest).await?;
+
+ let result = catalog.table_exists(&dest).await?;
+
+ assert!(result);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_table_exists() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_table_exists".into()));
+ set_test_namespace(&catalog, namespace.name()).await?;
+
+ let table = catalog.create_table(namespace.name(), creation).await?;
+
+ let result = catalog.table_exists(table.identifier()).await?;
+
+ assert!(result);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_drop_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_drop_table".into()));
+ set_test_namespace(&catalog, namespace.name()).await?;
+
+ let table = catalog.create_table(namespace.name(), creation).await?;
+
+ catalog.drop_table(table.identifier()).await?;
+
+ let result = catalog.table_exists(table.identifier()).await?;
+
+ assert!(!result);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_load_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_load_table".into()));
+ set_test_namespace(&catalog, namespace.name()).await?;
+
+ let expected = catalog.create_table(namespace.name(), creation).await?;
+
+ let result = catalog
+ .load_table(&TableIdent::new(
+ namespace.name().clone(),
+ "my_table".to_string(),
+ ))
+ .await?;
+
+ assert_eq!(result.identifier(), expected.identifier());
+ assert_eq!(result.metadata_location(), expected.metadata_location());
+ assert_eq!(result.metadata(), expected.metadata());
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_create_table() -> Result<()> {
+ let catalog = get_catalog().await;
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ let namespace = Namespace::new(NamespaceIdent::new("test_create_table".into()));
+ set_test_namespace(&catalog, namespace.name()).await?;
+
+ let result = catalog.create_table(namespace.name(), creation).await?;
+
+ assert_eq!(result.identifier().name(), "my_table");
+ assert!(result
+ .metadata_location()
+ .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")));
+ assert!(
+ catalog
+ .file_io()
+ .is_exist("s3a://warehouse/hive/metadata/")
+ .await?
+ );
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_list_tables() -> Result<()> {
+ let catalog = get_catalog().await;
+ let ns = Namespace::new(NamespaceIdent::new("test_list_tables".into()));
+ let result = catalog.list_tables(ns.name()).await?;
+ set_test_namespace(&catalog, ns.name()).await?;
+
+ assert_eq!(result, vec![]);
+
+ let creation = set_table_creation("s3a://warehouse/hive", "my_table")?;
+ catalog.create_table(ns.name(), creation).await?;
+ let result = catalog.list_tables(ns.name()).await?;
+
+ assert_eq!(result, vec![TableIdent::new(
+ ns.name().clone(),
+ "my_table".to_string()
+ )]);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_list_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let result_no_parent = catalog.list_namespaces(None).await?;
+
+ let result_with_parent = catalog
+ .list_namespaces(Some(&NamespaceIdent::new("parent".into())))
+ .await?;
+
+ assert!(result_no_parent.contains(&NamespaceIdent::new("default".into())));
+ assert!(result_with_parent.is_empty());
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_create_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let properties = HashMap::from([
+ ("comment".to_string(), "my_description".to_string()),
+ ("location".to_string(), "my_location".to_string()),
+ (
+ "hive.metastore.database.owner".to_string(),
+ "apache".to_string(),
+ ),
+ (
+ "hive.metastore.database.owner-type".to_string(),
+ "user".to_string(),
+ ),
+ ("key1".to_string(), "value1".to_string()),
+ ]);
+
+ let ns = Namespace::with_properties(
+ NamespaceIdent::new("test_create_namespace".into()),
+ properties.clone(),
+ );
+
+ let result = catalog.create_namespace(ns.name(), properties).await?;
+
+ assert_eq!(result, ns);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_get_default_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let ns = Namespace::new(NamespaceIdent::new("default".into()));
+ let properties = HashMap::from([
+ ("location".to_string(), "s3a://warehouse/hive".to_string()),
+ (
+ "hive.metastore.database.owner-type".to_string(),
+ "Role".to_string(),
+ ),
+ ("comment".to_string(), "Default Hive database".to_string()),
+ (
+ "hive.metastore.database.owner".to_string(),
+ "public".to_string(),
+ ),
+ ]);
+
+ let expected = Namespace::with_properties(NamespaceIdent::new("default".into()), properties);
+
+ let result = catalog.get_namespace(ns.name()).await?;
+
+ assert_eq!(expected, result);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_namespace_exists() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let ns_exists = Namespace::new(NamespaceIdent::new("default".into()));
+ let ns_not_exists = Namespace::new(NamespaceIdent::new("test_namespace_exists".into()));
+
+ let result_exists = catalog.namespace_exists(ns_exists.name()).await?;
+ let result_not_exists = catalog.namespace_exists(ns_not_exists.name()).await?;
+
+ assert!(result_exists);
+ assert!(!result_not_exists);
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_update_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let ns = NamespaceIdent::new("test_update_namespace".into());
+ set_test_namespace(&catalog, &ns).await?;
+ let properties = HashMap::from([("comment".to_string(), "my_update".to_string())]);
+
+ catalog.update_namespace(&ns, properties).await?;
+
+ let db = catalog.get_namespace(&ns).await?;
+
+ assert_eq!(
+ db.properties().get("comment"),
+ Some(&"my_update".to_string())
+ );
+
+ Ok(())
+}
+
+#[tokio::test]
+async fn test_drop_namespace() -> Result<()> {
+ let catalog = get_catalog().await;
+
+ let ns = Namespace::new(NamespaceIdent::new("delete_me".into()));
+
+ catalog.create_namespace(ns.name(), HashMap::new()).await?;
+
+ let result = catalog.namespace_exists(ns.name()).await?;
+ assert!(result);
+
+ catalog.drop_namespace(ns.name()).await?;
+
+ let result = catalog.namespace_exists(ns.name()).await?;
+ assert!(!result);
+
+ Ok(())
+}
diff --git a/crates/catalog/memory/Cargo.toml b/crates/catalog/memory/Cargo.toml
new file mode 100644
index 000000000..011479efc
--- /dev/null
+++ b/crates/catalog/memory/Cargo.toml
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "iceberg-catalog-memory"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+rust-version = { workspace = true }
+
+categories = ["database"]
+description = "Apache Iceberg Rust Memory Catalog API"
+repository = { workspace = true }
+license = { workspace = true }
+keywords = ["iceberg", "memory", "catalog"]
+
+[dependencies]
+async-trait = { workspace = true }
+futures = { workspace = true }
+iceberg = { workspace = true }
+itertools = { workspace = true }
+serde_json = { workspace = true }
+uuid = { workspace = true, features = ["v4"] }
+
+[dev-dependencies]
+regex = { workspace = true }
+tempfile = { workspace = true }
+tokio = { workspace = true }
diff --git a/crates/catalog/memory/DEPENDENCIES.rust.tsv b/crates/catalog/memory/DEPENDENCIES.rust.tsv
new file mode 100644
index 000000000..b4617eedb
--- /dev/null
+++ b/crates/catalog/memory/DEPENDENCIES.rust.tsv
@@ -0,0 +1,276 @@
+crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC MIT MPL-2.0 OpenSSL Unicode-DFS-2016 Unlicense Zlib
+addr2line@0.22.0 X X
+adler@1.0.2 X X X
+adler32@1.2.0 X
+ahash@0.8.11 X X
+aho-corasick@1.1.3 X X
+alloc-no-stdlib@2.0.4 X
+alloc-stdlib@0.2.2 X
+allocator-api2@0.2.18 X X
+android-tzdata@0.1.1 X X
+android_system_properties@0.1.5 X X
+anstream@0.6.15 X X
+anstyle@1.0.8 X X
+anstyle-parse@0.2.5 X X
+anstyle-query@1.1.1 X X
+anstyle-wincon@3.0.4 X X
+anyhow@1.0.86 X X
+apache-avro@0.17.0 X
+array-init@2.1.0 X X
+arrayvec@0.7.4 X X
+arrow-arith@52.2.0 X
+arrow-array@52.2.0 X
+arrow-buffer@52.2.0 X
+arrow-cast@52.2.0 X
+arrow-data@52.2.0 X
+arrow-ipc@52.2.0 X
+arrow-ord@52.2.0 X
+arrow-schema@52.2.0 X
+arrow-select@52.2.0 X
+arrow-string@52.2.0 X
+async-trait@0.1.81 X X
+atoi@2.0.0 X
+autocfg@1.3.0 X X
+backon@0.4.4 X
+backtrace@0.3.73 X X
+base64@0.22.1 X X
+bigdecimal@0.4.5 X X
+bimap@0.6.3 X X
+bitflags@1.3.2 X X
+bitvec@1.0.1 X
+block-buffer@0.10.4 X X
+brotli@6.0.0 X X
+brotli-decompressor@4.0.1 X X
+bumpalo@3.16.0 X X
+byteorder@1.5.0 X X
+bytes@1.7.1 X
+cc@1.1.11 X X
+cfg-if@1.0.0 X X
+chrono@0.4.38 X X
+colorchoice@1.0.2 X X
+const-oid@0.9.6 X X
+const-random@0.1.18 X X
+const-random-macro@0.1.16 X X
+core-foundation-sys@0.8.7 X X
+core2@0.4.0 X X
+cpufeatures@0.2.13 X X
+crc32c@0.6.8 X X
+crc32fast@1.4.2 X X
+crunchy@0.2.2 X
+crypto-common@0.1.6 X X
+darling@0.20.10 X
+darling_core@0.20.10 X
+darling_macro@0.20.10 X
+dary_heap@0.3.6 X X
+derive_builder@0.20.0 X X
+derive_builder_core@0.20.0 X X
+derive_builder_macro@0.20.0 X X
+digest@0.10.7 X X
+either@1.13.0 X X
+env_filter@0.1.2 X X
+env_logger@0.11.5 X X
+fastrand@2.1.0 X X
+flagset@0.4.6 X
+flatbuffers@24.3.25 X
+flate2@1.0.31 X X
+fnv@1.0.7 X X
+form_urlencoded@1.2.1 X X
+funty@2.0.0 X
+futures@0.3.30 X X
+futures-channel@0.3.30 X X
+futures-core@0.3.30 X X
+futures-executor@0.3.30 X X
+futures-io@0.3.30 X X
+futures-macro@0.3.30 X X
+futures-sink@0.3.30 X X
+futures-task@0.3.30 X X
+futures-util@0.3.30 X X
+generic-array@0.14.7 X
+getrandom@0.2.15 X X
+gimli@0.29.0 X X
+half@2.4.1 X X
+hashbrown@0.14.5 X X
+heck@0.5.0 X X
+hermit-abi@0.3.9 X X
+hex@0.4.3 X X
+hmac@0.12.1 X X
+home@0.5.9 X X
+http@1.1.0 X X
+http-body@1.0.1 X
+http-body-util@0.1.2 X
+httparse@1.9.4 X X
+humantime@2.1.0 X X
+hyper@1.4.1 X
+hyper-rustls@0.27.2 X X X
+hyper-util@0.1.7 X
+iana-time-zone@0.1.60 X X
+iana-time-zone-haiku@0.1.2 X X
+iceberg@0.3.0 X
+iceberg-catalog-memory@0.3.0 X
+iceberg_test_utils@0.3.0 X
+ident_case@1.0.1 X X
+idna@0.5.0 X X
+integer-encoding@3.0.4 X
+ipnet@2.9.0 X X
+is_terminal_polyfill@1.70.1 X X
+itertools@0.13.0 X X
+itoa@1.0.11 X X
+jobserver@0.1.32 X X
+js-sys@0.3.70 X X
+lexical-core@0.8.5 X X
+lexical-parse-float@0.8.5 X X
+lexical-parse-integer@0.8.6 X X
+lexical-util@0.8.5 X X
+lexical-write-float@0.8.5 X X
+lexical-write-integer@0.8.5 X X
+libc@0.2.155 X X
+libflate@2.1.0 X
+libflate_lz77@2.1.0 X
+libm@0.2.8 X X
+log@0.4.22 X X
+lz4_flex@0.11.3 X
+md-5@0.10.6 X X
+memchr@2.7.4 X X
+mime@0.3.17 X X
+miniz_oxide@0.7.4 X X X
+mio@1.0.2 X
+murmur3@0.5.2 X X
+num@0.4.3 X X
+num-bigint@0.4.6 X X
+num-complex@0.4.6 X X
+num-integer@0.1.46 X X
+num-iter@0.1.45 X X
+num-rational@0.4.2 X X
+num-traits@0.2.19 X X
+object@0.36.3 X X
+once_cell@1.19.0 X X
+opendal@0.49.0 X
+ordered-float@2.10.1 X
+ordered-float@4.2.2 X
+parquet@52.2.0 X
+paste@1.0.15 X X
+percent-encoding@2.3.1 X X
+pin-project@1.1.5 X X
+pin-project-internal@1.1.5 X X
+pin-project-lite@0.2.14 X X
+pin-utils@0.1.0 X X
+pkg-config@0.3.30 X X
+ppv-lite86@0.2.20 X X
+proc-macro2@1.0.86 X X
+quad-rand@0.2.1 X
+quick-xml@0.36.1 X
+quote@1.0.36 X X
+radium@0.7.0 X
+rand@0.8.5 X X
+rand_chacha@0.3.1 X X
+rand_core@0.6.4 X X
+regex@1.10.6 X X
+regex-automata@0.4.7 X X
+regex-lite@0.1.6 X X
+regex-syntax@0.8.4 X X
+reqsign@0.16.0 X
+reqwest@0.12.5 X X
+ring@0.17.8 X
+rle-decode-fast@1.0.3 X X
+rust_decimal@1.35.0 X
+rustc-demangle@0.1.24 X X
+rustc_version@0.4.0 X X
+rustls@0.23.12 X X X
+rustls-pemfile@2.1.3 X X X
+rustls-pki-types@1.8.0 X X
+rustls-webpki@0.102.6 X
+rustversion@1.0.17 X X
+ryu@1.0.18 X X
+semver@1.0.23 X X
+seq-macro@0.3.5 X X
+serde@1.0.207 X X
+serde_bytes@0.11.15 X X
+serde_derive@1.0.207 X X
+serde_json@1.0.124 X X
+serde_repr@0.1.19 X X
+serde_urlencoded@0.7.1 X X
+serde_with@3.9.0 X X
+serde_with_macros@3.9.0 X X
+sha1@0.10.6 X X
+sha2@0.10.8 X X
+shlex@1.3.0 X X
+slab@0.4.9 X
+smallvec@1.13.2 X X
+snap@1.1.1 X
+socket2@0.5.7 X X
+spin@0.9.8 X
+static_assertions@1.1.0 X X
+strsim@0.11.1 X
+strum@0.26.3 X
+strum_macros@0.26.4 X
+subtle@2.6.1 X
+syn@2.0.74 X X
+sync_wrapper@1.0.1 X
+tap@1.0.1 X
+thiserror@1.0.63 X X
+thiserror-impl@1.0.63 X X
+thrift@0.17.0 X
+tiny-keccak@2.0.2 X
+tinyvec@1.8.0 X X X
+tinyvec_macros@0.1.1 X X X
+tokio@1.39.2 X
+tokio-macros@2.4.0 X
+tokio-rustls@0.26.0 X X
+tokio-util@0.7.11 X
+tower@0.4.13 X
+tower-layer@0.3.3 X
+tower-service@0.3.3 X
+tracing@0.1.40 X
+tracing-core@0.1.32 X
+try-lock@0.2.5 X
+twox-hash@1.6.3 X
+typed-builder@0.19.1 X X
+typed-builder-macro@0.19.1 X X
+typenum@1.17.0 X X
+unicode-bidi@0.3.15 X X
+unicode-ident@1.0.12 X X X
+unicode-normalization@0.1.23 X X
+untrusted@0.9.0 X
+url@2.5.2 X X
+utf8parse@0.2.2 X X
+uuid@1.10.0 X X
+version_check@0.9.5 X X
+want@0.3.1 X
+wasi@0.11.0+wasi-snapshot-preview1 X X X
+wasm-bindgen@0.2.93 X X
+wasm-bindgen-backend@0.2.93 X X
+wasm-bindgen-futures@0.4.43 X X
+wasm-bindgen-macro@0.2.93 X X
+wasm-bindgen-macro-support@0.2.93 X X
+wasm-bindgen-shared@0.2.93 X X
+wasm-streams@0.4.0 X X
+web-sys@0.3.70 X X
+webpki-roots@0.26.3 X
+windows-core@0.52.0 X X
+windows-sys@0.48.0 X X
+windows-sys@0.52.0 X X
+windows-targets@0.48.5 X X
+windows-targets@0.52.6 X X
+windows_aarch64_gnullvm@0.48.5 X X
+windows_aarch64_gnullvm@0.52.6 X X
+windows_aarch64_msvc@0.48.5 X X
+windows_aarch64_msvc@0.52.6 X X
+windows_i686_gnu@0.48.5 X X
+windows_i686_gnu@0.52.6 X X
+windows_i686_gnullvm@0.52.6 X X
+windows_i686_msvc@0.48.5 X X
+windows_i686_msvc@0.52.6 X X
+windows_x86_64_gnu@0.48.5 X X
+windows_x86_64_gnu@0.52.6 X X
+windows_x86_64_gnullvm@0.48.5 X X
+windows_x86_64_gnullvm@0.52.6 X X
+windows_x86_64_msvc@0.48.5 X X
+windows_x86_64_msvc@0.52.6 X X
+winreg@0.52.0 X
+wyz@0.5.1 X
+zerocopy@0.7.35 X X X
+zerocopy-derive@0.7.35 X X X
+zeroize@1.8.1 X X
+zstd@0.13.2 X
+zstd-safe@7.2.1 X X
+zstd-sys@2.0.12+zstd.1.5.6 X X
diff --git a/crates/catalog/memory/README.md b/crates/catalog/memory/README.md
new file mode 100644
index 000000000..5b04f78ab
--- /dev/null
+++ b/crates/catalog/memory/README.md
@@ -0,0 +1,27 @@
+
+
+# Apache Iceberg Memory Catalog Official Native Rust Implementation
+
+[![crates.io](https://img.shields.io/crates/v/iceberg-catalog-memory.svg)](https://crates.io/crates/iceberg-catalog-memory)
+[![docs.rs](https://img.shields.io/docsrs/iceberg-catalog-memory.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-memory/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg Memory Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-memory/latest) for examples and the full API.
diff --git a/crates/catalog/memory/src/catalog.rs b/crates/catalog/memory/src/catalog.rs
new file mode 100644
index 000000000..1da044821
--- /dev/null
+++ b/crates/catalog/memory/src/catalog.rs
@@ -0,0 +1,1678 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module contains memory catalog implementation.
+
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use futures::lock::Mutex;
+use iceberg::io::FileIO;
+use iceberg::spec::{TableMetadata, TableMetadataBuilder};
+use iceberg::table::Table;
+use iceberg::{
+ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
+ TableIdent,
+};
+use itertools::Itertools;
+use uuid::Uuid;
+
+use crate::namespace_state::NamespaceState;
+
+/// namespace `location` property
+const LOCATION: &str = "location";
+
+/// Memory catalog implementation.
+#[derive(Debug)]
+pub struct MemoryCatalog {
+ root_namespace_state: Mutex,
+ file_io: FileIO,
+ warehouse_location: Option,
+}
+
+impl MemoryCatalog {
+ /// Creates an memory catalog.
+ pub fn new(file_io: FileIO, warehouse_location: Option) -> Self {
+ Self {
+ root_namespace_state: Mutex::new(NamespaceState::default()),
+ file_io,
+ warehouse_location,
+ }
+ }
+}
+
+#[async_trait]
+impl Catalog for MemoryCatalog {
+ /// List namespaces inside the catalog.
+ async fn list_namespaces(
+ &self,
+ maybe_parent: Option<&NamespaceIdent>,
+ ) -> Result> {
+ let root_namespace_state = self.root_namespace_state.lock().await;
+
+ match maybe_parent {
+ None => {
+ let namespaces = root_namespace_state
+ .list_top_level_namespaces()
+ .into_iter()
+ .map(|str| NamespaceIdent::new(str.to_string()))
+ .collect_vec();
+
+ Ok(namespaces)
+ }
+ Some(parent_namespace_ident) => {
+ let namespaces = root_namespace_state
+ .list_namespaces_under(parent_namespace_ident)?
+ .into_iter()
+ .map(|name| NamespaceIdent::new(name.to_string()))
+ .collect_vec();
+
+ Ok(namespaces)
+ }
+ }
+ }
+
+ /// Create a new namespace inside the catalog.
+ async fn create_namespace(
+ &self,
+ namespace_ident: &NamespaceIdent,
+ properties: HashMap,
+ ) -> Result {
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ root_namespace_state.insert_new_namespace(namespace_ident, properties.clone())?;
+ let namespace = Namespace::with_properties(namespace_ident.clone(), properties);
+
+ Ok(namespace)
+ }
+
+ /// Get a namespace information from the catalog.
+ async fn get_namespace(&self, namespace_ident: &NamespaceIdent) -> Result {
+ let root_namespace_state = self.root_namespace_state.lock().await;
+
+ let namespace = Namespace::with_properties(
+ namespace_ident.clone(),
+ root_namespace_state
+ .get_properties(namespace_ident)?
+ .clone(),
+ );
+
+ Ok(namespace)
+ }
+
+ /// Check if namespace exists in catalog.
+ async fn namespace_exists(&self, namespace_ident: &NamespaceIdent) -> Result {
+ let guarded_namespaces = self.root_namespace_state.lock().await;
+
+ Ok(guarded_namespaces.namespace_exists(namespace_ident))
+ }
+
+ /// Update a namespace inside the catalog.
+ ///
+ /// # Behavior
+ ///
+ /// The properties must be the full set of namespace.
+ async fn update_namespace(
+ &self,
+ namespace_ident: &NamespaceIdent,
+ properties: HashMap,
+ ) -> Result<()> {
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ root_namespace_state.replace_properties(namespace_ident, properties)
+ }
+
+ /// Drop a namespace from the catalog.
+ async fn drop_namespace(&self, namespace_ident: &NamespaceIdent) -> Result<()> {
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ root_namespace_state.remove_existing_namespace(namespace_ident)
+ }
+
+ /// List tables from namespace.
+ async fn list_tables(&self, namespace_ident: &NamespaceIdent) -> Result> {
+ let root_namespace_state = self.root_namespace_state.lock().await;
+
+ let table_names = root_namespace_state.list_tables(namespace_ident)?;
+ let table_idents = table_names
+ .into_iter()
+ .map(|table_name| TableIdent::new(namespace_ident.clone(), table_name.clone()))
+ .collect_vec();
+
+ Ok(table_idents)
+ }
+
+ /// Create a new table inside the namespace.
+ async fn create_table(
+ &self,
+ namespace_ident: &NamespaceIdent,
+ table_creation: TableCreation,
+ ) -> Result
{
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ let table_name = table_creation.name.clone();
+ let table_ident = TableIdent::new(namespace_ident.clone(), table_name);
+
+ let (table_creation, location) = match table_creation.location.clone() {
+ Some(location) => (table_creation, location),
+ None => {
+ let namespace_properties = root_namespace_state.get_properties(namespace_ident)?;
+ let location_prefix = match namespace_properties.get(LOCATION) {
+ Some(namespace_location) => Ok(namespace_location.clone()),
+ None => match self.warehouse_location.clone() {
+ Some(warehouse_location) => Ok(format!("{}/{}", warehouse_location, namespace_ident.join("/"))),
+ None => Err(Error::new(ErrorKind::Unexpected,
+ format!(
+ "Cannot create table {:?}. No default path is set, please specify a location when creating a table.",
+ &table_ident
+ )))
+ },
+ }?;
+
+ let location = format!("{}/{}", location_prefix, table_ident.name());
+
+ let new_table_creation = TableCreation {
+ location: Some(location.clone()),
+ ..table_creation
+ };
+
+ (new_table_creation, location)
+ }
+ };
+
+ let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
+ let metadata_location = format!(
+ "{}/metadata/{}-{}.metadata.json",
+ &location,
+ 0,
+ Uuid::new_v4()
+ );
+
+ self.file_io
+ .new_output(&metadata_location)?
+ .write(serde_json::to_vec(&metadata)?.into())
+ .await?;
+
+ root_namespace_state.insert_new_table(&table_ident, metadata_location.clone())?;
+
+ Table::builder()
+ .file_io(self.file_io.clone())
+ .metadata_location(metadata_location)
+ .metadata(metadata)
+ .identifier(table_ident)
+ .build()
+ }
+
+ /// Load table from the catalog.
+ async fn load_table(&self, table_ident: &TableIdent) -> Result
{
+ let root_namespace_state = self.root_namespace_state.lock().await;
+
+ let metadata_location = root_namespace_state.get_existing_table_location(table_ident)?;
+ let input_file = self.file_io.new_input(metadata_location)?;
+ let metadata_content = input_file.read().await?;
+ let metadata = serde_json::from_slice::(&metadata_content)?;
+
+ Table::builder()
+ .file_io(self.file_io.clone())
+ .metadata_location(metadata_location.clone())
+ .metadata(metadata)
+ .identifier(table_ident.clone())
+ .build()
+ }
+
+ /// Drop a table from the catalog.
+ async fn drop_table(&self, table_ident: &TableIdent) -> Result<()> {
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ root_namespace_state.remove_existing_table(table_ident)
+ }
+
+ /// Check if a table exists in the catalog.
+ async fn table_exists(&self, table_ident: &TableIdent) -> Result {
+ let root_namespace_state = self.root_namespace_state.lock().await;
+
+ root_namespace_state.table_exists(table_ident)
+ }
+
+ /// Rename a table in the catalog.
+ async fn rename_table(
+ &self,
+ src_table_ident: &TableIdent,
+ dst_table_ident: &TableIdent,
+ ) -> Result<()> {
+ let mut root_namespace_state = self.root_namespace_state.lock().await;
+
+ let mut new_root_namespace_state = root_namespace_state.clone();
+ let metadata_location = new_root_namespace_state
+ .get_existing_table_location(src_table_ident)?
+ .clone();
+ new_root_namespace_state.remove_existing_table(src_table_ident)?;
+ new_root_namespace_state.insert_new_table(dst_table_ident, metadata_location)?;
+ *root_namespace_state = new_root_namespace_state;
+
+ Ok(())
+ }
+
+ /// Update a table to the catalog.
+ async fn update_table(&self, _commit: TableCommit) -> Result
{
+ Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "MemoryCatalog does not currently support updating tables.",
+ ))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::collections::HashSet;
+ use std::hash::Hash;
+ use std::iter::FromIterator;
+
+ use iceberg::io::FileIOBuilder;
+ use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type};
+ use regex::Regex;
+ use tempfile::TempDir;
+
+ use super::*;
+
+ fn temp_path() -> String {
+ let temp_dir = TempDir::new().unwrap();
+ temp_dir.path().to_str().unwrap().to_string()
+ }
+
+ fn new_memory_catalog() -> impl Catalog {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let warehouse_location = temp_path();
+ MemoryCatalog::new(file_io, Some(warehouse_location))
+ }
+
+ async fn create_namespace(catalog: &C, namespace_ident: &NamespaceIdent) {
+ let _ = catalog
+ .create_namespace(namespace_ident, HashMap::new())
+ .await
+ .unwrap();
+ }
+
+ async fn create_namespaces(catalog: &C, namespace_idents: &Vec<&NamespaceIdent>) {
+ for namespace_ident in namespace_idents {
+ let _ = create_namespace(catalog, namespace_ident).await;
+ }
+ }
+
+ fn to_set(vec: Vec) -> HashSet {
+ HashSet::from_iter(vec)
+ }
+
+ fn simple_table_schema() -> Schema {
+ Schema::builder()
+ .with_fields(vec![NestedField::required(
+ 1,
+ "foo",
+ Type::Primitive(PrimitiveType::Int),
+ )
+ .into()])
+ .build()
+ .unwrap()
+ }
+
+ async fn create_table(catalog: &C, table_ident: &TableIdent) {
+ let _ = catalog
+ .create_table(
+ &table_ident.namespace,
+ TableCreation::builder()
+ .name(table_ident.name().into())
+ .schema(simple_table_schema())
+ .build(),
+ )
+ .await
+ .unwrap();
+ }
+
+ async fn create_tables(catalog: &C, table_idents: Vec<&TableIdent>) {
+ for table_ident in table_idents {
+ create_table(catalog, table_ident).await;
+ }
+ }
+
+ fn assert_table_eq(table: &Table, expected_table_ident: &TableIdent, expected_schema: &Schema) {
+ assert_eq!(table.identifier(), expected_table_ident);
+
+ let metadata = table.metadata();
+
+ assert_eq!(metadata.current_schema().as_ref(), expected_schema);
+
+ let expected_partition_spec = PartitionSpec::builder(expected_schema)
+ .with_spec_id(0)
+ .build()
+ .unwrap();
+
+ assert_eq!(
+ metadata
+ .partition_specs_iter()
+ .map(|p| p.as_ref())
+ .collect_vec(),
+ vec![&expected_partition_spec]
+ );
+
+ let expected_sorted_order = SortOrder::builder()
+ .with_order_id(0)
+ .with_fields(vec![])
+ .build(expected_schema)
+ .unwrap();
+
+ assert_eq!(
+ metadata
+ .sort_orders_iter()
+ .map(|s| s.as_ref())
+ .collect_vec(),
+ vec![&expected_sorted_order]
+ );
+
+ assert_eq!(metadata.properties(), &HashMap::new());
+
+ assert!(!table.readonly());
+ }
+
+ const UUID_REGEX_STR: &str = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";
+
+ fn assert_table_metadata_location_matches(table: &Table, regex_str: &str) {
+ let actual = table.metadata_location().unwrap().to_string();
+ let regex = Regex::new(regex_str).unwrap();
+ assert!(regex.is_match(&actual))
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_empty_vector() {
+ let catalog = new_memory_catalog();
+
+ assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![]);
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_single_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("abc".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![
+ namespace_ident
+ ]);
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_multiple_namespaces() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("a".into());
+ let namespace_ident_2 = NamespaceIdent::new("b".into());
+ create_namespaces(&catalog, &vec![&namespace_ident_1, &namespace_ident_2]).await;
+
+ assert_eq!(
+ to_set(catalog.list_namespaces(None).await.unwrap()),
+ to_set(vec![namespace_ident_1, namespace_ident_2])
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_only_top_level_namespaces() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("a".into());
+ let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_3 = NamespaceIdent::new("b".into());
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_1,
+ &namespace_ident_2,
+ &namespace_ident_3,
+ ])
+ .await;
+
+ assert_eq!(
+ to_set(catalog.list_namespaces(None).await.unwrap()),
+ to_set(vec![namespace_ident_1, namespace_ident_3])
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_no_namespaces_under_parent() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("a".into());
+ let namespace_ident_2 = NamespaceIdent::new("b".into());
+ create_namespaces(&catalog, &vec![&namespace_ident_1, &namespace_ident_2]).await;
+
+ assert_eq!(
+ catalog
+ .list_namespaces(Some(&namespace_ident_1))
+ .await
+ .unwrap(),
+ vec![]
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_namespace_under_parent() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("a".into());
+ let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_3 = NamespaceIdent::new("c".into());
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_1,
+ &namespace_ident_2,
+ &namespace_ident_3,
+ ])
+ .await;
+
+ assert_eq!(
+ to_set(catalog.list_namespaces(None).await.unwrap()),
+ to_set(vec![namespace_ident_1.clone(), namespace_ident_3])
+ );
+
+ assert_eq!(
+ catalog
+ .list_namespaces(Some(&namespace_ident_1))
+ .await
+ .unwrap(),
+ vec![NamespaceIdent::new("b".into())]
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_namespaces_returns_multiple_namespaces_under_parent() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("a".to_string());
+ let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "a"]).unwrap();
+ let namespace_ident_3 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_4 = NamespaceIdent::from_strs(vec!["a", "c"]).unwrap();
+ let namespace_ident_5 = NamespaceIdent::new("b".into());
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_1,
+ &namespace_ident_2,
+ &namespace_ident_3,
+ &namespace_ident_4,
+ &namespace_ident_5,
+ ])
+ .await;
+
+ assert_eq!(
+ to_set(
+ catalog
+ .list_namespaces(Some(&namespace_ident_1))
+ .await
+ .unwrap()
+ ),
+ to_set(vec![
+ NamespaceIdent::new("a".into()),
+ NamespaceIdent::new("b".into()),
+ NamespaceIdent::new("c".into()),
+ ])
+ );
+ }
+
+ #[tokio::test]
+ async fn test_namespace_exists_returns_false() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ assert!(!catalog
+ .namespace_exists(&NamespaceIdent::new("b".into()))
+ .await
+ .unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_namespace_exists_returns_true() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ assert!(catalog.namespace_exists(&namespace_ident).await.unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_create_namespace_with_empty_properties() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+
+ assert_eq!(
+ catalog
+ .create_namespace(&namespace_ident, HashMap::new())
+ .await
+ .unwrap(),
+ Namespace::new(namespace_ident.clone())
+ );
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident).await.unwrap(),
+ Namespace::with_properties(namespace_ident, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_create_namespace_with_properties() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("abc".into());
+
+ let mut properties: HashMap = HashMap::new();
+ properties.insert("k".into(), "v".into());
+
+ assert_eq!(
+ catalog
+ .create_namespace(&namespace_ident, properties.clone())
+ .await
+ .unwrap(),
+ Namespace::with_properties(namespace_ident.clone(), properties.clone())
+ );
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident).await.unwrap(),
+ Namespace::with_properties(namespace_ident, properties)
+ );
+ }
+
+ #[tokio::test]
+ async fn test_create_namespace_throws_error_if_namespace_already_exists() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ assert_eq!(
+ catalog
+ .create_namespace(&namespace_ident, HashMap::new())
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => Cannot create namespace {:?}. Namespace already exists.",
+ &namespace_ident
+ )
+ );
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident).await.unwrap(),
+ Namespace::with_properties(namespace_ident, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_create_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let parent_namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &parent_namespace_ident).await;
+
+ let child_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+
+ assert_eq!(
+ catalog
+ .create_namespace(&child_namespace_ident, HashMap::new())
+ .await
+ .unwrap(),
+ Namespace::new(child_namespace_ident.clone())
+ );
+
+ assert_eq!(
+ catalog.get_namespace(&child_namespace_ident).await.unwrap(),
+ Namespace::with_properties(child_namespace_ident, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_create_deeply_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+
+ assert_eq!(
+ catalog
+ .create_namespace(&namespace_ident_a_b_c, HashMap::new())
+ .await
+ .unwrap(),
+ Namespace::new(namespace_ident_a_b_c.clone())
+ );
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident_a_b_c).await.unwrap(),
+ Namespace::with_properties(namespace_ident_a_b_c, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_create_nested_namespace_throws_error_if_top_level_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+
+ assert_eq!(
+ catalog
+ .create_namespace(&nested_namespace_ident, HashMap::new())
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ NamespaceIdent::new("a".into())
+ )
+ );
+
+ assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![]);
+ }
+
+ #[tokio::test]
+ async fn test_create_deeply_nested_namespace_throws_error_if_intermediate_namespace_doesnt_exist(
+ ) {
+ let catalog = new_memory_catalog();
+
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident_a).await;
+
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+
+ assert_eq!(
+ catalog
+ .create_namespace(&namespace_ident_a_b_c, HashMap::new())
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ NamespaceIdent::from_strs(vec!["a", "b"]).unwrap()
+ )
+ );
+
+ assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![
+ namespace_ident_a.clone()
+ ]);
+
+ assert_eq!(
+ catalog
+ .list_namespaces(Some(&namespace_ident_a))
+ .await
+ .unwrap(),
+ vec![]
+ );
+ }
+
+ #[tokio::test]
+ async fn test_get_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("abc".into());
+
+ let mut properties: HashMap = HashMap::new();
+ properties.insert("k".into(), "v".into());
+ let _ = catalog
+ .create_namespace(&namespace_ident, properties.clone())
+ .await
+ .unwrap();
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident).await.unwrap(),
+ Namespace::with_properties(namespace_ident, properties)
+ )
+ }
+
+ #[tokio::test]
+ async fn test_get_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident_a_b).await.unwrap(),
+ Namespace::with_properties(namespace_ident_a_b, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_get_deeply_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_a,
+ &namespace_ident_a_b,
+ &namespace_ident_a_b_c,
+ ])
+ .await;
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident_a_b_c).await.unwrap(),
+ Namespace::with_properties(namespace_ident_a_b_c, HashMap::new())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_get_namespace_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ create_namespace(&catalog, &NamespaceIdent::new("a".into())).await;
+
+ let non_existent_namespace_ident = NamespaceIdent::new("b".into());
+ assert_eq!(
+ catalog
+ .get_namespace(&non_existent_namespace_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ )
+ )
+ }
+
+ #[tokio::test]
+ async fn test_update_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("abc".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let mut new_properties: HashMap = HashMap::new();
+ new_properties.insert("k".into(), "v".into());
+
+ catalog
+ .update_namespace(&namespace_ident, new_properties.clone())
+ .await
+ .unwrap();
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident).await.unwrap(),
+ Namespace::with_properties(namespace_ident, new_properties)
+ )
+ }
+
+ #[tokio::test]
+ async fn test_update_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ let mut new_properties = HashMap::new();
+ new_properties.insert("k".into(), "v".into());
+
+ catalog
+ .update_namespace(&namespace_ident_a_b, new_properties.clone())
+ .await
+ .unwrap();
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident_a_b).await.unwrap(),
+ Namespace::with_properties(namespace_ident_a_b, new_properties)
+ );
+ }
+
+ #[tokio::test]
+ async fn test_update_deeply_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_a,
+ &namespace_ident_a_b,
+ &namespace_ident_a_b_c,
+ ])
+ .await;
+
+ let mut new_properties = HashMap::new();
+ new_properties.insert("k".into(), "v".into());
+
+ catalog
+ .update_namespace(&namespace_ident_a_b_c, new_properties.clone())
+ .await
+ .unwrap();
+
+ assert_eq!(
+ catalog.get_namespace(&namespace_ident_a_b_c).await.unwrap(),
+ Namespace::with_properties(namespace_ident_a_b_c, new_properties)
+ );
+ }
+
+ #[tokio::test]
+ async fn test_update_namespace_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ create_namespace(&catalog, &NamespaceIdent::new("abc".into())).await;
+
+ let non_existent_namespace_ident = NamespaceIdent::new("def".into());
+ assert_eq!(
+ catalog
+ .update_namespace(&non_existent_namespace_ident, HashMap::new())
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ )
+ )
+ }
+
+ #[tokio::test]
+ async fn test_drop_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("abc".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ catalog.drop_namespace(&namespace_ident).await.unwrap();
+
+ assert!(!catalog.namespace_exists(&namespace_ident).await.unwrap())
+ }
+
+ #[tokio::test]
+ async fn test_drop_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ catalog.drop_namespace(&namespace_ident_a_b).await.unwrap();
+
+ assert!(!catalog
+ .namespace_exists(&namespace_ident_a_b)
+ .await
+ .unwrap());
+
+ assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_drop_deeply_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_a,
+ &namespace_ident_a_b,
+ &namespace_ident_a_b_c,
+ ])
+ .await;
+
+ catalog
+ .drop_namespace(&namespace_ident_a_b_c)
+ .await
+ .unwrap();
+
+ assert!(!catalog
+ .namespace_exists(&namespace_ident_a_b_c)
+ .await
+ .unwrap());
+
+ assert!(catalog
+ .namespace_exists(&namespace_ident_a_b)
+ .await
+ .unwrap());
+
+ assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_drop_namespace_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let non_existent_namespace_ident = NamespaceIdent::new("abc".into());
+ assert_eq!(
+ catalog
+ .drop_namespace(&non_existent_namespace_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ )
+ )
+ }
+
+ #[tokio::test]
+ async fn test_drop_namespace_throws_error_if_nested_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ create_namespace(&catalog, &NamespaceIdent::new("a".into())).await;
+
+ let non_existent_namespace_ident =
+ NamespaceIdent::from_vec(vec!["a".into(), "b".into()]).unwrap();
+ assert_eq!(
+ catalog
+ .drop_namespace(&non_existent_namespace_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ )
+ )
+ }
+
+ #[tokio::test]
+ async fn test_dropping_a_namespace_also_drops_namespaces_nested_under_that_one() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ catalog.drop_namespace(&namespace_ident_a).await.unwrap();
+
+ assert!(!catalog.namespace_exists(&namespace_ident_a).await.unwrap());
+
+ assert!(!catalog
+ .namespace_exists(&namespace_ident_a_b)
+ .await
+ .unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_create_table_with_location() {
+ let tmp_dir = TempDir::new().unwrap();
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let table_name = "abc";
+ let location = tmp_dir.path().to_str().unwrap().to_string();
+ let table_creation = TableCreation::builder()
+ .name(table_name.into())
+ .location(location.clone())
+ .schema(simple_table_schema())
+ .build();
+
+ let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
+
+ assert_table_eq(
+ &catalog
+ .create_table(&namespace_ident, table_creation)
+ .await
+ .unwrap(),
+ &expected_table_ident,
+ &simple_table_schema(),
+ );
+
+ let table = catalog.load_table(&expected_table_ident).await.unwrap();
+
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+
+ assert!(table
+ .metadata_location()
+ .unwrap()
+ .to_string()
+ .starts_with(&location))
+ }
+
+ #[tokio::test]
+ async fn test_create_table_falls_back_to_namespace_location_if_table_location_is_missing() {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let warehouse_location = temp_path();
+ let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
+
+ let namespace_ident = NamespaceIdent::new("a".into());
+ let mut namespace_properties = HashMap::new();
+ let namespace_location = temp_path();
+ namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string());
+ catalog
+ .create_namespace(&namespace_ident, namespace_properties)
+ .await
+ .unwrap();
+
+ let table_name = "tbl1";
+ let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
+ let expected_table_metadata_location_regex = format!(
+ "^{}/tbl1/metadata/0-{}.metadata.json$",
+ namespace_location, UUID_REGEX_STR,
+ );
+
+ let table = catalog
+ .create_table(
+ &namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ // no location specified for table
+ .build(),
+ )
+ .await
+ .unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+
+ let table = catalog.load_table(&expected_table_ident).await.unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+ }
+
+ #[tokio::test]
+ async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing(
+ ) {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let warehouse_location = temp_path();
+ let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
+
+ let namespace_ident = NamespaceIdent::new("a".into());
+ let mut namespace_properties = HashMap::new();
+ let namespace_location = temp_path();
+ namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string());
+ catalog
+ .create_namespace(&namespace_ident, namespace_properties)
+ .await
+ .unwrap();
+
+ let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let mut nested_namespace_properties = HashMap::new();
+ let nested_namespace_location = temp_path();
+ nested_namespace_properties
+ .insert(LOCATION.to_string(), nested_namespace_location.to_string());
+ catalog
+ .create_namespace(&nested_namespace_ident, nested_namespace_properties)
+ .await
+ .unwrap();
+
+ let table_name = "tbl1";
+ let expected_table_ident =
+ TableIdent::new(nested_namespace_ident.clone(), table_name.into());
+ let expected_table_metadata_location_regex = format!(
+ "^{}/tbl1/metadata/0-{}.metadata.json$",
+ nested_namespace_location, UUID_REGEX_STR,
+ );
+
+ let table = catalog
+ .create_table(
+ &nested_namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ // no location specified for table
+ .build(),
+ )
+ .await
+ .unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+
+ let table = catalog.load_table(&expected_table_ident).await.unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+ }
+
+ #[tokio::test]
+ async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing(
+ ) {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let warehouse_location = temp_path();
+ let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
+
+ let namespace_ident = NamespaceIdent::new("a".into());
+ // note: no location specified in namespace_properties
+ let namespace_properties = HashMap::new();
+ catalog
+ .create_namespace(&namespace_ident, namespace_properties)
+ .await
+ .unwrap();
+
+ let table_name = "tbl1";
+ let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
+ let expected_table_metadata_location_regex = format!(
+ "^{}/a/tbl1/metadata/0-{}.metadata.json$",
+ warehouse_location, UUID_REGEX_STR
+ );
+
+ let table = catalog
+ .create_table(
+ &namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ // no location specified for table
+ .build(),
+ )
+ .await
+ .unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+
+ let table = catalog.load_table(&expected_table_ident).await.unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+ }
+
+ #[tokio::test]
+ async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing(
+ ) {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let warehouse_location = temp_path();
+ let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone()));
+
+ let namespace_ident = NamespaceIdent::new("a".into());
+ catalog
+ // note: no location specified in namespace_properties
+ .create_namespace(&namespace_ident, HashMap::new())
+ .await
+ .unwrap();
+
+ let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ catalog
+ // note: no location specified in namespace_properties
+ .create_namespace(&nested_namespace_ident, HashMap::new())
+ .await
+ .unwrap();
+
+ let table_name = "tbl1";
+ let expected_table_ident =
+ TableIdent::new(nested_namespace_ident.clone(), table_name.into());
+ let expected_table_metadata_location_regex = format!(
+ "^{}/a/b/tbl1/metadata/0-{}.metadata.json$",
+ warehouse_location, UUID_REGEX_STR
+ );
+
+ let table = catalog
+ .create_table(
+ &nested_namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ // no location specified for table
+ .build(),
+ )
+ .await
+ .unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+
+ let table = catalog.load_table(&expected_table_ident).await.unwrap();
+ assert_table_eq(&table, &expected_table_ident, &simple_table_schema());
+ assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex);
+ }
+
+ #[tokio::test]
+ async fn test_create_table_throws_error_if_table_location_and_namespace_location_and_warehouse_location_are_missing(
+ ) {
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let catalog = MemoryCatalog::new(file_io, None);
+
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let table_name = "tbl1";
+ let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
+
+ assert_eq!(
+ catalog
+ .create_table(
+ &namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ .build(),
+ )
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => Cannot create table {:?}. No default path is set, please specify a location when creating a table.",
+ &expected_table_ident
+ )
+ )
+ }
+
+ #[tokio::test]
+ async fn test_create_table_throws_error_if_table_with_same_name_already_exists() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let table_name = "tbl1";
+ let table_ident = TableIdent::new(namespace_ident.clone(), table_name.into());
+ create_table(&catalog, &table_ident).await;
+
+ let tmp_dir = TempDir::new().unwrap();
+ let location = tmp_dir.path().to_str().unwrap().to_string();
+
+ assert_eq!(
+ catalog
+ .create_table(
+ &namespace_ident,
+ TableCreation::builder()
+ .name(table_name.into())
+ .schema(simple_table_schema())
+ .location(location)
+ .build()
+ )
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => Cannot create table {:?}. Table already exists.",
+ &table_ident
+ )
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_returns_empty_vector() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("a".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![]);
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_returns_a_single_table() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![
+ table_ident
+ ]);
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_returns_multiple_tables() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let table_ident_1 = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ let table_ident_2 = TableIdent::new(namespace_ident.clone(), "tbl2".into());
+ let _ = create_tables(&catalog, vec![&table_ident_1, &table_ident_2]).await;
+
+ assert_eq!(
+ to_set(catalog.list_tables(&namespace_ident).await.unwrap()),
+ to_set(vec![table_ident_1, table_ident_2])
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_returns_tables_from_correct_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_1 = NamespaceIdent::new("n1".into());
+ let namespace_ident_2 = NamespaceIdent::new("n2".into());
+ create_namespaces(&catalog, &vec![&namespace_ident_1, &namespace_ident_2]).await;
+
+ let table_ident_1 = TableIdent::new(namespace_ident_1.clone(), "tbl1".into());
+ let table_ident_2 = TableIdent::new(namespace_ident_1.clone(), "tbl2".into());
+ let table_ident_3 = TableIdent::new(namespace_ident_2.clone(), "tbl1".into());
+ let _ = create_tables(&catalog, vec![
+ &table_ident_1,
+ &table_ident_2,
+ &table_ident_3,
+ ])
+ .await;
+
+ assert_eq!(
+ to_set(catalog.list_tables(&namespace_ident_1).await.unwrap()),
+ to_set(vec![table_ident_1, table_ident_2])
+ );
+
+ assert_eq!(
+ to_set(catalog.list_tables(&namespace_ident_2).await.unwrap()),
+ to_set(vec![table_ident_3])
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_returns_table_under_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ let table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ assert_eq!(
+ catalog.list_tables(&namespace_ident_a_b).await.unwrap(),
+ vec![table_ident]
+ );
+ }
+
+ #[tokio::test]
+ async fn test_list_tables_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let non_existent_namespace_ident = NamespaceIdent::new("n1".into());
+
+ assert_eq!(
+ catalog
+ .list_tables(&non_existent_namespace_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_drop_table() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ catalog.drop_table(&table_ident).await.unwrap();
+ }
+
+ #[tokio::test]
+ async fn test_drop_table_drops_table_under_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ let table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ catalog.drop_table(&table_ident).await.unwrap();
+
+ assert_eq!(
+ catalog.list_tables(&namespace_ident_a_b).await.unwrap(),
+ vec![]
+ );
+ }
+
+ #[tokio::test]
+ async fn test_drop_table_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let non_existent_namespace_ident = NamespaceIdent::new("n1".into());
+ let non_existent_table_ident =
+ TableIdent::new(non_existent_namespace_ident.clone(), "tbl1".into());
+
+ assert_eq!(
+ catalog
+ .drop_table(&non_existent_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_drop_table_throws_error_if_table_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+
+ let non_existent_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+
+ assert_eq!(
+ catalog
+ .drop_table(&non_existent_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such table: {:?}",
+ non_existent_table_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_table_exists_returns_true() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ assert!(catalog.table_exists(&table_ident).await.unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_table_exists_returns_false() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let non_existent_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+
+ assert!(!catalog
+ .table_exists(&non_existent_table_ident)
+ .await
+ .unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_table_exists_under_nested_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ create_namespaces(&catalog, &vec![&namespace_ident_a, &namespace_ident_a_b]).await;
+
+ let table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl1".into());
+ create_table(&catalog, &table_ident).await;
+
+ assert!(catalog.table_exists(&table_ident).await.unwrap());
+
+ let non_existent_table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl2".into());
+ assert!(!catalog
+ .table_exists(&non_existent_table_ident)
+ .await
+ .unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_table_exists_throws_error_if_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let non_existent_namespace_ident = NamespaceIdent::new("n1".into());
+ let non_existent_table_ident =
+ TableIdent::new(non_existent_namespace_ident.clone(), "tbl1".into());
+
+ assert_eq!(
+ catalog
+ .table_exists(&non_existent_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_namespace_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_in_same_namespace() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into());
+ create_table(&catalog, &src_table_ident).await;
+
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap();
+
+ assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![
+ dst_table_ident
+ ],);
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_across_namespaces() {
+ let catalog = new_memory_catalog();
+ let src_namespace_ident = NamespaceIdent::new("a".into());
+ let dst_namespace_ident = NamespaceIdent::new("b".into());
+ create_namespaces(&catalog, &vec![&src_namespace_ident, &dst_namespace_ident]).await;
+ let src_table_ident = TableIdent::new(src_namespace_ident.clone(), "tbl1".into());
+ let dst_table_ident = TableIdent::new(dst_namespace_ident.clone(), "tbl2".into());
+ create_table(&catalog, &src_table_ident).await;
+
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap();
+
+ assert_eq!(
+ catalog.list_tables(&src_namespace_ident).await.unwrap(),
+ vec![],
+ );
+
+ assert_eq!(
+ catalog.list_tables(&dst_namespace_ident).await.unwrap(),
+ vec![dst_table_ident],
+ );
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_src_table_is_same_as_dst_table() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let table_ident = TableIdent::new(namespace_ident.clone(), "tbl".into());
+ create_table(&catalog, &table_ident).await;
+
+ catalog
+ .rename_table(&table_ident, &table_ident)
+ .await
+ .unwrap();
+
+ assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![
+ table_ident
+ ],);
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_across_nested_namespaces() {
+ let catalog = new_memory_catalog();
+ let namespace_ident_a = NamespaceIdent::new("a".into());
+ let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap();
+ let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap();
+ create_namespaces(&catalog, &vec![
+ &namespace_ident_a,
+ &namespace_ident_a_b,
+ &namespace_ident_a_b_c,
+ ])
+ .await;
+
+ let src_table_ident = TableIdent::new(namespace_ident_a_b_c.clone(), "tbl1".into());
+ create_tables(&catalog, vec![&src_table_ident]).await;
+
+ let dst_table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl1".into());
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap();
+
+ assert!(!catalog.table_exists(&src_table_ident).await.unwrap());
+
+ assert!(catalog.table_exists(&dst_table_ident).await.unwrap());
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_throws_error_if_src_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+
+ let non_existent_src_namespace_ident = NamespaceIdent::new("n1".into());
+ let src_table_ident =
+ TableIdent::new(non_existent_src_namespace_ident.clone(), "tbl1".into());
+
+ let dst_namespace_ident = NamespaceIdent::new("n2".into());
+ create_namespace(&catalog, &dst_namespace_ident).await;
+ let dst_table_ident = TableIdent::new(dst_namespace_ident.clone(), "tbl1".into());
+
+ assert_eq!(
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_src_namespace_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_throws_error_if_dst_namespace_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ let src_namespace_ident = NamespaceIdent::new("n1".into());
+ let src_table_ident = TableIdent::new(src_namespace_ident.clone(), "tbl1".into());
+ create_namespace(&catalog, &src_namespace_ident).await;
+ create_table(&catalog, &src_table_ident).await;
+
+ let non_existent_dst_namespace_ident = NamespaceIdent::new("n2".into());
+ let dst_table_ident =
+ TableIdent::new(non_existent_dst_namespace_ident.clone(), "tbl1".into());
+ assert_eq!(
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => No such namespace: {:?}",
+ non_existent_dst_namespace_ident
+ ),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_throws_error_if_src_table_doesnt_exist() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into());
+
+ assert_eq!(
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!("Unexpected => No such table: {:?}", src_table_ident),
+ );
+ }
+
+ #[tokio::test]
+ async fn test_rename_table_throws_error_if_dst_table_already_exists() {
+ let catalog = new_memory_catalog();
+ let namespace_ident = NamespaceIdent::new("n1".into());
+ create_namespace(&catalog, &namespace_ident).await;
+ let src_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into());
+ let dst_table_ident = TableIdent::new(namespace_ident.clone(), "tbl2".into());
+ create_tables(&catalog, vec![&src_table_ident, &dst_table_ident]).await;
+
+ assert_eq!(
+ catalog
+ .rename_table(&src_table_ident, &dst_table_ident)
+ .await
+ .unwrap_err()
+ .to_string(),
+ format!(
+ "Unexpected => Cannot create table {:? }. Table already exists.",
+ &dst_table_ident
+ ),
+ );
+ }
+}
diff --git a/crates/catalog/memory/src/lib.rs b/crates/catalog/memory/src/lib.rs
new file mode 100644
index 000000000..8988ac7b2
--- /dev/null
+++ b/crates/catalog/memory/src/lib.rs
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Iceberg memory Catalog API implementation.
+
+#![deny(missing_docs)]
+
+mod catalog;
+mod namespace_state;
+
+pub use catalog::*;
diff --git a/crates/catalog/memory/src/namespace_state.rs b/crates/catalog/memory/src/namespace_state.rs
new file mode 100644
index 000000000..a65319568
--- /dev/null
+++ b/crates/catalog/memory/src/namespace_state.rs
@@ -0,0 +1,298 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::{hash_map, HashMap};
+
+use iceberg::{Error, ErrorKind, NamespaceIdent, Result, TableIdent};
+use itertools::Itertools;
+
+// Represents the state of a namespace
+#[derive(Debug, Clone, Default)]
+pub(crate) struct NamespaceState {
+ // Properties of this namespace
+ properties: HashMap,
+ // Namespaces nested inside this namespace
+ namespaces: HashMap,
+ // Mapping of tables to metadata locations in this namespace
+ table_metadata_locations: HashMap,
+}
+
+fn no_such_namespace_err(namespace_ident: &NamespaceIdent) -> Result {
+ Err(Error::new(
+ ErrorKind::Unexpected,
+ format!("No such namespace: {:?}", namespace_ident),
+ ))
+}
+
+fn no_such_table_err(table_ident: &TableIdent) -> Result {
+ Err(Error::new(
+ ErrorKind::Unexpected,
+ format!("No such table: {:?}", table_ident),
+ ))
+}
+
+fn namespace_already_exists_err(namespace_ident: &NamespaceIdent) -> Result {
+ Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "Cannot create namespace {:?}. Namespace already exists.",
+ namespace_ident
+ ),
+ ))
+}
+
+fn table_already_exists_err(table_ident: &TableIdent) -> Result {
+ Err(Error::new(
+ ErrorKind::Unexpected,
+ format!(
+ "Cannot create table {:?}. Table already exists.",
+ table_ident
+ ),
+ ))
+}
+
+impl NamespaceState {
+ // Returns the state of the given namespace or an error if doesn't exist
+ fn get_namespace(&self, namespace_ident: &NamespaceIdent) -> Result<&NamespaceState> {
+ let mut acc_name_parts = vec![];
+ let mut namespace_state = self;
+ for next_name in namespace_ident.iter() {
+ acc_name_parts.push(next_name);
+ match namespace_state.namespaces.get(next_name) {
+ None => {
+ let namespace_ident = NamespaceIdent::from_strs(acc_name_parts)?;
+ return no_such_namespace_err(&namespace_ident);
+ }
+ Some(intermediate_namespace) => {
+ namespace_state = intermediate_namespace;
+ }
+ }
+ }
+
+ Ok(namespace_state)
+ }
+
+ // Returns the state of the given namespace or an error if doesn't exist
+ fn get_mut_namespace(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result<&mut NamespaceState> {
+ let mut acc_name_parts = vec![];
+ let mut namespace_state = self;
+ for next_name in namespace_ident.iter() {
+ acc_name_parts.push(next_name);
+ match namespace_state.namespaces.get_mut(next_name) {
+ None => {
+ let namespace_ident = NamespaceIdent::from_strs(acc_name_parts)?;
+ return no_such_namespace_err(&namespace_ident);
+ }
+ Some(intermediate_namespace) => {
+ namespace_state = intermediate_namespace;
+ }
+ }
+ }
+
+ Ok(namespace_state)
+ }
+
+ // Returns the state of the parent of the given namespace or an error if doesn't exist
+ fn get_mut_parent_namespace_of(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result<(&mut NamespaceState, String)> {
+ match namespace_ident.split_last() {
+ None => Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Namespace identifier can't be empty!",
+ )),
+ Some((child_namespace_name, parent_name_parts)) => {
+ let parent_namespace_state = if parent_name_parts.is_empty() {
+ Ok(self)
+ } else {
+ let parent_namespace_ident = NamespaceIdent::from_strs(parent_name_parts)?;
+ self.get_mut_namespace(&parent_namespace_ident)
+ }?;
+
+ Ok((parent_namespace_state, child_namespace_name.clone()))
+ }
+ }
+ }
+
+ // Returns any top-level namespaces
+ pub(crate) fn list_top_level_namespaces(&self) -> Vec<&String> {
+ self.namespaces.keys().collect_vec()
+ }
+
+ // Returns any namespaces nested under the given namespace or an error if the given namespace does not exist
+ pub(crate) fn list_namespaces_under(
+ &self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result> {
+ let nested_namespace_names = self
+ .get_namespace(namespace_ident)?
+ .namespaces
+ .keys()
+ .collect_vec();
+
+ Ok(nested_namespace_names)
+ }
+
+ // Returns true if the given namespace exists, otherwise false
+ pub(crate) fn namespace_exists(&self, namespace_ident: &NamespaceIdent) -> bool {
+ self.get_namespace(namespace_ident).is_ok()
+ }
+
+ // Inserts the given namespace or returns an error if it already exists
+ pub(crate) fn insert_new_namespace(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ properties: HashMap,
+ ) -> Result<()> {
+ let (parent_namespace_state, child_namespace_name) =
+ self.get_mut_parent_namespace_of(namespace_ident)?;
+
+ match parent_namespace_state
+ .namespaces
+ .entry(child_namespace_name)
+ {
+ hash_map::Entry::Occupied(_) => namespace_already_exists_err(namespace_ident),
+ hash_map::Entry::Vacant(entry) => {
+ let _ = entry.insert(NamespaceState {
+ properties,
+ namespaces: HashMap::new(),
+ table_metadata_locations: HashMap::new(),
+ });
+
+ Ok(())
+ }
+ }
+ }
+
+ // Removes the given namespace or returns an error if doesn't exist
+ pub(crate) fn remove_existing_namespace(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result<()> {
+ let (parent_namespace_state, child_namespace_name) =
+ self.get_mut_parent_namespace_of(namespace_ident)?;
+
+ match parent_namespace_state
+ .namespaces
+ .remove(&child_namespace_name)
+ {
+ None => no_such_namespace_err(namespace_ident),
+ Some(_) => Ok(()),
+ }
+ }
+
+ // Returns the properties of the given namespace or an error if doesn't exist
+ pub(crate) fn get_properties(
+ &self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result<&HashMap> {
+ let properties = &self.get_namespace(namespace_ident)?.properties;
+
+ Ok(properties)
+ }
+
+ // Returns the properties of this namespace or an error if doesn't exist
+ fn get_mut_properties(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ ) -> Result<&mut HashMap> {
+ let properties = &mut self.get_mut_namespace(namespace_ident)?.properties;
+
+ Ok(properties)
+ }
+
+ // Replaces the properties of the given namespace or an error if doesn't exist
+ pub(crate) fn replace_properties(
+ &mut self,
+ namespace_ident: &NamespaceIdent,
+ new_properties: HashMap,
+ ) -> Result<()> {
+ let properties = self.get_mut_properties(namespace_ident)?;
+ *properties = new_properties;
+
+ Ok(())
+ }
+
+ // Returns the list of table names under the given namespace
+ pub(crate) fn list_tables(&self, namespace_ident: &NamespaceIdent) -> Result> {
+ let table_names = self
+ .get_namespace(namespace_ident)?
+ .table_metadata_locations
+ .keys()
+ .collect_vec();
+
+ Ok(table_names)
+ }
+
+ // Returns true if the given table exists, otherwise false
+ pub(crate) fn table_exists(&self, table_ident: &TableIdent) -> Result {
+ let namespace_state = self.get_namespace(table_ident.namespace())?;
+ let table_exists = namespace_state
+ .table_metadata_locations
+ .contains_key(&table_ident.name);
+
+ Ok(table_exists)
+ }
+
+ // Returns the metadata location of the given table or an error if doesn't exist
+ pub(crate) fn get_existing_table_location(&self, table_ident: &TableIdent) -> Result<&String> {
+ let namespace = self.get_namespace(table_ident.namespace())?;
+
+ match namespace.table_metadata_locations.get(table_ident.name()) {
+ None => no_such_table_err(table_ident),
+ Some(table_metadadata_location) => Ok(table_metadadata_location),
+ }
+ }
+
+ // Inserts the given table or returns an error if it already exists
+ pub(crate) fn insert_new_table(
+ &mut self,
+ table_ident: &TableIdent,
+ metadata_location: String,
+ ) -> Result<()> {
+ let namespace = self.get_mut_namespace(table_ident.namespace())?;
+
+ match namespace
+ .table_metadata_locations
+ .entry(table_ident.name().to_string())
+ {
+ hash_map::Entry::Occupied(_) => table_already_exists_err(table_ident),
+ hash_map::Entry::Vacant(entry) => {
+ let _ = entry.insert(metadata_location);
+
+ Ok(())
+ }
+ }
+ }
+
+ // Removes the given table or returns an error if doesn't exist
+ pub(crate) fn remove_existing_table(&mut self, table_ident: &TableIdent) -> Result<()> {
+ let namespace = self.get_mut_namespace(table_ident.namespace())?;
+
+ match namespace
+ .table_metadata_locations
+ .remove(table_ident.name())
+ {
+ None => no_such_table_err(table_ident),
+ Some(_) => Ok(()),
+ }
+ }
+}
diff --git a/crates/catalog/rest/Cargo.toml b/crates/catalog/rest/Cargo.toml
index 883f55c02..add57183b 100644
--- a/crates/catalog/rest/Cargo.toml
+++ b/crates/catalog/rest/Cargo.toml
@@ -17,30 +17,35 @@
[package]
name = "iceberg-catalog-rest"
-version = "0.1.0"
-edition = "2021"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+rust-version = { workspace = true }
categories = ["database"]
description = "Apache Iceberg Rust REST API"
-repository = "https://github.com/apache/iceberg-rust"
-license = "Apache-2.0"
+repository = { workspace = true }
+license = { workspace = true }
keywords = ["iceberg", "rest", "catalog"]
[dependencies]
# async-trait = { workspace = true }
async-trait = { workspace = true }
chrono = { workspace = true }
+http = "1.1.0"
iceberg = { workspace = true }
+itertools = { workspace = true }
log = "0.4.20"
reqwest = { workspace = true }
serde = { workspace = true }
serde_derive = { workspace = true }
serde_json = { workspace = true }
+tokio = { workspace = true, features = ["sync"] }
typed-builder = { workspace = true }
-urlencoding = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
[dev-dependencies]
+ctor = { workspace = true }
iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
mockito = { workspace = true }
port_scanner = { workspace = true }
diff --git a/crates/catalog/rest/DEPENDENCIES.rust.tsv b/crates/catalog/rest/DEPENDENCIES.rust.tsv
new file mode 100644
index 000000000..43b4ed3d3
--- /dev/null
+++ b/crates/catalog/rest/DEPENDENCIES.rust.tsv
@@ -0,0 +1,288 @@
+crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC MIT MPL-2.0 OpenSSL Unicode-DFS-2016 Unlicense Zlib
+addr2line@0.22.0 X X
+adler@1.0.2 X X X
+adler32@1.2.0 X
+ahash@0.8.11 X X
+aho-corasick@1.1.3 X X
+alloc-no-stdlib@2.0.4 X
+alloc-stdlib@0.2.2 X
+allocator-api2@0.2.18 X X
+android-tzdata@0.1.1 X X
+android_system_properties@0.1.5 X X
+anstream@0.6.15 X X
+anstyle@1.0.8 X X
+anstyle-parse@0.2.5 X X
+anstyle-query@1.1.1 X X
+anstyle-wincon@3.0.4 X X
+anyhow@1.0.86 X X
+apache-avro@0.17.0 X
+array-init@2.1.0 X X
+arrayvec@0.7.4 X X
+arrow-arith@52.2.0 X
+arrow-array@52.2.0 X
+arrow-buffer@52.2.0 X
+arrow-cast@52.2.0 X
+arrow-data@52.2.0 X
+arrow-ipc@52.2.0 X
+arrow-ord@52.2.0 X
+arrow-schema@52.2.0 X
+arrow-select@52.2.0 X
+arrow-string@52.2.0 X
+async-trait@0.1.81 X X
+atoi@2.0.0 X
+atomic-waker@1.1.2 X X
+autocfg@1.3.0 X X
+backon@0.4.4 X
+backtrace@0.3.73 X X
+base64@0.22.1 X X
+bigdecimal@0.4.5 X X
+bimap@0.6.3 X X
+bitflags@1.3.2 X X
+bitflags@2.6.0 X X
+bitvec@1.0.1 X
+block-buffer@0.10.4 X X
+brotli@6.0.0 X X
+brotli-decompressor@4.0.1 X X
+bumpalo@3.16.0 X X
+byteorder@1.5.0 X X
+bytes@1.7.1 X
+cc@1.1.11 X X
+cfg-if@1.0.0 X X
+chrono@0.4.38 X X
+colorchoice@1.0.2 X X
+const-oid@0.9.6 X X
+const-random@0.1.18 X X
+const-random-macro@0.1.16 X X
+core-foundation-sys@0.8.7 X X
+core2@0.4.0 X X
+cpufeatures@0.2.13 X X
+crc32c@0.6.8 X X
+crc32fast@1.4.2 X X
+crunchy@0.2.2 X
+crypto-common@0.1.6 X X
+darling@0.20.10 X
+darling_core@0.20.10 X
+darling_macro@0.20.10 X
+dary_heap@0.3.6 X X
+derive_builder@0.20.0 X X
+derive_builder_core@0.20.0 X X
+derive_builder_macro@0.20.0 X X
+digest@0.10.7 X X
+either@1.13.0 X X
+env_filter@0.1.2 X X
+env_logger@0.11.5 X X
+equivalent@1.0.1 X X
+fastrand@2.1.0 X X
+flagset@0.4.6 X
+flatbuffers@24.3.25 X
+flate2@1.0.31 X X
+fnv@1.0.7 X X
+form_urlencoded@1.2.1 X X
+funty@2.0.0 X
+futures@0.3.30 X X
+futures-channel@0.3.30 X X
+futures-core@0.3.30 X X
+futures-executor@0.3.30 X X
+futures-io@0.3.30 X X
+futures-macro@0.3.30 X X
+futures-sink@0.3.30 X X
+futures-task@0.3.30 X X
+futures-util@0.3.30 X X
+generic-array@0.14.7 X
+getrandom@0.2.15 X X
+gimli@0.29.0 X X
+h2@0.4.5 X
+half@2.4.1 X X
+hashbrown@0.14.5 X X
+heck@0.5.0 X X
+hermit-abi@0.3.9 X X
+hex@0.4.3 X X
+hmac@0.12.1 X X
+home@0.5.9 X X
+http@1.1.0 X X
+http-body@1.0.1 X
+http-body-util@0.1.2 X
+httparse@1.9.4 X X
+httpdate@1.0.3 X X
+humantime@2.1.0 X X
+hyper@1.4.1 X
+hyper-rustls@0.27.2 X X X
+hyper-util@0.1.7 X
+iana-time-zone@0.1.60 X X
+iana-time-zone-haiku@0.1.2 X X
+iceberg@0.3.0 X
+iceberg-catalog-memory@0.3.0 X
+iceberg-catalog-rest@0.3.0 X
+iceberg_test_utils@0.3.0 X
+ident_case@1.0.1 X X
+idna@0.5.0 X X
+indexmap@2.4.0 X X
+integer-encoding@3.0.4 X
+ipnet@2.9.0 X X
+is_terminal_polyfill@1.70.1 X X
+itertools@0.13.0 X X
+itoa@1.0.11 X X
+jobserver@0.1.32 X X
+js-sys@0.3.70 X X
+lexical-core@0.8.5 X X
+lexical-parse-float@0.8.5 X X
+lexical-parse-integer@0.8.6 X X
+lexical-util@0.8.5 X X
+lexical-write-float@0.8.5 X X
+lexical-write-integer@0.8.5 X X
+libc@0.2.155 X X
+libflate@2.1.0 X
+libflate_lz77@2.1.0 X
+libm@0.2.8 X X
+lock_api@0.4.12 X X
+log@0.4.22 X X
+lz4_flex@0.11.3 X
+md-5@0.10.6 X X
+memchr@2.7.4 X X
+mime@0.3.17 X X
+miniz_oxide@0.7.4 X X X
+mio@1.0.2 X
+murmur3@0.5.2 X X
+num@0.4.3 X X
+num-bigint@0.4.6 X X
+num-complex@0.4.6 X X
+num-integer@0.1.46 X X
+num-iter@0.1.45 X X
+num-rational@0.4.2 X X
+num-traits@0.2.19 X X
+object@0.36.3 X X
+once_cell@1.19.0 X X
+opendal@0.49.0 X
+ordered-float@2.10.1 X
+ordered-float@4.2.2 X
+parking_lot@0.12.3 X X
+parking_lot_core@0.9.10 X X
+parquet@52.2.0 X
+paste@1.0.15 X X
+percent-encoding@2.3.1 X X
+pin-project@1.1.5 X X
+pin-project-internal@1.1.5 X X
+pin-project-lite@0.2.14 X X
+pin-utils@0.1.0 X X
+pkg-config@0.3.30 X X
+ppv-lite86@0.2.20 X X
+proc-macro2@1.0.86 X X
+quad-rand@0.2.1 X
+quick-xml@0.36.1 X
+quote@1.0.36 X X
+radium@0.7.0 X
+rand@0.8.5 X X
+rand_chacha@0.3.1 X X
+rand_core@0.6.4 X X
+redox_syscall@0.5.3 X
+regex@1.10.6 X X
+regex-automata@0.4.7 X X
+regex-lite@0.1.6 X X
+regex-syntax@0.8.4 X X
+reqsign@0.16.0 X
+reqwest@0.12.5 X X
+ring@0.17.8 X
+rle-decode-fast@1.0.3 X X
+rust_decimal@1.35.0 X
+rustc-demangle@0.1.24 X X
+rustc_version@0.4.0 X X
+rustls@0.23.12 X X X
+rustls-pemfile@2.1.3 X X X
+rustls-pki-types@1.8.0 X X
+rustls-webpki@0.102.6 X
+rustversion@1.0.17 X X
+ryu@1.0.18 X X
+scopeguard@1.2.0 X X
+semver@1.0.23 X X
+seq-macro@0.3.5 X X
+serde@1.0.207 X X
+serde_bytes@0.11.15 X X
+serde_derive@1.0.207 X X
+serde_json@1.0.124 X X
+serde_repr@0.1.19 X X
+serde_urlencoded@0.7.1 X X
+serde_with@3.9.0 X X
+serde_with_macros@3.9.0 X X
+sha1@0.10.6 X X
+sha2@0.10.8 X X
+shlex@1.3.0 X X
+slab@0.4.9 X
+smallvec@1.13.2 X X
+snap@1.1.1 X
+socket2@0.5.7 X X
+spin@0.9.8 X
+static_assertions@1.1.0 X X
+strsim@0.11.1 X
+strum@0.26.3 X
+strum_macros@0.26.4 X
+subtle@2.6.1 X
+syn@2.0.74 X X
+sync_wrapper@1.0.1 X
+tap@1.0.1 X
+thiserror@1.0.63 X X
+thiserror-impl@1.0.63 X X
+thrift@0.17.0 X
+tiny-keccak@2.0.2 X
+tinyvec@1.8.0 X X X
+tinyvec_macros@0.1.1 X X X
+tokio@1.39.2 X
+tokio-macros@2.4.0 X
+tokio-rustls@0.26.0 X X
+tokio-util@0.7.11 X
+tower@0.4.13 X
+tower-layer@0.3.3 X
+tower-service@0.3.3 X
+tracing@0.1.40 X
+tracing-core@0.1.32 X
+try-lock@0.2.5 X
+twox-hash@1.6.3 X
+typed-builder@0.19.1 X X
+typed-builder-macro@0.19.1 X X
+typenum@1.17.0 X X
+unicode-bidi@0.3.15 X X
+unicode-ident@1.0.12 X X X
+unicode-normalization@0.1.23 X X
+untrusted@0.9.0 X
+url@2.5.2 X X
+utf8parse@0.2.2 X X
+uuid@1.10.0 X X
+version_check@0.9.5 X X
+want@0.3.1 X
+wasi@0.11.0+wasi-snapshot-preview1 X X X
+wasm-bindgen@0.2.93 X X
+wasm-bindgen-backend@0.2.93 X X
+wasm-bindgen-futures@0.4.43 X X
+wasm-bindgen-macro@0.2.93 X X
+wasm-bindgen-macro-support@0.2.93 X X
+wasm-bindgen-shared@0.2.93 X X
+wasm-streams@0.4.0 X X
+web-sys@0.3.70 X X
+webpki-roots@0.26.3 X
+windows-core@0.52.0 X X
+windows-sys@0.48.0 X X
+windows-sys@0.52.0 X X
+windows-targets@0.48.5 X X
+windows-targets@0.52.6 X X
+windows_aarch64_gnullvm@0.48.5 X X
+windows_aarch64_gnullvm@0.52.6 X X
+windows_aarch64_msvc@0.48.5 X X
+windows_aarch64_msvc@0.52.6 X X
+windows_i686_gnu@0.48.5 X X
+windows_i686_gnu@0.52.6 X X
+windows_i686_gnullvm@0.52.6 X X
+windows_i686_msvc@0.48.5 X X
+windows_i686_msvc@0.52.6 X X
+windows_x86_64_gnu@0.48.5 X X
+windows_x86_64_gnu@0.52.6 X X
+windows_x86_64_gnullvm@0.48.5 X X
+windows_x86_64_gnullvm@0.52.6 X X
+windows_x86_64_msvc@0.48.5 X X
+windows_x86_64_msvc@0.52.6 X X
+winreg@0.52.0 X
+wyz@0.5.1 X
+zerocopy@0.7.35 X X X
+zerocopy-derive@0.7.35 X X X
+zeroize@1.8.1 X X
+zstd@0.13.2 X
+zstd-safe@7.2.1 X X
+zstd-sys@2.0.12+zstd.1.5.6 X X
diff --git a/crates/catalog/rest/README.md b/crates/catalog/rest/README.md
new file mode 100644
index 000000000..e3bb70e94
--- /dev/null
+++ b/crates/catalog/rest/README.md
@@ -0,0 +1,27 @@
+
+
+# Apache Iceberg Rest Catalog Official Native Rust Implementation
+
+[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-rest)
+[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-rest/)
+
+This crate contains the official Native Rust implementation of Apache Iceberg Rest Catalog.
+
+See the [API documentation](https://docs.rs/iceberg-catalog-rest/latest) for examples and the full API.
diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs
index 7ccd108b6..1181c3cc1 100644
--- a/crates/catalog/rest/src/catalog.rs
+++ b/crates/catalog/rest/src/catalog.rs
@@ -18,35 +18,36 @@
//! This module contains rest catalog implementation.
use std::collections::HashMap;
+use std::str::FromStr;
use async_trait::async_trait;
-use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue};
-use reqwest::{Client, Request, Response, StatusCode};
-use serde::de::DeserializeOwned;
-use typed_builder::TypedBuilder;
-use urlencoding::encode;
-
-use crate::catalog::_serde::{
- CommitTableRequest, CommitTableResponse, CreateTableRequest, LoadTableResponse,
-};
use iceberg::io::FileIO;
use iceberg::table::Table;
-use iceberg::Result;
use iceberg::{
- Catalog, Error, ErrorKind, Namespace, NamespaceIdent, TableCommit, TableCreation, TableIdent,
+ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
+ TableIdent,
+};
+use itertools::Itertools;
+use reqwest::header::{
+ HeaderMap, HeaderName, HeaderValue, {self},
};
+use reqwest::{Method, StatusCode, Url};
+use tokio::sync::OnceCell;
+use typed_builder::TypedBuilder;
-use self::_serde::{
- CatalogConfig, ErrorResponse, ListNamespaceResponse, ListTableResponse, NamespaceSerde,
+use crate::client::HttpClient;
+use crate::types::{
+ CatalogConfig, CommitTableRequest, CommitTableResponse, CreateTableRequest, ErrorResponse,
+ ListNamespaceResponse, ListTableResponse, LoadTableResponse, NamespaceSerde,
RenameTableRequest, NO_CONTENT, OK,
};
-const ICEBERG_REST_SPEC_VERSION: &str = "1.14";
+const ICEBERG_REST_SPEC_VERSION: &str = "0.14.1";
const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
const PATH_V1: &str = "v1";
/// Rest catalog configuration.
-#[derive(Debug, TypedBuilder)]
+#[derive(Clone, Debug, TypedBuilder)]
pub struct RestCatalogConfig {
uri: String,
#[builder(default, setter(strip_option))]
@@ -57,48 +58,93 @@ pub struct RestCatalogConfig {
}
impl RestCatalogConfig {
+ fn url_prefixed(&self, parts: &[&str]) -> String {
+ [&self.uri, PATH_V1]
+ .into_iter()
+ .chain(self.props.get("prefix").map(|s| &**s))
+ .chain(parts.iter().cloned())
+ .join("/")
+ }
+
fn config_endpoint(&self) -> String {
[&self.uri, PATH_V1, "config"].join("/")
}
+ pub(crate) fn get_token_endpoint(&self) -> String {
+ if let Some(oauth2_uri) = self.props.get("oauth2-server-uri") {
+ oauth2_uri.to_string()
+ } else if let Some(auth_url) = self.props.get("rest.authorization-url") {
+ log::warn!(
+ "'rest.authorization-url' is deprecated and will be removed in version 0.4.0. \
+ Please use 'oauth2-server-uri' instead."
+ );
+ auth_url.to_string()
+ } else {
+ [&self.uri, PATH_V1, "oauth", "tokens"].join("/")
+ }
+ }
+
fn namespaces_endpoint(&self) -> String {
- [&self.uri, PATH_V1, "namespaces"].join("/")
+ self.url_prefixed(&["namespaces"])
}
fn namespace_endpoint(&self, ns: &NamespaceIdent) -> String {
- [&self.uri, PATH_V1, "namespaces", &ns.encode_in_url()].join("/")
+ self.url_prefixed(&["namespaces", &ns.to_url_string()])
}
fn tables_endpoint(&self, ns: &NamespaceIdent) -> String {
- [
- &self.uri,
- PATH_V1,
- "namespaces",
- &ns.encode_in_url(),
- "tables",
- ]
- .join("/")
+ self.url_prefixed(&["namespaces", &ns.to_url_string(), "tables"])
}
fn rename_table_endpoint(&self) -> String {
- [&self.uri, PATH_V1, "tables", "rename"].join("/")
+ self.url_prefixed(&["tables", "rename"])
}
fn table_endpoint(&self, table: &TableIdent) -> String {
- [
- &self.uri,
- PATH_V1,
+ self.url_prefixed(&[
"namespaces",
- &table.namespace.encode_in_url(),
+ &table.namespace.to_url_string(),
"tables",
- encode(&table.name).as_ref(),
- ]
- .join("/")
+ &table.name,
+ ])
}
- fn try_create_rest_client(&self) -> Result {
- //TODO: We will add oauth, ssl config, sigv4 later
- let headers = HeaderMap::from_iter([
+ /// Get the token from the config.
+ ///
+ /// Client will use `token` to send requests if exists.
+ pub(crate) fn token(&self) -> Option {
+ self.props.get("token").cloned()
+ }
+
+ /// Get the credentials from the config. Client will use `credential`
+ /// to fetch a new token if exists.
+ ///
+ /// ## Output
+ ///
+ /// - `None`: No credential is set.
+ /// - `Some(None, client_secret)`: No client_id is set, use client_secret directly.
+ /// - `Some(Some(client_id), client_secret)`: Both client_id and client_secret are set.
+ pub(crate) fn credential(&self) -> Option<(Option, String)> {
+ let cred = self.props.get("credential")?;
+
+ match cred.split_once(':') {
+ Some((client_id, client_secret)) => {
+ Some((Some(client_id.to_string()), client_secret.to_string()))
+ }
+ None => Some((None, cred.to_string())),
+ }
+ }
+
+ /// Get the extra headers from config.
+ ///
+ /// We will include:
+ ///
+ /// - `content-type`
+ /// - `x-client-version`
+ /// - `user-agnet`
+ /// - all headers specified by `header.xxx` in props.
+ pub(crate) fn extra_headers(&self) -> Result {
+ let mut headers = HeaderMap::from_iter([
(
header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
@@ -113,106 +159,160 @@ impl RestCatalogConfig {
),
]);
- Ok(HttpClient(
- Client::builder().default_headers(headers).build()?,
- ))
+ for (key, value) in self
+ .props
+ .iter()
+ .filter(|(k, _)| k.starts_with("header."))
+ // The unwrap here is same since we are filtering the keys
+ .map(|(k, v)| (k.strip_prefix("header.").unwrap(), v))
+ {
+ headers.insert(
+ HeaderName::from_str(key).map_err(|e| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Invalid header name: {key}"),
+ )
+ .with_source(e)
+ })?,
+ HeaderValue::from_str(value).map_err(|e| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ format!("Invalid header value: {value}"),
+ )
+ .with_source(e)
+ })?,
+ );
+ }
+
+ Ok(headers)
}
-}
-#[derive(Debug)]
-struct HttpClient(Client);
-
-impl HttpClient {
- async fn query<
- R: DeserializeOwned,
- E: DeserializeOwned + Into,
- const SUCCESS_CODE: u16,
- >(
- &self,
- request: Request,
- ) -> Result {
- let resp = self.0.execute(request).await?;
+ /// Get the optional oauth headers from the config.
+ pub(crate) fn extra_oauth_params(&self) -> HashMap {
+ let mut params = HashMap::new();
- if resp.status().as_u16() == SUCCESS_CODE {
- let text = resp.bytes().await?;
- Ok(serde_json::from_slice::(&text).map_err(|e| {
- Error::new(
- ErrorKind::Unexpected,
- "Failed to parse response from rest catalog server!",
- )
- .with_context("json", String::from_utf8_lossy(&text))
- .with_source(e)
- })?)
+ if let Some(scope) = self.props.get("scope") {
+ params.insert("scope".to_string(), scope.to_string());
} else {
- let text = resp.bytes().await?;
- let e = serde_json::from_slice::(&text).map_err(|e| {
- Error::new(
- ErrorKind::Unexpected,
- "Failed to parse response from rest catalog server!",
- )
- .with_context("json", String::from_utf8_lossy(&text))
- .with_source(e)
- })?;
- Err(e.into())
+ params.insert("scope".to_string(), "catalog".to_string());
}
- }
-
- async fn execute, const SUCCESS_CODE: u16>(
- &self,
- request: Request,
- ) -> Result<()> {
- let resp = self.0.execute(request).await?;
- if resp.status().as_u16() == SUCCESS_CODE {
- Ok(())
- } else {
- let code = resp.status();
- let text = resp.bytes().await?;
- let e = serde_json::from_slice::(&text).map_err(|e| {
- Error::new(
- ErrorKind::Unexpected,
- "Failed to parse response from rest catalog server!",
- )
- .with_context("json", String::from_utf8_lossy(&text))
- .with_context("code", code.to_string())
- .with_source(e)
- })?;
- Err(e.into())
+ let optional_params = ["audience", "resource"];
+ for param_name in optional_params {
+ if let Some(value) = self.props.get(param_name) {
+ params.insert(param_name.to_string(), value.to_string());
+ }
}
+ params
}
- /// More generic logic handling for special cases like head.
- async fn do_execute>(
- &self,
- request: Request,
- handler: impl FnOnce(&Response) -> Option,
- ) -> Result {
- let resp = self.0.execute(request).await?;
-
- if let Some(ret) = handler(&resp) {
- Ok(ret)
- } else {
- let code = resp.status();
- let text = resp.bytes().await?;
- let e = serde_json::from_slice::(&text).map_err(|e| {
- Error::new(
- ErrorKind::Unexpected,
- "Failed to parse response from rest catalog server!",
- )
- .with_context("code", code.to_string())
- .with_context("json", String::from_utf8_lossy(&text))
- .with_source(e)
- })?;
- Err(e.into())
+ /// Merge the config with the given config fetched from rest server.
+ pub(crate) fn merge_with_config(mut self, mut config: CatalogConfig) -> Self {
+ if let Some(uri) = config.overrides.remove("uri") {
+ self.uri = uri;
}
+
+ let mut props = config.defaults;
+ props.extend(self.props);
+ props.extend(config.overrides);
+
+ self.props = props;
+ self
}
}
+#[derive(Debug)]
+struct RestContext {
+ client: HttpClient,
+
+ /// Runtime config is fetched from rest server and stored here.
+ ///
+ /// It's could be different from the user config.
+ config: RestCatalogConfig,
+}
+
+impl RestContext {}
+
/// Rest catalog implementation.
#[derive(Debug)]
pub struct RestCatalog {
- config: RestCatalogConfig,
- client: HttpClient,
+ /// User config is stored as-is and never be changed.
+ ///
+ /// It's could be different from the config fetched from the server and used at runtime.
+ user_config: RestCatalogConfig,
+ ctx: OnceCell,
+}
+
+impl RestCatalog {
+ /// Creates a rest catalog from config.
+ pub fn new(config: RestCatalogConfig) -> Self {
+ Self {
+ user_config: config,
+ ctx: OnceCell::new(),
+ }
+ }
+
+ /// Get the context from the catalog.
+ async fn context(&self) -> Result<&RestContext> {
+ self.ctx
+ .get_or_try_init(|| async {
+ let catalog_config = RestCatalog::load_config(&self.user_config).await?;
+ let config = self.user_config.clone().merge_with_config(catalog_config);
+ let client = HttpClient::new(&config)?;
+
+ Ok(RestContext { config, client })
+ })
+ .await
+ }
+
+ /// Load the runtime config from the server by user_config.
+ ///
+ /// It's required for a rest catalog to update it's config after creation.
+ async fn load_config(user_config: &RestCatalogConfig) -> Result {
+ let client = HttpClient::new(user_config)?;
+
+ let mut request = client.request(Method::GET, user_config.config_endpoint());
+
+ if let Some(warehouse_location) = &user_config.warehouse {
+ request = request.query(&[("warehouse", warehouse_location)]);
+ }
+
+ let config = client
+ .query::(request.build()?)
+ .await?;
+ Ok(config)
+ }
+
+ async fn load_file_io(
+ &self,
+ metadata_location: Option<&str>,
+ extra_config: Option>,
+ ) -> Result {
+ let mut props = self.context().await?.config.props.clone();
+ if let Some(config) = extra_config {
+ props.extend(config);
+ }
+
+ // If the warehouse is a logical identifier instead of a URL we don't want
+ // to raise an exception
+ let warehouse_path = match self.context().await?.config.warehouse.as_deref() {
+ Some(url) if Url::parse(url).is_ok() => Some(url),
+ Some(_) => None,
+ None => None,
+ };
+
+ let file_io = match warehouse_path.or(metadata_location) {
+ Some(url) => FileIO::from_path(url)?.with_props(props).build()?,
+ None => {
+ return Err(Error::new(
+ ErrorKind::Unexpected,
+ "Unable to load file io, neither warehouse nor metadata location is set!",
+ ))?
+ }
+ };
+
+ Ok(file_io)
+ }
}
#[async_trait]
@@ -222,12 +322,17 @@ impl Catalog for RestCatalog {
&self,
parent: Option<&NamespaceIdent>,
) -> Result> {
- let mut request = self.client.0.get(self.config.namespaces_endpoint());
+ let mut request = self.context().await?.client.request(
+ Method::GET,
+ self.context().await?.config.namespaces_endpoint(),
+ );
if let Some(ns) = parent {
- request = request.query(&[("parent", ns.encode_in_url())]);
+ request = request.query(&[("parent", ns.to_url_string())]);
}
let resp = self
+ .context()
+ .await?
.client
.query::(request.build()?)
.await?;
@@ -245,9 +350,13 @@ impl Catalog for RestCatalog {
properties: HashMap,
) -> Result {
let request = self
+ .context()
+ .await?
.client
- .0
- .post(self.config.namespaces_endpoint())
+ .request(
+ Method::POST,
+ self.context().await?.config.namespaces_endpoint(),
+ )
.json(&NamespaceSerde {
namespace: namespace.as_ref().clone(),
properties: Some(properties),
@@ -255,6 +364,8 @@ impl Catalog for RestCatalog {
.build()?;
let resp = self
+ .context()
+ .await?
.client
.query::(request)
.await?;
@@ -265,12 +376,18 @@ impl Catalog for RestCatalog {
/// Get a namespace information from the catalog.
async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result {
let request = self
+ .context()
+ .await?
.client
- .0
- .get(self.config.namespace_endpoint(namespace))
+ .request(
+ Method::GET,
+ self.context().await?.config.namespace_endpoint(namespace),
+ )
.build()?;
let resp = self
+ .context()
+ .await?
.client
.query::(request)
.await?;
@@ -295,12 +412,18 @@ impl Catalog for RestCatalog {
async fn namespace_exists(&self, ns: &NamespaceIdent) -> Result {
let request = self
+ .context()
+ .await?
.client
- .0
- .head(self.config.namespace_endpoint(ns))
+ .request(
+ Method::HEAD,
+ self.context().await?.config.namespace_endpoint(ns),
+ )
.build()?;
- self.client
+ self.context()
+ .await?
+ .client
.do_execute::(request, |resp| match resp.status() {
StatusCode::NO_CONTENT => Some(true),
StatusCode::NOT_FOUND => Some(false),
@@ -312,12 +435,18 @@ impl Catalog for RestCatalog {
/// Drop a namespace from the catalog.
async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
let request = self
+ .context()
+ .await?
.client
- .0
- .delete(self.config.namespace_endpoint(namespace))
+ .request(
+ Method::DELETE,
+ self.context().await?.config.namespace_endpoint(namespace),
+ )
.build()?;
- self.client
+ self.context()
+ .await?
+ .client
.execute::(request)
.await
}
@@ -325,12 +454,18 @@ impl Catalog for RestCatalog {
/// List tables from namespace.
async fn list_tables(&self, namespace: &NamespaceIdent) -> Result> {
let request = self
+ .context()
+ .await?
.client
- .0
- .get(self.config.tables_endpoint(namespace))
+ .request(
+ Method::GET,
+ self.context().await?.config.tables_endpoint(namespace),
+ )
.build()?;
let resp = self
+ .context()
+ .await?
.client
.query::(request)
.await?;
@@ -339,6 +474,11 @@ impl Catalog for RestCatalog {
}
/// Create a new table inside the namespace.
+ ///
+ /// In the resulting table, if there are any config properties that
+ /// are present in both the response from the REST server and the
+ /// config provided when creating this `RestCatalog` instance then
+ /// the value provided locally to the `RestCatalog` will take precedence.
async fn create_table(
&self,
namespace: &NamespaceIdent,
@@ -347,9 +487,13 @@ impl Catalog for RestCatalog {
let table_ident = TableIdent::new(namespace.clone(), creation.name.clone());
let request = self
+ .context()
+ .await?
.client
- .0
- .post(self.config.tables_endpoint(namespace))
+ .request(
+ Method::POST,
+ self.context().await?.config.tables_endpoint(namespace),
+ )
.json(&CreateTableRequest {
name: creation.name,
location: creation.location,
@@ -367,13 +511,24 @@ impl Catalog for RestCatalog {
.build()?;
let resp = self
+ .context()
+ .await?
.client
.query::(request)
.await?;
- let file_io = self.load_file_io(resp.metadata_location.as_deref(), resp.config)?;
+ let config = resp
+ .config
+ .unwrap_or_default()
+ .into_iter()
+ .chain(self.user_config.props.clone().into_iter())
+ .collect();
+
+ let file_io = self
+ .load_file_io(resp.metadata_location.as_deref(), Some(config))
+ .await?;
- let table = Table::builder()
+ Table::builder()
.identifier(table_ident)
.file_io(file_io)
.metadata(resp.metadata)
@@ -383,25 +538,43 @@ impl Catalog for RestCatalog {
"Metadata location missing in create table response!",
)
})?)
- .build();
-
- Ok(table)
+ .build()
}
/// Load table from the catalog.
+ ///
+ /// If there are any config properties that are present in
+ /// both the response from the REST server and the config provided
+ /// when creating this `RestCatalog` instance then the value
+ /// provided locally to the `RestCatalog` will take precedence.
async fn load_table(&self, table: &TableIdent) -> Result