Skip to content

Commit

Permalink
doc: full iceberg-rest example
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaelauv committed Jun 18, 2024
1 parent 85068b0 commit f82d872
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 0 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ export CATALOG_CATALOG__IMPL=org.apache.iceberg.aws.glue.GlueCatalog
java -jar ./build/libs/iceberg-rest-image-all.jar
```

## Example

how run a iceberg-rest service using a centralized external database -> [example](./example/README.md)

## Browse

To browse the catalog, you can use `pyiceberg`:
Expand Down
15 changes: 15 additions & 0 deletions example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# iceberg-rest example

how start the example stack

```shell
docker compose up -d
```

then

```shell
pip install "pyiceberg[s3fs,pyarrow]
curl https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet -o /tmp/yellow_tripdata_2023-01.parquet
python3 iceberg_s3_example.py
```
58 changes: 58 additions & 0 deletions example/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
services:
minio:
image: minio/minio:RELEASE.2024-05-27T19-17-46Z
command: server /data --console-address ":9001"
ports:
- "9020:9000"
- "9021:9001"
environment:
MINIO_ROOT_USER: admin
MINIO_ROOT_PASSWORD: adminadmin
MINIO_SITE_REGION: eu-west-3

createbuckets:
image: minio/mc
depends_on:
- minio
entrypoint: >
/bin/sh -c "
echo sleep 10;
sleep 10;
/usr/bin/mc config host add myminio http://minio:9000 admin adminadmin;
/usr/bin/mc mb myminio/test-bucket;
exit 0;
"
iceberg_rest:
image: tabulario/iceberg-rest
depends_on:
iceberg_rest-db:
condition: service_healthy
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=adminadmin
- AWS_REGION=eu-west-3
- CATALOG_WAREHOUSE=s3://test-bucket/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_S3_PATH__STYLE__ACCESS=true
- CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
- CATALOG_URI=jdbc:postgresql://iceberg_rest-db:5432/iceberg_restdb
- CATALOG_JDBC_USER=iceberg_rest
- CATALOG_JDBC_PASSWORD=password

iceberg_rest-db:
image: postgres:15.2
init: true
environment:
POSTGRES_DB: iceberg_restdb
POSTGRES_USER: iceberg_rest
POSTGRES_PASSWORD: password
ports:
- "5436:5432"
healthcheck:
test: [ "CMD", "pg_isready", "-U", "iceberg_rest", "-d", "iceberg_restdb" ]
interval: 5s
retries: 5
42 changes: 42 additions & 0 deletions example/iceberg_s3_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# pip install "pyiceberg[s3fs,pyarrow]
# curl https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet -o /tmp/yellow_tripdata_2023-01.parquet

import os

os.environ["AWS_DEFAULT_REGION"] = "eu-west-3"
os.environ["AWS_REGION"] = "eu-west-3"
os.environ["AWS_ACCESS_KEY_ID"] = "admin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "adminadmin"


def run_iceberg():
from pyiceberg.catalog.rest import RestCatalog

catalog = RestCatalog(
"default",
**{
"uri": "http://localhost:8181",
"warehouse": "s3://test-bucket/",
"s3.endpoint": "http://localhost:9020",
},
)
import pyarrow.parquet as pq

df = pq.read_table("/tmp/yellow_tripdata_2023-01.parquet")

catalog.create_namespace("default")
table = catalog.create_table(
"default.taxi_dataset",
schema=df.schema,
)

table.append(df)


table = catalog.load_table("default.taxi_dataset")
df = table.scan().to_arrow()
print(len(df))


if __name__ == '__main__':
run_iceberg()

0 comments on commit f82d872

Please sign in to comment.