Skip to content

Commit

Permalink
feat(build): remove base-requirements.txt (datahub-project#11238)
Browse files Browse the repository at this point in the history
Co-authored-by: David Leifker <[email protected]>
  • Loading branch information
hsheth2 and david-leifker authored Aug 30, 2024
1 parent 9d84872 commit c4bc34f
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 481 deletions.
3 changes: 2 additions & 1 deletion .github/actions/ci-optimization/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ runs:
- "metadata-ingestion-modules/**"
- "metadata-ingestion/**"
- "metadata-models/**"
- "docker/datahub-ingestion**"
- "docker/datahub-ingestion-base/**"
- "docker/datahub-ingestion/**"
ingestion-base:
- "docker/datahub-ingestion-base/**"
docker:
Expand Down
81 changes: 58 additions & 23 deletions .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,21 @@ inputs:
description: "Main tag to use for the Docker image"
required: true
flavor:
description: 'Image flavor (e.g., slim, full)'
description: "Image flavor (e.g., slim, full)"
required: false
target:
description: "Sets the target stage to build"
required: false
depot-project:
# Setting this will use native arm64 docker builds instead of QEMU emulation.
# This speeds up builds by 2-3x.
description: "Depot project id"
required: false

outputs:
image_tag:
description: "Docker image tags"
value: ${{ steps.docker_meta.outputs.tags }}
# image_name: ${{ env.DATAHUB_GMS_IMAGE }}

runs:
using: "composite"
Expand All @@ -58,9 +63,22 @@ runs:
type=raw,value=head,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }},enable={{is_default_branch}}
type=sha,prefix=,format=short,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
- name: Single Tag
id: single_tag
shell: bash
run: |
IMAGES="""
${{ inputs.images }}
"""
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_IMAGE=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
# Code for testing the build when not pushing to Docker Hub.
- name: Build and Load image for testing (if not publishing)
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
if: ${{ inputs.publish != 'true' }}
with:
context: ${{ inputs.context }}
Expand All @@ -73,20 +91,11 @@ runs:
target: ${{ inputs.target }}
load: true
push: false
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
- name: Single Tag
if: ${{ inputs.publish != 'true' }}
shell: bash
run: |
IMAGES="""
${{ inputs.images }}
"""
TAGS="""
${{ inputs.image_tag }}
"""
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> $GITHUB_OUTPUT
id: single_tag
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Upload image locally for testing (if not publishing)
uses: ishworkh/docker-image-artifact-upload@v1
if: ${{ inputs.publish != 'true' }}
Expand All @@ -96,19 +105,42 @@ runs:
# Code for building multi-platform images and pushing to Docker Hub.
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
if: ${{ inputs.publish == 'true' }}
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
if: ${{ inputs.publish == 'true' }}
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
- name: Setup Depot CLI
uses: depot/setup-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ inputs.publish == 'true' }}
with:
username: ${{ inputs.username }}
password: ${{ inputs.password }}

# Depot variant.
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v5
if: ${{ inputs.publish == 'true' }}
uses: depot/build-push-action@v1
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
with:
project: ${{ inputs.depot-project }}
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
build-args: ${{ inputs.build-args }}
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
- name: Build and Push Multi-Platform image
uses: docker/build-push-action@v6
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
Expand All @@ -117,7 +149,10 @@ runs:
tags: ${{ steps.docker_meta.outputs.tags }}
target: ${{ inputs.target }}
push: true
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
cache-from: |
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: |
type=inline
# TODO add code for vuln scanning?
54 changes: 32 additions & 22 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ env:
DATAHUB_INGESTION_BASE_IMAGE: "acryldata/datahub-ingestion-base"
DATAHUB_INGESTION_IMAGE: "acryldata/datahub-ingestion"

permissions:
contents: read
id-token: write

jobs:
setup:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -68,23 +72,23 @@ jobs:
id: tag
run: |
source .github/scripts/docker_helpers.sh
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
echo "slim_tag=$(get_tag_slim)" >> $GITHUB_OUTPUT
echo "full_tag=$(get_tag_full)" >> $GITHUB_OUTPUT
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag_slim)" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag_full)" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
echo "repository_name=${GITHUB_REPOSITORY#*/}" >> $GITHUB_OUTPUT
echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT"
echo "tag=$(get_tag)" >> "$GITHUB_OUTPUT"
echo "slim_tag=$(get_tag_slim)" >> "$GITHUB_OUTPUT"
echo "full_tag=$(get_tag_full)" >> "$GITHUB_OUTPUT"
echo "unique_tag=$(get_unique_tag)" >> "$GITHUB_OUTPUT"
echo "unique_slim_tag=$(get_unique_tag_slim)" >> "$GITHUB_OUTPUT"
echo "unique_full_tag=$(get_unique_tag_full)" >> "$GITHUB_OUTPUT"
echo "python_release_version=$(get_python_docker_release_v)" >> "$GITHUB_OUTPUT"
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> "$GITHUB_OUTPUT"
echo "repository_name=${GITHUB_REPOSITORY#*/}" >> "$GITHUB_OUTPUT"
- name: Check whether docker login is possible
id: docker-login
env:
ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
run: |
echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> $GITHUB_OUTPUT
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
- name: Check whether publishing enabled
id: publish
env:
Expand All @@ -95,7 +99,7 @@ jobs:
}}
run: |
echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- name: Check whether PR publishing enabled
id: pr-publish
env:
Expand All @@ -106,7 +110,7 @@ jobs:
}}
run: |
echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
- uses: ./.github/actions/ci-optimization
id: ci-optimize
- uses: actions/setup-python@v5
Expand Down Expand Up @@ -543,9 +547,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion-base/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute DataHub Ingestion (Base) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_base_slim_build:
name: Build and Push DataHub Ingestion (Base-Slim) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -585,9 +590,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion-base/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute DataHub Ingestion (Base-Slim) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_base_full_build:
name: Build and Push DataHub Ingestion (Base-Full) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -628,7 +634,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute DataHub Ingestion (Base-Full) Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_base_change == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> "$GITHUB_OUTPUT"

datahub_ingestion_slim_build:
name: Build and Push DataHub Ingestion Docker Images
Expand Down Expand Up @@ -681,9 +687,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_slim_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -713,6 +720,7 @@ jobs:
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
timeout: 15m
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
Expand Down Expand Up @@ -767,9 +775,10 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
datahub_ingestion_full_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -799,6 +808,7 @@ jobs:
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
timeout: 15m
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
Expand All @@ -813,13 +823,13 @@ jobs:
- id: set-matrix
run: |
if [ '${{ needs.setup.outputs.frontend_only }}' == 'true' ]; then
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
echo 'matrix=["cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> "$GITHUB_OUTPUT"
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
echo 'matrix=[]' >> "$GITHUB_OUTPUT"
fi
smoke_test:
Expand Down
12 changes: 9 additions & 3 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ RUN apt-get update && apt-get upgrade -y \
&& apt-get clean \
&& rm -rf /var/lib/{apt,dpkg,cache,log}/

COPY --from=dockerize-binary /usr/local/bin/dockerize /usr/local/bin
COPY --from=powerman/dockerize:0.19 /usr/local/bin/dockerize /usr/local/bin

COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh

RUN addgroup --gid 1000 datahub && \
Expand All @@ -67,7 +66,14 @@ ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
ENV VIRTUAL_ENV=/datahub-ingestion/.venv
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
RUN python3 -m venv $VIRTUAL_ENV && \
uv pip install --no-cache -r requirements.txt
uv pip install --no-cache --upgrade pip setuptools wheel

# Note: Normally uv will create hardlinks from the cache directory to the venv.
# In our docker files, we normally use `RUN --mount=type=cache,... uv pip install ...`,
# which means the cache directory is on a separate filesystem. uv will emit a warning:
# Failed to hardlink files; falling back to full copy. This may lead to degraded performance.
# If the cache and target directories are on different filesystems, hardlinking may not be supported.
# If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.

ENTRYPOINT [ "/entrypoint.sh" ]

Expand Down
Loading

0 comments on commit c4bc34f

Please sign in to comment.