-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deploy ctr-jailer as a reusable build stage
Change-type: minor Signed-off-by: Kyle Harding <[email protected]>
- Loading branch information
Showing
16 changed files
with
767 additions
and
206 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.env | ||
.balena |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,146 @@ | ||
FROM golang:1.20.3-alpine3.17 AS eget | ||
FROM debian:bullseye-slim AS kernel | ||
|
||
# hadolint ignore=DL3018 | ||
RUN apk add --no-cache \ | ||
build-base \ | ||
WORKDIR /src | ||
|
||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# hadolint ignore=DL3008 | ||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends \ | ||
awscli \ | ||
ca-certificates \ | ||
git | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
WORKDIR /app | ||
SHELL ["/bin/bash", "-o", "pipefail", "-c"] | ||
|
||
ARG CGO=0 | ||
# Print the available kernels in S3 in case the filenames change | ||
RUN aws s3 ls --no-sign-request "s3://spec.ccfc.min/firecracker-ci/v1.6/$(uname -m)/" | ||
|
||
RUN git clone -n https://github.com/zyedidia/eget . && \ | ||
git checkout --quiet 760f5151eb17fbd1bb592bce7cce57cf9657ce7d && \ | ||
make build | ||
# RUN curl -fsSL "https://s3.amazonaws.com/spec.ccfc.min/img/quickstart_guide/$(uname -m)/kernels/vmlinux.bin" -o vmlinux.bin | ||
# RUN curl -fsSL "http://mirror.archlinuxarm.org/aarch64/core/linux-aarch64-6.2.10-1-aarch64.pkg.tar.xz" -o vmlinux.bin | ||
# RUN curl -fsSL "https://s3.amazonaws.com/spec.ccfc.min/img/hello/kernel/hello-vmlinux.bin" -o vmlinux.bin | ||
RUN curl -fsSL "https://s3.amazonaws.com/spec.ccfc.min/firecracker-ci/v1.6/$(uname -m)/vmlinux-5.10.197" -o vmlinux.bin | ||
|
||
FROM alpine:3.17 | ||
############################################### | ||
|
||
# hadolint ignore=DL3018 | ||
RUN apk add --no-cache \ | ||
FROM debian:bullseye-slim AS firecracker | ||
|
||
WORKDIR /src | ||
|
||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# hadolint ignore=DL3008 | ||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
curl \ | ||
docker-cli \ | ||
e2fsprogs \ | ||
file \ | ||
git | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
WORKDIR /app | ||
# renovate: datasource=github-releases depName=firecracker-microvm/firecracker | ||
ARG FIRECRACKER_VERSION=v1.4.1 | ||
ARG FIRECRACKER_URL=https://github.com/firecracker-microvm/firecracker/releases/download/${FIRECRACKER_VERSION} | ||
|
||
COPY --from=eget /app/eget /usr/local/bin/eget | ||
SHELL ["/bin/bash", "-o", "pipefail", "-c"] | ||
|
||
COPY x86_64/ ./x86_64/ | ||
COPY aarch64/ ./aarch64/ | ||
RUN curl -fsSL -O "${FIRECRACKER_URL}/firecracker-${FIRECRACKER_VERSION}-$(uname -m).tgz" \ | ||
&& curl -fsSL "${FIRECRACKER_URL}/firecracker-${FIRECRACKER_VERSION}-$(uname -m).tgz.sha256.txt" | sha256sum -c - \ | ||
&& tar -xzf "firecracker-${FIRECRACKER_VERSION}-$(uname -m).tgz" --strip-components=1 \ | ||
&& for bin in *-"$(uname -m)" ; do install -v "${bin}" "/usr/local/bin/$(echo "${bin}" | sed -rn 's/(.+)-.+-.+/\1/p')" ; done \ | ||
&& rm "firecracker-${FIRECRACKER_VERSION}-$(uname -m).tgz" | ||
|
||
RUN ln -sf "$(uname -m)"/config.json config.json \ | ||
&& ln -sf "$(uname -m)"/eget.toml eget.toml | ||
############################################### | ||
|
||
SHELL ["/bin/ash", "-eo", "pipefail", "-c"] | ||
FROM debian:bullseye-slim AS jailer | ||
|
||
ARG FIRECRACKER_TAG=v1.3.1 | ||
WORKDIR /usr/src/app | ||
|
||
RUN eget firecracker-microvm/firecracker --tag ${FIRECRACKER_TAG} && \ | ||
for bin in /usr/local/bin/*-*-* ; \ | ||
do ln -sf "$(basename "${bin}")" "/usr/local/bin/$(basename "${bin}" | rev | cut -d'-' -f3- | rev)" ; \ | ||
done | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# hadolint ignore=DL3008 | ||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends \ | ||
bridge-utils \ | ||
ca-certificates \ | ||
curl \ | ||
e2fsprogs \ | ||
file \ | ||
gettext \ | ||
ipcalc \ | ||
iproute2 \ | ||
iptables \ | ||
jq \ | ||
procps \ | ||
rsync \ | ||
tcpdump \ | ||
uuid-runtime \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
COPY --from=firecracker /usr/local/bin/* /usr/local/bin/ | ||
COPY --from=kernel /src/vmlinux.bin /jail/boot/vmlinux.bin | ||
|
||
RUN addgroup --system firecracker \ | ||
&& adduser --system firecracker --ingroup firecracker \ | ||
&& chown -R firecracker:firecracker ./ | ||
|
||
RUN firecracker --version \ | ||
&& jailer --version | ||
|
||
COPY entry.sh ./ | ||
COPY overlay ./overlay | ||
COPY start.sh config.json ./ | ||
|
||
RUN chmod +x start.sh overlay/sbin/* overlay/usr/local/bin/* | ||
|
||
ENTRYPOINT [ "/usr/src/app/start.sh" ] | ||
|
||
CMD [ "/usr/local/bin/usage.sh" ] | ||
|
||
############################################### | ||
|
||
# Example alpine rootfs for testing, with some debug utilities | ||
FROM alpine:3.18 AS alpine-rootfs | ||
|
||
# hadolint ignore=DL3018 | ||
RUN apk add --no-cache bash ca-certificates ca-certificates curl iproute2 iputils-ping lsblk | ||
|
||
FROM jailer AS alpine-test | ||
|
||
COPY --from=alpine-rootfs / /usr/src/app/rootfs/ | ||
|
||
# Use livepush directives to conditionally run this test stage | ||
# for livepush, but not for default builds used in publishing. | ||
#dev-cmd-live="/usr/local/bin/healthcheck.sh && sleep infinity" | ||
|
||
############################################### | ||
|
||
# Example debian rootfs for testing, with some debug utilities | ||
FROM debian:bookworm AS debian-rootfs | ||
|
||
# hadolint ignore=DL3008 | ||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends curl iproute2 iputils-ping ca-certificates util-linux \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
FROM jailer AS debian-test | ||
|
||
COPY --from=debian-rootfs / /usr/src/app/rootfs/ | ||
|
||
############################################### | ||
|
||
# Example ubuntu rootfs for testing, with some debug utilities | ||
FROM ubuntu:jammy AS ubuntu-rootfs | ||
|
||
# hadolint ignore=DL3008 | ||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends ca-certificates curl iproute2 iputils-ping util-linux \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
FROM jailer AS ubuntu-test | ||
|
||
COPY --from=ubuntu-rootfs / /usr/src/app/rootfs/ | ||
|
||
RUN chmod +x entry.sh | ||
############################################### | ||
|
||
CMD [ "/app/entry.sh" ] | ||
# This is the stage we want to publish, but it has no rootfs | ||
# so we can't use it for livepush testing. | ||
FROM jailer AS default |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,137 @@ | ||
# balena-firecracker | ||
# Container Jailer | ||
|
||
Run Docker container images as Firecracker virtual machines on balenaOS | ||
Append this build stage to your existing container image to automatically run as a microVM with Firecracker! | ||
|
||
## What is Firecracker? | ||
|
||
[Firecracker](https://github.com/firecracker-microvm/firecracker) is an open source virtualization technology that is purpose-built for creating and managing secure, multi-tenant container and function-based services that provide serverless operational models. Firecracker runs workloads in lightweight virtual machines, called microVMs, which combine the security and isolation properties provided by hardware virtualization technology with the speed and flexibility of containers. | ||
[Firecracker](https://firecracker-microvm.github.io/) is an open source virtualization technology that is purpose-built for creating and managing secure, multi-tenant container and function-based services that provide serverless operational models. Firecracker runs workloads in lightweight virtual machines, called microVMs, which combine the security and isolation properties provided by hardware virtualization technology with the speed and flexibility of containers. | ||
|
||
## Goals | ||
## Requirements | ||
|
||
The main goal of this project is to create Firecracker virtual machines on a balenaOS host | ||
from inside a privileged service container. | ||
### Kernel Modules | ||
|
||
Additionally, the rootfs for the VM should be created from an existing Docker container image, | ||
downloaded and converted to raw format. | ||
Firecracker supports x86_64 and AARCH64 Linux, see [specific supported kernels](https://github.com/firecracker-microvm/firecracker/blob/main/docs/kernel-policy.md). | ||
|
||
## Architecture & OS | ||
Firecracker also requires [the KVM Linux kernel module](https://www.linux-kvm.org/). | ||
|
||
Firecracker supports x86_64 and aarch64 Linux, see [specific supported kernels](https://github.com/firecracker-microvm/firecracker/blob/main/docs/kernel-policy.md). | ||
The presence of the KVM module can be checked with: | ||
|
||
The provided Docker image must have an init system in place in order to serve as a rootfs. | ||
```bash | ||
lsmod | grep kvm | ||
``` | ||
|
||
> The minimal init system would be just an ELF binary, placed at `/sbin/init`. The final step in the Linux boot process executes `/sbin/init` and expects it to never exit. More complex init systems build on top of this, providing service configuration files, startup / shutdown scripts for various services, and many other features. | ||
### balenaOS Devices | ||
|
||
### KVM | ||
balenaOS is not a requirement of this project, but it is well suited to container-based operating systems. | ||
|
||
Firecracker requires [the KVM Linux kernel module](https://www.linux-kvm.org/). | ||
The following device types have been tested with balenaOS as they have the required kernel modules. | ||
|
||
The presence of the KVM module can be checked with: | ||
- Generic x86_64 (GPT) | ||
- Generic AARCH64 | ||
|
||
```bash | ||
lsmod | grep kvm | ||
``` | ||
### Guest Container | ||
|
||
An example output where it is enabled: | ||
Guest containers based on Alpine, Debian, and Ubuntu have been tested and must include the following packages | ||
available from a shell. | ||
|
||
```bash | ||
kvm_intel 348160 0 | ||
kvm 970752 1 kvm_intel | ||
irqbypass 16384 1 kvm | ||
- `sh` | ||
- `ip` via `iproute2` | ||
- `mount` | ||
- `awk` | ||
|
||
Distroless containers are not expected to work as the kernel init binary is a shell script. | ||
|
||
## Getting Started | ||
|
||
Add the following lines to the end of your existing Dockerfile for publishing. | ||
|
||
```Dockerfile | ||
# The rest of your docker instructions up here AS my-rootfs | ||
|
||
# Include firecracker wrapper and scripts | ||
FROM ghcr.io/balena-io/ctr-jailer AS runtime | ||
|
||
# Copy the root file system from your container final stage | ||
COPY --from=my-rootfs / /usr/src/app/rootfs | ||
|
||
# Provide your desired command to exec after init. | ||
# Setting your own ENTRYPOINT is unsupported, use the CMD field only. | ||
CMD /start.sh | ||
``` | ||
|
||
Then you can publish your container image as you normally would via container registries | ||
or deploy it directly via Docker Compose. | ||
|
||
```yml | ||
version: "2" | ||
|
||
services: | ||
my-app: | ||
build: . | ||
# Privileged is required to setup the rootfs and jailer | ||
# but permissions are dropped to a chroot in order to start your VM | ||
privileged: true | ||
network_mode: host | ||
# Optionally run the VM rootfs and kernel in-memory to save storage wear | ||
tmpfs: | ||
- /tmp | ||
- /run | ||
- /srv | ||
# Optionally mount a persistent data volume where a data drive will be created for the VM | ||
volumes: | ||
- persistent-data:/data | ||
|
||
volumes: | ||
persistent-data: {} | ||
``` | ||
## Resources | ||
That's it! The firecracker runtime image will execute your rootfs as a MicroVM. | ||
Reference: <https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md> | ||
## Usage | ||
### Environment Variables | ||
Since traditional container environment variables are not available in the VM, this wrapper will | ||
inject them into the VM rootfs and export them at runtime. | ||
Provide environment variables or secrets with the `CTR_` prefix, like `CTR_SECRET_KEY=secretvalue`. | ||
|
||
If the values have spaces, or special characters, it is recommended to encode your secret values | ||
with `base64` and have your init service decode them. | ||
|
||
After being exported to the running process, the files are removed so they can safely | ||
be used for secrets as long as the init stage of your service calls `unset <SECRET_KEY>` after using them. | ||
|
||
### Networking | ||
|
||
A TAP/TUN device will be automatically created for the guest to have network access. | ||
|
||
The IP address/netmask can be configured via `TAP_IP`, otherwise a random address in the 10.x.x.1/30 range will be assigned. | ||
|
||
The host interface for routing can be configured via `INTERFACE` otherwise the default route interface will be used. | ||
|
||
In order to create the TAP device, and update iptables rules, the container jailer must be run in host networking mode. | ||
|
||
Reference: <https://github.com/firecracker-microvm/firecracker/blob/main/docs/network-setup.md> | ||
|
||
Exposing ports is TBD. | ||
|
||
### Resources | ||
|
||
Resources like virtual CPUs and Memory can be overprovisioned and adjusted via the env vars `VCPU_COUNT` and `MEM_SIZE_MIB`. | ||
|
||
The default is the maximum available on the host. | ||
|
||
### Persistent Storage | ||
|
||
The root filesystem is recreated on every run, so anything written to the root partition will not persist restarts and | ||
is considered ephemeral similar to container layers. | ||
|
||
However an optional data drive `/dev/vdb` will be created and can be made persistent by mounting a volume | ||
or host path to `/jail/data`. | ||
|
||
## Contributing | ||
|
||
- <https://actuated.dev/blog/kvm-in-github-actions> | ||
- <https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md> | ||
- <https://github.com/firecracker-microvm/firecracker/blob/main/docs/rootfs-and-kernel-setup.md> | ||
- <https://github.com/skatolo/nested-firecracker> | ||
Please open an issue or submit a pull request with any features, fixes, or changes. |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.