forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into dev-refactoring
# Conflicts: # ggml/src/ggml-backend-reg.cpp
- Loading branch information
Showing
129 changed files
with
10,756 additions
and
3,062 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
FROM ubuntu:$UBUNTU_VERSION AS build | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential git cmake libcurl4-openssl-dev | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \ | ||
cmake --build build -j $(nproc) | ||
|
||
RUN mkdir -p /app/lib && \ | ||
find build -name "*.so" -exec cp {} /app/lib \; | ||
|
||
RUN mkdir -p /app/full \ | ||
&& cp build/bin/* /app/full \ | ||
&& cp *.py /app/full \ | ||
&& cp -r gguf-py /app/full \ | ||
&& cp -r requirements /app/full \ | ||
&& cp requirements.txt /app/full \ | ||
&& cp .devops/tools.sh /app/full/tools.sh | ||
|
||
## Base image | ||
FROM ubuntu:$UBUNTU_VERSION AS base | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 curl\ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app | ||
|
||
### Full | ||
FROM base AS full | ||
|
||
COPY --from=build /app/full /app | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y \ | ||
git \ | ||
python3 \ | ||
python3-pip \ | ||
&& pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
ENTRYPOINT ["/app/tools.sh"] | ||
|
||
### Light, CLI only | ||
FROM base AS light | ||
|
||
COPY --from=build /app/full/llama-cli /app | ||
|
||
WORKDIR /app | ||
|
||
ENTRYPOINT [ "/app/llama-cli" ] | ||
|
||
### Server, Server only | ||
FROM base AS server | ||
|
||
ENV LLAMA_ARG_HOST=0.0.0.0 | ||
|
||
COPY --from=build /app/full/llama-server /app | ||
|
||
WORKDIR /app | ||
|
||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] | ||
|
||
ENTRYPOINT [ "/app/llama-server" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
# This needs to generally match the container host's environment. | ||
ARG CUDA_VERSION=12.6.0 | ||
# Target the CUDA build image | ||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||
|
||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} | ||
|
||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build | ||
|
||
# CUDA architecture to build for (defaults to all supported archs) | ||
ARG CUDA_DOCKER_ARCH=default | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ | ||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ | ||
fi && \ | ||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ | ||
cmake --build build --config Release -j$(nproc) | ||
|
||
RUN mkdir -p /app/lib && \ | ||
find build -name "*.so" -exec cp {} /app/lib \; | ||
|
||
RUN mkdir -p /app/full \ | ||
&& cp build/bin/* /app/full \ | ||
&& cp *.py /app/full \ | ||
&& cp -r gguf-py /app/full \ | ||
&& cp -r requirements /app/full \ | ||
&& cp requirements.txt /app/full \ | ||
&& cp .devops/tools.sh /app/full/tools.sh | ||
|
||
## Base image | ||
FROM ${BASE_CUDA_RUN_CONTAINER} AS base | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 curl\ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app | ||
|
||
### Full | ||
FROM base AS full | ||
|
||
COPY --from=build /app/full /app | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y \ | ||
git \ | ||
python3 \ | ||
python3-pip \ | ||
&& pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
|
||
ENTRYPOINT ["/app/tools.sh"] | ||
|
||
### Light, CLI only | ||
FROM base AS light | ||
|
||
COPY --from=build /app/full/llama-cli /app | ||
|
||
WORKDIR /app | ||
|
||
ENTRYPOINT [ "/app/llama-cli" ] | ||
|
||
### Server, Server only | ||
FROM base AS server | ||
|
||
ENV LLAMA_ARG_HOST=0.0.0.0 | ||
|
||
COPY --from=build /app/full/llama-server /app | ||
|
||
WORKDIR /app | ||
|
||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] | ||
|
||
ENTRYPOINT [ "/app/llama-server" ] |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.