server : fix token duplication when streaming with stop strings #9389

Workflow file for this run

	# Server build and tests
	name: Server

	on:
	workflow_dispatch: # allows manual triggering
	inputs:
	sha:
	description: 'Commit SHA1 to build'
	required: false
	type: string
	slow_tests:
	description: 'Run slow tests'
	required: true
	type: boolean
	push:
	branches:
	- master
	paths: ['.github/workflows/server.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.swift', '*/.m', 'examples/server/*.']
	pull_request:
	types: [opened, synchronize, reopened]
	paths: ['.github/workflows/server.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.swift', '*/.m', 'examples/server/*.']

	env:
	LLAMA_LOG_COLORS: 1
	LLAMA_LOG_PREFIX: 1
	LLAMA_LOG_TIMESTAMPS: 1
	LLAMA_LOG_VERBOSITY: 10

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	server:
	runs-on: ubuntu-latest

	strategy:
	matrix:
	sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
	build_type: [RelWithDebInfo]
	include:
	- build_type: Release
	sanitizer: ""
	fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken

	steps:
	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get -y install \
	build-essential \
	xxd \
	git \
	cmake \
	curl \
	wget \
	language-pack-en \
	libcurl4-openssl-dev

	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	ref: ${{ github.event.inputs.sha \|\| github.event.pull_request.head.sha \|\| github.sha \|\| github.head_ref \|\| github.ref_name }}

	- name: Python setup
	id: setup_python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Tests dependencies
	id: test_dependencies
	run: \|
	pip install -r examples/server/tests/requirements.txt

	# Setup nodejs (to be used for verifying bundled index.html)
	- uses: actions/setup-node@v4
	with:
	node-version: '22.11.0'

	- name: Verify bundled index.html
	id: verify_server_index_html
	run: \|
	git config --global --add safe.directory $(realpath .)
	cd examples/server/webui
	git status
	npm ci
	npm run build
	git status
	modified_files="$(git status -s)"
	echo "Modified files: ${modified_files}"
	if [ -n "${modified_files}" ]; then
	echo "Repository is dirty or server/webui is not built as expected"
	echo "Hint: You may need to follow Web UI build guide in server/README.md"
	echo "${modified_files}"
	exit 1
	fi

	- name: Build (no OpenMP)
	id: cmake_build_no_openmp
	if: ${{ matrix.sanitizer == 'THREAD' }}
	run: \|
	cmake -B build \
	-DGGML_NATIVE=OFF \
	-DLLAMA_BUILD_SERVER=ON \
	-DLLAMA_CURL=ON \
	-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
	-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
	-DGGML_OPENMP=OFF ;
	cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server

	- name: Build
	id: cmake_build
	if: ${{ matrix.sanitizer != 'THREAD' }}
	run: \|
	cmake -B build \
	-DGGML_NATIVE=OFF \
	-DLLAMA_BUILD_SERVER=ON \
	-DLLAMA_CURL=ON \
	-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
	-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
	cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server

	- name: Tests
	id: server_integration_tests
	run: \|
	cd examples/server/tests
	./tests.sh

	- name: Slow tests
	id: server_integration_tests_slow
	if: ${{ (github.event.schedule \|\| github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
	run: \|
	cd examples/server/tests
	SLOW_TESTS=1 ./tests.sh


	server-windows:
	runs-on: windows-2019

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	ref: ${{ github.event.inputs.sha \|\| github.event.pull_request.head.sha \|\| github.sha \|\| github.head_ref \|\| github.ref_name }}

	- name: libCURL
	id: get_libcurl
	env:
	CURL_VERSION: 8.6.0_6
	run: \|
	curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
	mkdir $env:RUNNER_TEMP/libcurl
	tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
	cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server

	- name: Python setup
	id: setup_python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Tests dependencies
	id: test_dependencies
	run: \|
	pip install -r examples/server/tests/requirements.txt

	- name: Copy Libcurl
	id: prepare_libcurl
	run: \|
	cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll

	- name: Tests
	id: server_integration_tests
	if: ${{ !matrix.disabled_on_pr \|\| !github.event.pull_request }}
	run: \|
	cd examples/server/tests
	$env:PYTHONIOENCODING = ":replace"
	pytest -v -x

	- name: Slow tests
	id: server_integration_tests_slow
	if: ${{ (github.event.schedule \|\| github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
	run: \|
	cd examples/server/tests
	$env:SLOW_TESTS = "1"
	pytest -v -x

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

server : fix token duplication when streaming with stop strings #9389

Workflow file

server : fix token duplication when streaming with stop strings #9389

Jobs

Run details

Workflow file for this run