name: Build
on:
push:
branches:
- master
paths:
- '.github/workflows/build.yml'
workflow_dispatch: {}
schedule:
- cron: '0 0 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
find:
runs-on: ubuntu-latest
permissions:
packages: read
outputs:
build: ${{ steps.get_latest.outputs.tag }}
steps:
- name: Get latest build
id: get_latest
run: |
build=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | jq -r .tag_name)
echo "llama.cpp latest build: $build"
echo "tag=$build" >> "$GITHUB_OUTPUT"
linux-cuda:
needs: find
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: ggml-org/free-disk-space@v1.3.1
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Clone llama.cpp@${{ needs.find.outputs.build }}
run: git clone https://github.com/ggerganov/llama.cpp --branch=${{ needs.find.outputs.build }} --depth=1
- name: Build `llama-server`
run: docker build -f llama.cpp/.devops/cuda.Dockerfile --target server --tag llama-server-cuda llama.cpp
- name: Create llama-server container
run: docker create --name llama-server llama-server-cuda
- name: Copy build artifacts from container
run: docker cp llama-server:/app .
- name: Copy CUDA runtime from container
run: |
CUDA_VERSION=$(docker run --rm --entrypoint "" llama-server-cuda \
bash -c "ls /usr/local | grep '^cuda-[0-9]\+\.[0-9]\+$' | sort -V | tail -n1 | sed 's/cuda-//'")
CUDA_MAJOR_VERSION=${CUDA_VERSION%%.*}
docker cp -L llama-server:/usr/local/cuda-$CUDA_VERSION/targets/x86_64-linux/lib/libcudart.so.$CUDA_MAJOR_VERSION app/.
docker cp -L llama-server:/usr/local/cuda-$CUDA_VERSION/targets/x86_64-linux/lib/libcublas.so.$CUDA_MAJOR_VERSION app/.
docker cp -L llama-server:/usr/local/cuda-$CUDA_VERSION/targets/x86_64-linux/lib/libcublasLt.so.$CUDA_MAJOR_VERSION app/.
- name: Set $ORIGIN as RPATH for `llama-server` and shared libraries
run: patchelf --set-rpath '$ORIGIN' app/llama-server app/*.so*
- name: Separate CUDA backend
run: |
mkdir cuda
mv app/libcudart* app/libcublas* app/libggml-cuda.so cuda/.
- name: Pack llama-server
run: |
cp llama.cpp/LICENSE app/.
cd app
zip -r ../llama-server-${{ needs.find.outputs.build }}-linux-x64.zip *
- name: Pack CUDA backend
run: |
cp llama.cpp/LICENSE cuda/.
cd cuda
zip -r ../backend-cuda-${{ needs.find.outputs.build }}-linux-x64.zip *
- name: Upload llama-server
uses: actions/upload-artifact@v4
with:
path: llama-server-${{ needs.find.outputs.build }}-linux-x64.zip
name: llama-server-linux-x64
- name: Upload CUDA backend
uses: actions/upload-artifact@v4
with:
path: backend-cuda-${{ needs.find.outputs.build }}-linux-x64.zip
name: backend-cuda-linux-x64
linux-hip:
needs: find
runs-on: ubuntu-latest
steps:
- name: Free Disk Space (Ubuntu)
uses: ggml-org/free-disk-space@v1.3.1
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Clone llama.cpp@${{ needs.find.outputs.build }}
run: git clone https://github.com/ggerganov/llama.cpp --branch=${{ needs.find.outputs.build }} --depth=1
- name: Build `llama-server`
run: docker build -f llama.cpp/.devops/rocm.Dockerfile --target server --tag llama-server llama.cpp
- name: Copy HIP shared libraries
run: |
docker run --name hip --entrypoint "" llama-server \
bash -c "ldd /app/libggml-hip.so | awk '/=>/ {print \$3}' | grep -E 'rocm' | xargs -I{} cp -v {} /app/."
- name: Extract `llama-server` from container
run: docker cp hip:/app .
- name: Copy libnuma.so library
run: docker cp -L hip:/usr/lib/x86_64-linux-gnu/libnuma.so.1 app/.
- name: Delete container and image
run: |
docker rm hip
docker rmi llama-server
- name: Set $ORIGIN as RPATH for `llama-server` and shared libraries
run: patchelf --set-rpath '$ORIGIN' app/llama-server app/*.so*
- name: Pack HIP backend
run: |
mkdir hip
mv app/libggml-hip.so app/*.so.* llama.cpp/LICENSE hip/.
cd hip
zip -r ../backend-hip-${{ needs.find.outputs.build }}-linux-x64.zip *
- name: Upload HIP backend
uses: actions/upload-artifact@v4
with:
path: backend-hip-${{ needs.find.outputs.build }}-linux-x64.zip
name: backend-hip-linux-x64
macos:
needs: find
runs-on: ubuntu-latest
strategy:
matrix:
arch: [arm64, x64]
steps:
- name: Download macOS release
run: curl -LO https://github.com/ggml-org/llama.cpp/releases/download/${{ needs.find.outputs.build }}/llama-${{ needs.find.outputs.build }}-bin-macos-${{ matrix.arch }}.tar.gz
- name: Extract macOS release
run: tar -xzf llama-${{ needs.find.outputs.build }}-bin-macos-${{ matrix.arch }}.tar.gz
- name: Pack llama-server
run: |
mkdir app
mv llama-${{ needs.find.outputs.build }}/llama-server llama-${{ needs.find.outputs.build }}/*.dylib llama-${{ needs.find.outputs.build }}/LICENSE* app/.
cd app
zip -r ../llama-server-${{ needs.find.outputs.build }}-macos-${{ matrix.arch }}.zip *
- name: Upload llama-server
uses: actions/upload-artifact@v4
with:
path: llama-server-${{ needs.find.outputs.build }}-macos-${{ matrix.arch }}.zip
name: llama-server-macos-${{ matrix.arch }}
windows:
needs: find
runs-on: ubuntu-latest
steps:
- name: Download Windows release
run: curl -LO https://github.com/ggml-org/llama.cpp/releases/download/${{ needs.find.outputs.build }}/llama-${{ needs.find.outputs.build }}-bin-win-cpu-x64.zip
- name: Unzip Windows release
run: unzip llama-${{ needs.find.outputs.build }}-bin-win-cpu-x64.zip
- name: Pack llama-server
run: |
mkdir app
mv llama-server.exe *.dll app/.
cd app
zip -r ../llama-server-${{ needs.find.outputs.build }}-windows-x64.zip *
- name: Upload llama-server
uses: actions/upload-artifact@v4
with:
path: llama-server-${{ needs.find.outputs.build }}-windows-x64.zip
name: llama-server-windows-x64
windows-cuda:
needs: find
runs-on: ubuntu-latest
steps:
- name: Download Windows CUDA release
run: |
version=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | \
jq -r '.assets | map(.name | capture("cudart-.*-cuda-(?<version>[^-]+)-")) | .[0] | .version')
curl -L -o llama-server-windows-x64.zip \
https://github.com/ggml-org/llama.cpp/releases/download/${{ needs.find.outputs.build }}/llama-${{ needs.find.outputs.build }}-bin-win-cuda-$version-x64.zip
curl -L -o cudart-windows-x64.zip \
https://github.com/ggml-org/llama.cpp/releases/download/${{ needs.find.outputs.build }}/cudart-llama-bin-win-cuda-$version-x64.zip
- name: Unzip Windows CUDA release
run: |
unzip llama-server-windows-x64.zip
unzip cudart-windows-x64.zip
- name: Pack Windows CUDA backend
run: |
mkdir cuda
mv ggml-cuda.dll cudart*.dll cublas*.dll cuda/.
cd cuda
zip -r ../backend-cuda-${{ needs.find.outputs.build }}-windows-x64.zip *
- name: Upload CUDA backend
uses: actions/upload-artifact@v4
with:
path: backend-cuda-${{ needs.find.outputs.build }}-windows-x64.zip
name: backend-cuda-windows-x64
windows-hip:
needs: find
runs-on: ubuntu-latest
steps:
- name: Download Windows HIP release
run: curl -LO https://github.com/ggml-org/llama.cpp/releases/download/${{ needs.find.outputs.build }}/llama-${{ needs.find.outputs.build }}-bin-win-hip-radeon-x64.zip
- name: Unzip Windows HIP release
run: unzip llama-${{ needs.find.outputs.build }}-bin-win-hip-radeon-x64.zip
- name: Pack Windows HIP backend
run: |
mkdir hip
mv ggml-hip.dll hipblas*.dll rocblas.dll hipblaslt rocblas hip/.
cd hip
zip -r ../backend-hip-${{ needs.find.outputs.build }}-windows-x64.zip *
- name: Upload HIP backend
uses: actions/upload-artifact@v4
with:
path: backend-hip-${{ needs.find.outputs.build }}-windows-x64.zip
name: backend-hip-windows-x64
release:
needs: [find, linux-cuda, linux-hip, macos, windows, windows-cuda, windows-hip]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: ./release
merge-multiple: true
- name: Create release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs');
const tag = '${{ needs.find.outputs.build }}';
try {
await github.repos.getReleaseByTag({
owner: context.repo.owner,
repo: context.repo.repo,
tag,
});
console.log(`Release for build ${tag} already exists. Skipping...`);
return;
} catch (error) {
if (error.status !== '404') throw error;
}
const release = await github.repos.createRelease({
owner: context.repo.owner,
repo: context.repo.repo,
tag_name: tag,
});
for (let file of fs.readdirSync('./release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release.data.id,
name: file,
data: fs.readFileSync(`./release/${file}`)
});
}
}