Skip to content

Build Release (CUDA) #64

Build Release (CUDA)

Build Release (CUDA) #64

name: Build Release (CUDA)
on: workflow_dispatch
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-24.04')
'pyver' = @("3.13")
'cuda' = @("12.8.1") #12.9.0 >= is not supported (12.8.1 works)
'releasetag' = @("basic")
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
needs: define_matrix
runs-on: ${{ matrix.os }}
permissions:
id-token: write
contents: write
attestations: write
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
with:
submodules: "recursive"
- name: Verify GLIBC version
run: ldd --version
- name: Install the latest version of uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7
with:
version: "latest"
activate-environment: true
- name: Setup Mamba
uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
miniforge-version: latest
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install Dependencies
env:
MAMBA_DOWNLOAD_FAILFAST: "0"
MAMBA_NO_LOW_SPEED_LIMIT: "1"
run: |
$cudaVersion = $env:CUDAVER
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
- name: Build Wheel
working-directory: vendor/llama-cpp-python
run: |
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
if ($IsLinux) {
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DLLAVA_BUILD=off -DCMAKE_CUDA_ARCHITECTURES=80;75;'
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=OFF $env:CMAKE_ARGS"
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
# Rename the release version with +cu{cuda_ver} build tag
(Get-Content llama_cpp/__init__.py) -replace '__version__ = "([^"]*)"', ('__version__ = "' + '$1' + '+cu' + $cudaVersion + '"') | Set-Content llama_cpp/__init__.py
#Build wheel
uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel
# write the build tag to the output
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
- uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
with:
files: vendor/llama-cpp-python/dist/*.whl
tag_name: ${{ github.ref_name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
with:
subject-path: 'vendor/llama-cpp-python/dist/*.whl'