diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..76dedeb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,383 @@ +name: release + +# Pre-built parakeet-cli bundles for every release (issue #21). +# +# One self-contained binary per (platform, backend) pair, packaged with the +# LICENSE and README. BUILD_SHARED_LIBS=OFF folds the ggml backends into the +# binary (the docker images keep them shared; a download-and-run bundle should +# not need a lib directory). GGML_NATIVE=OFF keeps the binaries portable +# across CPUs, same as the docker images and ci.yml. +# +# Variants: +# linux x64: cpu, vulkan, cuda arm64: cpu +# macos arm64: metal x64: cpu (cross-compiled on the arm +# runner; GitHub is retiring Intel ones) +# windows x64: cpu, vulkan, cuda +# +# CUDA notes (same reasoning as docker.yml): GGML_CUDA_NO_VMM=ON because the +# build runners have no GPU driver to link libcuda against. Linux uses the +# CUDA 13 apt repo so Blackwell (sm_120) is covered; the cudart/cublas +# runtime libraries are bundled into the tarball next to the binary, which +# carries an $ORIGIN rpath. Windows ships them as a separate cudart zip +# (llama.cpp convention) so users who already have the toolkit skip a large +# download. +# +# Triggers: pushing a v* tag builds everything and attaches the bundles to +# the GitHub release for that tag (creating a draft release if none exists +# yet, so creating the release before or after pushing the tag both work). +# workflow_dispatch builds the same bundles and leaves them as workflow +# artifacts, useful for testing changes to this file. + +on: + push: + tags: ['v*'] + branches: [feat/release-binaries] # TEMP: matrix validation, remove before merge + workflow_dispatch: + +defaults: + run: + shell: bash + +jobs: + # --------------------------------------------------------------------------- + # linux: cpu (x64 + arm64, native runners), vulkan (LunarG SDK), cuda + # (CUDA 13 apt repo). cpu/vulkan build on ubuntu-22.04 for a wider glibc + # range; cuda needs the ubuntu2404 CUDA repo. + # --------------------------------------------------------------------------- + build-linux: + runs-on: ${{ matrix.runner }} + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + include: + - backend: cpu + arch: x64 + runner: ubuntu-22.04 + cmake_args: "" + cuda_archs: "" + - backend: cpu + arch: arm64 + runner: ubuntu-22.04-arm + cmake_args: "" + cuda_archs: "" + - backend: vulkan + arch: x64 + runner: ubuntu-22.04 + cmake_args: "-DPARAKEET_GGML_VULKAN=ON" + cuda_archs: "" + - backend: cuda + arch: x64 + runner: ubuntu-24.04 + # No CMAKE_CUDA_ARCHITECTURES: ggml's curated default applies, + # the same list llama.cpp releases ship (PTX for the datacenter + # gens, real code for 86/89/120a, plus 121a for GB10 on CUDA 13). + cmake_args: "-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON" + cuda_archs: "" + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Determine version + id: ver + run: | + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + echo "version=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" + else + echo "version=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + fi + + - name: Install Vulkan SDK (LunarG) + if: matrix.backend == 'vulkan' + run: | + wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc \ + | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc >/dev/null + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list \ + https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update + sudo apt-get install -y vulkan-sdk + + - name: Install CUDA toolkit 13 + if: matrix.backend == 'cuda' + run: | + wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-toolkit-13-0 + echo "/usr/local/cuda/bin" >> "$GITHUB_PATH" + + - name: Configure + env: + CUDA_ARCHS: ${{ matrix.cuda_archs }} + run: | + EXTRA="" + if [ "${{ matrix.backend }}" = "cuda" ]; then + # $ORIGIN rpath so the bundled cudart/cublas next to the binary + # are found without LD_LIBRARY_PATH. + EXTRA="-DCMAKE_BUILD_RPATH=\$ORIGIN" + fi + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DPARAKEET_BUILD_CLI=ON \ + -DPARAKEET_BUILD_TESTS=OFF \ + ${{ matrix.cmake_args }} \ + ${CUDA_ARCHS:+"-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}"} \ + ${EXTRA:+"$EXTRA"} + + - name: Build + run: cmake --build build -j"$(getconf _NPROCESSORS_ONLN)" + + - name: Smoke test (usage banner) + # The CLI exits nonzero when invoked bare; capture first so set -e + # only judges the grep. + run: | + out=$(./build/examples/cli/parakeet-cli 2>&1 || true) + grep -qi usage <<<"$out" + + - name: Package + id: pack + run: | + BUNDLE="parakeet-${{ steps.ver.outputs.version }}-bin-linux-${{ matrix.backend }}-${{ matrix.arch }}" + mkdir "$BUNDLE" + cp build/examples/cli/parakeet-cli LICENSE README.md "$BUNDLE"/ + if [ "${{ matrix.backend }}" = "cuda" ]; then + cp -P /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so* \ + /usr/local/cuda/targets/x86_64-linux/lib/libcublas.so* \ + /usr/local/cuda/targets/x86_64-linux/lib/libcublasLt.so* \ + "$BUNDLE"/ + fi + tar -czf "$BUNDLE.tar.gz" "$BUNDLE" + echo "bundle=$BUNDLE" >> "$GITHUB_OUTPUT" + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.pack.outputs.bundle }} + path: ${{ steps.pack.outputs.bundle }}.tar.gz + if-no-files-found: error + + # --------------------------------------------------------------------------- + # macos: metal on arm64 (the metallib is embedded in the binary, nothing to + # ship alongside), plus a cpu-only x64 build cross-compiled on the same + # arm64 runner. + # --------------------------------------------------------------------------- + build-macos: + runs-on: macos-14 + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: + include: + - backend: metal + arch: arm64 + cmake_args: "-DPARAKEET_GGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON" + - backend: cpu + arch: x64 + cmake_args: "-DCMAKE_OSX_ARCHITECTURES=x86_64" + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Determine version + id: ver + run: | + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + echo "version=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" + else + echo "version=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + fi + + - name: Configure + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DPARAKEET_BUILD_CLI=ON \ + -DPARAKEET_BUILD_TESTS=OFF \ + ${{ matrix.cmake_args }} + + - name: Build + run: cmake --build build -j"$(getconf _NPROCESSORS_ONLN)" + + - name: Smoke test (usage banner) + # The x64 binary needs Rosetta; only smoke test the native build. + if: matrix.arch == 'arm64' + run: | + out=$(./build/examples/cli/parakeet-cli 2>&1 || true) + grep -qi usage <<<"$out" + + - name: Package + id: pack + run: | + BUNDLE="parakeet-${{ steps.ver.outputs.version }}-bin-macos-${{ matrix.backend }}-${{ matrix.arch }}" + mkdir "$BUNDLE" + cp build/examples/cli/parakeet-cli LICENSE README.md "$BUNDLE"/ + tar -czf "$BUNDLE.tar.gz" "$BUNDLE" + echo "bundle=$BUNDLE" >> "$GITHUB_OUTPUT" + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.pack.outputs.bundle }} + path: ${{ steps.pack.outputs.bundle }}.tar.gz + if-no-files-found: error + + # --------------------------------------------------------------------------- + # windows: MSVC via Ninja (ilammy/msvc-dev-cmd provides the cl environment, + # which also lets nvcc use cl without the Visual Studio CUDA integration). + # The ggml patches are applied explicitly under Git Bash first: CMake's + # find_program(bash) can pick up the stub WSL bash.exe in System32, which + # would skip the patches with only a warning. + # --------------------------------------------------------------------------- + build-windows: + runs-on: windows-2022 + timeout-minutes: 300 + strategy: + fail-fast: false + matrix: + include: + - backend: cpu + cmake_args: "" + cuda_archs: "" + - backend: vulkan + cmake_args: "-DPARAKEET_GGML_VULKAN=ON" + cuda_archs: "" + - backend: cuda + # CUDA 12.8: sm_120 support starts there (CUDA 13 is not in the + # cuda-toolkit action yet). No CMAKE_CUDA_ARCHITECTURES: ggml's + # curated default applies, same as the llama.cpp releases. + cmake_args: "-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_NO_VMM=ON" + cuda_archs: "" + env: + VULKAN_VERSION: 1.4.321.1 + steps: + - name: Checkout (with submodules) + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Determine version + id: ver + run: | + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + echo "version=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" + else + echo "version=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + fi + + - name: Apply ggml patches (Git Bash) + run: bash scripts/apply_ggml_patches.sh + + - name: Install Vulkan SDK + if: matrix.backend == 'vulkan' + shell: pwsh + run: | + curl.exe -L -o vulkan-sdk.exe "https://sdk.lunarg.com/sdk/download/$env:VULKAN_VERSION/windows/vulkansdk-windows-X64-$env:VULKAN_VERSION.exe" + Start-Process -Wait -FilePath .\vulkan-sdk.exe -ArgumentList '--accept-licenses','--default-answer','--confirm-command','install' + "VULKAN_SDK=C:\VulkanSDK\$env:VULKAN_VERSION" | Out-File -Append $env:GITHUB_ENV + "C:\VulkanSDK\$env:VULKAN_VERSION\Bin" | Out-File -Append $env:GITHUB_PATH + + - name: Install CUDA toolkit + if: matrix.backend == 'cuda' + uses: Jimver/cuda-toolkit@v0.2.21 + with: + cuda: '12.8.0' + method: network + sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust"]' + use-github-cache: true + + - name: MSVC environment + uses: ilammy/msvc-dev-cmd@v1 + + - name: Configure + env: + CUDA_ARCHS: ${{ matrix.cuda_archs }} + run: | + cmake -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DPARAKEET_BUILD_CLI=ON \ + -DPARAKEET_BUILD_TESTS=OFF \ + ${{ matrix.cmake_args }} \ + ${CUDA_ARCHS:+"-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}"} + + - name: Build + run: cmake --build build -j + + - name: Smoke test (usage banner) + # The vulkan binary needs vulkan-1.dll, which the SDK install provides + # on the runner; cpu and cuda load on a bare machine. + run: | + out=$(./build/examples/cli/parakeet-cli.exe 2>&1 || true) + grep -qi usage <<<"$out" + + - name: Package + id: pack + shell: pwsh + run: | + $bundle = "parakeet-${{ steps.ver.outputs.version }}-bin-win-${{ matrix.backend }}-x64" + New-Item -ItemType Directory -Path $bundle | Out-Null + Copy-Item build/examples/cli/parakeet-cli.exe,LICENSE,README.md $bundle/ + Compress-Archive -Path $bundle -DestinationPath "$bundle.zip" + "bundle=$bundle" | Out-File -Append $env:GITHUB_OUTPUT + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.pack.outputs.bundle }} + path: ${{ steps.pack.outputs.bundle }}.zip + if-no-files-found: error + + - name: Package CUDA runtime DLLs + if: matrix.backend == 'cuda' + shell: pwsh + run: | + $name = "cudart-parakeet-bin-win-cuda-x64" + New-Item -ItemType Directory -Path $name | Out-Null + Copy-Item "$env:CUDA_PATH\bin\cudart64*.dll","$env:CUDA_PATH\bin\cublas64*.dll","$env:CUDA_PATH\bin\cublasLt64*.dll" $name/ + Compress-Archive -Path $name -DestinationPath "$name.zip" + + - name: Upload CUDA runtime artifact + if: matrix.backend == 'cuda' + uses: actions/upload-artifact@v4 + with: + name: cudart-parakeet-bin-win-cuda-x64 + path: cudart-parakeet-bin-win-cuda-x64.zip + if-no-files-found: error + + # --------------------------------------------------------------------------- + # release: attach every bundle to the GitHub release for the tag. Creates a + # draft release if none exists yet, so the usual create-release-with-notes + # flow keeps working whether it happens before or after the tag push. + # --------------------------------------------------------------------------- + release: + if: github.ref_type == 'tag' + needs: [build-linux, build-macos, build-windows] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + + - name: Upload assets to the release + env: + GH_TOKEN: ${{ github.token }} + TAG: ${{ github.ref_name }} + run: | + if ! gh release view "$TAG" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then + gh release create "$TAG" --repo "$GITHUB_REPOSITORY" --draft --verify-tag \ + --title "$TAG" --notes "Pre-built binaries for $TAG" + fi + gh release upload "$TAG" --repo "$GITHUB_REPOSITORY" --clobber dist/* diff --git a/README.md b/README.md index b0d17fc..a88cc27 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,20 @@ GPU numbers (NVIDIA GB10, Grace-Blackwell, vs NeMo-GPU in the `nvcr.io/nvidia/ne --- +## Pre-built binaries + +Every [release](https://github.com/mudler/parakeet.cpp/releases) ships pre-built `parakeet-cli` bundles, so there is no need to compile from source: + +| Platform | Variants | +| -------- | -------- | +| Linux x64 | cpu, vulkan, cuda | +| Linux arm64 | cpu | +| macOS arm64 | metal | +| macOS x64 | cpu | +| Windows x64 | cpu, vulkan, cuda | + +The cuda bundles target Turing (sm_75) and newer, including Blackwell. On Linux the CUDA runtime libraries are bundled in the tarball; on Windows download the `cudart-parakeet-bin-win-cuda-x64.zip` asset alongside the binary zip unless you already have the CUDA toolkit installed. The vulkan binaries need the Vulkan loader on the system (`libvulkan1` on Debian/Ubuntu; on Windows the GPU driver provides it). + ## Build Clone with submodules (ggml is vendored at `third_party/ggml`):