llama-rs 0.17.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
name: CI

on:
  push:
    branches: [main, develop, "feature/**"]
  pull_request:
    branches: [main, develop]

env:
  CARGO_TERM_COLOR: always

jobs:
  check:
    name: Format & Lint
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: rustfmt, clippy
      - uses: Swatinem/rust-cache@v2
      - name: Install protoc
        run: sudo apt-get update && sudo apt-get install -y protobuf-compiler
      - run: cargo fmt --check
      - run: cargo clippy -- -D warnings

  test:
    name: Test (${{ matrix.name }})
    needs: check
    runs-on: ${{ matrix.runner }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: Linux
            runner: ubuntu-latest
            features: default
            cache_key: ubuntu-default
          - name: macOS ARM
            runner: macos-latest
            features: default,metal
            cache_key: macos-arm-metal
          - name: Windows
            runner: windows-latest
            features: default
            cache_key: windows-default
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
        with:
          key: ${{ matrix.cache_key }}

      - name: Install protoc (Linux)
        if: runner.os == 'Linux'
        run: sudo apt-get update && sudo apt-get install -y protobuf-compiler

      - name: Install protoc (macOS)
        if: runner.os == 'macOS'
        run: brew install protobuf

      - name: Install protoc (Windows)
        if: runner.os == 'Windows'
        run: choco install protoc -y

      - name: Verify Metal toolchain
        if: contains(matrix.features, 'metal')
        run: xcrun --sdk macosx metal --version

      - name: Build
        env:
          FEATURES: ${{ matrix.features }}
        run: cargo build --features "$FEATURES"

      - name: Test
        env:
          FEATURES: ${{ matrix.features }}
        run: cargo test --features "$FEATURES"

      - name: Metal integration tests
        if: contains(matrix.features, 'metal')
        run: cargo test --features metal --test metal_integration

  feature-matrix:
    name: Features (${{ matrix.name }})
    needs: check
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: minimal
            cargo_args: --no-default-features
            cache_key: ubuntu-minimal
          - name: cpu-only
            cargo_args: --no-default-features --features cpu
            cache_key: ubuntu-cpu
          - name: onnx
            cargo_args: --no-default-features --features "cpu,onnx"
            cache_key: ubuntu-onnx
          - name: server
            cargo_args: --no-default-features --features "cpu,server"
            cache_key: ubuntu-server
          - name: rag
            cargo_args: --features rag
            cache_key: ubuntu-rag
          - name: rag-sqlite
            cargo_args: --features rag-sqlite
            cache_key: ubuntu-rag-sqlite
          - name: rag-both
            cargo_args: --features "rag,rag-sqlite"
            cache_key: ubuntu-rag-both
          - name: distributed
            cargo_args: --features distributed
            cache_key: ubuntu-distributed
          - name: all-portable
            cargo_args: --features "cpu,huggingface,cli,client,onnx,server,rag,rag-sqlite,distributed"
            cache_key: ubuntu-all
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
        with:
          key: ${{ matrix.cache_key }}

      - name: Install protoc
        run: sudo apt-get update && sudo apt-get install -y protobuf-compiler

      - name: Build
        env:
          CARGO_ARGS: ${{ matrix.cargo_args }}
        run: cargo build $CARGO_ARGS

      - name: Test
        env:
          CARGO_ARGS: ${{ matrix.cargo_args }}
        run: cargo test $CARGO_ARGS

  vulkan:
    name: Vulkan (Linux)
    needs: check
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
        with:
          key: ubuntu-vulkan

      - name: Install Vulkan SDK and protoc
        run: |
          wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list
          sudo apt-get update
          sudo apt-get install -y vulkan-sdk protobuf-compiler

      - name: Verify glslc
        run: glslc --version

      - name: Build
        run: cargo build --features vulkan

      - name: Test
        run: cargo test --features vulkan

  dx12:
    name: DX12 (Windows)
    needs: check
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
        with:
          key: windows-dx12

      - name: Install protoc
        run: choco install protoc -y

      - name: Verify dxc
        run: dxc --version
        continue-on-error: true

      - name: Build
        run: cargo build --features dx12

      - name: Test
        run: cargo test --features dx12 -- --test-threads=1

      - name: DX12 integration tests
        run: cargo test --features dx12 --test dx12_integration -- --test-threads=1

  bench:
    name: Benchmarks (compile check)
    needs: check
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
        with:
          key: ubuntu-bench

      - name: Install protoc
        run: sudo apt-get update && sudo apt-get install -y protobuf-compiler

      - name: Build benchmarks
        run: cargo bench --no-run