riptoken 0.1.0

Fast BPE tokenizer for LLMs — a faster, drop-in compatible reimplementation of tiktoken
Documentation
name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  rust:
    name: Rust ${{ matrix.rust }} on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        rust: [stable]
    steps:
      - uses: actions/checkout@v4

      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable
        with:
          toolchain: ${{ matrix.rust }}
          components: rustfmt, clippy

      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}

      - name: cargo fmt
        run: cargo fmt --all -- --check

      - name: cargo clippy
        run: cargo clippy --all-targets -- -D warnings

      - name: cargo clippy (python feature)
        run: cargo clippy --all-targets --features python -- -D warnings

      - name: cargo test
        run: cargo test --all-targets

      - name: cargo doc
        run: cargo doc --no-deps
        env:
          RUSTDOCFLAGS: "-D warnings"

  python:
    name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        python-version: ["3.9", "3.11", "3.13"]
    steps:
      - uses: actions/checkout@v4

      - name: Install Rust
        uses: dtolnay/rust-toolchain@stable

      - name: Install Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install maturin & test deps
        run: |
          python -m pip install --upgrade pip
          pip install "maturin>=1.5,<2.0" pytest tiktoken

      - name: Build extension
        run: maturin develop --features python --release

      - name: Download o200k_base vocab
        shell: bash
        run: |
          python -c "import tiktoken; enc=tiktoken.get_encoding('o200k_base'); print(enc.n_vocab)"
          python -c "
          import base64, tiktoken
          enc = tiktoken.get_encoding('o200k_base')
          with open('o200k_base.tiktoken', 'wb') as f:
              for b, r in sorted(enc._mergeable_ranks.items(), key=lambda x: x[1]):
                  f.write(base64.b64encode(b) + b' ' + str(r).encode() + b'\n')
          "

      - name: Run pytest
        run: pytest -q