chunk 0.10.2

The fastest semantic text chunking library — up to 1TB/s chunking throughput
Documentation
name: Pyodide compatibility

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

permissions:
  contents: read

env:
  CARGO_TERM_COLOR: always

jobs:
  build-pyemscripten:
    name: Build PyEmscripten wheel (${{ matrix.python-version }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          - python-version: '3.14'
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install pyodide-build
        run: pip install pyodide-build

      - name: Get pyodide config
        id: pyodide_config
        run: |
          echo "rust_toolchain=$(pyodide config get rust_toolchain)" >> "$GITHUB_OUTPUT"
          echo "emscripten_version=$(pyodide config get emscripten_version)" >> "$GITHUB_OUTPUT"
          echo "pyodide_abi_version=$(pyodide config get pyodide_abi_version)" >> "$GITHUB_OUTPUT"
          echo "rustflags=$(pyodide config get rustflags)" >> "$GITHUB_OUTPUT"

      - uses: emscripten-core/setup-emsdk@v16
        with:
          version: ${{ steps.pyodide_config.outputs.emscripten_version }}

      - name: Build PyEmscripten wheel
        uses: PyO3/maturin-action@v1
        env:
          CARGO_TARGET_WASM32_UNKNOWN_EMSCRIPTEN_RUSTFLAGS: ${{ steps.pyodide_config.outputs.rustflags }}
          MATURIN_PYEMSCRIPTEN_PLATFORM_VERSION: ${{ steps.pyodide_config.outputs.pyodide_abi_version }}
        with:
          target: wasm32-unknown-emscripten
          args: --release --out dist -m packages/python/Cargo.toml --interpreter ${{ matrix.python-version }} --no-default-features
          rust-toolchain: ${{ steps.pyodide_config.outputs.rust_toolchain }}

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-pyemscripten-${{ matrix.python-version }}
          path: dist

  test-pyemscripten:
    name: Test in Pyodide (${{ matrix.python-version }})
    needs: [build-pyemscripten]
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          - python-version: '3.14'
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install pyodide-build
        run: pip install pyodide-build

      - name: Download wheel artifact
        uses: actions/download-artifact@v4
        with:
          name: wheels-pyemscripten-${{ matrix.python-version }}
          path: dist

      - name: Test in Pyodide venv
        run: |
          pyodide venv .venv-pyodide
          source .venv-pyodide/bin/activate
          pip install --upgrade pip
          pip install chonkie-core --no-index --no-deps --find-links dist
          python -c "
          import chonkie_core

          # Verify module exports
          assert hasattr(chonkie_core, 'Chunker'), 'Missing Chunker class'
          assert hasattr(chonkie_core, 'PatternSplitter'), 'Missing PatternSplitter class'
          assert hasattr(chonkie_core, 'MergeResult'), 'Missing MergeResult class'
          assert hasattr(chonkie_core, 'chunk_offsets'), 'Missing chunk_offsets function'
          assert hasattr(chonkie_core, 'split_offsets'), 'Missing split_offsets function'
          assert hasattr(chonkie_core, 'split_pattern_offsets'), 'Missing split_pattern_offsets function'
          assert hasattr(chonkie_core, 'find_merge_indices'), 'Missing find_merge_indices function'
          assert hasattr(chonkie_core, 'merge_splits'), 'Missing merge_splits function'
          assert hasattr(chonkie_core, 'DEFAULT_TARGET_SIZE'), 'Missing DEFAULT_TARGET_SIZE'
          assert hasattr(chonkie_core, 'DEFAULT_DELIMITERS'), 'Missing DEFAULT_DELIMITERS'

          # Verify numpy functions are NOT available (numpy-support feature disabled)
          assert not hasattr(chonkie_core, 'savgol_filter'), 'savgol_filter should be absent in wasm build'
          assert not hasattr(chonkie_core, 'windowed_cross_similarity'), 'windowed_cross_similarity should be absent in wasm build'

          # Basic functionality test
          text = b'Hello. World. Test.'
          chunks = list(chonkie_core.Chunker(text, size=10, delimiters=b'.'))
          assert len(chunks) == 3, f'Expected 3 chunks, got {len(chunks)}'
          assert chunks[0] == b'Hello.'
          assert chunks[1] == b' World.'
          assert chunks[2] == b' Test.'

          # Test chunk_offsets
          offsets = chonkie_core.chunk_offsets(text, size=10, delimiters=b'.')
          assert len(offsets) == 3

          # Test merge functions
          result = chonkie_core.merge_splits(['Hello', 'world', '!'], [1, 1, 1], 2)
          assert len(result.merged) >= 1

          print('All Pyodide compatibility checks passed!')
          "