libdictenstein 0.1.0

High-performance dictionary data structures (trie, DAWG, double-array trie, suffix automaton, lock-free durable persistent ART) behind one trait API; pairs with liblevenshtein for fuzzy matching
name: CI

on:
  push:
    branches: [main, master]
  pull_request:
    branches: [main, master]
  workflow_dispatch:
  schedule:
    # Weekly deep formal run. The regular PR path runs the no-TLC harness.
    - cron: "37 5 * * 0"

env:
  CARGO_TERM_COLOR: always
  # Avoid the unconditional `target-cpu=native` from .cargo/config.toml so CI
  # builds remain portable. AES + SSE2 are required by PathMap's gxhash.
  RUSTFLAGS: "-C target-feature=+aes,+sse2"

jobs:
  # -----------------------------------------------------------------
  # Feature-matrix build. Catches feature-flag bugs that hide behind
  # the default `cargo build --all-features` smoke-test.
  # -----------------------------------------------------------------
  build-matrix:
    name: Build (${{ matrix.features-name }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        include:
          - features-name: default
            features: ""
            os: ubuntu-latest
          - features-name: no-default
            features: --no-default-features
            os: ubuntu-latest
          - features-name: persistent-artrie
            features: --no-default-features --features persistent-artrie
            os: ubuntu-latest
          - features-name: pathmap-backend
            features: --no-default-features --features pathmap-backend
            os: ubuntu-latest
          - features-name: io-uring-backend
            features: --no-default-features --features io-uring-backend
            os: ubuntu-latest
          - features-name: parallel-merge
            features: --no-default-features --features parallel-merge
            os: ubuntu-latest
          - features-name: serialization
            features: --no-default-features --features serialization
            os: ubuntu-latest
          - features-name: protobuf
            features: --no-default-features --features protobuf
            os: ubuntu-latest
          # `lling-llang` and `union_zipper_lling_llang_tests.rs` —
          # the latter is cfg-gated on `lling-llang AND NOT persistent-artrie`,
          # so we explicitly exclude persistent-artrie in this row.
          - features-name: lling-llang
            features: --no-default-features --features lling-llang
            os: ubuntu-latest
          - features-name: all-features
            features: --all-features
            os: ubuntu-latest
          # Cross-OS sanity on the default feature set.
          - features-name: macos-default
            features: ""
            os: macos-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - name: cargo build
        run: cargo build ${{ matrix.features }} --verbose
      - name: cargo test
        run: cargo test ${{ matrix.features }} --verbose --no-fail-fast

  # -----------------------------------------------------------------
  # `cargo clippy` with `-D warnings`. Catches all the lint regressions
  # that the default `cargo build` would let pass.
  # -----------------------------------------------------------------
  clippy:
    name: Clippy
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: clippy
      - name: cargo clippy --all-features -- -D warnings
        run: cargo clippy --all-features --all-targets -- -D warnings

  # -----------------------------------------------------------------
  # `cargo doc` with `-D warnings` so broken intra-doc links fail CI.
  # -----------------------------------------------------------------
  doc:
    name: Doc
    runs-on: ubuntu-latest
    env:
      RUSTDOCFLAGS: "-D warnings"
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - name: cargo doc --all-features --no-deps
        run: cargo doc --all-features --no-deps

  # -----------------------------------------------------------------
  # rustfmt --check. Prevents formatting drift.
  # -----------------------------------------------------------------
  fmt:
    name: Rustfmt
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: rustfmt
      - name: cargo fmt --check
        run: cargo fmt --all -- --check

  # -----------------------------------------------------------------
  # MSRV check. `Cargo.toml` declares `rust-version = "1.70"` —
  # enforce it.
  # -----------------------------------------------------------------
  msrv:
    name: MSRV (1.70)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@master
        with:
          toolchain: "1.70"
      - name: cargo build --all-features
        run: cargo build --all-features

  # -----------------------------------------------------------------
  # Coverage on nightly (branch coverage requires nightly llvm-cov).
  # The previous coverage.yml ran on stable, which silently degraded
  # branch coverage.
  # -----------------------------------------------------------------
  coverage:
    name: Coverage
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@nightly
        with:
          components: llvm-tools-preview
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Run coverage
        run: |
          cargo llvm-cov --all-features --branch \
            --lcov --output-path lcov.info \
            --cobertura --output-path cobertura.xml
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        with:
          files: lcov.info,cobertura.xml
          fail_ci_if_error: false
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

  # -----------------------------------------------------------------
  # Sanitizer suite. Pinned at nightly for `-Z sanitizer=`. Each
  # sanitizer is its own job so failures are localised.
  # -----------------------------------------------------------------
  sanitizers:
    name: Sanitizer (${{ matrix.sanitizer }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        sanitizer: [address, thread]
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@nightly
        with:
          components: rust-src
      - name: Run ${{ matrix.sanitizer }}sanitizer
        env:
          RUSTFLAGS: "-Z sanitizer=${{ matrix.sanitizer }}"
        run: |
          cargo +nightly test \
            --target x86_64-unknown-linux-gnu \
            --all-features \
            --no-fail-fast

  # -----------------------------------------------------------------
  # Rocq proofs. Requires `coqc` >= 9.1 on the runner.
  # -----------------------------------------------------------------
  rocq:
    name: Rocq proofs
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install Rocq (Coq)
        run: |
          sudo apt-get update
          sudo apt-get install -y coq
      - name: Build proofs
        working-directory: formal-verification/rocq
        run: make

  # -----------------------------------------------------------------
  # Formal correspondence harness. This ties Rust tests, Rocq, unsafe
  # inventory drift, and TLA+ SANY checks into one PR gate.
  # -----------------------------------------------------------------
  formal-correspondence:
    name: Formal correspondence
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: actions/setup-java@v4
        with:
          distribution: temurin
          java-version: "21"
      - name: Install Rocq and TLA+ tools
        run: |
          sudo apt-get update
          sudo apt-get install -y coq curl
          curl -L \
            https://github.com/tlaplus/tlaplus/releases/latest/download/tla2tools.jar \
            -o tla2tools.jar
          sudo mkdir -p /opt/tla
          sudo mv tla2tools.jar /opt/tla/tla2tools.jar
          sudo tee /usr/local/bin/tla2sany >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tla2sany.SANY "$@"
          EOF
          sudo tee /usr/local/bin/tlc >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tlc2.TLC "$@"
          EOF
          sudo chmod +x /usr/local/bin/tla2sany /usr/local/bin/tlc
      - name: Run no-TLC correspondence harness
        env:
          CARGO_BUILD_JOBS: "2"
          FORMAL_MEM_LIMIT_BYTES: "8589934592"
          TLA_JAVA_HEAP: "3g"
        run: prlimit --as="$FORMAL_MEM_LIMIT_BYTES" --rss="$FORMAL_MEM_LIMIT_BYTES" -- bash scripts/verify-formal-correspondence.sh

  # -----------------------------------------------------------------
  # Miri unsafe-boundary checks. Runs on nightly because `cargo miri`
  # is a nightly component.
  # -----------------------------------------------------------------
  formal-miri:
    name: Formal unsafe boundary (Miri)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@nightly
        with:
          components: miri,rust-src
      - uses: actions/setup-java@v4
        with:
          distribution: temurin
          java-version: "21"
      - name: Install Rocq and TLA+ tools
        run: |
          sudo apt-get update
          sudo apt-get install -y coq curl
          curl -L \
            https://github.com/tlaplus/tlaplus/releases/latest/download/tla2tools.jar \
            -o tla2tools.jar
          sudo mkdir -p /opt/tla
          sudo mv tla2tools.jar /opt/tla/tla2tools.jar
          sudo tee /usr/local/bin/tla2sany >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tla2sany.SANY "$@"
          EOF
          sudo tee /usr/local/bin/tlc >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tlc2.TLC "$@"
          EOF
          sudo chmod +x /usr/local/bin/tla2sany /usr/local/bin/tlc
      - name: Prepare Miri
        env:
          CARGO_BUILD_JOBS: "2"
          FORMAL_MEM_LIMIT_BYTES: "8589934592"
        run: prlimit --as="$FORMAL_MEM_LIMIT_BYTES" --rss="$FORMAL_MEM_LIMIT_BYTES" -- cargo miri setup
      - name: Run Miri-gated correspondence harness
        env:
          CARGO_BUILD_JOBS: "2"
          FORMAL_MEM_LIMIT_BYTES: "8589934592"
          RUN_MIRI: "1"
          TLA_JAVA_HEAP: "3g"
        run: prlimit --as="$FORMAL_MEM_LIMIT_BYTES" --rss="$FORMAL_MEM_LIMIT_BYTES" -- bash scripts/verify-formal-correspondence.sh

  # -----------------------------------------------------------------
  # Linux io_uring backend correspondence. Tests skip internally if
  # the runner kernel/filesystem rejects io_uring + O_DIRECT setup.
  # -----------------------------------------------------------------
  formal-io-uring:
    name: Formal storage backend (io_uring)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: actions/setup-java@v4
        with:
          distribution: temurin
          java-version: "21"
      - name: Install Rocq and TLA+ tools
        run: |
          sudo apt-get update
          sudo apt-get install -y coq curl
          curl -L \
            https://github.com/tlaplus/tlaplus/releases/latest/download/tla2tools.jar \
            -o tla2tools.jar
          sudo mkdir -p /opt/tla
          sudo mv tla2tools.jar /opt/tla/tla2tools.jar
          sudo tee /usr/local/bin/tla2sany >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tla2sany.SANY "$@"
          EOF
          sudo tee /usr/local/bin/tlc >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-3g}" -cp /opt/tla/tla2tools.jar tlc2.TLC "$@"
          EOF
          sudo chmod +x /usr/local/bin/tla2sany /usr/local/bin/tlc
      - name: Run io_uring-gated correspondence harness
        env:
          CARGO_BUILD_JOBS: "2"
          FORMAL_MEM_LIMIT_BYTES: "8589934592"
          RUN_IO_URING: "1"
          TLA_JAVA_HEAP: "3g"
        run: prlimit --as="$FORMAL_MEM_LIMIT_BYTES" --rss="$FORMAL_MEM_LIMIT_BYTES" -- bash scripts/verify-formal-correspondence.sh

  # -----------------------------------------------------------------
  # Scheduled/manual deep bounded model check. This is intentionally
  # not a PR job because several focused TLC models are large.
  # -----------------------------------------------------------------
  formal-tlc:
    name: Formal bounded models (TLC)
    runs-on: ubuntu-latest
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: actions/setup-java@v4
        with:
          distribution: temurin
          java-version: "21"
      - name: Install Rocq and TLA+ tools
        run: |
          sudo apt-get update
          sudo apt-get install -y coq curl
          curl -L \
            https://github.com/tlaplus/tlaplus/releases/latest/download/tla2tools.jar \
            -o tla2tools.jar
          sudo mkdir -p /opt/tla
          sudo mv tla2tools.jar /opt/tla/tla2tools.jar
          sudo tee /usr/local/bin/tla2sany >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-5g}" -cp /opt/tla/tla2tools.jar tla2sany.SANY "$@"
          EOF
          sudo tee /usr/local/bin/tlc >/dev/null <<'EOF'
          #!/usr/bin/env bash
          exec java -Xmx"${TLA_JAVA_HEAP:-5g}" -cp /opt/tla/tla2tools.jar tlc2.TLC "$@"
          EOF
          sudo chmod +x /usr/local/bin/tla2sany /usr/local/bin/tlc
      - name: Run TLC-gated correspondence harness
        env:
          CARGO_BUILD_JOBS: "2"
          FORMAL_MEM_LIMIT_BYTES: "10737418240"
          RUN_TLC: "1"
          TLA_JAVA_HEAP: "5g"
        run: prlimit --as="$FORMAL_MEM_LIMIT_BYTES" --rss="$FORMAL_MEM_LIMIT_BYTES" -- bash scripts/verify-formal-correspondence.sh