sqlite-graphrag 1.0.68

name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

env:
  CARGO_TERM_COLOR: always
  RUST_BACKTRACE: 1

jobs:
  fmt:
    name: Formatting
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: rustfmt
      - run: cargo fmt --all --check

  clippy:
    name: Clippy (${{ matrix.os }})
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: clippy
      - uses: Swatinem/rust-cache@v2
      - run: cargo clippy --all-targets --all-features -- -D warnings

  test:
    name: Tests (${{ matrix.os }})
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Cache embedding ONNX models
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/sqlite-graphrag/models
            ~/Library/Caches/sqlite-graphrag/models
            ~\AppData\Local\sqlite-graphrag\cache\models
          key: embedding-model-multilingual-e5-small-${{ runner.os }}-v2
      - name: Pre-download embedding model
        shell: bash
        run: |
          timeout 120 cargo run -- init --json 2>/dev/null && echo "Model ready" || echo "Model download skipped or failed (tests will skip gracefully)"
        env:
          SQLITE_GRAPHRAG_HOME: ${{ runner.temp }}/sgr-model-init
      - name: Install cargo-nextest
        run: cargo install cargo-nextest --version 0.9.114 --locked
      - name: Run tests with nextest (retry on network flake)
        shell: bash
        env:
          RUST_TEST_THREADS: "2"
          RUSTFLAGS: -D warnings
        run: |
          for attempt in 1 2 3; do
            echo "=== Test attempt $attempt ==="
            if cargo nextest run --profile ci; then
              break
            fi
            if [ "$attempt" -eq 3 ]; then exit 1; fi
            echo "Attempt $attempt failed, retrying in 30s..."
            sleep 30
          done
      - name: Run doctests
        env:
          RUST_TEST_THREADS: "2"
          RUSTFLAGS: -D warnings
        run: cargo test --doc --all-features

  slow-contracts:
    name: Slow Contract Suites
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Cache embedding ONNX models
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/sqlite-graphrag/models
            ~/Library/Caches/sqlite-graphrag/models
            ~\AppData\Local\sqlite-graphrag\cache\models
          key: embedding-model-multilingual-e5-small-${{ runner.os }}-v2
      - name: Run slow JSON contract suite
        env:
          RUST_TEST_THREADS: "1"
        run: timeout 5400 cargo test --features slow-tests --test doc_contract_integration -- --nocapture
      - name: Run slow PRD compliance suite
        env:
          RUST_TEST_THREADS: "1"
        run: timeout 5400 cargo test --features slow-tests --test prd_compliance -- --nocapture

  doc:
    name: Documentation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - run: cargo doc --no-deps --all-features
        env:
          RUSTDOCFLAGS: -D warnings

  msrv:
    name: MSRV (1.88)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@1.88
      - uses: Swatinem/rust-cache@v2
      - run: cargo check --all-features

  # G29 (v1.0.68): cross-compile check to catch HANDLE-type regressions on
  # Windows before publish.  Host arch on the runner is x86_64, so x86_64-pc-
  # windows-msvc catches the type check without needing the full Windows
  # linker (lib.exe) installed.
  windows-build-check:
    name: Windows MSVC cross-compile (G29)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          targets: x86_64-pc-windows-msvc
      - uses: Swatinem/rust-cache@v2
      - name: cargo check --target x86_64-pc-windows-msvc
        run: timeout 600 cargo check --target x86_64-pc-windows-msvc --lib --all-features

  audit:
    name: Security Audit
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - run: cargo install cargo-audit --locked
      - run: cargo audit

  deny:
    name: Dependency Policy
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - run: cargo install cargo-deny --locked
      - run: cargo deny check advisories licenses bans sources

  geiger:
    name: Unsafe Audit
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Install cargo-geiger
        run: cargo install cargo-geiger --locked || true
      - name: Audit unsafe density
        run: |
          cargo geiger --all-features --output-format ascii 2>&1 | tee geiger-report.txt || true
          echo "::notice::Unsafe audit report generated"

  coverage:
    name: Coverage
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - name: Install cargo-nextest and llvm-cov
        run: |
          cargo install cargo-nextest --version 0.9.114 --locked
          cargo install cargo-llvm-cov --locked
      - name: Coverage with nextest
        env:
          RUST_TEST_THREADS: "2"
        run: cargo llvm-cov nextest --profile heavy --features slow-tests --lcov --output-path lcov.info
      - uses: actions/upload-artifact@v4
        with:
          name: coverage-lcov
          path: lcov.info

  commit-check:
    name: Block Agent Co-Authorship
    runs-on: ubuntu-latest
    if: github.event_name == 'pull_request'
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Check commit messages for agent co-authors
        run: |
          range="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
          pattern='(?i)Co-authored-by:.*(Claude|Opus|Sonnet|Haiku|GPT-|Copilot|Cursor|Gemini|Anthropic|OpenAI|dependabot\[bot\]|renovate\[bot\])'
          if git log --format='%B' "$range" | grep -Pq "$pattern"; then
            echo "::error::Commit messages must not include Co-authored-by from AI agents or bots"
            git log --format='%B' "$range" | grep -P "$pattern" || true
            exit 1
          fi
          echo "No forbidden co-authors detected"

  cargo-audit:
    name: Cargo audit (vulnerabilities + unmaintained advisories)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Install cargo-audit
        run: cargo install --locked cargo-audit
      - name: Run cargo audit
        # v1.0.37 (H9): cargo audit complements cargo deny check by surfacing
        # RUSTSEC advisories on transitive deps. The previous CI ran only
        # `cargo deny check`, which missed `RUSTSEC-2025-0119` (number_prefix
        # unmaintained) and `RUSTSEC-2024-0436` (paste unmaintained), both
        # transitive via fastembed/tokenizers. We ignore those two specific
        # advisories until the upstream crates publish maintained replacements;
        # all OTHER advisories (especially HIGH/CRITICAL vulnerabilities) still
        # fail the build. Re-evaluate ignores quarterly.
        run: |
          timeout 120 cargo audit \
            --ignore RUSTSEC-2025-0119 \
            --ignore RUSTSEC-2024-0436 \
            --deny warnings

  language-check:
    name: Language policy (English-only outside i18n)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install ripgrep
        run: sudo apt-get update && sudo apt-get install -y ripgrep
      - name: Forbid Portuguese in /// doc comments
        run: |
          if rg '///.*[áéíóúâêôãõçÁÉÍÓÚÂÊÔÃÕÇ]' src/ tests/; then
            echo "::error::Portuguese characters found in /// doc comments. Policy: English-only outside i18n.rs."
            exit 1
          fi
      - name: Forbid Portuguese in //! crate-level docs
        run: |
          if rg '//!.*[áéíóúâêôãõçÁÉÍÓÚÂÊÔÃÕÇ]' src/ tests/; then
            echo "::error::Portuguese characters found in //! crate-level docs."
            exit 1
          fi
      - name: Forbid Portuguese in tracing logs
        run: |
          if rg 'tracing::(info|warn|error|debug|trace)!.*[áéíóúâêôãõçÁÉÍÓÚÂÊÔÃÕÇ]' src/; then
            echo "::error::Portuguese in tracing logs. Use English keys/messages."
            exit 1
          fi
      - name: Forbid Portuguese in #[error(...)] attributes
        run: |
          if rg '#\[error.*[áéíóúâêôãõçÁÉÍÓÚÂÊÔÃÕÇ]' src/; then
            echo "::error::Portuguese in thiserror attributes. Use English."
            exit 1
          fi
      - name: Forbid Portuguese-without-accent words in tracing/error/doc/assert/expect/panic
        # v1.0.36 (M1): the accent-only audit gates above missed several PT
        # strings such as `tracing::warn!("NER falhou, usando apenas regex")`,
        # `"batch NER falhou (chunk de N janelas)..."`, and `"falha ao persistir
        # url '...'"`. This gate complements the accent gate by scanning the
        # specific surfaces where PT/EN drift hurts most: log lines, error
        # attributes, doc comments, and runtime assertions.
        # Plain string literals are NOT scanned because intentional fixtures
        # (PT test inputs that exercise multilingual extraction) live there.
        run: |
          # v1.0.42 (HIGH 3): extended PT_PATTERN to detect non-accented prepositions,
          # adjectives, and nouns that escaped the previous verb-focused regex.
          # Previously, "Alias de X para contrato documentado" passed the gate.
          PT_PATTERN='\b(falhou|falha|falham|falhar|falhas|janelas?|invalido|invalida|invalidos|invalidas|inicializando|inicializar|inicializa|inicializou|persistir|persistencia|persistido|persistida|configuracao|autenticacao|validacao|verificacao|compactacao|substituicao|usando apenas|nao foi|nao pode|nao tem|nao deve|nao precisa|nao consegue|nao possui|nao encontrou|nao existe|nao sera|ja foi|ja existe|ja tem|ja esta|esperado|requerido|obrigatorio|obrigatoria|obrigatorios|obrigatorias|memoria|memorias|criando|criada|criadas|criados|criou|atualizado|atualizada|atualizados|atualizadas|deletado|deletada|deletados|deletadas|inserido|inserida|inseridos|inseridas|removido|removida|removidos|removidas|busca|propaga|retorna|alias de|contrato documentado|migrado de|liberar|fonte do match|adicionado em v|quando omitido|usuarios?|para list|para related|para history|para graph|para batch|para DbBusy|para parsers|para parsers ISO|paralelo a)\b'
          # 1. Tracing macros
          if rg --no-messages -i "tracing::(info|warn|error|debug|trace)!.*${PT_PATTERN}" src/ -g '!i18n.rs'; then
            echo "::error::PT-no-accent word detected in tracing log (src/). Translate or move to i18n.rs."
            exit 1
          fi
          # 2. thiserror attributes
          if rg --no-messages -i "#\[error.*${PT_PATTERN}" src/ -g '!i18n.rs'; then
            echo "::error::PT-no-accent word detected in #[error(...)] (src/). Translate or move to i18n.rs."
            exit 1
          fi
          # 3. /// and //! doc comments
          if rg --no-messages -i "(///|//!).*${PT_PATTERN}" src/ tests/ -g '!i18n.rs'; then
            echo "::error::PT-no-accent word detected in doc comment. Translate to English."
            exit 1
          fi
          # 4. assert/panic/unreachable/bail/ensure/expect macros and methods
          if rg --no-messages -i "(panic|assert|assert_eq|assert_ne|unreachable|todo|unimplemented|bail|ensure|expect)!?\([^)]*${PT_PATTERN}" src/ tests/ -g '!i18n.rs'; then
            echo "::error::PT-no-accent word detected in panic/assert/expect (src/ or tests/). Translate to English."
            exit 1
          fi

  loom:
    name: Loom Concurrency
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Run loom tests
        env:
          RUSTFLAGS: "--cfg sqlite_graphrag_loom"
          LOOM_MAX_PREEMPTIONS: "1"
          LOOM_MAX_BRANCHES: "100"
          RUST_TEST_THREADS: "1"
        run: timeout 600 cargo test --test loom_lock_slots --release -- --test-threads=1

  miri:
    name: Miri Unsafe Validation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@nightly
        with:
          components: miri
      - uses: Swatinem/rust-cache@v2
      - name: Run miri on pure-Rust unsafe tests
        run: |
          cargo +nightly miri test -- \
            f32_to_bytes \
            controlled_batch_plan

  bench-regression:
    name: Benchmark Regression
    runs-on: ubuntu-latest
    continue-on-error: true
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: Download baseline artifact
        uses: actions/download-artifact@v4
        with:
          name: bench-baseline
          path: target/criterion
        continue-on-error: true
      - name: Run benchmarks
        env:
          RUST_TEST_THREADS: "2"
        run: timeout 900 cargo bench --bench regression_baseline -- --quick
      - name: Upload benchmark results
        uses: actions/upload-artifact@v4
        with:
          name: bench-baseline
          path: target/criterion

  careful:
    name: cargo-careful sanity
    runs-on: ubuntu-latest
    continue-on-error: true
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@nightly
      - run: cargo install cargo-careful
      - run: timeout 600 cargo +nightly careful test -- --test-threads=2

  changelog-version-check:
    name: CHANGELOG version sync
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Verify CHANGELOG matches Cargo.toml version
        run: |
          CARGO_VERSION=$(grep -m 1 '^version = ' Cargo.toml | cut -d '"' -f 2)
          echo "Cargo.toml version: $CARGO_VERSION"
          if ! grep -q "^## \[$CARGO_VERSION\]" CHANGELOG.md; then
            echo "ERROR: CHANGELOG.md missing entry [## ${CARGO_VERSION}]"
            echo "Did you forget to bump CHANGELOG.md before tagging?"
            exit 1
          fi
          if ! grep -q "^## \[$CARGO_VERSION\]" CHANGELOG.pt-BR.md; then
            echo "ERROR: CHANGELOG.pt-BR.md missing entry [## ${CARGO_VERSION}]"
            exit 1
          fi
          echo "OK: both CHANGELOGs have [$CARGO_VERSION] entry"

  coverage:
    name: Coverage threshold
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          components: llvm-tools-preview
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate coverage
        run: cargo llvm-cov --text --output-path coverage.txt
      - name: Enforce 75% threshold
        run: |
          TOTAL=$(tail -1 coverage.txt | awk '{print $NF}' | tr -d '%')
          echo "Total line coverage: ${TOTAL}%"
          if [ "$(echo "$TOTAL < 75" | bc -l)" -eq 1 ]; then
            echo "::error::Coverage ${TOTAL}% is below threshold 75%"
            exit 1
          fi