1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
name: ci
on:
push:
branches:
pull_request:
branches:
# Nightly run of the hardening harness — gated by the `hardening` job's `if:` below.
schedule:
- cron: "0 4 * * *"
workflow_dispatch:
# Cancel superseded runs on the same ref — fresher pushes win.
concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
jobs:
test:
name: test / ${{ matrix.os }} / ${{ matrix.features }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
# Default-features only on per-PR CI. The opt-in intelligence features
# (`memory`, `documents`, `crawl`, `full`) pull in kreuzberg's heavy
# system-dep stack (libheif source build, Tesseract, ONNX runtime,
# downloadable embedding models) — too expensive for fast PR feedback.
# The nightly `hardening` job runs the full feature surface against
# real OSS repos and is the authoritative signal for those builds.
features:
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
# protoc is required by lance-encoding's build script (transitive via the
# `memory`, `documents`, `crawl`, and `full` features). Install on every
# matrix point — the default-features build doesn't need it but the cost
# is negligible and keeps the steps uniform across matrix entries.
- name: Install protoc (Linux)
if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get install -y protobuf-compiler
- name: Install protoc (macOS)
if: runner.os == 'macOS'
run: brew install protobuf
- uses: Swatinem/rust-cache@v2
with:
# Differentiate caches per-OS and features so the cache key is stable across runs.
key: ${{ matrix.os }}-${{ matrix.features }}
- name: cargo fmt
run: cargo fmt --all --check
- name: cargo clippy
run: cargo clippy --workspace --all-targets --tests --features "${{ matrix.features }}" -- -D warnings
- name: cargo test
run: cargo test --workspace --features "${{ matrix.features }}" --quiet
- name: cargo build --release
run: cargo build --release --quiet --bin basemind --features "${{ matrix.features }}"
deny:
name: cargo-deny
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: EmbarkStudios/cargo-deny-action@v2
with:
# Uses deny.toml at the repo root.
command: check
# Real-OSS hardening harness. Clones large upstream repos and exercises every MCP tool
# against them — too heavy for per-PR but ideal as a nightly canary. Runs on manual
# dispatch and once a day.
hardening:
name: hardening harness (nightly)
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
# protoc is required by lance-encoding's build script (transitive via the
# `full` feature surface that harden.sh builds). Mirror the `test` job.
- name: Install protoc
run: sudo apt-get update && sudo apt-get install -y protobuf-compiler
# System libraries for the `full` feature surface that harden.sh builds.
# kreuzberg-tesseract needs the tesseract CLI + English data; libheif's
# codec backends (libde265 / libaom / libx265 / libdav1d) must be present
# at link + runtime even when the libheif build is restored from cache, so
# this install is UNCONDITIONAL — gating it on the cache hit would leave a
# cached libheif unable to load its codecs.
- name: Install system dependencies
run: |
sudo apt-get install -y \
tesseract-ocr tesseract-ocr-eng \
libssl-dev pkg-config build-essential cmake \
libmagic-dev libde265-dev libaom-dev \
libx265-dev libdav1d-dev libnuma-dev
# Ubuntu Noble ships libheif 1.17.6 but libheif-sys requires >=1.21, so
# build 1.23.0 from source. The cache below skips the rebuild on later runs.
- name: Cache libheif build (Linux)
id: cache-libheif
uses: actions/cache@v4
with:
path: |
/usr/local/lib/libheif*
/usr/local/lib/pkgconfig/libheif.pc
/usr/local/include/libheif/
/usr/local/share/libheif/
key: libheif-${{ runner.arch }}-v1.23.0
restore-keys: |
libheif-${{ runner.arch }}-v1.23.0
- name: Build and install libheif from source
if: steps.cache-libheif.outputs.cache-hit != 'true'
run: |
set -euo pipefail
# Download and build libheif 1.23.0 from source.
LIBHEIF_VERSION="1.23.0"
LIBHEIF_PREFIX="/usr/local"
build_dir="$(mktemp -d)"
trap "rm -rf '$build_dir'" EXIT
cd "$build_dir"
curl -fsSL -o libheif.tar.gz \
"https://github.com/strukturag/libheif/releases/download/v${LIBHEIF_VERSION}/libheif-${LIBHEIF_VERSION}.tar.gz"
tar xzf libheif.tar.gz
cd "libheif-${LIBHEIF_VERSION}"
mkdir build && cd build
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$LIBHEIF_PREFIX" \
-DCMAKE_INSTALL_LIBDIR=lib \
-DWITH_EXAMPLES=OFF \
-DWITH_GDK_PIXBUF=OFF \
-DBUILD_TESTING=OFF
make -j"$(nproc)"
sudo make install
sudo ldconfig
- name: Export libheif environment variables
run: |
echo "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH:-}" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH:-}" >> $GITHUB_ENV
- uses: Swatinem/rust-cache@v2
- name: run harden.sh
run: ./scripts/harden.sh
- name: upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: harden-results
path: /tmp/basemind-harden/results.ndjson