embellama 0.3.0

High-performance Rust library for generating text embeddings using llama-cpp
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
# Embellama Build & Test Automation
# Run `just` to see available commands

# Default recipe - show help
default:
    @just --list

# Model cache directory
models_dir := "models"
test_model_dir := models_dir + "/test"
bench_model_dir := models_dir + "/bench"

# Platform-specific feature selection
# Use appropriate backend based on OS to avoid conflicts
platform_features := if os() == "macos" {
    "server,metal"
} else if os() == "linux" {
    "server,openmp"
} else {
    "server,openmp"
}

# Backend-only features (no server)
backend_features := if os() == "macos" {
    "metal"
} else if os() == "linux" {
    "openmp"
} else {
    "openmp"
}

# Model URLs
test_model_url := "https://huggingface.co/Jarbas/all-MiniLM-L6-v2-Q4_K_M-GGUF/resolve/main/all-minilm-l6-v2-q4_k_m.gguf"
bench_model_url := "https://huggingface.co/gaianet/jina-embeddings-v2-base-code-GGUF/resolve/main/jina-embeddings-v2-base-code-Q4_K_M.gguf"

# Model filenames
test_model_file := test_model_dir + "/all-minilm-l6-v2-q4_k_m.gguf"
bench_model_file := bench_model_dir + "/jina-embeddings-v2-base-code-Q4_K_M.gguf"

# Download small test model (MiniLM ~15MB) for integration tests
download-test-model:
    @echo "Setting up test model..."
    @mkdir -p {{test_model_dir}}
    @if [ ! -f {{test_model_file}} ]; then \
        echo "Downloading MiniLM-L6-v2 model (~15MB)..."; \
        curl -L --progress-bar -o {{test_model_file}} {{test_model_url}}; \
        echo "✓ Test model (MiniLM) downloaded successfully"; \
    else \
        echo "✓ Test model (MiniLM) already cached"; \
    fi

# Download benchmark model (Jina ~110MB) for performance testing
download-bench-model:
    @echo "Setting up benchmark model..."
    @mkdir -p {{bench_model_dir}}
    @if [ ! -f {{bench_model_file}} ]; then \
        echo "Downloading Jina Embeddings v2 Base Code model (~110MB)..."; \
        curl -L --progress-bar -o {{bench_model_file}} {{bench_model_url}}; \
        echo "✓ Benchmark model (Jina) downloaded successfully"; \
    else \
        echo "✓ Benchmark model (Jina) already cached"; \
    fi

# Download all models
download-all: download-test-model download-bench-model
    @echo "✓ All models ready"

# Run unit tests (no models required)
test-unit:
    @echo "Running unit tests with backend features ({{backend_features}})..."
    cargo test --lib --features "{{backend_features}}"

# Run integration tests with real model
test-integration: download-test-model
    @echo "Running integration tests with backend features ({{backend_features}})..."
    RUST_BACKTRACE=1 EMBELLAMA_TEST_MODEL={{test_model_file}} \
    cargo test --test integration_tests --features "{{backend_features}}" -- --nocapture

# Run concurrency tests
test-concurrency: download-test-model
    @echo "Running concurrency tests with backend features ({{backend_features}})..."
    EMBELLAMA_TEST_MODEL={{test_model_file}} \
    cargo test --test concurrency_tests --features "{{backend_features}}" -- --nocapture

# Run property-based tests
test-property: download-test-model
    @echo "Running property-based tests with backend features ({{backend_features}})..."
    EMBELLAMA_TEST_MODEL={{test_model_file}} \
    cargo test --test property_tests --features "{{backend_features}}" -- --nocapture

# Run property-based tests with fewer cases (faster)
test-property-quick: download-test-model
    @echo "Running property-based tests (quick mode) with backend features ({{backend_features}})..."
    EMBELLAMA_TEST_MODEL={{test_model_file}} \
    PROPTEST_CASES=10 \
    cargo test --test property_tests --features "{{backend_features}}" -- --nocapture

# Run all tests
test: test-unit test-integration test-concurrency test-property
    @echo "✓ All tests completed"

# Run benchmarks with real model
bench: download-bench-model
    @echo "Running benchmarks with real model..."
    EMBELLAMA_BENCH_MODEL={{bench_model_file}} \
    cargo bench

# Quick benchmark (subset of benchmarks for faster testing)
bench-quick: download-bench-model
    @echo "Running quick benchmarks (subset only)..."
    EMBELLAMA_BENCH_MODEL={{bench_model_file}} \
    cargo bench -- "single_embedding/text_length/11$|batch_embeddings/batch_size/1$|thread_scaling/threads/1$"

# Run example with model
example NAME="simple": download-test-model
    @echo "Running example: {{NAME}}..."
    EMBELLAMA_MODEL={{test_model_file}} \
    cargo run --example {{NAME}}

# Check for compilation warnings
check:
    @echo "Checking for warnings with platform features ({{platform_features}})..."
    cargo check --all-targets --features "{{platform_features}}"

# Check with all features (may fail due to backend conflicts)
check-all-features:
    @echo "Checking with all features (may fail on some platforms)..."
    cargo check --all-targets --all-features

# Fix common issues
fix:
    @echo "Running cargo fix..."
    cargo fix --all-targets --allow-dirty --allow-staged
    cargo clippy --fix --all-targets --allow-dirty --allow-staged

# Format code
fmt:
    @echo "Formatting code..."
    cargo fmt

# Check formatting for code
fmtcheck:
    @echo "Formatting code..."
    cargo fmt -- --check

# Check different backend features compilation
check-backends:
    @echo "Checking backend features compilation..."
    @echo "✓ Checking OpenMP backend..."
    @cargo check --no-default-features --features openmp
    @echo "✓ Checking Native backend..."
    @cargo check --no-default-features --features native
    @if [ "$(uname)" = "Darwin" ]; then \
        echo "✓ Checking Metal backend (macOS)..."; \
        cargo check --no-default-features --features metal; \
    fi
    @if [ "$(uname)" = "Linux" ]; then \
        echo "✓ Checking CUDA backend (compile only)..."; \
        cargo check --no-default-features --features cuda; \
        echo "✓ Checking Vulkan backend (compile only)..."; \
        cargo check --no-default-features --features vulkan; \
    fi
    @echo "✓ All backend features compile successfully"

# Test with platform-specific backend
test-backend: download-test-model
    @echo "Testing with platform-specific backend..."
    @if [ "$(uname)" = "Darwin" ]; then \
        echo "Testing with Metal backend on macOS..."; \
        EMBELLAMA_TEST_MODEL={{test_model_file}} \
        cargo test --features metal -- --nocapture; \
    elif [ "$(uname)" = "Linux" ]; then \
        echo "Testing with OpenMP backend on Linux..."; \
        EMBELLAMA_TEST_MODEL={{test_model_file}} \
        cargo test --features openmp -- --nocapture; \
    else \
        echo "Testing with OpenMP backend..."; \
        EMBELLAMA_TEST_MODEL={{test_model_file}} \
        cargo test --features openmp -- --nocapture; \
    fi

# Run clippy with platform-appropriate features
clippy:
    @echo "Running clippy with platform features ({{platform_features}})..."
    @echo "Library and binaries (strict)..."
    cargo clippy --lib --bins --features "{{platform_features}}" -- -D warnings -D clippy::pedantic
    @echo "Tests, examples, and benches (lenient)..."
    cargo clippy --tests --examples --benches --features "{{platform_features}}" -- -W warnings -W clippy::pedantic

# Run clippy with all features (may fail due to backend conflicts)
clippy-all-features:
    @echo "Running clippy with all features (may fail on some platforms)..."
    cargo clippy --lib --bins --all-features -- -D warnings -D clippy::pedantic
    cargo clippy --tests --examples --benches --all-features -- -W warnings -W clippy::pedantic

# Compile tests without running them (for pre-commit)
test-compile:
    @echo "Compiling tests with platform features ({{platform_features}})..."
    cargo test --no-run --lib --features "{{platform_features}}"

# Build documentation with platform features
doc:
    @echo "Building documentation with platform features ({{platform_features}})..."
    cargo doc --no-deps --features "{{platform_features}}"

# Build documentation with all features (for docs.rs)
doc-all-features:
    @echo "Building documentation with all features..."
    cargo doc --no-deps --all-features

# Clean build artifacts
clean:
    @echo "Cleaning build artifacts..."
    cargo clean

# Clean downloaded models
clean-models:
    @echo "Removing cached models..."
    rm -rf {{models_dir}}

# Clean everything
clean-all: clean clean-models
    @echo "✓ All cleaned"

# Show model cache status
models-status:
    @echo "Model cache status:"
    @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    @if [ -f {{test_model_file}} ]; then \
        size=`du -h {{test_model_file}} | cut -f1`; \
        echo "✓ Test model: {{test_model_file}} ($$size)"; \
    else \
        echo "✗ Test model: Not downloaded"; \
    fi
    @if [ -f {{bench_model_file}} ]; then \
        size=`du -h {{bench_model_file}} | cut -f1`; \
        echo "✓ Bench model: {{bench_model_file}} ($$size)"; \
    else \
        echo "✗ Bench model: Not downloaded"; \
    fi
    @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    @if [ -d {{models_dir}} ]; then \
        total=`du -sh {{models_dir}} | cut -f1`; \
        echo "Total cache size: $$total"; \
    fi

# Development workflow - fix issues and test
dev: fix fmt clippy test-unit
    @echo "✓ Ready for integration testing"

# Install pre-commit hooks
install-hooks:
    @echo "Installing pre-commit hooks..."
    @if command -v uvx >/dev/null 2>&1; then \
        uvx pre-commit install; \
        echo "✓ Pre-commit hooks installed using uvx"; \
    elif command -v pipx >/dev/null 2>&1; then \
        pipx run pre-commit install; \
        echo "✓ Pre-commit hooks installed using pipx"; \
    elif command -v pre-commit >/dev/null 2>&1; then \
        pre-commit install; \
        echo "✓ Pre-commit hooks installed"; \
    else \
        echo "pre-commit not found. Install uv with: curl -LsSf https://astral.sh/uv/install.sh | sh"; \
        echo "Or install pipx with: pip install --user pipx"; \
        exit 1; \
    fi

# Run pre-commit hooks on all files
pre-commit-all:
    @echo "Running pre-commit on all files..."
    @if command -v uvx >/dev/null 2>&1; then \
        uvx pre-commit run --all-files; \
    elif command -v pipx >/dev/null 2>&1; then \
        pipx run pre-commit run --all-files; \
    elif command -v pre-commit >/dev/null 2>&1; then \
        pre-commit run --all-files; \
    else \
        echo "pre-commit not found. Install uv with: curl -LsSf https://astral.sh/uv/install.sh | sh"; \
        echo "Or install pipx with: pip install --user pipx"; \
        exit 1; \
    fi

# Run pre-commit hooks on staged files
pre-commit:
    @echo "Running pre-commit on staged files..."
    @if command -v uvx >/dev/null 2>&1; then \
        uvx pre-commit run; \
    elif command -v pipx >/dev/null 2>&1; then \
        pipx run pre-commit run; \
    elif command -v pre-commit >/dev/null 2>&1; then \
        pre-commit run; \
    else \
        echo "pre-commit not found. Install uv with: curl -LsSf https://astral.sh/uv/install.sh | sh"; \
        echo "Or install pipx with: pip install --user pipx"; \
        exit 1; \
    fi

# Update pre-commit hooks to latest versions
update-hooks:
    @echo "Updating pre-commit hooks..."
    @if command -v uvx >/dev/null 2>&1; then \
        uvx pre-commit autoupdate; \
        echo "✓ Pre-commit hooks updated using uvx"; \
    elif command -v pipx >/dev/null 2>&1; then \
        pipx run pre-commit autoupdate; \
        echo "✓ Pre-commit hooks updated using pipx"; \
    elif command -v pre-commit >/dev/null 2>&1; then \
        pre-commit autoupdate; \
        echo "✓ Pre-commit hooks updated"; \
    else \
        echo "pre-commit not found. Install uv with: curl -LsSf https://astral.sh/uv/install.sh | sh"; \
        echo "Or install pipx with: pip install --user pipx"; \
        exit 1; \
    fi

# Full CI simulation
ci: clean check clippy test bench
    @echo "✓ CI checks completed successfully"

# Build the server binary
build-server:
    @echo "Building server binary..."
    cargo build --features server --bin embellama-server
    @echo "✓ Server binary built"

# Run the server with test model
run-server: download-test-model build-server
    @echo "Starting server with test model..."
    cargo run --features server --bin embellama-server -- \
        --model-path {{test_model_file}} \
        --model-name test-model \
        --workers 2 \
        --log-level info

# Run server in background for testing (returns immediately)
start-server: download-test-model build-server
    @echo "Starting server in background..."
    @cargo run --features server --bin embellama-server -- \
        --model-path {{test_model_file}} \
        --model-name test-model \
        --workers 2 \
        --log-level info > server.log 2>&1 & \
        echo $$! > server.pid
    @sleep 2
    @echo "✓ Server started (PID: `cat server.pid`)"

# Stop the background server
stop-server:
    @if [ -f server.pid ]; then \
        echo "Stopping server (PID: `cat server.pid`)..."; \
        kill `cat server.pid` 2>/dev/null || true; \
        rm -f server.pid; \
        echo "✓ Server stopped"; \
    else \
        echo "No server running"; \
    fi

# Test server API endpoints
test-server-api:
    @echo "Testing server API endpoints..."
    @echo "================================"
    @echo
    @echo "1. Testing /health endpoint:"
    @curl -s "http://localhost:8080/health" | jq . || echo "Failed - is server running?"
    @echo
    @echo "2. Testing /v1/models endpoint:"
    @curl -s "http://localhost:8080/v1/models" | jq . || echo "Failed - is server running?"
    @echo
    @echo "3. Testing /v1/embeddings with single text:"
    @curl -s -X POST "http://localhost:8080/v1/embeddings" \
        -H "Content-Type: application/json" \
        -d '{"model": "test-model", "input": "Hello, world!"}' \
        | jq '.object, .model, .usage' || echo "Failed - is server running?"
    @echo
    @echo "4. Testing /v1/embeddings with batch:"
    @curl -s -X POST "http://localhost:8080/v1/embeddings" \
        -H "Content-Type: application/json" \
        -d '{"model": "test-model", "input": ["Hello", "World"]}' \
        | jq '.object, .model, (.data | length)' || echo "Failed - is server running?"
    @echo
    @echo "================================"
    @echo "✓ API tests complete"

# Run server integration tests
test-server-integration: download-test-model
    @echo "Running server integration tests..."
    cargo test --features server --test server_api_tests -- --nocapture

# Run OpenAI compatibility tests
test-server-compat: download-test-model
    @echo "Running OpenAI compatibility tests..."
    cargo test --features server --test openai_compat_tests -- --nocapture

# Run server load tests (excluding slow tests)
test-server-load: download-test-model
    @echo "Running server load tests..."
    cargo test --features server --test server_load_tests -- --nocapture

# Run ALL server load tests (including slow/ignored tests)
test-server-load-all: download-test-model
    @echo "Running all server load tests (including slow tests)..."
    cargo test --features server --test server_load_tests -- --nocapture --ignored

# Run all server tests
test-server-all: test-server-integration test-server-compat test-server-load
    @echo "✓ All server tests completed"

# Test with Python OpenAI SDK
test-server-python: start-server
    @echo "Testing with Python OpenAI SDK..."
    @python3 scripts/test-openai-python.py || echo "Python SDK test failed - is openai package installed?"
    @just stop-server

# Test with JavaScript OpenAI SDK
test-server-js: start-server
    @echo "Testing with JavaScript OpenAI SDK..."
    @node scripts/test-openai-js.mjs || echo "JS SDK test failed - is openai package installed?"
    @just stop-server

# Full server test workflow (old compatibility)
test-server: start-server test-server-api stop-server
    @echo "✓ Server tests completed"

# Check server compilation
check-server:
    @echo "Checking server compilation..."
    cargo check --features server --bin embellama-server
    cargo check --features server --tests

# Clean server artifacts
clean-server: stop-server
    @echo "Cleaning server artifacts..."
    @rm -f server.log server.pid
    @echo "✓ Server artifacts cleaned"