1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Multi-stage Dockerfile for ruvector-postgres extension
# Builds the extension and creates a PostgreSQL image with it installed
# Build stage
# Using nightly Rust to support edition2024 crates in the registry
FROM rustlang/rust:nightly-bookworm-slim AS builder
# Install build dependencies including PostgreSQL 17 from PGDG
RUN apt-get update && apt-get install -y \
build-essential \
libssl-dev \
pkg-config \
clang \
libclang-dev \
flex \
bison \
curl \
ca-certificates \
gnupg \
&& rm -rf /var/lib/apt/lists/*
# Add PostgreSQL official apt repository
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /usr/share/keyrings/postgresql-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/postgresql-keyring.gpg] http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list
# Install PostgreSQL 17 development packages
RUN apt-get update && apt-get install -y \
postgresql-server-dev-17 \
postgresql-17 \
&& rm -rf /var/lib/apt/lists/*
# Install cargo-pgrx
RUN cargo install cargo-pgrx --version 0.12.9 --locked
# Set up workspace
WORKDIR /build
# Create a minimal standalone Cargo.toml for ruvector-postgres
# (not the workspace version)
COPY crates/ruvector-postgres/ ./
# Copy the ruvector-mincut-gated-transformer dependency (required for gated-transformer feature)
COPY crates/ruvector-mincut-gated-transformer /build/../ruvector-mincut-gated-transformer/
# Use the workspace Cargo.lock to pin dependencies and avoid registry parsing issues
COPY Cargo.lock ./
# Initialize pgrx with system PostgreSQL
RUN cargo pgrx init --pg17=/usr/lib/postgresql/17/bin/pg_config
# Pre-fetch dependencies to lock versions and warm the registry cache
RUN cargo fetch
# Configure cargo to avoid sparse registry issues with edition2024 crates
# This uses the git protocol instead of sparse which skips problematic index entries
ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=git
# Build the extension with all features including embeddings and gated-transformer
RUN cargo pgrx package --features "pg17 index-all quant-all embeddings gated-transformer"
# Build the model downloader binary
RUN cargo build --release --bin download-models --features "embeddings"
# Set cache path and download embedding models
# FASTEMBED_CACHE_DIR is the correct env var for fastembed-rs
ENV FASTEMBED_CACHE_DIR=/opt/ruvector/models
RUN mkdir -p /opt/ruvector/models && \
./target/release/download-models && \
echo "Model cache size: $(du -sh /opt/ruvector/models)" && \
ls -la /opt/ruvector/models/
# Copy the pre-built SQL schema file (with sparse functions removed)
# cargo pgrx schema doesn't work reliably in Docker, so we use the hand-crafted file
RUN cp /build/sql/ruvector--0.1.0.sql /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \
echo "SQL schema copied with $(grep -c 'CREATE FUNCTION\|CREATE OR REPLACE FUNCTION' /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql) functions"
# Verify the extension files are complete
RUN ls -la /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ && \
echo "=== First 20 lines of SQL ===" && \
head -20 /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql && \
echo "=== CREATE FUNCTION count ===" && \
grep -c "CREATE FUNCTION\|CREATE OR REPLACE FUNCTION" /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/ruvector--0.1.0.sql
# Runtime stage
FROM postgres:17-bookworm
# Labels
LABEL maintainer="ruvector team"
LABEL description="PostgreSQL with ruvector extension - high-performance vector similarity search with local embeddings"
LABEL version="2.0.1"
# Set embedding model cache path - models are pre-downloaded during build
# FASTEMBED_CACHE_DIR is the correct env var for fastembed-rs
ENV FASTEMBED_CACHE_DIR=/opt/ruvector/models
# Copy pre-downloaded embedding models from builder
COPY --from=builder /opt/ruvector/models /opt/ruvector/models
# Copy the built extension from builder
# Note: pgrx generates correct SQL from #[pg_extern] macros in target directory
# The extension/* directory includes:
# - ruvector.control (version info)
# - ruvector--*.sql (pgrx-generated SQL with correct function symbols)
# - Any additional SQL migration files
COPY --from=builder /build/target/release/ruvector-pg17/usr/share/postgresql/17/extension/* \
/usr/share/postgresql/17/extension/
COPY --from=builder /build/target/release/ruvector-pg17/usr/lib/postgresql/17/lib/* \
/usr/lib/postgresql/17/lib/
# Add initialization scripts
RUN mkdir -p /docker-entrypoint-initdb.d
# Copy the full initialization script with extension creation, role setup, and tests
# The init.sql is copied from the builder stage where it was included in the source copy
COPY --from=builder /build/docker/init.sql /docker-entrypoint-initdb.d/01-init.sql
# Health check
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
CMD pg_isready -U postgres || exit 1
# Expose PostgreSQL port
EXPOSE 5432
# Use the default PostgreSQL entrypoint
CMD ["postgres"]