Skip to main content

daimon_plugin_pgvector/
migrations.rs

1//! SQL migration strings for manual schema setup.
2//!
3//! When [`PgVectorStoreBuilder::auto_migrate(false)`](crate::PgVectorStoreBuilder::auto_migrate)
4//! is set, run these SQL statements against your database before using the store.
5
6/// Creates the `vector` extension if it does not exist.
7pub const CREATE_EXTENSION: &str = "CREATE EXTENSION IF NOT EXISTS vector";
8
9/// Returns the `CREATE TABLE` statement for a given table name and dimension count.
10///
11/// The table stores document IDs, embedding vectors, text content, and
12/// arbitrary JSONB metadata.
13///
14/// # Example
15///
16/// ```
17/// let sql = daimon_plugin_pgvector::migrations::create_table_sql("documents", 1536);
18/// assert!(sql.contains("vector(1536)"));
19/// ```
20pub fn create_table_sql(table: &str, dimensions: usize) -> String {
21    format!(
22        "CREATE TABLE IF NOT EXISTS {table} (\
23         id TEXT PRIMARY KEY, \
24         embedding vector({dimensions}), \
25         content TEXT NOT NULL, \
26         metadata JSONB NOT NULL DEFAULT '{{}}'::jsonb\
27         )"
28    )
29}
30
31/// Returns the `CREATE INDEX` statement for an HNSW index.
32///
33/// The operator class is chosen based on the distance metric:
34/// - Cosine → `vector_cosine_ops`
35/// - L2 → `vector_l2_ops`
36/// - InnerProduct → `vector_ip_ops`
37///
38/// # Parameters
39///
40/// - `table`: table name
41/// - `ops_class`: one of `vector_cosine_ops`, `vector_l2_ops`, `vector_ip_ops`
42/// - `m`: HNSW `m` parameter (max connections per layer). `None` uses the PG default (16).
43/// - `ef_construction`: HNSW build-time search width. `None` uses the PG default (64).
44pub fn create_hnsw_index_sql(
45    table: &str,
46    ops_class: &str,
47    m: Option<usize>,
48    ef_construction: Option<usize>,
49) -> String {
50    let mut with_parts = Vec::new();
51    if let Some(m) = m {
52        with_parts.push(format!("m = {m}"));
53    }
54    if let Some(ef) = ef_construction {
55        with_parts.push(format!("ef_construction = {ef}"));
56    }
57
58    let with_clause = if with_parts.is_empty() {
59        String::new()
60    } else {
61        format!(" WITH ({})", with_parts.join(", "))
62    };
63
64    format!(
65        "CREATE INDEX IF NOT EXISTS {table}_embedding_hnsw_idx \
66         ON {table} USING hnsw (embedding {ops_class}){with_clause}"
67    )
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn test_create_table_sql() {
76        let sql = create_table_sql("docs", 1536);
77        assert!(sql.contains("docs"));
78        assert!(sql.contains("vector(1536)"));
79        assert!(sql.contains("id TEXT PRIMARY KEY"));
80        assert!(sql.contains("content TEXT NOT NULL"));
81        assert!(sql.contains("metadata JSONB"));
82    }
83
84    #[test]
85    fn test_create_hnsw_index_defaults() {
86        let sql = create_hnsw_index_sql("docs", "vector_cosine_ops", None, None);
87        assert!(sql.contains("USING hnsw"));
88        assert!(sql.contains("vector_cosine_ops"));
89        assert!(!sql.contains("WITH"));
90    }
91
92    #[test]
93    fn test_create_hnsw_index_custom_params() {
94        let sql = create_hnsw_index_sql("docs", "vector_l2_ops", Some(32), Some(128));
95        assert!(sql.contains("WITH (m = 32, ef_construction = 128)"));
96    }
97}