Skip to main content

uni_db/api/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::api::Uni;
5use std::path::Path;
6use uni_common::core::schema::{
7    DataType, DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition,
8    ScalarIndexConfig, ScalarIndexType, TokenizerConfig, VectorIndexConfig, VectorIndexType,
9};
10use uni_common::{Result, UniError};
11
12/// Builder for defining and modifying the graph schema.
13///
14/// Use this builder to define labels, edge types, properties, and indexes.
15/// Changes are batched and applied atomically when `.apply()` is called.
16///
17/// # Example
18///
19/// ```no_run
20/// # async fn example(db: &uni_db::Uni) -> uni_db::Result<()> {
21/// db.schema()
22///     .label("Person")
23///         .property("name", uni_db::DataType::String)
24///         .property("age", uni_db::DataType::Int64)
25///         .vector("embedding", 1536) // Adds property AND vector index
26///         .index("name", uni_db::IndexType::Scalar(uni_db::ScalarType::BTree))
27///     .edge_type("KNOWS", &["Person"], &["Person"])
28///         .property("since", uni_db::DataType::Date)
29///     .apply()
30///     .await?;
31/// # Ok(())
32/// # }
33/// ```
34#[must_use = "schema builders do nothing until .apply() is called"]
35pub struct SchemaBuilder<'a> {
36    db: &'a Uni,
37    pending: Vec<SchemaChange>,
38}
39
40pub enum SchemaChange {
41    AddLabel {
42        name: String,
43    },
44    AddProperty {
45        label_or_type: String,
46        name: String,
47        data_type: DataType,
48        nullable: bool,
49    },
50    AddIndex(IndexDefinition),
51    AddEdgeType {
52        name: String,
53        from_labels: Vec<String>,
54        to_labels: Vec<String>,
55    },
56}
57
58impl<'a> SchemaBuilder<'a> {
59    pub fn new(db: &'a Uni) -> Self {
60        Self {
61            db,
62            pending: Vec::new(),
63        }
64    }
65
66    /// Create a label (node type) in the schema.
67    ///
68    /// Labels can be **schemaless** (no properties defined) or **typed** (with properties).
69    ///
70    /// # Schemaless Labels
71    ///
72    /// Labels without property definitions support flexible, dynamic properties:
73    /// - Properties not in schema are stored in `overflow_json` (JSONB binary)
74    /// - Queries on overflow properties are automatically rewritten to JSONB functions
75    /// - No schema migration needed to add new properties
76    ///
77    /// # Example: Schemaless Label
78    ///
79    /// ```ignore
80    /// // Create label with no properties
81    /// db.schema().label("Document").apply().await?;
82    ///
83    /// // Create with arbitrary properties
84    /// db.execute("CREATE (:Document {title: 'Article', author: 'Alice', year: 2024})").await?;
85    ///
86    /// // Query works transparently (automatic query rewriting)
87    /// db.query("MATCH (d:Document) WHERE d.author = 'Alice' RETURN d.title, d.year").await?;
88    /// ```
89    ///
90    /// # Example: Typed Label with Overflow
91    ///
92    /// ```ignore
93    /// // Define core properties in schema
94    /// db.schema()
95    ///     .label("Person")
96    ///     .property("name", DataType::String)
97    ///     .property("age", DataType::Int)
98    ///     .apply().await?;
99    ///
100    /// // Can still add overflow properties dynamically
101    /// db.execute("CREATE (:Person {name: 'Bob', age: 25, city: 'NYC'})").await?;
102    /// //                                                   ^^^^^^^^^^^
103    /// //                                                   overflow property
104    ///
105    /// // Query mixing schema and overflow properties
106    /// db.query("MATCH (p:Person) WHERE p.name = 'Bob' AND p.city = 'NYC' RETURN p.age").await?;
107    /// ```
108    ///
109    /// **Performance Note**: Schema properties use typed columns (faster filtering/sorting),
110    /// while overflow properties use JSONB (flexible but slower). Use schema properties
111    /// for core, frequently-queried fields.
112    pub fn label(self, name: &str) -> LabelBuilder<'a> {
113        LabelBuilder::new(self, name.to_string())
114    }
115
116    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
117        EdgeTypeBuilder::new(
118            self,
119            name.to_string(),
120            from.iter().map(|s| s.to_string()).collect(),
121            to.iter().map(|s| s.to_string()).collect(),
122        )
123    }
124
125    pub async fn apply(self) -> Result<()> {
126        let manager = &self.db.schema;
127        let mut indexes_to_build = Vec::new();
128
129        for change in self.pending {
130            match change {
131                SchemaChange::AddLabel { name } => {
132                    manager.add_label(&name).map_err(|e| UniError::Schema {
133                        message: e.to_string(),
134                    })?;
135                }
136                SchemaChange::AddProperty {
137                    label_or_type,
138                    name,
139                    data_type,
140                    nullable,
141                } => {
142                    manager
143                        .add_property(&label_or_type, &name, data_type, nullable)
144                        .map_err(|e| UniError::Schema {
145                            message: e.to_string(),
146                        })?;
147                }
148                SchemaChange::AddIndex(idx) => {
149                    manager
150                        .add_index(idx.clone())
151                        .map_err(|e| UniError::Schema {
152                            message: e.to_string(),
153                        })?;
154                    // Track index to trigger build after saving schema
155                    indexes_to_build.push(idx.label().to_string());
156                }
157                SchemaChange::AddEdgeType {
158                    name,
159                    from_labels,
160                    to_labels,
161                } => {
162                    manager
163                        .add_edge_type(&name, from_labels, to_labels)
164                        .map_err(|e| UniError::Schema {
165                            message: e.to_string(),
166                        })?;
167                }
168            }
169        }
170
171        manager.save().await.map_err(UniError::Internal)?;
172
173        // Trigger index builds for affected labels
174        // We use a set to avoid rebuilding same label multiple times if multiple indexes added
175        indexes_to_build.sort();
176        indexes_to_build.dedup();
177        for label in indexes_to_build {
178            // Trigger async rebuild
179            // Note: If synchronous behavior is desired, pass false.
180            // But usually schema changes should be fast, so async build is better?
181            // The prompt says "Indexes Not Built During Schema Changes", implying they should be.
182            // Let's do it synchronously to ensure they are ready, matching user expectation.
183            self.db.rebuild_indexes(&label, false).await?;
184        }
185
186        Ok(())
187    }
188}
189
190#[must_use = "builders do nothing until .done() or .apply() is called"]
191pub struct LabelBuilder<'a> {
192    builder: SchemaBuilder<'a>,
193    name: String,
194}
195
196impl<'a> LabelBuilder<'a> {
197    fn new(builder: SchemaBuilder<'a>, name: String) -> Self {
198        Self { builder, name }
199    }
200
201    pub fn property(mut self, name: &str, data_type: DataType) -> Self {
202        self.builder.pending.push(SchemaChange::AddProperty {
203            label_or_type: self.name.clone(),
204            name: name.to_string(),
205            data_type,
206            nullable: false,
207        });
208        self
209    }
210
211    pub fn property_nullable(mut self, name: &str, data_type: DataType) -> Self {
212        self.builder.pending.push(SchemaChange::AddProperty {
213            label_or_type: self.name.clone(),
214            name: name.to_string(),
215            data_type,
216            nullable: true,
217        });
218        self
219    }
220
221    pub fn vector(self, name: &str, dimensions: usize) -> Self {
222        self.property(name, DataType::Vector { dimensions })
223    }
224
225    pub fn index(mut self, property: &str, index_type: IndexType) -> Self {
226        let idx = match index_type {
227            IndexType::Vector(cfg) => IndexDefinition::Vector(VectorIndexConfig {
228                name: format!("idx_{}_{}", self.name, property),
229                label: self.name.clone(),
230                property: property.to_string(),
231                index_type: cfg.algorithm.into_internal(),
232                metric: cfg.metric.into_internal(),
233                embedding_config: cfg.embedding.map(|e| e.into_internal()),
234                metadata: Default::default(),
235            }),
236            IndexType::FullText => IndexDefinition::FullText(FullTextIndexConfig {
237                name: format!("fts_{}_{}", self.name, property),
238                label: self.name.clone(),
239                properties: vec![property.to_string()],
240                tokenizer: TokenizerConfig::Standard,
241                with_positions: true,
242                metadata: Default::default(),
243            }),
244            IndexType::Scalar(stype) => IndexDefinition::Scalar(ScalarIndexConfig {
245                name: format!("idx_{}_{}", self.name, property),
246                label: self.name.clone(),
247                properties: vec![property.to_string()],
248                index_type: stype.into_internal(),
249                where_clause: None,
250                metadata: Default::default(),
251            }),
252            IndexType::Inverted(config) => IndexDefinition::Inverted(config),
253        };
254        self.builder.pending.push(SchemaChange::AddIndex(idx));
255        self
256    }
257
258    pub fn done(mut self) -> SchemaBuilder<'a> {
259        self.builder
260            .pending
261            .insert(0, SchemaChange::AddLabel { name: self.name });
262        self.builder
263    }
264
265    // Chaining
266    pub fn label(self, name: &str) -> LabelBuilder<'a> {
267        self.done().label(name)
268    }
269
270    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
271        self.done().edge_type(name, from, to)
272    }
273
274    pub async fn apply(self) -> Result<()> {
275        self.done().apply().await
276    }
277}
278
279#[must_use = "builders do nothing until .done() or .apply() is called"]
280pub struct EdgeTypeBuilder<'a> {
281    builder: SchemaBuilder<'a>,
282    name: String,
283    from_labels: Vec<String>,
284    to_labels: Vec<String>,
285}
286
287impl<'a> EdgeTypeBuilder<'a> {
288    fn new(
289        builder: SchemaBuilder<'a>,
290        name: String,
291        from_labels: Vec<String>,
292        to_labels: Vec<String>,
293    ) -> Self {
294        Self {
295            builder,
296            name,
297            from_labels,
298            to_labels,
299        }
300    }
301
302    pub fn property(mut self, name: &str, data_type: DataType) -> Self {
303        self.builder.pending.push(SchemaChange::AddProperty {
304            label_or_type: self.name.clone(),
305            name: name.to_string(),
306            data_type,
307            nullable: false,
308        });
309        self
310    }
311
312    pub fn property_nullable(mut self, name: &str, data_type: DataType) -> Self {
313        self.builder.pending.push(SchemaChange::AddProperty {
314            label_or_type: self.name.clone(),
315            name: name.to_string(),
316            data_type,
317            nullable: true,
318        });
319        self
320    }
321
322    pub fn done(mut self) -> SchemaBuilder<'a> {
323        self.builder.pending.insert(
324            0,
325            SchemaChange::AddEdgeType {
326                name: self.name,
327                from_labels: self.from_labels,
328                to_labels: self.to_labels,
329            },
330        );
331        self.builder
332    }
333
334    pub fn label(self, name: &str) -> LabelBuilder<'a> {
335        self.done().label(name)
336    }
337
338    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
339        self.done().edge_type(name, from, to)
340    }
341
342    pub async fn apply(self) -> Result<()> {
343        self.done().apply().await
344    }
345}
346
347#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
348pub struct LabelInfo {
349    pub name: String,
350    pub count: usize,
351    pub properties: Vec<PropertyInfo>,
352    pub indexes: Vec<IndexInfo>,
353    pub constraints: Vec<ConstraintInfo>,
354}
355
356#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
357pub struct PropertyInfo {
358    pub name: String,
359    pub data_type: String,
360    pub nullable: bool,
361    pub is_indexed: bool,
362}
363
364#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
365pub struct IndexInfo {
366    pub name: String,
367    pub index_type: String,
368    pub properties: Vec<String>,
369    pub status: String,
370}
371
372#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
373pub struct ConstraintInfo {
374    pub name: String,
375    pub constraint_type: String,
376    pub properties: Vec<String>,
377    pub enabled: bool,
378}
379
380#[non_exhaustive]
381pub enum IndexType {
382    Vector(VectorIndexCfg),
383    FullText,
384    Scalar(ScalarType),
385    Inverted(uni_common::core::schema::InvertedIndexConfig),
386}
387
388pub struct VectorIndexCfg {
389    pub algorithm: VectorAlgo,
390    pub metric: VectorMetric,
391    pub embedding: Option<EmbeddingCfg>,
392}
393
394/// Embedding configuration for auto-embedding during index writes.
395pub struct EmbeddingCfg {
396    /// Model alias from the Uni-Xervo catalog (for example: "embed/default").
397    pub alias: String,
398    pub source_properties: Vec<String>,
399    pub batch_size: usize,
400}
401
402impl EmbeddingCfg {
403    fn into_internal(self) -> EmbeddingConfig {
404        EmbeddingConfig {
405            alias: self.alias,
406            source_properties: self.source_properties,
407            batch_size: self.batch_size,
408        }
409    }
410}
411
412#[non_exhaustive]
413pub enum VectorAlgo {
414    Hnsw { m: u32, ef_construction: u32 },
415    IvfPq { partitions: u32, sub_vectors: u32 },
416    Flat,
417}
418
419impl VectorAlgo {
420    fn into_internal(self) -> VectorIndexType {
421        match self {
422            VectorAlgo::Hnsw { m, ef_construction } => VectorIndexType::Hnsw {
423                m,
424                ef_construction,
425                ef_search: 50,
426            },
427            VectorAlgo::IvfPq {
428                partitions,
429                sub_vectors,
430            } => VectorIndexType::IvfPq {
431                num_partitions: partitions,
432                num_sub_vectors: sub_vectors,
433                bits_per_subvector: 8,
434            },
435            VectorAlgo::Flat => VectorIndexType::Flat,
436        }
437    }
438}
439
440#[non_exhaustive]
441pub enum VectorMetric {
442    Cosine,
443    L2,
444    Dot,
445}
446
447impl VectorMetric {
448    fn into_internal(self) -> DistanceMetric {
449        match self {
450            VectorMetric::Cosine => DistanceMetric::Cosine,
451            VectorMetric::L2 => DistanceMetric::L2,
452            VectorMetric::Dot => DistanceMetric::Dot,
453        }
454    }
455}
456
457#[non_exhaustive]
458pub enum ScalarType {
459    BTree,
460    Hash,
461    Bitmap,
462}
463
464impl ScalarType {
465    fn into_internal(self) -> ScalarIndexType {
466        match self {
467            ScalarType::BTree => ScalarIndexType::BTree,
468            ScalarType::Hash => ScalarIndexType::Hash,
469            ScalarType::Bitmap => ScalarIndexType::Bitmap,
470        }
471    }
472}
473
474impl Uni {
475    pub fn schema(&self) -> SchemaBuilder<'_> {
476        SchemaBuilder::new(self)
477    }
478
479    pub async fn load_schema(&self, path: impl AsRef<Path>) -> Result<()> {
480        // We can't easily "replace" the SchemaManager's schema in-place if it's already Arc-ed around.
481        // But SchemaManager has internal RwLock<Schema>.
482        // Let's check if we can add a method to SchemaManager to reload.
483        let content = tokio::fs::read_to_string(path)
484            .await
485            .map_err(UniError::Io)?;
486        let schema: uni_common::core::schema::Schema =
487            serde_json::from_str(&content).map_err(|e| UniError::Schema {
488                message: e.to_string(),
489            })?;
490
491        // We need a way to update the schema in SchemaManager.
492        // I'll add a `replace_schema` or similar to SchemaManager.
493        self.schema.replace_schema(schema);
494        Ok(())
495    }
496
497    pub async fn save_schema(&self, path: impl AsRef<Path>) -> Result<()> {
498        let content =
499            serde_json::to_string_pretty(&self.schema.schema()).map_err(|e| UniError::Schema {
500                message: e.to_string(),
501            })?;
502        tokio::fs::write(path, content)
503            .await
504            .map_err(UniError::Io)?;
505        Ok(())
506    }
507}