Skip to main content

uni_db/api/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::api::Uni;
5use std::path::Path;
6use uni_common::core::schema::{
7    DataType, DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition,
8    ScalarIndexConfig, ScalarIndexType, TokenizerConfig, VectorIndexConfig, VectorIndexType,
9};
10use uni_common::{Result, UniError};
11
12/// Builder for defining and modifying the graph schema.
13///
14/// Use this builder to define labels, edge types, properties, and indexes.
15/// Changes are batched and applied atomically when `.apply()` is called.
16///
17/// # Example
18///
19/// ```no_run
20/// # async fn example(db: &uni_db::Uni) -> uni_db::Result<()> {
21/// db.schema()
22///     .label("Person")
23///         .property("name", uni_db::DataType::String)
24///         .property("age", uni_db::DataType::Int64)
25///         .vector("embedding", 1536) // Adds property AND vector index
26///         .index("name", uni_db::IndexType::Scalar(uni_db::ScalarType::BTree))
27///     .edge_type("KNOWS", &["Person"], &["Person"])
28///         .property("since", uni_db::DataType::Date)
29///     .apply()
30///     .await?;
31/// # Ok(())
32/// # }
33/// ```
34#[must_use = "schema builders do nothing until .apply() is called"]
35pub struct SchemaBuilder<'a> {
36    db: &'a Uni,
37    pending: Vec<SchemaChange>,
38}
39
40pub enum SchemaChange {
41    AddLabel {
42        name: String,
43    },
44    AddProperty {
45        label_or_type: String,
46        name: String,
47        data_type: DataType,
48        nullable: bool,
49    },
50    AddIndex(IndexDefinition),
51    AddEdgeType {
52        name: String,
53        from_labels: Vec<String>,
54        to_labels: Vec<String>,
55    },
56}
57
58impl<'a> SchemaBuilder<'a> {
59    pub fn new(db: &'a Uni) -> Self {
60        Self {
61            db,
62            pending: Vec::new(),
63        }
64    }
65
66    /// Create a label (node type) in the schema.
67    ///
68    /// Labels can be **schemaless** (no properties defined) or **typed** (with properties).
69    ///
70    /// # Schemaless Labels
71    ///
72    /// Labels without property definitions support flexible, dynamic properties:
73    /// - Properties not in schema are stored in `overflow_json` (JSONB binary)
74    /// - Queries on overflow properties are automatically rewritten to JSONB functions
75    /// - No schema migration needed to add new properties
76    ///
77    /// # Example: Schemaless Label
78    ///
79    /// ```ignore
80    /// // Create label with no properties
81    /// db.schema().label("Document").apply().await?;
82    ///
83    /// // Create with arbitrary properties
84    /// db.execute("CREATE (:Document {title: 'Article', author: 'Alice', year: 2024})").await?;
85    ///
86    /// // Query works transparently (automatic query rewriting)
87    /// db.query("MATCH (d:Document) WHERE d.author = 'Alice' RETURN d.title, d.year").await?;
88    /// ```
89    ///
90    /// # Example: Typed Label with Overflow
91    ///
92    /// ```ignore
93    /// // Define core properties in schema
94    /// db.schema()
95    ///     .label("Person")
96    ///     .property("name", DataType::String)
97    ///     .property("age", DataType::Int)
98    ///     .apply().await?;
99    ///
100    /// // Can still add overflow properties dynamically
101    /// db.execute("CREATE (:Person {name: 'Bob', age: 25, city: 'NYC'})").await?;
102    /// //                                                   ^^^^^^^^^^^
103    /// //                                                   overflow property
104    ///
105    /// // Query mixing schema and overflow properties
106    /// db.query("MATCH (p:Person) WHERE p.name = 'Bob' AND p.city = 'NYC' RETURN p.age").await?;
107    /// ```
108    ///
109    /// **Performance Note**: Schema properties use typed columns (faster filtering/sorting),
110    /// while overflow properties use JSONB (flexible but slower). Use schema properties
111    /// for core, frequently-queried fields.
112    pub fn label(self, name: &str) -> LabelBuilder<'a> {
113        LabelBuilder::new(self, name.to_string())
114    }
115
116    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
117        EdgeTypeBuilder::new(
118            self,
119            name.to_string(),
120            from.iter().map(|s| s.to_string()).collect(),
121            to.iter().map(|s| s.to_string()).collect(),
122        )
123    }
124
125    pub async fn apply(self) -> Result<()> {
126        let manager = &self.db.schema;
127        let mut indexes_to_build = Vec::new();
128
129        for change in self.pending {
130            match change {
131                SchemaChange::AddLabel { name } => {
132                    manager.add_label(&name).map_err(|e| UniError::Schema {
133                        message: e.to_string(),
134                    })?;
135                }
136                SchemaChange::AddProperty {
137                    label_or_type,
138                    name,
139                    data_type,
140                    nullable,
141                } => {
142                    manager
143                        .add_property(&label_or_type, &name, data_type, nullable)
144                        .map_err(|e| UniError::Schema {
145                            message: e.to_string(),
146                        })?;
147                }
148                SchemaChange::AddIndex(idx) => {
149                    manager
150                        .add_index(idx.clone())
151                        .map_err(|e| UniError::Schema {
152                            message: e.to_string(),
153                        })?;
154                    // Track index to trigger build after saving schema
155                    indexes_to_build.push(idx.label().to_string());
156                }
157                SchemaChange::AddEdgeType {
158                    name,
159                    from_labels,
160                    to_labels,
161                } => {
162                    manager
163                        .add_edge_type(&name, from_labels, to_labels)
164                        .map_err(|e| UniError::Schema {
165                            message: e.to_string(),
166                        })?;
167                }
168            }
169        }
170
171        manager.save().await.map_err(UniError::Internal)?;
172
173        // Trigger index builds for affected labels
174        // We use a set to avoid rebuilding same label multiple times if multiple indexes added
175        indexes_to_build.sort();
176        indexes_to_build.dedup();
177        for label in indexes_to_build {
178            // Trigger async rebuild
179            // Note: If synchronous behavior is desired, pass false.
180            // But usually schema changes should be fast, so async build is better?
181            // The prompt says "Indexes Not Built During Schema Changes", implying they should be.
182            // Let's do it synchronously to ensure they are ready, matching user expectation.
183            self.db.rebuild_indexes(&label, false).await?;
184        }
185
186        Ok(())
187    }
188}
189
190#[must_use = "builders do nothing until .done() or .apply() is called"]
191pub struct LabelBuilder<'a> {
192    builder: SchemaBuilder<'a>,
193    name: String,
194}
195
196impl<'a> LabelBuilder<'a> {
197    fn new(builder: SchemaBuilder<'a>, name: String) -> Self {
198        Self { builder, name }
199    }
200
201    pub fn property(mut self, name: &str, data_type: DataType) -> Self {
202        self.builder.pending.push(SchemaChange::AddProperty {
203            label_or_type: self.name.clone(),
204            name: name.to_string(),
205            data_type,
206            nullable: false,
207        });
208        self
209    }
210
211    pub fn property_nullable(mut self, name: &str, data_type: DataType) -> Self {
212        self.builder.pending.push(SchemaChange::AddProperty {
213            label_or_type: self.name.clone(),
214            name: name.to_string(),
215            data_type,
216            nullable: true,
217        });
218        self
219    }
220
221    pub fn vector(self, name: &str, dimensions: usize) -> Self {
222        self.property(name, DataType::Vector { dimensions })
223    }
224
225    pub fn index(mut self, property: &str, index_type: IndexType) -> Self {
226        let idx = match index_type {
227            IndexType::Vector(cfg) => IndexDefinition::Vector(VectorIndexConfig {
228                name: format!("idx_{}_{}", self.name, property),
229                label: self.name.clone(),
230                property: property.to_string(),
231                index_type: cfg.algorithm.into_internal(),
232                metric: cfg.metric.into_internal(),
233                embedding_config: cfg.embedding.map(|e| e.into_internal()),
234            }),
235            IndexType::FullText => IndexDefinition::FullText(FullTextIndexConfig {
236                name: format!("fts_{}_{}", self.name, property),
237                label: self.name.clone(),
238                properties: vec![property.to_string()],
239                tokenizer: TokenizerConfig::Standard,
240                with_positions: true,
241            }),
242            IndexType::Scalar(stype) => IndexDefinition::Scalar(ScalarIndexConfig {
243                name: format!("idx_{}_{}", self.name, property),
244                label: self.name.clone(),
245                properties: vec![property.to_string()],
246                index_type: stype.into_internal(),
247                where_clause: None,
248            }),
249            IndexType::Inverted(config) => IndexDefinition::Inverted(config),
250        };
251        self.builder.pending.push(SchemaChange::AddIndex(idx));
252        self
253    }
254
255    pub fn done(mut self) -> SchemaBuilder<'a> {
256        self.builder
257            .pending
258            .insert(0, SchemaChange::AddLabel { name: self.name });
259        self.builder
260    }
261
262    // Chaining
263    pub fn label(self, name: &str) -> LabelBuilder<'a> {
264        self.done().label(name)
265    }
266
267    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
268        self.done().edge_type(name, from, to)
269    }
270
271    pub async fn apply(self) -> Result<()> {
272        self.done().apply().await
273    }
274}
275
276#[must_use = "builders do nothing until .done() or .apply() is called"]
277pub struct EdgeTypeBuilder<'a> {
278    builder: SchemaBuilder<'a>,
279    name: String,
280    from_labels: Vec<String>,
281    to_labels: Vec<String>,
282}
283
284impl<'a> EdgeTypeBuilder<'a> {
285    fn new(
286        builder: SchemaBuilder<'a>,
287        name: String,
288        from_labels: Vec<String>,
289        to_labels: Vec<String>,
290    ) -> Self {
291        Self {
292            builder,
293            name,
294            from_labels,
295            to_labels,
296        }
297    }
298
299    pub fn property(mut self, name: &str, data_type: DataType) -> Self {
300        self.builder.pending.push(SchemaChange::AddProperty {
301            label_or_type: self.name.clone(),
302            name: name.to_string(),
303            data_type,
304            nullable: false,
305        });
306        self
307    }
308
309    pub fn property_nullable(mut self, name: &str, data_type: DataType) -> Self {
310        self.builder.pending.push(SchemaChange::AddProperty {
311            label_or_type: self.name.clone(),
312            name: name.to_string(),
313            data_type,
314            nullable: true,
315        });
316        self
317    }
318
319    pub fn done(mut self) -> SchemaBuilder<'a> {
320        self.builder.pending.insert(
321            0,
322            SchemaChange::AddEdgeType {
323                name: self.name,
324                from_labels: self.from_labels,
325                to_labels: self.to_labels,
326            },
327        );
328        self.builder
329    }
330
331    pub fn label(self, name: &str) -> LabelBuilder<'a> {
332        self.done().label(name)
333    }
334
335    pub fn edge_type(self, name: &str, from: &[&str], to: &[&str]) -> EdgeTypeBuilder<'a> {
336        self.done().edge_type(name, from, to)
337    }
338
339    pub async fn apply(self) -> Result<()> {
340        self.done().apply().await
341    }
342}
343
344#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
345pub struct LabelInfo {
346    pub name: String,
347    pub count: usize,
348    pub properties: Vec<PropertyInfo>,
349    pub indexes: Vec<IndexInfo>,
350    pub constraints: Vec<ConstraintInfo>,
351}
352
353#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
354pub struct PropertyInfo {
355    pub name: String,
356    pub data_type: String,
357    pub nullable: bool,
358    pub is_indexed: bool,
359}
360
361#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
362pub struct IndexInfo {
363    pub name: String,
364    pub index_type: String,
365    pub properties: Vec<String>,
366    pub status: String,
367}
368
369#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
370pub struct ConstraintInfo {
371    pub name: String,
372    pub constraint_type: String,
373    pub properties: Vec<String>,
374    pub enabled: bool,
375}
376
377#[non_exhaustive]
378pub enum IndexType {
379    Vector(VectorIndexCfg),
380    FullText,
381    Scalar(ScalarType),
382    Inverted(uni_common::core::schema::InvertedIndexConfig),
383}
384
385pub struct VectorIndexCfg {
386    pub algorithm: VectorAlgo,
387    pub metric: VectorMetric,
388    pub embedding: Option<EmbeddingCfg>,
389}
390
391/// Embedding configuration for auto-embedding during index writes.
392pub struct EmbeddingCfg {
393    /// Model alias from the Uni-Xervo catalog (for example: "embed/default").
394    pub alias: String,
395    pub source_properties: Vec<String>,
396    pub batch_size: usize,
397}
398
399impl EmbeddingCfg {
400    fn into_internal(self) -> EmbeddingConfig {
401        EmbeddingConfig {
402            alias: self.alias,
403            source_properties: self.source_properties,
404            batch_size: self.batch_size,
405        }
406    }
407}
408
409#[non_exhaustive]
410pub enum VectorAlgo {
411    Hnsw { m: u32, ef_construction: u32 },
412    IvfPq { partitions: u32, sub_vectors: u32 },
413    Flat,
414}
415
416impl VectorAlgo {
417    fn into_internal(self) -> VectorIndexType {
418        match self {
419            VectorAlgo::Hnsw { m, ef_construction } => VectorIndexType::Hnsw {
420                m,
421                ef_construction,
422                ef_search: 50,
423            },
424            VectorAlgo::IvfPq {
425                partitions,
426                sub_vectors,
427            } => VectorIndexType::IvfPq {
428                num_partitions: partitions,
429                num_sub_vectors: sub_vectors,
430                bits_per_subvector: 8,
431            },
432            VectorAlgo::Flat => VectorIndexType::Flat,
433        }
434    }
435}
436
437#[non_exhaustive]
438pub enum VectorMetric {
439    Cosine,
440    L2,
441    Dot,
442}
443
444impl VectorMetric {
445    fn into_internal(self) -> DistanceMetric {
446        match self {
447            VectorMetric::Cosine => DistanceMetric::Cosine,
448            VectorMetric::L2 => DistanceMetric::L2,
449            VectorMetric::Dot => DistanceMetric::Dot,
450        }
451    }
452}
453
454#[non_exhaustive]
455pub enum ScalarType {
456    BTree,
457    Hash,
458    Bitmap,
459}
460
461impl ScalarType {
462    fn into_internal(self) -> ScalarIndexType {
463        match self {
464            ScalarType::BTree => ScalarIndexType::BTree,
465            ScalarType::Hash => ScalarIndexType::Hash,
466            ScalarType::Bitmap => ScalarIndexType::Bitmap,
467        }
468    }
469}
470
471impl Uni {
472    pub fn schema(&self) -> SchemaBuilder<'_> {
473        SchemaBuilder::new(self)
474    }
475
476    pub async fn load_schema(&self, path: impl AsRef<Path>) -> Result<()> {
477        // We can't easily "replace" the SchemaManager's schema in-place if it's already Arc-ed around.
478        // But SchemaManager has internal RwLock<Schema>.
479        // Let's check if we can add a method to SchemaManager to reload.
480        let content = tokio::fs::read_to_string(path)
481            .await
482            .map_err(UniError::Io)?;
483        let schema: uni_common::core::schema::Schema =
484            serde_json::from_str(&content).map_err(|e| UniError::Schema {
485                message: e.to_string(),
486            })?;
487
488        // We need a way to update the schema in SchemaManager.
489        // I'll add a `replace_schema` or similar to SchemaManager.
490        self.schema.replace_schema(schema);
491        Ok(())
492    }
493
494    pub async fn save_schema(&self, path: impl AsRef<Path>) -> Result<()> {
495        let content =
496            serde_json::to_string_pretty(&self.schema.schema()).map_err(|e| UniError::Schema {
497                message: e.to_string(),
498            })?;
499        tokio::fs::write(path, content)
500            .await
501            .map_err(UniError::Io)?;
502        Ok(())
503    }
504}