Skip to main content

omnigraph_compiler/catalog/
mod.rs

1pub mod schema_ir;
2pub mod schema_plan;
3
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use arrow_schema::{DataType, Field, Schema, SchemaRef};
8
9use crate::error::{NanoError, Result};
10use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
11use crate::types::{PropType, ScalarType};
12
13#[derive(Debug, Clone)]
14pub struct Catalog {
15    pub node_types: HashMap<String, NodeType>,
16    pub edge_types: HashMap<String, EdgeType>,
17    /// Maps normalized lowercase edge name -> EdgeType key (e.g. "knows" -> "Knows")
18    pub edge_name_index: HashMap<String, String>,
19    /// Interface declarations (for Phase 2 polymorphic queries)
20    pub interfaces: HashMap<String, InterfaceType>,
21}
22
23#[derive(Debug, Clone)]
24pub struct InterfaceType {
25    pub name: String,
26    pub properties: HashMap<String, PropType>,
27}
28
29#[derive(Debug, Clone)]
30pub struct NodeType {
31    pub name: String,
32    /// Interface names this type implements
33    pub implements: Vec<String>,
34    pub properties: HashMap<String, PropType>,
35    /// Key property names (from `@key` or `@key(name)`). Usually 0 or 1 element.
36    pub key: Option<Vec<String>>,
37    /// Uniqueness constraints (each entry is a list of column names)
38    pub unique_constraints: Vec<Vec<String>>,
39    /// Index declarations (each entry is a list of column names)
40    pub indices: Vec<Vec<String>>,
41    /// Value range constraints
42    pub range_constraints: Vec<RangeConstraint>,
43    /// Regex check constraints
44    pub check_constraints: Vec<CheckConstraint>,
45    /// Maps @embed target property -> source text property
46    pub embed_sources: HashMap<String, String>,
47    pub blob_properties: HashSet<String>,
48    pub arrow_schema: SchemaRef,
49}
50
51impl NodeType {
52    /// Backward-compatible accessor: returns the first (and typically only) key property name.
53    pub fn key_property(&self) -> Option<&str> {
54        self.key
55            .as_ref()
56            .and_then(|v| v.first())
57            .map(|s| s.as_str())
58    }
59}
60
61#[derive(Debug, Clone)]
62pub struct RangeConstraint {
63    pub property: String,
64    pub min: Option<LiteralValue>,
65    pub max: Option<LiteralValue>,
66}
67
68#[derive(Debug, Clone)]
69pub enum LiteralValue {
70    Integer(i64),
71    Float(f64),
72}
73
74#[derive(Debug, Clone)]
75pub struct CheckConstraint {
76    pub property: String,
77    pub pattern: String,
78}
79
80#[derive(Debug, Clone)]
81pub struct EdgeType {
82    pub name: String,
83    pub from_type: String,
84    pub to_type: String,
85    pub cardinality: Cardinality,
86    pub properties: HashMap<String, PropType>,
87    /// Uniqueness constraints on edge columns (e.g. `@unique(src, dst)`)
88    pub unique_constraints: Vec<Vec<String>>,
89    /// Index declarations on edge properties
90    pub indices: Vec<Vec<String>>,
91    pub blob_properties: HashSet<String>,
92    pub arrow_schema: SchemaRef,
93}
94
95impl Catalog {
96    pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
97        if let Some(et) = self.edge_types.get(name) {
98            return Some(et);
99        }
100        if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
101            return self.edge_types.get(key);
102        }
103        None
104    }
105}
106
107fn normalize_edge_name(name: &str) -> String {
108    name.to_lowercase()
109}
110
111fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
112    match b {
113        ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
114        ConstraintBound::Float(f) => LiteralValue::Float(*f),
115    }
116}
117
118pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
119    let mut node_types = HashMap::new();
120    let mut edge_types = HashMap::new();
121    let mut edge_name_index = HashMap::new();
122    let mut interfaces = HashMap::new();
123
124    // Pass 0: collect interfaces
125    for decl in &schema.declarations {
126        if let SchemaDecl::Interface(iface) = decl {
127            let mut properties = HashMap::new();
128            for prop in &iface.properties {
129                properties.insert(prop.name.clone(), prop.prop_type.clone());
130            }
131            interfaces.insert(
132                iface.name.clone(),
133                InterfaceType {
134                    name: iface.name.clone(),
135                    properties,
136                },
137            );
138        }
139    }
140
141    // Pass 1: collect node types
142    for decl in &schema.declarations {
143        if let SchemaDecl::Node(node) = decl {
144            if node_types.contains_key(&node.name) {
145                return Err(NanoError::Catalog(format!(
146                    "duplicate node type: {}",
147                    node.name
148                )));
149            }
150
151            let mut properties = HashMap::new();
152            let mut embed_sources = HashMap::new();
153            let mut blob_properties = HashSet::new();
154            for prop in &node.properties {
155                properties.insert(prop.name.clone(), prop.prop_type.clone());
156                if matches!(prop.prop_type.scalar, ScalarType::Blob) {
157                    blob_properties.insert(prop.name.clone());
158                }
159                // Extract @embed from property annotations (stays as annotation)
160                if let Some(source_prop) = prop
161                    .annotations
162                    .iter()
163                    .find(|ann| ann.name == "embed")
164                    .and_then(|ann| ann.value.clone())
165                {
166                    embed_sources.insert(prop.name.clone(), source_prop);
167                }
168            }
169
170            // Extract constraints from the typed Constraint enum
171            let mut key: Option<Vec<String>> = None;
172            let mut unique_constraints = Vec::new();
173            let mut indices = Vec::new();
174            let mut range_constraints = Vec::new();
175            let mut check_constraints = Vec::new();
176
177            for constraint in &node.constraints {
178                match constraint {
179                    Constraint::Key(cols) => {
180                        key = Some(cols.clone());
181                        // @key implies index on key columns
182                        indices.push(cols.clone());
183                    }
184                    Constraint::Unique(cols) => {
185                        unique_constraints.push(cols.clone());
186                    }
187                    Constraint::Index(cols) => {
188                        indices.push(cols.clone());
189                    }
190                    Constraint::Range { property, min, max } => {
191                        range_constraints.push(RangeConstraint {
192                            property: property.clone(),
193                            min: min.as_ref().map(bound_to_literal),
194                            max: max.as_ref().map(bound_to_literal),
195                        });
196                    }
197                    Constraint::Check { property, pattern } => {
198                        check_constraints.push(CheckConstraint {
199                            property: property.clone(),
200                            pattern: pattern.clone(),
201                        });
202                    }
203                }
204            }
205
206            // Build Arrow schema: id: Utf8 + all properties
207            let mut fields = vec![Field::new("id", DataType::Utf8, false)];
208            for prop in &node.properties {
209                fields.push(Field::new(
210                    &prop.name,
211                    prop.prop_type.to_arrow(),
212                    prop.prop_type.nullable,
213                ));
214            }
215            let arrow_schema = Arc::new(Schema::new(fields));
216
217            node_types.insert(
218                node.name.clone(),
219                NodeType {
220                    name: node.name.clone(),
221                    implements: node.implements.clone(),
222                    properties,
223                    key,
224                    unique_constraints,
225                    indices,
226                    range_constraints,
227                    check_constraints,
228                    embed_sources,
229                    blob_properties,
230                    arrow_schema,
231                },
232            );
233        }
234    }
235
236    // Pass 2: collect edge types, validate endpoints
237    for decl in &schema.declarations {
238        if let SchemaDecl::Edge(edge) = decl {
239            if edge_types.contains_key(&edge.name) {
240                return Err(NanoError::Catalog(format!(
241                    "duplicate edge type: {}",
242                    edge.name
243                )));
244            }
245            if !node_types.contains_key(&edge.from_type) {
246                return Err(NanoError::Catalog(format!(
247                    "edge {} references unknown source type: {}",
248                    edge.name, edge.from_type
249                )));
250            }
251            if !node_types.contains_key(&edge.to_type) {
252                return Err(NanoError::Catalog(format!(
253                    "edge {} references unknown target type: {}",
254                    edge.name, edge.to_type
255                )));
256            }
257
258            let mut properties = HashMap::new();
259            let mut blob_properties = HashSet::new();
260            let mut fields = vec![
261                Field::new("id", DataType::Utf8, false),
262                Field::new("src", DataType::Utf8, false),
263                Field::new("dst", DataType::Utf8, false),
264            ];
265            for prop in &edge.properties {
266                properties.insert(prop.name.clone(), prop.prop_type.clone());
267                if matches!(prop.prop_type.scalar, ScalarType::Blob) {
268                    blob_properties.insert(prop.name.clone());
269                }
270                fields.push(Field::new(
271                    &prop.name,
272                    prop.prop_type.to_arrow(),
273                    prop.prop_type.nullable,
274                ));
275            }
276
277            // Extract edge constraints
278            let mut unique_constraints = Vec::new();
279            let mut edge_indices = Vec::new();
280            for constraint in &edge.constraints {
281                match constraint {
282                    Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
283                    Constraint::Index(cols) => edge_indices.push(cols.clone()),
284                    _ => {} // Key/Range/Check validated at parse time to not appear on edges
285                }
286            }
287
288            let normalized_name = normalize_edge_name(&edge.name);
289            if let Some(existing) = edge_name_index.get(&normalized_name)
290                && existing != &edge.name
291            {
292                return Err(NanoError::Catalog(format!(
293                    "edge name collision after case folding: '{}' conflicts with '{}'",
294                    edge.name, existing
295                )));
296            }
297            edge_name_index.insert(normalized_name, edge.name.clone());
298
299            edge_types.insert(
300                edge.name.clone(),
301                EdgeType {
302                    name: edge.name.clone(),
303                    from_type: edge.from_type.clone(),
304                    to_type: edge.to_type.clone(),
305                    cardinality: edge.cardinality.clone(),
306                    properties,
307                    unique_constraints,
308                    indices: edge_indices,
309                    blob_properties,
310                    arrow_schema: Arc::new(Schema::new(fields)),
311                },
312            );
313        }
314    }
315
316    Ok(Catalog {
317        node_types,
318        edge_types,
319        edge_name_index,
320        interfaces,
321    })
322}
323
324#[cfg(test)]
325#[path = "tests.rs"]
326mod tests;