omnigraph_compiler/catalog/
mod.rs1pub mod schema_ir;
2pub mod schema_plan;
3
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use arrow_schema::{DataType, Field, Schema, SchemaRef};
8
9use crate::error::{NanoError, Result};
10use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
11use crate::types::{PropType, ScalarType};
12
13#[derive(Debug, Clone)]
14pub struct Catalog {
15 pub node_types: HashMap<String, NodeType>,
16 pub edge_types: HashMap<String, EdgeType>,
17 pub edge_name_index: HashMap<String, String>,
19 pub interfaces: HashMap<String, InterfaceType>,
21}
22
23#[derive(Debug, Clone)]
24pub struct InterfaceType {
25 pub name: String,
26 pub properties: HashMap<String, PropType>,
27}
28
29#[derive(Debug, Clone)]
30pub struct NodeType {
31 pub name: String,
32 pub implements: Vec<String>,
34 pub properties: HashMap<String, PropType>,
35 pub key: Option<Vec<String>>,
37 pub unique_constraints: Vec<Vec<String>>,
39 pub indices: Vec<Vec<String>>,
41 pub range_constraints: Vec<RangeConstraint>,
43 pub check_constraints: Vec<CheckConstraint>,
45 pub embed_sources: HashMap<String, String>,
47 pub blob_properties: HashSet<String>,
48 pub arrow_schema: SchemaRef,
49}
50
51impl NodeType {
52 pub fn key_property(&self) -> Option<&str> {
54 self.key
55 .as_ref()
56 .and_then(|v| v.first())
57 .map(|s| s.as_str())
58 }
59}
60
61#[derive(Debug, Clone)]
62pub struct RangeConstraint {
63 pub property: String,
64 pub min: Option<LiteralValue>,
65 pub max: Option<LiteralValue>,
66}
67
68#[derive(Debug, Clone)]
69pub enum LiteralValue {
70 Integer(i64),
71 Float(f64),
72}
73
74#[derive(Debug, Clone)]
75pub struct CheckConstraint {
76 pub property: String,
77 pub pattern: String,
78}
79
80#[derive(Debug, Clone)]
81pub struct EdgeType {
82 pub name: String,
83 pub from_type: String,
84 pub to_type: String,
85 pub cardinality: Cardinality,
86 pub properties: HashMap<String, PropType>,
87 pub unique_constraints: Vec<Vec<String>>,
89 pub indices: Vec<Vec<String>>,
91 pub blob_properties: HashSet<String>,
92 pub arrow_schema: SchemaRef,
93}
94
95impl Catalog {
96 pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
97 if let Some(et) = self.edge_types.get(name) {
98 return Some(et);
99 }
100 if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
101 return self.edge_types.get(key);
102 }
103 None
104 }
105}
106
107fn normalize_edge_name(name: &str) -> String {
108 name.to_lowercase()
109}
110
111fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
112 match b {
113 ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
114 ConstraintBound::Float(f) => LiteralValue::Float(*f),
115 }
116}
117
118pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
119 let mut node_types = HashMap::new();
120 let mut edge_types = HashMap::new();
121 let mut edge_name_index = HashMap::new();
122 let mut interfaces = HashMap::new();
123
124 for decl in &schema.declarations {
126 if let SchemaDecl::Interface(iface) = decl {
127 let mut properties = HashMap::new();
128 for prop in &iface.properties {
129 properties.insert(prop.name.clone(), prop.prop_type.clone());
130 }
131 interfaces.insert(
132 iface.name.clone(),
133 InterfaceType {
134 name: iface.name.clone(),
135 properties,
136 },
137 );
138 }
139 }
140
141 for decl in &schema.declarations {
143 if let SchemaDecl::Node(node) = decl {
144 if node_types.contains_key(&node.name) {
145 return Err(NanoError::Catalog(format!(
146 "duplicate node type: {}",
147 node.name
148 )));
149 }
150
151 let mut properties = HashMap::new();
152 let mut embed_sources = HashMap::new();
153 let mut blob_properties = HashSet::new();
154 for prop in &node.properties {
155 properties.insert(prop.name.clone(), prop.prop_type.clone());
156 if matches!(prop.prop_type.scalar, ScalarType::Blob) {
157 blob_properties.insert(prop.name.clone());
158 }
159 if let Some(source_prop) = prop
161 .annotations
162 .iter()
163 .find(|ann| ann.name == "embed")
164 .and_then(|ann| ann.value.clone())
165 {
166 embed_sources.insert(prop.name.clone(), source_prop);
167 }
168 }
169
170 let mut key: Option<Vec<String>> = None;
172 let mut unique_constraints = Vec::new();
173 let mut indices = Vec::new();
174 let mut range_constraints = Vec::new();
175 let mut check_constraints = Vec::new();
176
177 for constraint in &node.constraints {
178 match constraint {
179 Constraint::Key(cols) => {
180 key = Some(cols.clone());
181 indices.push(cols.clone());
183 }
184 Constraint::Unique(cols) => {
185 unique_constraints.push(cols.clone());
186 }
187 Constraint::Index(cols) => {
188 indices.push(cols.clone());
189 }
190 Constraint::Range { property, min, max } => {
191 range_constraints.push(RangeConstraint {
192 property: property.clone(),
193 min: min.as_ref().map(bound_to_literal),
194 max: max.as_ref().map(bound_to_literal),
195 });
196 }
197 Constraint::Check { property, pattern } => {
198 check_constraints.push(CheckConstraint {
199 property: property.clone(),
200 pattern: pattern.clone(),
201 });
202 }
203 }
204 }
205
206 let mut fields = vec![Field::new("id", DataType::Utf8, false)];
208 for prop in &node.properties {
209 fields.push(Field::new(
210 &prop.name,
211 prop.prop_type.to_arrow(),
212 prop.prop_type.nullable,
213 ));
214 }
215 let arrow_schema = Arc::new(Schema::new(fields));
216
217 node_types.insert(
218 node.name.clone(),
219 NodeType {
220 name: node.name.clone(),
221 implements: node.implements.clone(),
222 properties,
223 key,
224 unique_constraints,
225 indices,
226 range_constraints,
227 check_constraints,
228 embed_sources,
229 blob_properties,
230 arrow_schema,
231 },
232 );
233 }
234 }
235
236 for decl in &schema.declarations {
238 if let SchemaDecl::Edge(edge) = decl {
239 if edge_types.contains_key(&edge.name) {
240 return Err(NanoError::Catalog(format!(
241 "duplicate edge type: {}",
242 edge.name
243 )));
244 }
245 if !node_types.contains_key(&edge.from_type) {
246 return Err(NanoError::Catalog(format!(
247 "edge {} references unknown source type: {}",
248 edge.name, edge.from_type
249 )));
250 }
251 if !node_types.contains_key(&edge.to_type) {
252 return Err(NanoError::Catalog(format!(
253 "edge {} references unknown target type: {}",
254 edge.name, edge.to_type
255 )));
256 }
257
258 let mut properties = HashMap::new();
259 let mut blob_properties = HashSet::new();
260 let mut fields = vec![
261 Field::new("id", DataType::Utf8, false),
262 Field::new("src", DataType::Utf8, false),
263 Field::new("dst", DataType::Utf8, false),
264 ];
265 for prop in &edge.properties {
266 properties.insert(prop.name.clone(), prop.prop_type.clone());
267 if matches!(prop.prop_type.scalar, ScalarType::Blob) {
268 blob_properties.insert(prop.name.clone());
269 }
270 fields.push(Field::new(
271 &prop.name,
272 prop.prop_type.to_arrow(),
273 prop.prop_type.nullable,
274 ));
275 }
276
277 let mut unique_constraints = Vec::new();
279 let mut edge_indices = Vec::new();
280 for constraint in &edge.constraints {
281 match constraint {
282 Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
283 Constraint::Index(cols) => edge_indices.push(cols.clone()),
284 _ => {} }
286 }
287
288 let normalized_name = normalize_edge_name(&edge.name);
289 if let Some(existing) = edge_name_index.get(&normalized_name)
290 && existing != &edge.name
291 {
292 return Err(NanoError::Catalog(format!(
293 "edge name collision after case folding: '{}' conflicts with '{}'",
294 edge.name, existing
295 )));
296 }
297 edge_name_index.insert(normalized_name, edge.name.clone());
298
299 edge_types.insert(
300 edge.name.clone(),
301 EdgeType {
302 name: edge.name.clone(),
303 from_type: edge.from_type.clone(),
304 to_type: edge.to_type.clone(),
305 cardinality: edge.cardinality.clone(),
306 properties,
307 unique_constraints,
308 indices: edge_indices,
309 blob_properties,
310 arrow_schema: Arc::new(Schema::new(fields)),
311 },
312 );
313 }
314 }
315
316 Ok(Catalog {
317 node_types,
318 edge_types,
319 edge_name_index,
320 interfaces,
321 })
322}
323
324#[cfg(test)]
325#[path = "tests.rs"]
326mod tests;