1pub mod schema_ir;
2pub mod schema_plan;
3
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use arrow_schema::{DataType, Field, Schema, SchemaRef};
8
9use crate::error::{NanoError, Result};
10use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
11use crate::types::{PropType, ScalarType};
12
13#[derive(Debug, Clone)]
14pub struct Catalog {
15 pub node_types: HashMap<String, NodeType>,
16 pub edge_types: HashMap<String, EdgeType>,
17 pub edge_name_index: HashMap<String, String>,
19 pub interfaces: HashMap<String, InterfaceType>,
21}
22
23#[derive(Debug, Clone)]
24pub struct InterfaceType {
25 pub name: String,
26 pub properties: HashMap<String, PropType>,
27}
28
29#[derive(Debug, Clone)]
30pub struct NodeType {
31 pub name: String,
32 pub implements: Vec<String>,
34 pub properties: HashMap<String, PropType>,
35 pub key: Option<Vec<String>>,
37 pub unique_constraints: Vec<Vec<String>>,
39 pub indices: Vec<Vec<String>>,
41 pub range_constraints: Vec<RangeConstraint>,
43 pub check_constraints: Vec<CheckConstraint>,
45 pub embed_sources: HashMap<String, String>,
47 pub blob_properties: HashSet<String>,
48 pub arrow_schema: SchemaRef,
49}
50
51impl NodeType {
52 pub fn key_property(&self) -> Option<&str> {
54 self.key
55 .as_ref()
56 .and_then(|v| v.first())
57 .map(|s| s.as_str())
58 }
59}
60
61#[derive(Debug, Clone)]
62pub struct RangeConstraint {
63 pub property: String,
64 pub min: Option<LiteralValue>,
65 pub max: Option<LiteralValue>,
66}
67
68#[derive(Debug, Clone)]
69pub enum LiteralValue {
70 Integer(i64),
71 Float(f64),
72}
73
74#[derive(Debug, Clone)]
75pub struct CheckConstraint {
76 pub property: String,
77 pub pattern: String,
78}
79
80#[derive(Debug, Clone)]
81pub struct EdgeType {
82 pub name: String,
83 pub from_type: String,
84 pub to_type: String,
85 pub cardinality: Cardinality,
86 pub properties: HashMap<String, PropType>,
87 pub unique_constraints: Vec<Vec<String>>,
89 pub indices: Vec<Vec<String>>,
91 pub blob_properties: HashSet<String>,
92 pub arrow_schema: SchemaRef,
93}
94
95impl Catalog {
96 pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
97 if let Some(et) = self.edge_types.get(name) {
98 return Some(et);
99 }
100 if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
101 return self.edge_types.get(key);
102 }
103 None
104 }
105}
106
107fn normalize_edge_name(name: &str) -> String {
108 name.to_lowercase()
109}
110
111fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
112 match b {
113 ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
114 ConstraintBound::Float(f) => LiteralValue::Float(*f),
115 }
116}
117
118pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
119 let mut node_types = HashMap::new();
120 let mut edge_types = HashMap::new();
121 let mut edge_name_index = HashMap::new();
122 let mut interfaces = HashMap::new();
123
124 for decl in &schema.declarations {
126 if let SchemaDecl::Interface(iface) = decl {
127 let mut properties = HashMap::new();
128 for prop in &iface.properties {
129 properties.insert(prop.name.clone(), prop.prop_type.clone());
130 }
131 interfaces.insert(
132 iface.name.clone(),
133 InterfaceType {
134 name: iface.name.clone(),
135 properties,
136 },
137 );
138 }
139 }
140
141 for decl in &schema.declarations {
143 if let SchemaDecl::Node(node) = decl {
144 if node_types.contains_key(&node.name) {
145 return Err(NanoError::Catalog(format!(
146 "duplicate node type: {}",
147 node.name
148 )));
149 }
150
151 let mut properties = HashMap::new();
152 let mut embed_sources = HashMap::new();
153 let mut blob_properties = HashSet::new();
154 for prop in &node.properties {
155 properties.insert(prop.name.clone(), prop.prop_type.clone());
156 if matches!(prop.prop_type.scalar, ScalarType::Blob) {
157 blob_properties.insert(prop.name.clone());
158 }
159 if let Some(source_prop) = prop
161 .annotations
162 .iter()
163 .find(|ann| ann.name == "embed")
164 .and_then(|ann| ann.value.clone())
165 {
166 embed_sources.insert(prop.name.clone(), source_prop);
167 }
168 }
169
170 let mut key: Option<Vec<String>> = None;
172 let mut unique_constraints = Vec::new();
173 let mut indices = Vec::new();
174 let mut range_constraints = Vec::new();
175 let mut check_constraints = Vec::new();
176
177 for constraint in &node.constraints {
178 match constraint {
179 Constraint::Key(cols) => {
180 key = Some(cols.clone());
181 indices.push(cols.clone());
183 }
184 Constraint::Unique(cols) => {
185 unique_constraints.push(cols.clone());
186 }
187 Constraint::Index(cols) => {
188 indices.push(cols.clone());
189 }
190 Constraint::Range { property, min, max } => {
191 range_constraints.push(RangeConstraint {
192 property: property.clone(),
193 min: min.as_ref().map(bound_to_literal),
194 max: max.as_ref().map(bound_to_literal),
195 });
196 }
197 Constraint::Check { property, pattern } => {
198 check_constraints.push(CheckConstraint {
199 property: property.clone(),
200 pattern: pattern.clone(),
201 });
202 }
203 }
204 }
205
206 let mut fields = vec![Field::new("id", DataType::Utf8, false)];
208 for prop in &node.properties {
209 fields.push(Field::new(
210 &prop.name,
211 prop.prop_type.to_arrow(),
212 prop.prop_type.nullable,
213 ));
214 }
215 let arrow_schema = Arc::new(Schema::new(fields));
216
217 node_types.insert(
218 node.name.clone(),
219 NodeType {
220 name: node.name.clone(),
221 implements: node.implements.clone(),
222 properties,
223 key,
224 unique_constraints,
225 indices,
226 range_constraints,
227 check_constraints,
228 embed_sources,
229 blob_properties,
230 arrow_schema,
231 },
232 );
233 }
234 }
235
236 for decl in &schema.declarations {
238 if let SchemaDecl::Edge(edge) = decl {
239 if edge_types.contains_key(&edge.name) {
240 return Err(NanoError::Catalog(format!(
241 "duplicate edge type: {}",
242 edge.name
243 )));
244 }
245 if !node_types.contains_key(&edge.from_type) {
246 return Err(NanoError::Catalog(format!(
247 "edge {} references unknown source type: {}",
248 edge.name, edge.from_type
249 )));
250 }
251 if !node_types.contains_key(&edge.to_type) {
252 return Err(NanoError::Catalog(format!(
253 "edge {} references unknown target type: {}",
254 edge.name, edge.to_type
255 )));
256 }
257
258 let mut properties = HashMap::new();
259 let mut blob_properties = HashSet::new();
260 let mut fields = vec![
261 Field::new("id", DataType::Utf8, false),
262 Field::new("src", DataType::Utf8, false),
263 Field::new("dst", DataType::Utf8, false),
264 ];
265 for prop in &edge.properties {
266 properties.insert(prop.name.clone(), prop.prop_type.clone());
267 if matches!(prop.prop_type.scalar, ScalarType::Blob) {
268 blob_properties.insert(prop.name.clone());
269 }
270 fields.push(Field::new(
271 &prop.name,
272 prop.prop_type.to_arrow(),
273 prop.prop_type.nullable,
274 ));
275 }
276
277 let mut unique_constraints = Vec::new();
279 let mut edge_indices = Vec::new();
280 for constraint in &edge.constraints {
281 match constraint {
282 Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
283 Constraint::Index(cols) => edge_indices.push(cols.clone()),
284 _ => {} }
286 }
287
288 let normalized_name = normalize_edge_name(&edge.name);
289 if let Some(existing) = edge_name_index.get(&normalized_name)
290 && existing != &edge.name
291 {
292 return Err(NanoError::Catalog(format!(
293 "edge name collision after case folding: '{}' conflicts with '{}'",
294 edge.name, existing
295 )));
296 }
297 edge_name_index.insert(normalized_name, edge.name.clone());
298
299 edge_types.insert(
300 edge.name.clone(),
301 EdgeType {
302 name: edge.name.clone(),
303 from_type: edge.from_type.clone(),
304 to_type: edge.to_type.clone(),
305 cardinality: edge.cardinality.clone(),
306 properties,
307 unique_constraints,
308 indices: edge_indices,
309 blob_properties,
310 arrow_schema: Arc::new(Schema::new(fields)),
311 },
312 );
313 }
314 }
315
316 Ok(Catalog {
317 node_types,
318 edge_types,
319 edge_name_index,
320 interfaces,
321 })
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use crate::schema::ast::{EdgeDecl, NodeDecl};
328 use crate::schema::parser::parse_schema;
329 use crate::types::PropType;
330
331 fn test_schema() -> &'static str {
332 r#"
333node Person {
334 name: String
335 age: I32?
336}
337node Company {
338 name: String
339}
340edge Knows: Person -> Person {
341 since: Date?
342}
343edge WorksAt: Person -> Company {
344 title: String?
345}
346"#
347 }
348
349 #[test]
350 fn test_build_catalog() {
351 let schema = parse_schema(test_schema()).unwrap();
352 let catalog = build_catalog(&schema).unwrap();
353 assert_eq!(catalog.node_types.len(), 2);
354 assert_eq!(catalog.edge_types.len(), 2);
355 assert!(catalog.node_types.contains_key("Person"));
356 assert!(catalog.node_types.contains_key("Company"));
357 }
358
359 #[test]
360 fn test_edge_lookup() {
361 let schema = parse_schema(test_schema()).unwrap();
362 let catalog = build_catalog(&schema).unwrap();
363 let edge = catalog.lookup_edge_by_name("knows").unwrap();
364 assert_eq!(edge.from_type, "Person");
365 assert_eq!(edge.to_type, "Person");
366 let upper = catalog.lookup_edge_by_name("KNOWS").unwrap();
367 assert_eq!(upper.name, "Knows");
368 }
369
370 #[test]
371 fn test_node_arrow_schema() {
372 let schema = parse_schema(test_schema()).unwrap();
373 let catalog = build_catalog(&schema).unwrap();
374 let person = &catalog.node_types["Person"];
375 assert_eq!(person.arrow_schema.fields().len(), 3); }
377
378 #[test]
379 fn test_duplicate_node_error() {
380 let input = r#"
381node Person { name: String }
382node Person { age: I32 }
383"#;
384 let schema = parse_schema(input).unwrap();
385 assert!(build_catalog(&schema).is_err());
386 }
387
388 #[test]
389 fn test_bad_edge_endpoint() {
390 let input = r#"
391node Person { name: String }
392edge Knows: Person -> Alien
393"#;
394 let schema = parse_schema(input).unwrap();
395 assert!(build_catalog(&schema).is_err());
396 }
397
398 #[test]
399 fn test_id_fields_are_utf8() {
400 let schema = parse_schema(test_schema()).unwrap();
401 let catalog = build_catalog(&schema).unwrap();
402 let person = &catalog.node_types["Person"];
403 assert_eq!(
404 person
405 .arrow_schema
406 .field_with_name("id")
407 .unwrap()
408 .data_type(),
409 &DataType::Utf8
410 );
411 let knows = &catalog.edge_types["Knows"];
412 assert_eq!(
413 knows
414 .arrow_schema
415 .field_with_name("id")
416 .unwrap()
417 .data_type(),
418 &DataType::Utf8
419 );
420 assert_eq!(
421 knows
422 .arrow_schema
423 .field_with_name("src")
424 .unwrap()
425 .data_type(),
426 &DataType::Utf8
427 );
428 assert_eq!(
429 knows
430 .arrow_schema
431 .field_with_name("dst")
432 .unwrap()
433 .data_type(),
434 &DataType::Utf8
435 );
436 }
437
438 #[test]
439 fn test_key_property_tracking() {
440 let input = r#"
441node Signal {
442 slug: String @key
443 title: String
444}
445node Person {
446 name: String
447}
448edge Emits: Person -> Signal
449"#;
450 let schema = parse_schema(input).unwrap();
451 let catalog = build_catalog(&schema).unwrap();
452 assert_eq!(catalog.node_types["Signal"].key_property(), Some("slug"));
453 assert_eq!(catalog.node_types["Person"].key_property(), None);
454 }
455
456 #[test]
457 fn test_edge_lookup_handles_non_ascii_leading_character() {
458 let schema = SchemaFile {
459 declarations: vec![
460 SchemaDecl::Node(NodeDecl {
461 name: "Person".to_string(),
462 annotations: vec![],
463 implements: vec![],
464 properties: vec![crate::schema::ast::PropDecl {
465 name: "name".to_string(),
466 prop_type: PropType::scalar(ScalarType::String, false),
467 annotations: vec![],
468 }],
469 constraints: vec![],
470 }),
471 SchemaDecl::Edge(EdgeDecl {
472 name: "Édges".to_string(),
473 from_type: "Person".to_string(),
474 to_type: "Person".to_string(),
475 cardinality: Default::default(),
476 annotations: vec![],
477 properties: vec![],
478 constraints: vec![],
479 }),
480 ],
481 };
482 let catalog = build_catalog(&schema).unwrap();
483 assert!(catalog.lookup_edge_by_name("édges").is_some());
484 }
485
486 #[test]
487 fn test_edge_lookup_rejects_case_fold_collisions() {
488 let input = r#"
489node Person { name: String }
490edge Knows: Person -> Person
491edge KNOWS: Person -> Person
492"#;
493 let schema = parse_schema(input).unwrap();
494 let err = build_catalog(&schema).unwrap_err();
495 assert!(err.to_string().contains("case folding"));
496 }
497
498 #[test]
499 fn test_catalog_composite_unique() {
500 let input = r#"
501node Person {
502 first: String
503 last: String
504 @unique(first, last)
505}
506"#;
507 let schema = parse_schema(input).unwrap();
508 let catalog = build_catalog(&schema).unwrap();
509 let person = &catalog.node_types["Person"];
510 assert!(
511 person
512 .unique_constraints
513 .contains(&vec!["first".to_string(), "last".to_string()])
514 );
515 }
516
517 #[test]
518 fn test_catalog_composite_index() {
519 let input = r#"
520node Event {
521 category: String
522 date: Date
523 @index(category, date)
524}
525"#;
526 let schema = parse_schema(input).unwrap();
527 let catalog = build_catalog(&schema).unwrap();
528 let event = &catalog.node_types["Event"];
529 assert!(
530 event
531 .indices
532 .contains(&vec!["category".to_string(), "date".to_string()])
533 );
534 }
535
536 #[test]
537 fn test_catalog_edge_cardinality() {
538 let input = r#"
539node Person { name: String }
540node Company { name: String }
541edge WorksAt: Person -> Company @card(0..1)
542"#;
543 let schema = parse_schema(input).unwrap();
544 let catalog = build_catalog(&schema).unwrap();
545 let edge = &catalog.edge_types["WorksAt"];
546 assert_eq!(edge.cardinality.min, 0);
547 assert_eq!(edge.cardinality.max, Some(1));
548 }
549
550 #[test]
551 fn test_catalog_interfaces_stored() {
552 let input = r#"
553interface Named {
554 name: String
555}
556node Person implements Named {
557 age: I32?
558}
559"#;
560 let schema = parse_schema(input).unwrap();
561 let catalog = build_catalog(&schema).unwrap();
562 assert!(catalog.interfaces.contains_key("Named"));
563 assert!(catalog.interfaces["Named"].properties.contains_key("name"));
564 }
565
566 #[test]
567 fn test_catalog_node_implements() {
568 let input = r#"
569interface Named {
570 name: String
571}
572node Person implements Named {
573 age: I32?
574}
575"#;
576 let schema = parse_schema(input).unwrap();
577 let catalog = build_catalog(&schema).unwrap();
578 assert_eq!(catalog.node_types["Person"].implements, vec!["Named"]);
579 }
580
581 #[test]
582 fn test_key_implies_index() {
583 let input = r#"
584node Signal {
585 slug: String @key
586 title: String
587}
588"#;
589 let schema = parse_schema(input).unwrap();
590 let catalog = build_catalog(&schema).unwrap();
591 let signal = &catalog.node_types["Signal"];
592 assert!(signal.indices.contains(&vec!["slug".to_string()]));
593 }
594}