pub mod schema_ir;
pub mod schema_plan;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use crate::error::{NanoError, Result};
use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
use crate::types::{PropType, ScalarType};
#[derive(Debug, Clone)]
pub struct Catalog {
pub node_types: HashMap<String, NodeType>,
pub edge_types: HashMap<String, EdgeType>,
pub edge_name_index: HashMap<String, String>,
pub interfaces: HashMap<String, InterfaceType>,
}
#[derive(Debug, Clone)]
pub struct InterfaceType {
pub name: String,
pub properties: HashMap<String, PropType>,
}
#[derive(Debug, Clone)]
pub struct NodeType {
pub name: String,
pub implements: Vec<String>,
pub properties: HashMap<String, PropType>,
pub key: Option<Vec<String>>,
pub unique_constraints: Vec<Vec<String>>,
pub indices: Vec<Vec<String>>,
pub range_constraints: Vec<RangeConstraint>,
pub check_constraints: Vec<CheckConstraint>,
pub embed_sources: HashMap<String, String>,
pub blob_properties: HashSet<String>,
pub arrow_schema: SchemaRef,
}
impl NodeType {
pub fn key_property(&self) -> Option<&str> {
self.key
.as_ref()
.and_then(|v| v.first())
.map(|s| s.as_str())
}
}
#[derive(Debug, Clone)]
pub struct RangeConstraint {
pub property: String,
pub min: Option<LiteralValue>,
pub max: Option<LiteralValue>,
}
#[derive(Debug, Clone)]
pub enum LiteralValue {
Integer(i64),
Float(f64),
}
#[derive(Debug, Clone)]
pub struct CheckConstraint {
pub property: String,
pub pattern: String,
}
#[derive(Debug, Clone)]
pub struct EdgeType {
pub name: String,
pub from_type: String,
pub to_type: String,
pub cardinality: Cardinality,
pub properties: HashMap<String, PropType>,
pub unique_constraints: Vec<Vec<String>>,
pub indices: Vec<Vec<String>>,
pub blob_properties: HashSet<String>,
pub arrow_schema: SchemaRef,
}
impl Catalog {
pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
if let Some(et) = self.edge_types.get(name) {
return Some(et);
}
if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
return self.edge_types.get(key);
}
None
}
}
fn normalize_edge_name(name: &str) -> String {
name.to_lowercase()
}
fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
match b {
ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
ConstraintBound::Float(f) => LiteralValue::Float(*f),
}
}
pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
let mut node_types = HashMap::new();
let mut edge_types = HashMap::new();
let mut edge_name_index = HashMap::new();
let mut interfaces = HashMap::new();
for decl in &schema.declarations {
if let SchemaDecl::Interface(iface) = decl {
let mut properties = HashMap::new();
for prop in &iface.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
}
interfaces.insert(
iface.name.clone(),
InterfaceType {
name: iface.name.clone(),
properties,
},
);
}
}
for decl in &schema.declarations {
if let SchemaDecl::Node(node) = decl {
if node_types.contains_key(&node.name) {
return Err(NanoError::Catalog(format!(
"duplicate node type: {}",
node.name
)));
}
let mut properties = HashMap::new();
let mut embed_sources = HashMap::new();
let mut blob_properties = HashSet::new();
for prop in &node.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
blob_properties.insert(prop.name.clone());
}
if let Some(source_prop) = prop
.annotations
.iter()
.find(|ann| ann.name == "embed")
.and_then(|ann| ann.value.clone())
{
embed_sources.insert(prop.name.clone(), source_prop);
}
}
let mut key: Option<Vec<String>> = None;
let mut unique_constraints = Vec::new();
let mut indices = Vec::new();
let mut range_constraints = Vec::new();
let mut check_constraints = Vec::new();
for constraint in &node.constraints {
match constraint {
Constraint::Key(cols) => {
key = Some(cols.clone());
indices.push(cols.clone());
}
Constraint::Unique(cols) => {
unique_constraints.push(cols.clone());
}
Constraint::Index(cols) => {
indices.push(cols.clone());
}
Constraint::Range { property, min, max } => {
range_constraints.push(RangeConstraint {
property: property.clone(),
min: min.as_ref().map(bound_to_literal),
max: max.as_ref().map(bound_to_literal),
});
}
Constraint::Check { property, pattern } => {
check_constraints.push(CheckConstraint {
property: property.clone(),
pattern: pattern.clone(),
});
}
}
}
let mut fields = vec![Field::new("id", DataType::Utf8, false)];
for prop in &node.properties {
fields.push(Field::new(
&prop.name,
prop.prop_type.to_arrow(),
prop.prop_type.nullable,
));
}
let arrow_schema = Arc::new(Schema::new(fields));
node_types.insert(
node.name.clone(),
NodeType {
name: node.name.clone(),
implements: node.implements.clone(),
properties,
key,
unique_constraints,
indices,
range_constraints,
check_constraints,
embed_sources,
blob_properties,
arrow_schema,
},
);
}
}
for decl in &schema.declarations {
if let SchemaDecl::Edge(edge) = decl {
if edge_types.contains_key(&edge.name) {
return Err(NanoError::Catalog(format!(
"duplicate edge type: {}",
edge.name
)));
}
if !node_types.contains_key(&edge.from_type) {
return Err(NanoError::Catalog(format!(
"edge {} references unknown source type: {}",
edge.name, edge.from_type
)));
}
if !node_types.contains_key(&edge.to_type) {
return Err(NanoError::Catalog(format!(
"edge {} references unknown target type: {}",
edge.name, edge.to_type
)));
}
let mut properties = HashMap::new();
let mut blob_properties = HashSet::new();
let mut fields = vec![
Field::new("id", DataType::Utf8, false),
Field::new("src", DataType::Utf8, false),
Field::new("dst", DataType::Utf8, false),
];
for prop in &edge.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
blob_properties.insert(prop.name.clone());
}
fields.push(Field::new(
&prop.name,
prop.prop_type.to_arrow(),
prop.prop_type.nullable,
));
}
let mut unique_constraints = Vec::new();
let mut edge_indices = Vec::new();
for constraint in &edge.constraints {
match constraint {
Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
Constraint::Index(cols) => edge_indices.push(cols.clone()),
_ => {} }
}
let normalized_name = normalize_edge_name(&edge.name);
if let Some(existing) = edge_name_index.get(&normalized_name)
&& existing != &edge.name
{
return Err(NanoError::Catalog(format!(
"edge name collision after case folding: '{}' conflicts with '{}'",
edge.name, existing
)));
}
edge_name_index.insert(normalized_name, edge.name.clone());
edge_types.insert(
edge.name.clone(),
EdgeType {
name: edge.name.clone(),
from_type: edge.from_type.clone(),
to_type: edge.to_type.clone(),
cardinality: edge.cardinality.clone(),
properties,
unique_constraints,
indices: edge_indices,
blob_properties,
arrow_schema: Arc::new(Schema::new(fields)),
},
);
}
}
Ok(Catalog {
node_types,
edge_types,
edge_name_index,
interfaces,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::ast::{EdgeDecl, NodeDecl};
use crate::schema::parser::parse_schema;
use crate::types::PropType;
fn test_schema() -> &'static str {
r#"
node Person {
name: String
age: I32?
}
node Company {
name: String
}
edge Knows: Person -> Person {
since: Date?
}
edge WorksAt: Person -> Company {
title: String?
}
"#
}
#[test]
fn test_build_catalog() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types.len(), 2);
assert_eq!(catalog.edge_types.len(), 2);
assert!(catalog.node_types.contains_key("Person"));
assert!(catalog.node_types.contains_key("Company"));
}
#[test]
fn test_edge_lookup() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let edge = catalog.lookup_edge_by_name("knows").unwrap();
assert_eq!(edge.from_type, "Person");
assert_eq!(edge.to_type, "Person");
let upper = catalog.lookup_edge_by_name("KNOWS").unwrap();
assert_eq!(upper.name, "Knows");
}
#[test]
fn test_node_arrow_schema() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert_eq!(person.arrow_schema.fields().len(), 3); }
#[test]
fn test_duplicate_node_error() {
let input = r#"
node Person { name: String }
node Person { age: I32 }
"#;
let schema = parse_schema(input).unwrap();
assert!(build_catalog(&schema).is_err());
}
#[test]
fn test_bad_edge_endpoint() {
let input = r#"
node Person { name: String }
edge Knows: Person -> Alien
"#;
let schema = parse_schema(input).unwrap();
assert!(build_catalog(&schema).is_err());
}
#[test]
fn test_id_fields_are_utf8() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert_eq!(
person
.arrow_schema
.field_with_name("id")
.unwrap()
.data_type(),
&DataType::Utf8
);
let knows = &catalog.edge_types["Knows"];
assert_eq!(
knows
.arrow_schema
.field_with_name("id")
.unwrap()
.data_type(),
&DataType::Utf8
);
assert_eq!(
knows
.arrow_schema
.field_with_name("src")
.unwrap()
.data_type(),
&DataType::Utf8
);
assert_eq!(
knows
.arrow_schema
.field_with_name("dst")
.unwrap()
.data_type(),
&DataType::Utf8
);
}
#[test]
fn test_key_property_tracking() {
let input = r#"
node Signal {
slug: String @key
title: String
}
node Person {
name: String
}
edge Emits: Person -> Signal
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types["Signal"].key_property(), Some("slug"));
assert_eq!(catalog.node_types["Person"].key_property(), None);
}
#[test]
fn test_edge_lookup_handles_non_ascii_leading_character() {
let schema = SchemaFile {
declarations: vec![
SchemaDecl::Node(NodeDecl {
name: "Person".to_string(),
annotations: vec![],
implements: vec![],
properties: vec![crate::schema::ast::PropDecl {
name: "name".to_string(),
prop_type: PropType::scalar(ScalarType::String, false),
annotations: vec![],
}],
constraints: vec![],
}),
SchemaDecl::Edge(EdgeDecl {
name: "Édges".to_string(),
from_type: "Person".to_string(),
to_type: "Person".to_string(),
cardinality: Default::default(),
annotations: vec![],
properties: vec![],
constraints: vec![],
}),
],
};
let catalog = build_catalog(&schema).unwrap();
assert!(catalog.lookup_edge_by_name("édges").is_some());
}
#[test]
fn test_edge_lookup_rejects_case_fold_collisions() {
let input = r#"
node Person { name: String }
edge Knows: Person -> Person
edge KNOWS: Person -> Person
"#;
let schema = parse_schema(input).unwrap();
let err = build_catalog(&schema).unwrap_err();
assert!(err.to_string().contains("case folding"));
}
#[test]
fn test_catalog_composite_unique() {
let input = r#"
node Person {
first: String
last: String
@unique(first, last)
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert!(
person
.unique_constraints
.contains(&vec!["first".to_string(), "last".to_string()])
);
}
#[test]
fn test_catalog_composite_index() {
let input = r#"
node Event {
category: String
date: Date
@index(category, date)
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let event = &catalog.node_types["Event"];
assert!(
event
.indices
.contains(&vec!["category".to_string(), "date".to_string()])
);
}
#[test]
fn test_catalog_edge_cardinality() {
let input = r#"
node Person { name: String }
node Company { name: String }
edge WorksAt: Person -> Company @card(0..1)
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let edge = &catalog.edge_types["WorksAt"];
assert_eq!(edge.cardinality.min, 0);
assert_eq!(edge.cardinality.max, Some(1));
}
#[test]
fn test_catalog_interfaces_stored() {
let input = r#"
interface Named {
name: String
}
node Person implements Named {
age: I32?
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert!(catalog.interfaces.contains_key("Named"));
assert!(catalog.interfaces["Named"].properties.contains_key("name"));
}
#[test]
fn test_catalog_node_implements() {
let input = r#"
interface Named {
name: String
}
node Person implements Named {
age: I32?
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types["Person"].implements, vec!["Named"]);
}
#[test]
fn test_key_implies_index() {
let input = r#"
node Signal {
slug: String @key
title: String
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let signal = &catalog.node_types["Signal"];
assert!(signal.indices.contains(&vec!["slug".to_string()]));
}
}