omnigraph-compiler 0.4.2

Schema/query compiler for Omnigraph. Zero Lance dependency.
Documentation
use std::collections::HashMap;

use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use crate::catalog::{Catalog, build_catalog};
use crate::error::{NanoError, Result};
use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
use crate::types::PropType;

const SCHEMA_IR_VERSION: u32 = 1;

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SchemaIR {
    pub ir_version: u32,
    pub interfaces: Vec<InterfaceIR>,
    pub nodes: Vec<NodeIR>,
    pub edges: Vec<EdgeIR>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InterfaceIR {
    pub name: String,
    pub type_id: u32,
    pub properties: Vec<PropertyIR>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NodeIR {
    pub name: String,
    pub type_id: u32,
    pub annotations: Vec<Annotation>,
    pub implements: Vec<String>,
    pub properties: Vec<PropertyIR>,
    pub constraints: Vec<Constraint>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EdgeIR {
    pub name: String,
    pub type_id: u32,
    pub from_type: String,
    pub to_type: String,
    pub cardinality: Cardinality,
    pub annotations: Vec<Annotation>,
    pub properties: Vec<PropertyIR>,
    pub constraints: Vec<Constraint>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PropertyIR {
    pub name: String,
    pub prop_id: u32,
    pub prop_type: PropType,
    pub annotations: Vec<Annotation>,
}

pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
    let mut seen_type_ids = HashMap::<u32, String>::new();
    let mut interfaces = Vec::new();
    let mut nodes = Vec::new();
    let mut edges = Vec::new();

    for decl in &schema.declarations {
        match decl {
            SchemaDecl::Interface(interface) => {
                let type_id = stable_type_id("interface", &interface.name);
                check_type_id_collision(&mut seen_type_ids, type_id, &interface.name)?;
                interfaces.push(InterfaceIR {
                    name: interface.name.clone(),
                    type_id,
                    properties: canonical_properties(
                        "interface",
                        &interface.name,
                        &interface.properties,
                    )?,
                });
            }
            SchemaDecl::Node(node) => {
                let type_id = stable_type_id("node", &node.name);
                check_type_id_collision(&mut seen_type_ids, type_id, &node.name)?;
                nodes.push(NodeIR {
                    name: node.name.clone(),
                    type_id,
                    annotations: canonical_annotations(&node.annotations),
                    implements: canonical_strings(&node.implements),
                    properties: canonical_properties("node", &node.name, &node.properties)?,
                    constraints: canonical_constraints(&node.constraints),
                });
            }
            SchemaDecl::Edge(edge) => {
                let type_id = stable_type_id("edge", &edge.name);
                check_type_id_collision(&mut seen_type_ids, type_id, &edge.name)?;
                edges.push(EdgeIR {
                    name: edge.name.clone(),
                    type_id,
                    from_type: edge.from_type.clone(),
                    to_type: edge.to_type.clone(),
                    cardinality: edge.cardinality.clone(),
                    annotations: canonical_annotations(&edge.annotations),
                    properties: canonical_properties("edge", &edge.name, &edge.properties)?,
                    constraints: canonical_constraints(&edge.constraints),
                });
            }
        }
    }

    interfaces.sort_by(|a, b| a.name.cmp(&b.name));
    nodes.sort_by(|a, b| a.name.cmp(&b.name));
    edges.sort_by(|a, b| a.name.cmp(&b.name));

    Ok(SchemaIR {
        ir_version: SCHEMA_IR_VERSION,
        interfaces,
        nodes,
        edges,
    })
}

pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
    if ir.ir_version != SCHEMA_IR_VERSION {
        return Err(NanoError::Catalog(format!(
            "unsupported schema ir_version {} (expected {})",
            ir.ir_version, SCHEMA_IR_VERSION
        )));
    }

    let schema = SchemaFile {
        declarations: ir
            .interfaces
            .iter()
            .map(|interface| {
                SchemaDecl::Interface(crate::schema::ast::InterfaceDecl {
                    name: interface.name.clone(),
                    properties: interface
                        .properties
                        .iter()
                        .map(property_decl_from_ir)
                        .collect(),
                })
            })
            .chain(ir.nodes.iter().map(|node| {
                SchemaDecl::Node(crate::schema::ast::NodeDecl {
                    name: node.name.clone(),
                    annotations: node.annotations.clone(),
                    implements: node.implements.clone(),
                    properties: node.properties.iter().map(property_decl_from_ir).collect(),
                    constraints: node.constraints.clone(),
                })
            }))
            .chain(ir.edges.iter().map(|edge| {
                SchemaDecl::Edge(crate::schema::ast::EdgeDecl {
                    name: edge.name.clone(),
                    from_type: edge.from_type.clone(),
                    to_type: edge.to_type.clone(),
                    cardinality: edge.cardinality.clone(),
                    annotations: edge.annotations.clone(),
                    properties: edge.properties.iter().map(property_decl_from_ir).collect(),
                    constraints: edge.constraints.clone(),
                })
            }))
            .collect(),
    };

    build_catalog(&schema)
}

pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
    serde_json::to_string(ir)
        .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
}

pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
    serde_json::to_string_pretty(ir)
        .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
}

pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
    let json = schema_ir_json(ir)?;
    let mut hasher = Sha256::new();
    hasher.update(json.as_bytes());
    Ok(format!("sha256:{:x}", hasher.finalize()))
}

fn property_decl_from_ir(property: &PropertyIR) -> PropDecl {
    PropDecl {
        name: property.name.clone(),
        prop_type: property.prop_type.clone(),
        annotations: property.annotations.clone(),
    }
}

fn canonical_strings(values: &[String]) -> Vec<String> {
    let mut values = values.to_vec();
    values.sort();
    values.dedup();
    values
}

fn canonical_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
    let mut annotations = annotations.to_vec();
    annotations.sort_by(|left, right| {
        left.name
            .cmp(&right.name)
            .then_with(|| left.value.cmp(&right.value))
    });
    annotations
}

fn canonical_prop_type(prop_type: &PropType) -> PropType {
    let mut normalized = prop_type.clone();
    if let Some(values) = &mut normalized.enum_values {
        values.sort();
        values.dedup();
    }
    normalized
}

fn canonical_properties(
    kind: &str,
    owner_name: &str,
    properties: &[PropDecl],
) -> Result<Vec<PropertyIR>> {
    let mut seen_prop_ids = HashMap::<u32, String>::new();
    let owner_key = format!("{}:{}", kind, owner_name);
    let mut canonical = properties
        .iter()
        .map(|property| {
            let prop_id = stable_prop_id(&owner_key, &property.name);
            if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
                return Err(NanoError::Catalog(format!(
                    "property id collision on {}: '{}' and '{}' both hash to {}",
                    owner_name, previous, property.name, prop_id
                )));
            }
            Ok(PropertyIR {
                name: property.name.clone(),
                prop_id,
                prop_type: canonical_prop_type(&property.prop_type),
                annotations: canonical_annotations(&property.annotations),
            })
        })
        .collect::<Result<Vec<_>>>()?;
    canonical.sort_by(|a, b| a.name.cmp(&b.name));
    Ok(canonical)
}

fn canonical_constraints(constraints: &[Constraint]) -> Vec<Constraint> {
    let mut constraints = constraints
        .iter()
        .cloned()
        .map(normalize_constraint)
        .collect::<Vec<_>>();
    constraints.sort_by_key(constraint_sort_key);
    constraints
}

fn normalize_constraint(constraint: Constraint) -> Constraint {
    match constraint {
        Constraint::Key(mut columns) => {
            columns.sort();
            Constraint::Key(columns)
        }
        Constraint::Unique(mut columns) => {
            columns.sort();
            Constraint::Unique(columns)
        }
        Constraint::Index(mut columns) => {
            columns.sort();
            Constraint::Index(columns)
        }
        other => other,
    }
}

fn constraint_sort_key(constraint: &Constraint) -> String {
    match constraint {
        Constraint::Key(columns) => format!("key:{}", columns.join(",")),
        Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
        Constraint::Index(columns) => format!("index:{}", columns.join(",")),
        Constraint::Range { property, min, max } => {
            format!("range:{}:{:?}:{:?}", property, min, max)
        }
        Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
    }
}

fn stable_type_id(kind: &str, name: &str) -> u32 {
    fnv1a_u32(&format!("{}:{}", kind, name))
}

fn stable_prop_id(owner: &str, name: &str) -> u32 {
    fnv1a_u32(&format!("{}:{}", owner, name))
}

fn fnv1a_u32(value: &str) -> u32 {
    let mut hash: u32 = 2_166_136_261;
    for byte in value.bytes() {
        hash ^= u32::from(byte);
        hash = hash.wrapping_mul(16_777_619);
    }
    if hash == 0 { 1 } else { hash }
}

fn check_type_id_collision(
    seen_type_ids: &mut HashMap<u32, String>,
    type_id: u32,
    name: &str,
) -> Result<()> {
    if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
        return Err(NanoError::Catalog(format!(
            "type id collision: '{}' and '{}' both hash to {}",
            previous, name, type_id
        )));
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::catalog::build_catalog;
    use crate::schema::parser::parse_schema;

    #[test]
    fn schema_ir_hash_is_stable_across_source_ordering_noise() {
        let schema_a = parse_schema(
            r#"
node Person {
    age: I32?
    name: String @key
}

edge Knows: Person -> Person {
    since: Date?
}
"#,
        )
        .unwrap();
        let schema_b = parse_schema(
            r#"
edge Knows: Person -> Person {
    since: Date?
}

node Person {
    name: String @key
    age: I32?
}
"#,
        )
        .unwrap();

        let ir_a = build_schema_ir(&schema_a).unwrap();
        let ir_b = build_schema_ir(&schema_b).unwrap();
        assert_eq!(ir_a, ir_b);
        assert_eq!(
            schema_ir_hash(&ir_a).unwrap(),
            schema_ir_hash(&ir_b).unwrap()
        );
    }

    #[test]
    fn build_catalog_from_ir_round_trips_core_catalog_fields() {
        let schema = parse_schema(
            r#"
node Person @description("person") {
    name: String @key
    age: I32? @description("age")
}

edge Knows: Person -> Person @instruction("friendship") {
    since: Date?
}
"#,
        )
        .unwrap();
        let direct = build_catalog(&schema).unwrap();
        let ir = build_schema_ir(&schema).unwrap();
        let rebuilt = build_catalog_from_ir(&ir).unwrap();

        assert_eq!(direct.node_types.len(), rebuilt.node_types.len());
        assert_eq!(direct.edge_types.len(), rebuilt.edge_types.len());
        assert_eq!(
            direct.node_types["Person"].key_property(),
            rebuilt.node_types["Person"].key_property()
        );
        assert_eq!(
            direct.edge_types["Knows"].cardinality,
            rebuilt.edge_types["Knows"].cardinality
        );
    }
}