Skip to main content

omnigraph_compiler/catalog/
schema_ir.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5
6use crate::catalog::{Catalog, build_catalog};
7use crate::error::{NanoError, Result};
8use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
9use crate::types::PropType;
10
11const SCHEMA_IR_VERSION: u32 = 1;
12
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub struct SchemaIR {
15    pub ir_version: u32,
16    pub interfaces: Vec<InterfaceIR>,
17    pub nodes: Vec<NodeIR>,
18    pub edges: Vec<EdgeIR>,
19}
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct InterfaceIR {
23    pub name: String,
24    pub type_id: u32,
25    pub properties: Vec<PropertyIR>,
26}
27
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct NodeIR {
30    pub name: String,
31    pub type_id: u32,
32    pub annotations: Vec<Annotation>,
33    pub implements: Vec<String>,
34    pub properties: Vec<PropertyIR>,
35    pub constraints: Vec<Constraint>,
36}
37
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
39pub struct EdgeIR {
40    pub name: String,
41    pub type_id: u32,
42    pub from_type: String,
43    pub to_type: String,
44    pub cardinality: Cardinality,
45    pub annotations: Vec<Annotation>,
46    pub properties: Vec<PropertyIR>,
47    pub constraints: Vec<Constraint>,
48}
49
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub struct PropertyIR {
52    pub name: String,
53    pub prop_id: u32,
54    pub prop_type: PropType,
55    pub annotations: Vec<Annotation>,
56}
57
58pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
59    let mut seen_type_ids = HashMap::<u32, String>::new();
60    let mut interfaces = Vec::new();
61    let mut nodes = Vec::new();
62    let mut edges = Vec::new();
63
64    for decl in &schema.declarations {
65        match decl {
66            SchemaDecl::Interface(interface) => {
67                let type_id = stable_type_id("interface", &interface.name);
68                check_type_id_collision(&mut seen_type_ids, type_id, &interface.name)?;
69                interfaces.push(InterfaceIR {
70                    name: interface.name.clone(),
71                    type_id,
72                    properties: canonical_properties(
73                        "interface",
74                        &interface.name,
75                        &interface.properties,
76                    )?,
77                });
78            }
79            SchemaDecl::Node(node) => {
80                let type_id = stable_type_id("node", &node.name);
81                check_type_id_collision(&mut seen_type_ids, type_id, &node.name)?;
82                nodes.push(NodeIR {
83                    name: node.name.clone(),
84                    type_id,
85                    annotations: canonical_annotations(&node.annotations),
86                    implements: canonical_strings(&node.implements),
87                    properties: canonical_properties("node", &node.name, &node.properties)?,
88                    constraints: canonical_constraints(&node.constraints),
89                });
90            }
91            SchemaDecl::Edge(edge) => {
92                let type_id = stable_type_id("edge", &edge.name);
93                check_type_id_collision(&mut seen_type_ids, type_id, &edge.name)?;
94                edges.push(EdgeIR {
95                    name: edge.name.clone(),
96                    type_id,
97                    from_type: edge.from_type.clone(),
98                    to_type: edge.to_type.clone(),
99                    cardinality: edge.cardinality.clone(),
100                    annotations: canonical_annotations(&edge.annotations),
101                    properties: canonical_properties("edge", &edge.name, &edge.properties)?,
102                    constraints: canonical_constraints(&edge.constraints),
103                });
104            }
105        }
106    }
107
108    interfaces.sort_by(|a, b| a.name.cmp(&b.name));
109    nodes.sort_by(|a, b| a.name.cmp(&b.name));
110    edges.sort_by(|a, b| a.name.cmp(&b.name));
111
112    Ok(SchemaIR {
113        ir_version: SCHEMA_IR_VERSION,
114        interfaces,
115        nodes,
116        edges,
117    })
118}
119
120pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
121    if ir.ir_version != SCHEMA_IR_VERSION {
122        return Err(NanoError::Catalog(format!(
123            "unsupported schema ir_version {} (expected {})",
124            ir.ir_version, SCHEMA_IR_VERSION
125        )));
126    }
127
128    let schema = SchemaFile {
129        declarations: ir
130            .interfaces
131            .iter()
132            .map(|interface| {
133                SchemaDecl::Interface(crate::schema::ast::InterfaceDecl {
134                    name: interface.name.clone(),
135                    properties: interface
136                        .properties
137                        .iter()
138                        .map(property_decl_from_ir)
139                        .collect(),
140                })
141            })
142            .chain(ir.nodes.iter().map(|node| {
143                SchemaDecl::Node(crate::schema::ast::NodeDecl {
144                    name: node.name.clone(),
145                    annotations: node.annotations.clone(),
146                    implements: node.implements.clone(),
147                    properties: node.properties.iter().map(property_decl_from_ir).collect(),
148                    constraints: node.constraints.clone(),
149                })
150            }))
151            .chain(ir.edges.iter().map(|edge| {
152                SchemaDecl::Edge(crate::schema::ast::EdgeDecl {
153                    name: edge.name.clone(),
154                    from_type: edge.from_type.clone(),
155                    to_type: edge.to_type.clone(),
156                    cardinality: edge.cardinality.clone(),
157                    annotations: edge.annotations.clone(),
158                    properties: edge.properties.iter().map(property_decl_from_ir).collect(),
159                    constraints: edge.constraints.clone(),
160                })
161            }))
162            .collect(),
163    };
164
165    build_catalog(&schema)
166}
167
168pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
169    serde_json::to_string(ir)
170        .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
171}
172
173pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
174    serde_json::to_string_pretty(ir)
175        .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
176}
177
178pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
179    let json = schema_ir_json(ir)?;
180    let mut hasher = Sha256::new();
181    hasher.update(json.as_bytes());
182    Ok(format!("sha256:{:x}", hasher.finalize()))
183}
184
185fn property_decl_from_ir(property: &PropertyIR) -> PropDecl {
186    PropDecl {
187        name: property.name.clone(),
188        prop_type: property.prop_type.clone(),
189        annotations: property.annotations.clone(),
190    }
191}
192
193fn canonical_strings(values: &[String]) -> Vec<String> {
194    let mut values = values.to_vec();
195    values.sort();
196    values.dedup();
197    values
198}
199
200fn canonical_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
201    let mut annotations = annotations.to_vec();
202    annotations.sort_by(|left, right| {
203        left.name
204            .cmp(&right.name)
205            .then_with(|| left.value.cmp(&right.value))
206    });
207    annotations
208}
209
210fn canonical_prop_type(prop_type: &PropType) -> PropType {
211    let mut normalized = prop_type.clone();
212    if let Some(values) = &mut normalized.enum_values {
213        values.sort();
214        values.dedup();
215    }
216    normalized
217}
218
219fn canonical_properties(
220    kind: &str,
221    owner_name: &str,
222    properties: &[PropDecl],
223) -> Result<Vec<PropertyIR>> {
224    let mut seen_prop_ids = HashMap::<u32, String>::new();
225    let owner_key = format!("{}:{}", kind, owner_name);
226    let mut canonical = properties
227        .iter()
228        .map(|property| {
229            let prop_id = stable_prop_id(&owner_key, &property.name);
230            if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
231                return Err(NanoError::Catalog(format!(
232                    "property id collision on {}: '{}' and '{}' both hash to {}",
233                    owner_name, previous, property.name, prop_id
234                )));
235            }
236            Ok(PropertyIR {
237                name: property.name.clone(),
238                prop_id,
239                prop_type: canonical_prop_type(&property.prop_type),
240                annotations: canonical_annotations(&property.annotations),
241            })
242        })
243        .collect::<Result<Vec<_>>>()?;
244    canonical.sort_by(|a, b| a.name.cmp(&b.name));
245    Ok(canonical)
246}
247
248fn canonical_constraints(constraints: &[Constraint]) -> Vec<Constraint> {
249    let mut constraints = constraints
250        .iter()
251        .cloned()
252        .map(normalize_constraint)
253        .collect::<Vec<_>>();
254    constraints.sort_by_key(constraint_sort_key);
255    constraints
256}
257
258fn normalize_constraint(constraint: Constraint) -> Constraint {
259    match constraint {
260        Constraint::Key(mut columns) => {
261            columns.sort();
262            Constraint::Key(columns)
263        }
264        Constraint::Unique(mut columns) => {
265            columns.sort();
266            Constraint::Unique(columns)
267        }
268        Constraint::Index(mut columns) => {
269            columns.sort();
270            Constraint::Index(columns)
271        }
272        other => other,
273    }
274}
275
276fn constraint_sort_key(constraint: &Constraint) -> String {
277    match constraint {
278        Constraint::Key(columns) => format!("key:{}", columns.join(",")),
279        Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
280        Constraint::Index(columns) => format!("index:{}", columns.join(",")),
281        Constraint::Range { property, min, max } => {
282            format!("range:{}:{:?}:{:?}", property, min, max)
283        }
284        Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
285    }
286}
287
288fn stable_type_id(kind: &str, name: &str) -> u32 {
289    fnv1a_u32(&format!("{}:{}", kind, name))
290}
291
292fn stable_prop_id(owner: &str, name: &str) -> u32 {
293    fnv1a_u32(&format!("{}:{}", owner, name))
294}
295
296fn fnv1a_u32(value: &str) -> u32 {
297    let mut hash: u32 = 2_166_136_261;
298    for byte in value.bytes() {
299        hash ^= u32::from(byte);
300        hash = hash.wrapping_mul(16_777_619);
301    }
302    if hash == 0 { 1 } else { hash }
303}
304
305fn check_type_id_collision(
306    seen_type_ids: &mut HashMap<u32, String>,
307    type_id: u32,
308    name: &str,
309) -> Result<()> {
310    if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
311        return Err(NanoError::Catalog(format!(
312            "type id collision: '{}' and '{}' both hash to {}",
313            previous, name, type_id
314        )));
315    }
316    Ok(())
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::catalog::build_catalog;
323    use crate::schema::parser::parse_schema;
324
325    #[test]
326    fn schema_ir_hash_is_stable_across_source_ordering_noise() {
327        let schema_a = parse_schema(
328            r#"
329node Person {
330    age: I32?
331    name: String @key
332}
333
334edge Knows: Person -> Person {
335    since: Date?
336}
337"#,
338        )
339        .unwrap();
340        let schema_b = parse_schema(
341            r#"
342edge Knows: Person -> Person {
343    since: Date?
344}
345
346node Person {
347    name: String @key
348    age: I32?
349}
350"#,
351        )
352        .unwrap();
353
354        let ir_a = build_schema_ir(&schema_a).unwrap();
355        let ir_b = build_schema_ir(&schema_b).unwrap();
356        assert_eq!(ir_a, ir_b);
357        assert_eq!(
358            schema_ir_hash(&ir_a).unwrap(),
359            schema_ir_hash(&ir_b).unwrap()
360        );
361    }
362
363    #[test]
364    fn build_catalog_from_ir_round_trips_core_catalog_fields() {
365        let schema = parse_schema(
366            r#"
367node Person @description("person") {
368    name: String @key
369    age: I32? @description("age")
370}
371
372edge Knows: Person -> Person @instruction("friendship") {
373    since: Date?
374}
375"#,
376        )
377        .unwrap();
378        let direct = build_catalog(&schema).unwrap();
379        let ir = build_schema_ir(&schema).unwrap();
380        let rebuilt = build_catalog_from_ir(&ir).unwrap();
381
382        assert_eq!(direct.node_types.len(), rebuilt.node_types.len());
383        assert_eq!(direct.edge_types.len(), rebuilt.edge_types.len());
384        assert_eq!(
385            direct.node_types["Person"].key_property(),
386            rebuilt.node_types["Person"].key_property()
387        );
388        assert_eq!(
389            direct.edge_types["Knows"].cardinality,
390            rebuilt.edge_types["Knows"].cardinality
391        );
392    }
393}