1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5
6use crate::catalog::{Catalog, build_catalog};
7use crate::error::{NanoError, Result};
8use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
9use crate::types::PropType;
10
11const SCHEMA_IR_VERSION: u32 = 1;
12
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub struct SchemaIR {
15 pub ir_version: u32,
16 pub interfaces: Vec<InterfaceIR>,
17 pub nodes: Vec<NodeIR>,
18 pub edges: Vec<EdgeIR>,
19}
20
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct InterfaceIR {
23 pub name: String,
24 pub type_id: u32,
25 pub properties: Vec<PropertyIR>,
26}
27
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct NodeIR {
30 pub name: String,
31 pub type_id: u32,
32 pub annotations: Vec<Annotation>,
33 pub implements: Vec<String>,
34 pub properties: Vec<PropertyIR>,
35 pub constraints: Vec<Constraint>,
36}
37
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
39pub struct EdgeIR {
40 pub name: String,
41 pub type_id: u32,
42 pub from_type: String,
43 pub to_type: String,
44 pub cardinality: Cardinality,
45 pub annotations: Vec<Annotation>,
46 pub properties: Vec<PropertyIR>,
47 pub constraints: Vec<Constraint>,
48}
49
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub struct PropertyIR {
52 pub name: String,
53 pub prop_id: u32,
54 pub prop_type: PropType,
55 pub annotations: Vec<Annotation>,
56}
57
58pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
59 let mut seen_type_ids = HashMap::<u32, String>::new();
60 let mut interfaces = Vec::new();
61 let mut nodes = Vec::new();
62 let mut edges = Vec::new();
63
64 for decl in &schema.declarations {
65 match decl {
66 SchemaDecl::Interface(interface) => {
67 let type_id = stable_type_id("interface", &interface.name);
68 check_type_id_collision(&mut seen_type_ids, type_id, &interface.name)?;
69 interfaces.push(InterfaceIR {
70 name: interface.name.clone(),
71 type_id,
72 properties: canonical_properties(
73 "interface",
74 &interface.name,
75 &interface.properties,
76 )?,
77 });
78 }
79 SchemaDecl::Node(node) => {
80 let type_id = stable_type_id("node", &node.name);
81 check_type_id_collision(&mut seen_type_ids, type_id, &node.name)?;
82 nodes.push(NodeIR {
83 name: node.name.clone(),
84 type_id,
85 annotations: canonical_annotations(&node.annotations),
86 implements: canonical_strings(&node.implements),
87 properties: canonical_properties("node", &node.name, &node.properties)?,
88 constraints: canonical_constraints(&node.constraints),
89 });
90 }
91 SchemaDecl::Edge(edge) => {
92 let type_id = stable_type_id("edge", &edge.name);
93 check_type_id_collision(&mut seen_type_ids, type_id, &edge.name)?;
94 edges.push(EdgeIR {
95 name: edge.name.clone(),
96 type_id,
97 from_type: edge.from_type.clone(),
98 to_type: edge.to_type.clone(),
99 cardinality: edge.cardinality.clone(),
100 annotations: canonical_annotations(&edge.annotations),
101 properties: canonical_properties("edge", &edge.name, &edge.properties)?,
102 constraints: canonical_constraints(&edge.constraints),
103 });
104 }
105 }
106 }
107
108 interfaces.sort_by(|a, b| a.name.cmp(&b.name));
109 nodes.sort_by(|a, b| a.name.cmp(&b.name));
110 edges.sort_by(|a, b| a.name.cmp(&b.name));
111
112 Ok(SchemaIR {
113 ir_version: SCHEMA_IR_VERSION,
114 interfaces,
115 nodes,
116 edges,
117 })
118}
119
120pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
121 if ir.ir_version != SCHEMA_IR_VERSION {
122 return Err(NanoError::Catalog(format!(
123 "unsupported schema ir_version {} (expected {})",
124 ir.ir_version, SCHEMA_IR_VERSION
125 )));
126 }
127
128 let schema = SchemaFile {
129 declarations: ir
130 .interfaces
131 .iter()
132 .map(|interface| {
133 SchemaDecl::Interface(crate::schema::ast::InterfaceDecl {
134 name: interface.name.clone(),
135 properties: interface
136 .properties
137 .iter()
138 .map(property_decl_from_ir)
139 .collect(),
140 })
141 })
142 .chain(ir.nodes.iter().map(|node| {
143 SchemaDecl::Node(crate::schema::ast::NodeDecl {
144 name: node.name.clone(),
145 annotations: node.annotations.clone(),
146 implements: node.implements.clone(),
147 properties: node.properties.iter().map(property_decl_from_ir).collect(),
148 constraints: node.constraints.clone(),
149 })
150 }))
151 .chain(ir.edges.iter().map(|edge| {
152 SchemaDecl::Edge(crate::schema::ast::EdgeDecl {
153 name: edge.name.clone(),
154 from_type: edge.from_type.clone(),
155 to_type: edge.to_type.clone(),
156 cardinality: edge.cardinality.clone(),
157 annotations: edge.annotations.clone(),
158 properties: edge.properties.iter().map(property_decl_from_ir).collect(),
159 constraints: edge.constraints.clone(),
160 })
161 }))
162 .collect(),
163 };
164
165 build_catalog(&schema)
166}
167
168pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
169 serde_json::to_string(ir)
170 .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
171}
172
173pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
174 serde_json::to_string_pretty(ir)
175 .map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
176}
177
178pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
179 let json = schema_ir_json(ir)?;
180 let mut hasher = Sha256::new();
181 hasher.update(json.as_bytes());
182 Ok(format!("sha256:{:x}", hasher.finalize()))
183}
184
185fn property_decl_from_ir(property: &PropertyIR) -> PropDecl {
186 PropDecl {
187 name: property.name.clone(),
188 prop_type: property.prop_type.clone(),
189 annotations: property.annotations.clone(),
190 }
191}
192
193fn canonical_strings(values: &[String]) -> Vec<String> {
194 let mut values = values.to_vec();
195 values.sort();
196 values.dedup();
197 values
198}
199
200fn canonical_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
201 let mut annotations = annotations.to_vec();
202 annotations.sort_by(|left, right| {
203 left.name
204 .cmp(&right.name)
205 .then_with(|| left.value.cmp(&right.value))
206 });
207 annotations
208}
209
210fn canonical_prop_type(prop_type: &PropType) -> PropType {
211 let mut normalized = prop_type.clone();
212 if let Some(values) = &mut normalized.enum_values {
213 values.sort();
214 values.dedup();
215 }
216 normalized
217}
218
219fn canonical_properties(
220 kind: &str,
221 owner_name: &str,
222 properties: &[PropDecl],
223) -> Result<Vec<PropertyIR>> {
224 let mut seen_prop_ids = HashMap::<u32, String>::new();
225 let owner_key = format!("{}:{}", kind, owner_name);
226 let mut canonical = properties
227 .iter()
228 .map(|property| {
229 let prop_id = stable_prop_id(&owner_key, &property.name);
230 if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
231 return Err(NanoError::Catalog(format!(
232 "property id collision on {}: '{}' and '{}' both hash to {}",
233 owner_name, previous, property.name, prop_id
234 )));
235 }
236 Ok(PropertyIR {
237 name: property.name.clone(),
238 prop_id,
239 prop_type: canonical_prop_type(&property.prop_type),
240 annotations: canonical_annotations(&property.annotations),
241 })
242 })
243 .collect::<Result<Vec<_>>>()?;
244 canonical.sort_by(|a, b| a.name.cmp(&b.name));
245 Ok(canonical)
246}
247
248fn canonical_constraints(constraints: &[Constraint]) -> Vec<Constraint> {
249 let mut constraints = constraints
250 .iter()
251 .cloned()
252 .map(normalize_constraint)
253 .collect::<Vec<_>>();
254 constraints.sort_by_key(constraint_sort_key);
255 constraints
256}
257
258fn normalize_constraint(constraint: Constraint) -> Constraint {
259 match constraint {
260 Constraint::Key(mut columns) => {
261 columns.sort();
262 Constraint::Key(columns)
263 }
264 Constraint::Unique(mut columns) => {
265 columns.sort();
266 Constraint::Unique(columns)
267 }
268 Constraint::Index(mut columns) => {
269 columns.sort();
270 Constraint::Index(columns)
271 }
272 other => other,
273 }
274}
275
276fn constraint_sort_key(constraint: &Constraint) -> String {
277 match constraint {
278 Constraint::Key(columns) => format!("key:{}", columns.join(",")),
279 Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
280 Constraint::Index(columns) => format!("index:{}", columns.join(",")),
281 Constraint::Range { property, min, max } => {
282 format!("range:{}:{:?}:{:?}", property, min, max)
283 }
284 Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
285 }
286}
287
288fn stable_type_id(kind: &str, name: &str) -> u32 {
289 fnv1a_u32(&format!("{}:{}", kind, name))
290}
291
292fn stable_prop_id(owner: &str, name: &str) -> u32 {
293 fnv1a_u32(&format!("{}:{}", owner, name))
294}
295
296fn fnv1a_u32(value: &str) -> u32 {
297 let mut hash: u32 = 2_166_136_261;
298 for byte in value.bytes() {
299 hash ^= u32::from(byte);
300 hash = hash.wrapping_mul(16_777_619);
301 }
302 if hash == 0 { 1 } else { hash }
303}
304
305fn check_type_id_collision(
306 seen_type_ids: &mut HashMap<u32, String>,
307 type_id: u32,
308 name: &str,
309) -> Result<()> {
310 if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
311 return Err(NanoError::Catalog(format!(
312 "type id collision: '{}' and '{}' both hash to {}",
313 previous, name, type_id
314 )));
315 }
316 Ok(())
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322 use crate::catalog::build_catalog;
323 use crate::schema::parser::parse_schema;
324
325 #[test]
326 fn schema_ir_hash_is_stable_across_source_ordering_noise() {
327 let schema_a = parse_schema(
328 r#"
329node Person {
330 age: I32?
331 name: String @key
332}
333
334edge Knows: Person -> Person {
335 since: Date?
336}
337"#,
338 )
339 .unwrap();
340 let schema_b = parse_schema(
341 r#"
342edge Knows: Person -> Person {
343 since: Date?
344}
345
346node Person {
347 name: String @key
348 age: I32?
349}
350"#,
351 )
352 .unwrap();
353
354 let ir_a = build_schema_ir(&schema_a).unwrap();
355 let ir_b = build_schema_ir(&schema_b).unwrap();
356 assert_eq!(ir_a, ir_b);
357 assert_eq!(
358 schema_ir_hash(&ir_a).unwrap(),
359 schema_ir_hash(&ir_b).unwrap()
360 );
361 }
362
363 #[test]
364 fn build_catalog_from_ir_round_trips_core_catalog_fields() {
365 let schema = parse_schema(
366 r#"
367node Person @description("person") {
368 name: String @key
369 age: I32? @description("age")
370}
371
372edge Knows: Person -> Person @instruction("friendship") {
373 since: Date?
374}
375"#,
376 )
377 .unwrap();
378 let direct = build_catalog(&schema).unwrap();
379 let ir = build_schema_ir(&schema).unwrap();
380 let rebuilt = build_catalog_from_ir(&ir).unwrap();
381
382 assert_eq!(direct.node_types.len(), rebuilt.node_types.len());
383 assert_eq!(direct.edge_types.len(), rebuilt.edge_types.len());
384 assert_eq!(
385 direct.node_types["Person"].key_property(),
386 rebuilt.node_types["Person"].key_property()
387 );
388 assert_eq!(
389 direct.edge_types["Knows"].cardinality,
390 rebuilt.edge_types["Knows"].cardinality
391 );
392 }
393}