Skip to main content

infigraph_core/structured/
mod.rs

1mod cozo;
2mod ingest;
3mod schema;
4
5pub use cozo::*;
6pub use ingest::*;
7pub use schema::*;
8
9#[cfg(test)]
10mod tests {
11    use super::*;
12
13    const SAMPLE_SCHEMA: &str = r#"
14[schema]
15schema_id = "ears"
16name = "EARS Requirements"
17node_table = "Requirement"
18id_template = "ears_{req_id}"
19searchable_fields = ["title", "requirement_text"]
20
21[[schema.columns]]
22name = "title"
23col_type = "STRING"
24required = true
25
26[[schema.columns]]
27name = "requirement_text"
28col_type = "STRING"
29
30[[schema.columns]]
31name = "category"
32col_type = "STRING"
33
34[[schema.columns]]
35name = "priority"
36col_type = "INT64"
37
38[[schema.edges]]
39name = "TRACES_TO"
40from_table = "Requirement"
41to_table = "Symbol"
42source_field = "traces_to"
43"#;
44
45    #[test]
46    fn test_parse_schema() {
47        let schema: StructuredSchema = toml::from_str(SAMPLE_SCHEMA).unwrap();
48        assert_eq!(schema.schema.schema_id, "ears");
49        assert_eq!(schema.schema.node_table, "Requirement");
50        assert_eq!(schema.schema.columns.len(), 4);
51        assert!(schema.schema.columns[0].required);
52        assert_eq!(schema.schema.edges.len(), 1);
53        assert_eq!(schema.schema.edges[0].name, "TRACES_TO");
54        schema.schema.validate().unwrap();
55    }
56
57    #[test]
58    fn test_generate_ddl() {
59        let schema: StructuredSchema = toml::from_str(SAMPLE_SCHEMA).unwrap();
60        let ddl = schema.schema.generate_ddl();
61        assert_eq!(ddl.len(), 2);
62        assert!(ddl[0].contains("Requirement"));
63        assert!(ddl[0].contains("title STRING"));
64        assert!(ddl[0].contains("priority INT64"));
65        assert!(ddl[1].contains("TRACES_TO"));
66        assert!(ddl[1].contains("FROM Requirement TO Symbol"));
67    }
68
69    #[test]
70    fn test_invalid_schema_id() {
71        let toml_str = r#"
72[schema]
73schema_id = "Bad"
74name = "Bad"
75node_table = "Bad"
76"#;
77        let schema: StructuredSchema = toml::from_str(toml_str).unwrap();
78        assert!(schema.schema.validate().is_err());
79    }
80
81    #[test]
82    fn test_id_template_interpolation() {
83        let mut obj = serde_json::Map::new();
84        obj.insert(
85            "req_id".to_string(),
86            serde_json::Value::String("REQ-001".to_string()),
87        );
88        obj.insert(
89            "category".to_string(),
90            serde_json::Value::String("security".to_string()),
91        );
92        let result = schema::interpolate_template("ears_{req_id}_{category}", &obj);
93        assert_eq!(result, "ears_REQ-001_security");
94    }
95
96    #[test]
97    fn test_format_value() {
98        assert_eq!(
99            schema::format_value("STRING", Some(&serde_json::json!("hello"))),
100            "'hello'"
101        );
102        assert_eq!(
103            schema::format_value("INT64", Some(&serde_json::json!(42))),
104            "42"
105        );
106        assert_eq!(
107            schema::format_value("BOOL", Some(&serde_json::json!(true))),
108            "true"
109        );
110        assert_eq!(schema::format_value("STRING", None), "''");
111        assert_eq!(schema::format_value("INT64", None), "0");
112    }
113
114    fn simple_schema() -> SchemaMeta {
115        SchemaMeta {
116            schema_id: "test_items".to_string(),
117            name: "Test Items".to_string(),
118            node_table: "TestItem".to_string(),
119            columns: vec![
120                ColumnDef {
121                    name: "title".to_string(),
122                    col_type: "STRING".to_string(),
123                    required: true,
124                },
125                ColumnDef {
126                    name: "priority".to_string(),
127                    col_type: "INT64".to_string(),
128                    required: false,
129                },
130            ],
131            edges: vec![],
132            searchable_fields: vec![],
133            id_template: Some("item_{item_id}".to_string()),
134        }
135    }
136
137    fn kuzu_conn() -> (tempfile::TempDir, crate::graph::GraphStore) {
138        let dir = tempfile::TempDir::new().unwrap();
139        let store = crate::graph::GraphStore::open(&dir.path().join("graph")).unwrap();
140        (dir, store)
141    }
142
143    #[test]
144    fn test_ingest_data_with_kuzu() {
145        let (_dir, store) = kuzu_conn();
146        let conn = store.connection().unwrap();
147        let schema = simple_schema();
148
149        let data = vec![
150            serde_json::json!({"item_id": "A1", "title": "First", "priority": 1}),
151            serde_json::json!({"item_id": "A2", "title": "Second", "priority": 2}),
152            serde_json::json!({"item_id": "A3", "title": "Third", "priority": 3}),
153        ];
154
155        let result = ingest_data(&conn, &schema, &data).unwrap();
156        assert_eq!(result.nodes_created, 3);
157
158        let qr = conn
159            .query("MATCH (t:TestItem) RETURN t.id ORDER BY t.id")
160            .unwrap();
161        let mut ids = Vec::new();
162        for row in qr {
163            ids.push(row[0].to_string());
164        }
165        assert_eq!(ids.len(), 3);
166        assert!(ids.iter().any(|id| id.contains("item_A1")));
167    }
168
169    #[test]
170    fn test_ingest_file_json() {
171        let (_dir, store) = kuzu_conn();
172        let conn = store.connection().unwrap();
173        let schema = simple_schema();
174
175        let tmp = tempfile::NamedTempFile::with_suffix(".json").unwrap();
176        std::fs::write(
177            tmp.path(),
178            r#"[{"item_id":"J1","title":"JSON item","priority":5}]"#,
179        )
180        .unwrap();
181
182        let result = ingest_file(&conn, &schema, tmp.path()).unwrap();
183        assert_eq!(result.nodes_created, 1);
184    }
185
186    #[test]
187    fn test_ingest_file_yaml() {
188        let (_dir, store) = kuzu_conn();
189        let conn = store.connection().unwrap();
190        let schema = simple_schema();
191
192        let tmp = tempfile::NamedTempFile::with_suffix(".yaml").unwrap();
193        std::fs::write(
194            tmp.path(),
195            "- item_id: Y1\n  title: YAML item\n  priority: 10\n",
196        )
197        .unwrap();
198
199        let result = ingest_file(&conn, &schema, tmp.path()).unwrap();
200        assert_eq!(result.nodes_created, 1);
201    }
202
203    #[test]
204    fn test_ingest_directory() {
205        let (_dir, store) = kuzu_conn();
206        let conn = store.connection().unwrap();
207        let schema = simple_schema();
208
209        let data_dir = tempfile::TempDir::new().unwrap();
210        std::fs::write(
211            data_dir.path().join("batch1.json"),
212            r#"[{"item_id":"D1","title":"Dir item 1","priority":1}]"#,
213        )
214        .unwrap();
215        std::fs::write(
216            data_dir.path().join("batch2.json"),
217            r#"[{"item_id":"D2","title":"Dir item 2","priority":2}]"#,
218        )
219        .unwrap();
220        std::fs::write(data_dir.path().join("ignore.txt"), "not a data file").unwrap();
221
222        let result = ingest_directory(&conn, &schema, data_dir.path()).unwrap();
223        assert_eq!(result.nodes_created, 2);
224    }
225
226    #[test]
227    fn test_required_field_missing() {
228        let (_dir, store) = kuzu_conn();
229        let conn = store.connection().unwrap();
230        let schema = simple_schema();
231
232        let data = vec![serde_json::json!({"item_id": "X1", "priority": 1})];
233        let err = ingest_data(&conn, &schema, &data).unwrap_err();
234        assert!(
235            err.to_string().contains("title"),
236            "error should mention missing field 'title': {err}"
237        );
238    }
239
240    #[test]
241    fn test_edge_creation_between_nodes() {
242        let (_dir, store) = kuzu_conn();
243        let conn = store.connection().unwrap();
244
245        let schema = SchemaMeta {
246            schema_id: "linked".to_string(),
247            name: "Linked".to_string(),
248            node_table: "LinkedNode".to_string(),
249            columns: vec![ColumnDef {
250                name: "label".to_string(),
251                col_type: "STRING".to_string(),
252                required: false,
253            }],
254            edges: vec![EdgeDef {
255                name: "LINKS_TO".to_string(),
256                from_table: "LinkedNode".to_string(),
257                to_table: "LinkedNode".to_string(),
258                properties: vec![],
259                source_field: "links".to_string(),
260                target_lookup: None,
261            }],
262            searchable_fields: vec![],
263            id_template: None,
264        };
265
266        let data = vec![
267            serde_json::json!({"id": "n2", "label": "Node 2"}),
268            serde_json::json!({"id": "n1", "label": "Node 1", "links": ["n2"]}),
269        ];
270
271        let result = ingest_data(&conn, &schema, &data).unwrap();
272        assert_eq!(result.nodes_created, 2);
273        assert_eq!(result.edges_created, 1);
274    }
275
276    #[test]
277    fn test_id_template_with_missing_field() {
278        let mut obj = serde_json::Map::new();
279        obj.insert(
280            "req_id".to_string(),
281            serde_json::Value::String("REQ-001".to_string()),
282        );
283        let result = schema::interpolate_template("{req_id}_{category}", &obj);
284        assert_eq!(
285            result, "REQ-001_{category}",
286            "missing field should remain as literal placeholder"
287        );
288    }
289
290    #[test]
291    fn test_edge_to_nonexistent_target() {
292        let (_dir, store) = kuzu_conn();
293        let conn = store.connection().unwrap();
294
295        let schema = SchemaMeta {
296            schema_id: "orphan".to_string(),
297            name: "Orphan".to_string(),
298            node_table: "OrphanNode".to_string(),
299            columns: vec![],
300            edges: vec![EdgeDef {
301                name: "REFS".to_string(),
302                from_table: "OrphanNode".to_string(),
303                to_table: "OrphanNode".to_string(),
304                properties: vec![],
305                source_field: "refs".to_string(),
306                target_lookup: None,
307            }],
308            searchable_fields: vec![],
309            id_template: None,
310        };
311
312        let data = vec![serde_json::json!({"id": "exists", "refs": ["does_not_exist"]})];
313
314        let result = ingest_data(&conn, &schema, &data).unwrap();
315        assert_eq!(result.nodes_created, 1);
316        assert_eq!(
317            result.edges_created, 0,
318            "edge to nonexistent target should silently fail"
319        );
320    }
321
322    #[test]
323    fn test_unsupported_file_format() {
324        let (_dir, store) = kuzu_conn();
325        let conn = store.connection().unwrap();
326        let schema = simple_schema();
327
328        let tmp = tempfile::NamedTempFile::with_suffix(".csv").unwrap();
329        std::fs::write(tmp.path(), "a,b\n1,2").unwrap();
330
331        let err = ingest_file(&conn, &schema, tmp.path()).unwrap_err();
332        assert!(
333            err.to_string().contains("Unsupported"),
334            "should mention unsupported format: {err}"
335        );
336    }
337
338    #[test]
339    fn test_schema_discovery_project_dir() {
340        let dir = tempfile::TempDir::new().unwrap();
341        let schema_dir = dir.path().join(".infigraph/structured-schemas");
342        std::fs::create_dir_all(&schema_dir).unwrap();
343        std::fs::write(
344            schema_dir.join("test.toml"),
345            r#"
346[schema]
347schema_id = "found"
348name = "Found"
349node_table = "Found"
350"#,
351        )
352        .unwrap();
353
354        let schemas = discover_schemas(dir.path()).unwrap();
355        assert_eq!(schemas.len(), 1);
356        assert_eq!(schemas[0].1.schema.schema_id, "found");
357    }
358
359    #[test]
360    fn test_schema_discovery_terragraph_dir() {
361        let dir = tempfile::TempDir::new().unwrap();
362        let schema_dir = dir.path().join(".terragraph/schemas");
363        std::fs::create_dir_all(&schema_dir).unwrap();
364        std::fs::write(
365            schema_dir.join("tg.toml"),
366            r#"
367[schema]
368schema_id = "tg_schema"
369name = "TG Schema"
370node_table = "TGNode"
371"#,
372        )
373        .unwrap();
374
375        let schemas = discover_schemas(dir.path()).unwrap();
376        assert_eq!(schemas.len(), 1);
377        assert_eq!(schemas[0].1.schema.schema_id, "tg_schema");
378    }
379
380    // ── Cozo structured ingestion tests ──────────────────────────────
381
382    fn cozo_db() -> (tempfile::TempDir, ::cozo::DbInstance) {
383        let dir = tempfile::TempDir::new().unwrap();
384        let db = ::cozo::DbInstance::new(
385            "sqlite",
386            dir.path().join("cozo.db").to_str().unwrap(),
387            Default::default(),
388        )
389        .unwrap();
390        (dir, db)
391    }
392
393    #[test]
394    fn test_cozo_generate_ddl() {
395        let schema = simple_schema();
396        let ddl = schema.generate_cozo_ddl();
397        assert_eq!(ddl.len(), 1);
398        assert!(
399            ddl[0].contains("testitem"),
400            "table name should be lowercased"
401        );
402        assert!(
403            ddl[0].contains("title: String"),
404            "should have String column"
405        );
406        assert!(ddl[0].contains("priority: Int"), "should have Int column");
407    }
408
409    #[test]
410    fn test_cozo_ingest_data() {
411        let (_dir, db) = cozo_db();
412        let schema = simple_schema();
413
414        let data = vec![
415            serde_json::json!({"item_id": "A1", "title": "First", "priority": 1}),
416            serde_json::json!({"item_id": "A2", "title": "Second", "priority": 2}),
417            serde_json::json!({"item_id": "A3", "title": "Third", "priority": 3}),
418        ];
419
420        let result = ingest_data_cozo(&db, &schema, &data).unwrap();
421        assert_eq!(result.nodes_created, 3);
422
423        let r = db
424            .run_script(
425                "?[id] := *testitem{id}\n:order id",
426                std::collections::BTreeMap::new(),
427                ::cozo::ScriptMutability::Immutable,
428            )
429            .unwrap();
430        assert_eq!(r.rows.len(), 3);
431    }
432
433    #[test]
434    fn test_cozo_ingest_file_json() {
435        let (_dir, db) = cozo_db();
436        let schema = simple_schema();
437
438        let tmp = tempfile::NamedTempFile::with_suffix(".json").unwrap();
439        std::fs::write(
440            tmp.path(),
441            r#"[{"item_id":"J1","title":"JSON item","priority":5}]"#,
442        )
443        .unwrap();
444
445        let result = ingest_file_cozo(&db, &schema, tmp.path()).unwrap();
446        assert_eq!(result.nodes_created, 1);
447    }
448
449    #[test]
450    fn test_cozo_ingest_file_yaml() {
451        let (_dir, db) = cozo_db();
452        let schema = simple_schema();
453
454        let tmp = tempfile::NamedTempFile::with_suffix(".yaml").unwrap();
455        std::fs::write(
456            tmp.path(),
457            "- item_id: Y1\n  title: YAML item\n  priority: 10\n",
458        )
459        .unwrap();
460
461        let result = ingest_file_cozo(&db, &schema, tmp.path()).unwrap();
462        assert_eq!(result.nodes_created, 1);
463    }
464
465    #[test]
466    fn test_cozo_ingest_directory() {
467        let (_dir, db) = cozo_db();
468        let schema = simple_schema();
469
470        let data_dir = tempfile::TempDir::new().unwrap();
471        std::fs::write(
472            data_dir.path().join("batch1.json"),
473            r#"[{"item_id":"D1","title":"Dir item 1","priority":1}]"#,
474        )
475        .unwrap();
476        std::fs::write(
477            data_dir.path().join("batch2.json"),
478            r#"[{"item_id":"D2","title":"Dir item 2","priority":2}]"#,
479        )
480        .unwrap();
481
482        let result = ingest_directory_cozo(&db, &schema, data_dir.path()).unwrap();
483        assert_eq!(result.nodes_created, 2);
484    }
485
486    #[test]
487    fn test_cozo_required_field_missing() {
488        let (_dir, db) = cozo_db();
489        let schema = simple_schema();
490
491        let data = vec![serde_json::json!({"item_id": "X1", "priority": 1})];
492        let err = ingest_data_cozo(&db, &schema, &data).unwrap_err();
493        assert!(
494            err.to_string().contains("title"),
495            "error should mention missing field: {err}"
496        );
497    }
498
499    #[test]
500    fn test_cozo_edge_creation() {
501        let (_dir, db) = cozo_db();
502
503        let schema = SchemaMeta {
504            schema_id: "linked".to_string(),
505            name: "Linked".to_string(),
506            node_table: "LinkedNode".to_string(),
507            columns: vec![ColumnDef {
508                name: "label".to_string(),
509                col_type: "STRING".to_string(),
510                required: false,
511            }],
512            edges: vec![EdgeDef {
513                name: "LINKS_TO".to_string(),
514                from_table: "LinkedNode".to_string(),
515                to_table: "LinkedNode".to_string(),
516                properties: vec![],
517                source_field: "links".to_string(),
518                target_lookup: None,
519            }],
520            searchable_fields: vec![],
521            id_template: None,
522        };
523
524        let data = vec![
525            serde_json::json!({"id": "n2", "label": "Node 2"}),
526            serde_json::json!({"id": "n1", "label": "Node 1", "links": ["n2"]}),
527        ];
528
529        let result = ingest_data_cozo(&db, &schema, &data).unwrap();
530        assert_eq!(result.nodes_created, 2);
531        assert_eq!(result.edges_created, 1);
532    }
533
534    #[test]
535    fn test_cozo_edge_to_nonexistent_target() {
536        let (_dir, db) = cozo_db();
537
538        let schema = SchemaMeta {
539            schema_id: "orphan".to_string(),
540            name: "Orphan".to_string(),
541            node_table: "OrphanNode".to_string(),
542            columns: vec![],
543            edges: vec![EdgeDef {
544                name: "REFS".to_string(),
545                from_table: "OrphanNode".to_string(),
546                to_table: "OrphanNode".to_string(),
547                properties: vec![],
548                source_field: "refs".to_string(),
549                target_lookup: None,
550            }],
551            searchable_fields: vec![],
552            id_template: None,
553        };
554
555        let data = vec![serde_json::json!({"id": "exists", "refs": ["does_not_exist"]})];
556
557        let result = ingest_data_cozo(&db, &schema, &data).unwrap();
558        assert_eq!(result.nodes_created, 1);
559        assert_eq!(result.edges_created, 0);
560    }
561
562    #[test]
563    fn test_cozo_format_value() {
564        use super::cozo as cozo_ingest;
565        assert_eq!(
566            cozo_ingest::format_cozo_value("STRING", Some(&serde_json::json!("hello"))),
567            "\"hello\""
568        );
569        assert_eq!(
570            cozo_ingest::format_cozo_value("INT64", Some(&serde_json::json!(42))),
571            "42"
572        );
573        assert_eq!(
574            cozo_ingest::format_cozo_value("BOOL", Some(&serde_json::json!(true))),
575            "true"
576        );
577        assert_eq!(cozo_ingest::format_cozo_value("STRING", None), "\"\"");
578        assert_eq!(cozo_ingest::format_cozo_value("INT64", None), "0");
579    }
580}