infigraph_core/structured/
schema.rs1use std::path::{Path, PathBuf};
2
3use anyhow::{bail, Context, Result};
4use serde::Deserialize;
5
6#[derive(Debug, Clone, Deserialize)]
7pub struct StructuredSchema {
8 pub schema: SchemaMeta,
9}
10
11#[derive(Debug, Clone, Deserialize)]
12pub struct SchemaMeta {
13 pub schema_id: String,
14 pub name: String,
15 pub node_table: String,
16 #[serde(default)]
17 pub columns: Vec<ColumnDef>,
18 #[serde(default)]
19 pub edges: Vec<EdgeDef>,
20 #[serde(default)]
21 pub searchable_fields: Vec<String>,
22 #[serde(default)]
23 pub id_template: Option<String>,
24}
25
26#[derive(Debug, Clone, Deserialize)]
27pub struct ColumnDef {
28 pub name: String,
29 pub col_type: String,
30 #[serde(default)]
31 pub required: bool,
32}
33
34#[derive(Debug, Clone, Deserialize)]
35pub struct EdgeDef {
36 pub name: String,
37 pub from_table: String,
38 pub to_table: String,
39 #[serde(default)]
40 pub properties: Vec<ColumnDef>,
41 pub source_field: String,
42 #[serde(default)]
43 pub target_lookup: Option<String>,
44}
45
46const VALID_COL_TYPES: &[&str] = &["STRING", "INT64", "BOOL", "DOUBLE", "STRING[]"];
47
48impl SchemaMeta {
49 pub fn validate(&self) -> Result<()> {
50 let id_re = regex::Regex::new(r"^[a-z][a-z0-9_]{0,31}$").unwrap();
51 if !id_re.is_match(&self.schema_id) {
52 bail!(
53 "Invalid schema_id '{}': must match ^[a-z][a-z0-9_]{{0,31}}$",
54 self.schema_id
55 );
56 }
57
58 let col_re = regex::Regex::new(r"^[a-z][a-z0-9_]{0,63}$").unwrap();
59 for col in &self.columns {
60 if !col_re.is_match(&col.name) {
61 bail!(
62 "Invalid column name '{}' in schema '{}'",
63 col.name,
64 self.schema_id
65 );
66 }
67 if !VALID_COL_TYPES.contains(&col.col_type.as_str()) {
68 bail!(
69 "Invalid col_type '{}' for column '{}': must be one of {:?}",
70 col.col_type,
71 col.name,
72 VALID_COL_TYPES
73 );
74 }
75 }
76
77 if self.node_table.is_empty() {
78 bail!("node_table must not be empty");
79 }
80
81 Ok(())
82 }
83
84 pub fn generate_ddl(&self) -> Vec<String> {
85 let mut stmts = Vec::new();
86
87 let mut col_defs = vec!["id STRING".to_string()];
88 for col in &self.columns {
89 col_defs.push(format!("{} {}", col.name, col.col_type));
90 }
91 stmts.push(format!(
92 "CREATE NODE TABLE IF NOT EXISTS {}({}, PRIMARY KEY(id))",
93 self.node_table,
94 col_defs.join(", ")
95 ));
96
97 for edge in &self.edges {
98 let mut props = String::new();
99 if !edge.properties.is_empty() {
100 let p: Vec<String> = edge
101 .properties
102 .iter()
103 .map(|c| format!("{} {}", c.name, c.col_type))
104 .collect();
105 props = format!(", {}", p.join(", "));
106 }
107 stmts.push(format!(
108 "CREATE REL TABLE IF NOT EXISTS {}(FROM {} TO {}{})",
109 edge.name, edge.from_table, edge.to_table, props
110 ));
111 }
112
113 stmts
114 }
115}
116
117pub fn discover_schemas(project_root: &Path) -> Result<Vec<(PathBuf, StructuredSchema)>> {
118 let mut schemas = Vec::new();
119
120 let search_dirs = [
121 project_root.join(".infigraph/structured-schemas"),
122 project_root.join(".terragraph/schemas"),
123 dirs_next::home_dir()
124 .unwrap_or_default()
125 .join(".infigraph/structured-schemas"),
126 ];
127
128 for dir in &search_dirs {
129 if !dir.exists() {
130 continue;
131 }
132 for entry in std::fs::read_dir(dir)? {
133 let entry = entry?;
134 let path = entry.path();
135 if path.extension().map(|e| e == "toml").unwrap_or(false) {
136 let content = std::fs::read_to_string(&path)
137 .with_context(|| format!("failed to read schema: {}", path.display()))?;
138 let schema: StructuredSchema = toml::from_str(&content)
139 .with_context(|| format!("invalid schema TOML: {}", path.display()))?;
140 schema
141 .schema
142 .validate()
143 .with_context(|| format!("schema validation failed: {}", path.display()))?;
144 schemas.push((path, schema));
145 }
146 }
147 }
148
149 Ok(schemas)
150}
151
152#[derive(Debug)]
153pub struct IngestResult {
154 pub nodes_created: usize,
155 pub edges_created: usize,
156}
157
158pub(crate) fn escape(s: &str) -> String {
159 s.replace('\\', "\\\\").replace('\'', "\\'")
160}
161
162pub(crate) fn format_value(col_type: &str, val: Option<&serde_json::Value>) -> String {
163 match val {
164 None => match col_type {
165 "STRING" => "''".to_string(),
166 "INT64" => "0".to_string(),
167 "BOOL" => "false".to_string(),
168 "DOUBLE" => "0.0".to_string(),
169 "STRING[]" => "[]".to_string(),
170 _ => "''".to_string(),
171 },
172 Some(v) => match col_type {
173 "STRING" => format!("'{}'", escape(v.to_string().trim_matches('"'))),
174 "INT64" => v.as_i64().unwrap_or(0).to_string(),
175 "BOOL" => v.as_bool().unwrap_or(false).to_string(),
176 "DOUBLE" => v.as_f64().unwrap_or(0.0).to_string(),
177 "STRING[]" => {
178 if let Some(arr) = v.as_array() {
179 let items: Vec<String> = arr
180 .iter()
181 .filter_map(|i| i.as_str())
182 .map(|s| format!("'{}'", escape(s)))
183 .collect();
184 format!("[{}]", items.join(", "))
185 } else {
186 "[]".to_string()
187 }
188 }
189 _ => format!("'{}'", escape(&v.to_string())),
190 },
191 }
192}
193
194pub(crate) fn interpolate_template(
195 tmpl: &str,
196 obj: &serde_json::Map<String, serde_json::Value>,
197) -> String {
198 let mut result = tmpl.to_string();
199 for (key, val) in obj {
200 let placeholder = format!("{{{}}}", key);
201 let replacement = match val {
202 serde_json::Value::String(s) => s.clone(),
203 other => other.to_string().trim_matches('"').to_string(),
204 };
205 result = result.replace(&placeholder, &replacement);
206 }
207 result
208}