Skip to main content

infigraph_core/structured/
schema.rs

1use std::path::{Path, PathBuf};
2
3use anyhow::{bail, Context, Result};
4use serde::Deserialize;
5
6#[derive(Debug, Clone, Deserialize)]
7pub struct StructuredSchema {
8    pub schema: SchemaMeta,
9}
10
11#[derive(Debug, Clone, Deserialize)]
12pub struct SchemaMeta {
13    pub schema_id: String,
14    pub name: String,
15    pub node_table: String,
16    #[serde(default)]
17    pub columns: Vec<ColumnDef>,
18    #[serde(default)]
19    pub edges: Vec<EdgeDef>,
20    #[serde(default)]
21    pub searchable_fields: Vec<String>,
22    #[serde(default)]
23    pub id_template: Option<String>,
24}
25
26#[derive(Debug, Clone, Deserialize)]
27pub struct ColumnDef {
28    pub name: String,
29    pub col_type: String,
30    #[serde(default)]
31    pub required: bool,
32}
33
34#[derive(Debug, Clone, Deserialize)]
35pub struct EdgeDef {
36    pub name: String,
37    pub from_table: String,
38    pub to_table: String,
39    #[serde(default)]
40    pub properties: Vec<ColumnDef>,
41    pub source_field: String,
42    #[serde(default)]
43    pub target_lookup: Option<String>,
44}
45
46const VALID_COL_TYPES: &[&str] = &["STRING", "INT64", "BOOL", "DOUBLE", "STRING[]"];
47
48impl SchemaMeta {
49    pub fn validate(&self) -> Result<()> {
50        let id_re = regex::Regex::new(r"^[a-z][a-z0-9_]{0,31}$").unwrap();
51        if !id_re.is_match(&self.schema_id) {
52            bail!(
53                "Invalid schema_id '{}': must match ^[a-z][a-z0-9_]{{0,31}}$",
54                self.schema_id
55            );
56        }
57
58        let col_re = regex::Regex::new(r"^[a-z][a-z0-9_]{0,63}$").unwrap();
59        for col in &self.columns {
60            if !col_re.is_match(&col.name) {
61                bail!(
62                    "Invalid column name '{}' in schema '{}'",
63                    col.name,
64                    self.schema_id
65                );
66            }
67            if !VALID_COL_TYPES.contains(&col.col_type.as_str()) {
68                bail!(
69                    "Invalid col_type '{}' for column '{}': must be one of {:?}",
70                    col.col_type,
71                    col.name,
72                    VALID_COL_TYPES
73                );
74            }
75        }
76
77        if self.node_table.is_empty() {
78            bail!("node_table must not be empty");
79        }
80
81        Ok(())
82    }
83
84    pub fn generate_ddl(&self) -> Vec<String> {
85        let mut stmts = Vec::new();
86
87        let mut col_defs = vec!["id STRING".to_string()];
88        for col in &self.columns {
89            col_defs.push(format!("{} {}", col.name, col.col_type));
90        }
91        stmts.push(format!(
92            "CREATE NODE TABLE IF NOT EXISTS {}({}, PRIMARY KEY(id))",
93            self.node_table,
94            col_defs.join(", ")
95        ));
96
97        for edge in &self.edges {
98            let mut props = String::new();
99            if !edge.properties.is_empty() {
100                let p: Vec<String> = edge
101                    .properties
102                    .iter()
103                    .map(|c| format!("{} {}", c.name, c.col_type))
104                    .collect();
105                props = format!(", {}", p.join(", "));
106            }
107            stmts.push(format!(
108                "CREATE REL TABLE IF NOT EXISTS {}(FROM {} TO {}{})",
109                edge.name, edge.from_table, edge.to_table, props
110            ));
111        }
112
113        stmts
114    }
115}
116
117pub fn discover_schemas(project_root: &Path) -> Result<Vec<(PathBuf, StructuredSchema)>> {
118    let mut schemas = Vec::new();
119
120    let search_dirs = [
121        project_root.join(".infigraph/structured-schemas"),
122        project_root.join(".terragraph/schemas"),
123        dirs_next::home_dir()
124            .unwrap_or_default()
125            .join(".infigraph/structured-schemas"),
126    ];
127
128    for dir in &search_dirs {
129        if !dir.exists() {
130            continue;
131        }
132        for entry in std::fs::read_dir(dir)? {
133            let entry = entry?;
134            let path = entry.path();
135            if path.extension().map(|e| e == "toml").unwrap_or(false) {
136                let content = std::fs::read_to_string(&path)
137                    .with_context(|| format!("failed to read schema: {}", path.display()))?;
138                let schema: StructuredSchema = toml::from_str(&content)
139                    .with_context(|| format!("invalid schema TOML: {}", path.display()))?;
140                schema
141                    .schema
142                    .validate()
143                    .with_context(|| format!("schema validation failed: {}", path.display()))?;
144                schemas.push((path, schema));
145            }
146        }
147    }
148
149    Ok(schemas)
150}
151
152#[derive(Debug)]
153pub struct IngestResult {
154    pub nodes_created: usize,
155    pub edges_created: usize,
156}
157
158pub(crate) fn escape(s: &str) -> String {
159    s.replace('\\', "\\\\").replace('\'', "\\'")
160}
161
162pub(crate) fn format_value(col_type: &str, val: Option<&serde_json::Value>) -> String {
163    match val {
164        None => match col_type {
165            "STRING" => "''".to_string(),
166            "INT64" => "0".to_string(),
167            "BOOL" => "false".to_string(),
168            "DOUBLE" => "0.0".to_string(),
169            "STRING[]" => "[]".to_string(),
170            _ => "''".to_string(),
171        },
172        Some(v) => match col_type {
173            "STRING" => format!("'{}'", escape(&v.to_string().trim_matches('"').to_string())),
174            "INT64" => v.as_i64().unwrap_or(0).to_string(),
175            "BOOL" => v.as_bool().unwrap_or(false).to_string(),
176            "DOUBLE" => v.as_f64().unwrap_or(0.0).to_string(),
177            "STRING[]" => {
178                if let Some(arr) = v.as_array() {
179                    let items: Vec<String> = arr
180                        .iter()
181                        .filter_map(|i| i.as_str())
182                        .map(|s| format!("'{}'", escape(s)))
183                        .collect();
184                    format!("[{}]", items.join(", "))
185                } else {
186                    "[]".to_string()
187                }
188            }
189            _ => format!("'{}'", escape(&v.to_string())),
190        },
191    }
192}
193
194pub(crate) fn interpolate_template(
195    tmpl: &str,
196    obj: &serde_json::Map<String, serde_json::Value>,
197) -> String {
198    let mut result = tmpl.to_string();
199    for (key, val) in obj {
200        let placeholder = format!("{{{}}}", key);
201        let replacement = match val {
202            serde_json::Value::String(s) => s.clone(),
203            other => other.to_string().trim_matches('"').to_string(),
204        };
205        result = result.replace(&placeholder, &replacement);
206    }
207    result
208}