Skip to main content

reddb_server/application/
migration_inference.rs

1//! Static analysis for automatic migration dependency inference.
2//!
3//! Scans a migration body (raw SQL string) for collection references and
4//! returns which collection names it touches. The engine uses this to infer
5//! dependency edges between migrations that share a collection — without
6//! requiring the developer to write explicit DEPENDS ON.
7//!
8//! Conservative by design: ambiguous references produce no inferred edge
9//! rather than a wrong one. The developer can always add an explicit
10//! DEPENDS ON to override.
11
12use std::collections::HashSet;
13
14/// Extract collection names referenced by a SQL body.
15///
16/// Recognises patterns:
17/// - `FROM <name>`
18/// - `INTO <name>`
19/// - `TABLE <name>`
20/// - `UPDATE <name>`
21/// - `JOIN <name>`
22///
23/// Returns deduplicated, lowercased names. Names starting with `red_` are
24/// excluded (system collections are not user migrations).
25pub fn referenced_collections(body: &str) -> HashSet<String> {
26    let tokens: Vec<&str> = body.split_ascii_whitespace().collect();
27    let mut result = HashSet::new();
28
29    let triggers = ["from", "into", "table", "update", "join", "on"];
30
31    let mut i = 0;
32    while i < tokens.len() {
33        let tok = tokens[i].to_ascii_lowercase();
34        // Strip trailing punctuation like `;` or `(`
35        let tok = tok.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
36
37        if triggers.contains(&tok) {
38            if let Some(next) = tokens.get(i + 1) {
39                let name = next
40                    .trim_matches(|c: char| !c.is_alphanumeric() && c != '_')
41                    .to_ascii_lowercase();
42                // Exclude SQL keywords and system collections
43                if !name.is_empty()
44                    && !is_sql_keyword(&name)
45                    && !name.starts_with("red_")
46                    && name
47                        .chars()
48                        .next()
49                        .map(|c| c.is_alphabetic())
50                        .unwrap_or(false)
51                {
52                    result.insert(name);
53                }
54            }
55        }
56        i += 1;
57    }
58
59    result
60}
61
62/// Infer dependency edges for a new migration given existing migration metadata.
63///
64/// `new_name`: name of the migration being created
65/// `new_body`: SQL body of the new migration
66/// `existing`: list of (migration_name, body) for all existing migrations
67///
68/// Returns a list of `(new_name, depends_on_name)` edges that should be stored
69/// as inferred=true in `red_migration_deps`.
70///
71/// Ambiguity rule: if multiple existing migrations reference the same collection,
72/// the inference is skipped for that collection (requires explicit DEPENDS ON).
73pub fn infer_dependencies(
74    new_name: &str,
75    new_body: &str,
76    existing: &[(String, String)],
77) -> Vec<(String, String)> {
78    let new_collections = referenced_collections(new_body);
79    if new_collections.is_empty() {
80        return Vec::new();
81    }
82
83    // Map collection → list of migration names that reference it
84    let mut collection_to_migrations: std::collections::HashMap<String, Vec<String>> =
85        std::collections::HashMap::new();
86    for (name, body) in existing {
87        if name == new_name {
88            continue;
89        }
90        for col in referenced_collections(body) {
91            collection_to_migrations
92                .entry(col)
93                .or_default()
94                .push(name.clone());
95        }
96    }
97
98    let mut edges = Vec::new();
99    for col in &new_collections {
100        if let Some(owners) = collection_to_migrations.get(col) {
101            if owners.len() == 1 {
102                // Unambiguous: exactly one prior migration touches this collection
103                let dep = &owners[0];
104                let edge = (new_name.to_string(), dep.clone());
105                if !edges.contains(&edge) {
106                    edges.push(edge);
107                }
108            }
109            // Ambiguous (multiple owners): skip, require explicit DEPENDS ON
110        }
111    }
112
113    edges
114}
115
116fn is_sql_keyword(name: &str) -> bool {
117    matches!(
118        name,
119        "select"
120            | "insert"
121            | "update"
122            | "delete"
123            | "create"
124            | "drop"
125            | "alter"
126            | "table"
127            | "from"
128            | "where"
129            | "set"
130            | "into"
131            | "values"
132            | "join"
133            | "inner"
134            | "outer"
135            | "left"
136            | "right"
137            | "on"
138            | "as"
139            | "and"
140            | "or"
141            | "not"
142            | "null"
143            | "true"
144            | "false"
145            | "if"
146            | "exists"
147            | "column"
148            | "index"
149            | "unique"
150            | "primary"
151            | "key"
152            | "foreign"
153            | "references"
154            | "cascade"
155            | "restrict"
156            | "default"
157            | "constraint"
158            | "add"
159            | "rename"
160            | "to"
161            | "all"
162            | "distinct"
163            | "order"
164            | "by"
165            | "group"
166            | "having"
167            | "limit"
168            | "offset"
169            | "union"
170            | "intersect"
171            | "except"
172            | "with"
173            | "returning"
174            | "in"
175            | "like"
176            | "between"
177            | "is"
178            | "case"
179            | "when"
180            | "then"
181            | "else"
182            | "end"
183    )
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn extracts_from_clause() {
192        let cols = referenced_collections("SELECT * FROM users WHERE id = 1");
193        assert!(cols.contains("users"));
194    }
195
196    #[test]
197    fn extracts_update_target() {
198        let cols = referenced_collections("UPDATE users SET email = lower(email)");
199        assert!(cols.contains("users"));
200    }
201
202    #[test]
203    fn extracts_insert_into() {
204        let cols = referenced_collections("INSERT INTO profiles (user_id) VALUES (1)");
205        assert!(cols.contains("profiles"));
206    }
207
208    #[test]
209    fn excludes_system_collections() {
210        let cols = referenced_collections("SELECT * FROM red_migrations");
211        assert!(!cols.contains("red_migrations"));
212    }
213
214    #[test]
215    fn excludes_sql_keywords() {
216        let cols = referenced_collections("CREATE TABLE users (id INT)");
217        // "users" should be captured (after TABLE keyword), "create" should not
218        assert!(cols.contains("users"));
219        assert!(!cols.contains("create"));
220    }
221
222    #[test]
223    fn infers_unambiguous_dep() {
224        let existing = vec![(
225            "add_email".to_string(),
226            "ALTER TABLE users ADD COLUMN email TEXT".to_string(),
227        )];
228        let edges = infer_dependencies(
229            "add_email_index",
230            "CREATE INDEX idx_email ON users (email)",
231            &existing,
232        );
233        assert!(edges.contains(&("add_email_index".to_string(), "add_email".to_string())));
234    }
235
236    #[test]
237    fn skips_ambiguous_dep() {
238        let existing = vec![
239            (
240                "mig_a".to_string(),
241                "ALTER TABLE users ADD COLUMN a INT".to_string(),
242            ),
243            (
244                "mig_b".to_string(),
245                "ALTER TABLE users ADD COLUMN b INT".to_string(),
246            ),
247        ];
248        // Two migrations touch "users" → ambiguous → no inferred edge
249        let edges = infer_dependencies("mig_c", "UPDATE users SET a = 1", &existing);
250        assert!(edges.is_empty());
251    }
252}