immigrant_schema/
process.rs

1use std::{
2	collections::{BTreeMap, HashSet},
3	mem,
4	ops::{Deref, DerefMut},
5	str::FromStr,
6};
7
8use inflector::{cases::snakecase::to_snake_case, string::pluralize::to_plural};
9use itertools::{Either, Itertools};
10
11use crate::{
12	composite::Composite,
13	diagnostics::Report,
14	ids::{DbIdent, Ident},
15	index::{Check, Index, PrimaryKey, UniqueConstraint},
16	root::{Item, Schema},
17	scalar::{Enum, Scalar, ScalarAnnotation},
18	sql::Sql,
19	table::{Table, TableAnnotation},
20	uid::{RenameExt, RenameMap},
21	view::View,
22	w, HasIdent, SchemaComposite, SchemaEnum, SchemaItem, SchemaTable,
23};
24
25/// Can be updated in database source code, and some are already doing that,
26/// but for simplicity assuming default here.
27const MAX_IDENTIFIER_LEN: usize = 63;
28
29#[derive(Clone, Copy)]
30pub enum NamingConvention {
31	Postgres,
32}
33impl FromStr for NamingConvention {
34	type Err = &'static str;
35
36	fn from_str(s: &str) -> Result<Self, Self::Err> {
37		match s {
38			"postgres" => Ok(Self::Postgres),
39			_ => Err("unknown naming convention"),
40		}
41	}
42}
43
44pub struct Pgnc<T>(pub T);
45impl<T> Deref for Pgnc<T> {
46	type Target = T;
47	fn deref(&self) -> &Self::Target {
48		&self.0
49	}
50}
51impl<T> DerefMut for Pgnc<T> {
52	fn deref_mut(&mut self) -> &mut Self::Target {
53		&mut self.0
54	}
55}
56
57impl Pgnc<&mut Schema> {
58	// TODO: Split into merge and renaming phases, so that renames may work with `SchemaItem` instead of raw `Item`?
59	pub fn process_naming(&mut self, rn: &mut RenameMap) {
60		for item in self.0 .0.iter_mut() {
61			match item {
62				Item::Table(t) => {
63					let mut t = Pgnc(t);
64					t.generate_name(rn);
65					t.generate_column_names(rn);
66					t.merge(rn);
67					t.generate_names(rn);
68				}
69				Item::Scalar(s) => {
70					let mut s = Pgnc(s);
71					s.generate_name(rn);
72					s.generate_names(rn);
73					s.merge_checks(rn);
74				}
75				Item::Enum(e) => {
76					let e = Pgnc(e);
77					e.generate_name(rn);
78					e.generate_item_names(rn);
79				}
80				Item::Composite(c) => {
81					let c = Pgnc(c);
82					c.generate_name(rn);
83					c.generate_item_names(rn);
84				}
85				Item::View(v) => {
86					let c = Pgnc(v);
87					c.generate_name(rn);
88				}
89				Item::Mixin(_) => unreachable!("mixins are assimilated"),
90			}
91		}
92	}
93}
94
95/// First generate names, because only check constraint are mergeable, and there is no problem with merging unrelated
96/// constraints, then merge constraints with the same name.
97impl Pgnc<&mut Scalar> {
98	/// Generate name for the scalar itself
99	pub fn generate_name(&self, rn: &mut RenameMap) {
100		if self.db_assigned(rn) {
101			return;
102		}
103		let id = self.id().name();
104		self.set_db(rn, DbIdent::new(&id));
105	}
106	/// In postgres, first check constraint has autogenerated `table_check` name, second is `table_check1` and so on.
107	/// In immigrant, constraints with the same name are merged, so only name we should save to every constraint is
108	/// `table_check`
109	pub fn generate_names(&mut self, rn: &mut RenameMap) {
110		let name = self.db(rn);
111		for ann in &mut self.annotations {
112			if let ScalarAnnotation::Check(c) = ann {
113				if c.db_assigned(rn) {
114					continue;
115				}
116				c.set_db(rn, DbIdent::new(&format!("{}_check", name.raw())));
117			}
118		}
119	}
120	/// All checks with the same name are merged using AND
121	pub fn merge_checks(&mut self, rn: &RenameMap) {
122		let (checks, mut annotations): (Vec<_>, Vec<_>) = mem::take(&mut self.annotations)
123			.into_iter()
124			.partition_map(|a| match a {
125				ScalarAnnotation::Check(c) => Either::Left(c),
126				a => Either::Right(a),
127			});
128		let merged_checks = checks
129			.into_iter()
130			.map(|c| (c.db(rn), c.check))
131			.into_group_map()
132			.into_iter()
133			.collect::<BTreeMap<_, _>>();
134		for (name, checks) in merged_checks {
135			annotations.push(ScalarAnnotation::Check(Check::new(
136				Some(name),
137				Sql::all(checks),
138			)))
139		}
140		self.annotations = annotations;
141	}
142}
143
144fn truncate_auto_name(name: String, suf: &str) -> String {
145	use blake2::Digest;
146	// FIXME: MAX_IDENTIFIER_LEN should be provided from generator engine.
147	#[allow(clippy::int_plus_one)]
148	if name.len() + suf.len() + 1 <= MAX_IDENTIFIER_LEN {
149		return format!("{name}_{suf}");
150	}
151	let hash = blake2::Blake2s256::digest(name.as_bytes());
152	let hash = base32::encode(base32::Alphabet::Crockford, hash.as_slice());
153	let suf = format!("_{}_{suf}", &hash[0..6]);
154	// format!("", name[])
155	// FIXME: Technically, it is not valid to slice name, because it might be utf-8, but in reality it won't.
156	format!("{}{suf}", &name[..MAX_IDENTIFIER_LEN - suf.len()])
157}
158
159/// First merge constraints, to allow specifying partial primary keys, i.e
160/// ```immigrant
161/// a @primary_key;
162/// b @primary_key;
163/// c @primary_key;
164/// ```
165///
166/// Should be equivalent to
167/// ```immigrant
168/// a;
169/// b;
170/// c;
171/// @primary_key(a, b, c)
172/// ```
173///
174/// Then generate names, i.e for the specified example it will be `table_a_b_c_pk`
175impl Pgnc<&mut Table> {
176	/// Generate name for the table itself
177	pub fn generate_name(&self, rn: &mut RenameMap) {
178		if self.db_assigned(rn) {
179			return;
180		}
181		let id = self.id().name();
182		let id = if self.attrlist.get_single("pgnc", "as_is") == Ok(true) {
183			id
184		} else {
185			let id = to_snake_case(&id);
186			to_plural(&id)
187		};
188		self.set_db(rn, DbIdent::new(&id));
189	}
190	pub fn generate_column_names(&self, rn: &mut RenameMap) {
191		for column in self.columns.iter() {
192			if column.db_assigned(rn) {
193				continue;
194			}
195			column.set_db(rn, DbIdent::new(&column.id().name()))
196		}
197	}
198	/// Merge annotations:
199	/// - Primary keys are always merged, it is assumed at most only name will be set. TODO: It gets weird in presence of mixins, e.g mixin can add another primary key, how that should be handled?
200	/// - Checks with the same name (+all unnamed) are merged using AND
201	/// - Unique constraints are merged the same way as the primary key, but unnamed uniques are not merged
202	/// - Indexes are merged the same way as unique constraints, except accounting for the uniqueness flag
203	pub fn merge(&mut self, rn: &RenameMap) {
204		let annotations = mem::take(&mut self.annotations);
205
206		// PK
207		let pk_name = annotations
208			.iter()
209			.filter_map(TableAnnotation::as_primary_key)
210			.filter_map(|pk| pk.try_db(rn))
211			.at_most_one()
212			.expect("at most one pk have name set");
213		let (pks, mut annotations): (Vec<_>, Vec<_>) =
214			annotations.into_iter().partition_map(|a| match a {
215				TableAnnotation::PrimaryKey(pk) => Either::Left(pk),
216				a => Either::Right(a),
217			});
218		if !pks.is_empty() {
219			annotations.push(TableAnnotation::PrimaryKey(PrimaryKey::new(
220				pk_name,
221				pks.into_iter().flat_map(|pk| pk.columns).collect(),
222			)));
223		}
224
225		// Unique
226		let (unqs, mut annotations): (Vec<_>, Vec<_>) =
227			annotations.into_iter().partition_map(|a| match a {
228				TableAnnotation::Unique(u) => Either::Left(u),
229				a => Either::Right(a),
230			});
231		let (named_unqs, unnamed_unqs) = unqs
232			.into_iter()
233			.partition::<Vec<_>, _>(|u| u.db_assigned(rn));
234		let named_unqs = named_unqs
235			.into_iter()
236			.map(|u| (u.db(rn), u.columns))
237			.into_group_map()
238			.into_iter()
239			.collect::<BTreeMap<_, _>>();
240		for (name, cols) in named_unqs {
241			annotations.push(TableAnnotation::Unique(UniqueConstraint::new(
242				Some(name),
243				cols.into_iter().flatten().collect(),
244			)))
245		}
246		for unq in unnamed_unqs {
247			annotations.push(TableAnnotation::Unique(unq));
248		}
249
250		// Check
251		let (checks, mut annotations): (Vec<_>, Vec<_>) =
252			annotations.into_iter().partition_map(|a| match a {
253				TableAnnotation::Check(c) => Either::Left(c),
254				a => Either::Right(a),
255			});
256		let (named_cks, unnamed_cks) = checks
257			.into_iter()
258			.partition::<Vec<_>, _>(|c| c.db_assigned(rn));
259		let named_cks = named_cks
260			.into_iter()
261			.map(|c| (c.db(rn), c.check))
262			.into_group_map()
263			.into_iter()
264			.collect::<BTreeMap<_, _>>();
265		for (name, checks) in named_cks {
266			annotations.push(TableAnnotation::Check(Check::new(
267				Some(name),
268				Sql::all(checks),
269			)))
270		}
271		if !unnamed_cks.is_empty() {
272			annotations.push(TableAnnotation::Check(Check::new(
273				None,
274				Sql::all(unnamed_cks.into_iter().map(|c| c.check)),
275			)));
276		}
277
278		// Index
279		let (indexes, mut annotations): (Vec<_>, Vec<_>) =
280			annotations.into_iter().partition_map(|a| match a {
281				TableAnnotation::Index(i) => Either::Left(i),
282				a => Either::Right(a),
283			});
284		let (named_idxs, unnamed_idxs) = indexes
285			.into_iter()
286			.partition::<Vec<_>, _>(|i| i.db_assigned(rn));
287		let named_idxs = named_idxs
288			.into_iter()
289			.map(|i| {
290				(
291					(
292						i.unique,
293						i.using.clone(),
294						i.default_opclass.clone(),
295						i.with.clone(),
296						i.db(rn),
297					),
298					i.fields().to_vec(),
299				)
300			})
301			.into_group_map()
302			.into_iter()
303			.collect::<BTreeMap<_, _>>();
304		for ((unique, using, default_opclass, with, name), fields) in named_idxs {
305			annotations.push(TableAnnotation::Index(Index::new(
306				Some(name),
307				unique,
308				fields.into_iter().flatten().collect(),
309				using,
310				default_opclass,
311				with,
312			)))
313		}
314		for idx in unnamed_idxs {
315			annotations.push(TableAnnotation::Index(idx))
316		}
317		self.annotations = annotations;
318	}
319	pub fn generate_names(&mut self, rn: &mut RenameMap) {
320		let mut decided_names = Vec::new();
321		for ann in self.annotations.iter() {
322			match ann {
323				TableAnnotation::Index(i) if !i.db_assigned(rn) => {
324					let mut out = self.db(rn).raw().to_string();
325					for column in self.db_names(i.fields().iter().map(|v| &v.0).cloned(), rn) {
326						w!(out, "_{}", column.raw());
327					}
328
329					decided_names.push(Some(truncate_auto_name(
330						out,
331						if i.unique { "key" } else { "idx" },
332					)));
333				}
334				TableAnnotation::Check(c) if !c.db_assigned(rn) => {
335					let mut out = self.db(rn).raw().to_string();
336					for ele in self.db_names(c.check.affected_columns(), rn) {
337						w!(out, "_{}", ele.raw());
338					}
339					decided_names.push(Some(truncate_auto_name(out, "check")));
340				}
341				TableAnnotation::Unique(u) if !u.db_assigned(rn) => {
342					let mut out = self.db(rn).raw().to_string();
343					for ele in self.db_names(u.columns.iter().cloned(), rn) {
344						w!(out, "_{}", ele.raw());
345					}
346					decided_names.push(Some(truncate_auto_name(out, "key")));
347				}
348				TableAnnotation::PrimaryKey(p) if !p.db_assigned(rn) => {
349					let mut out = self.db(rn).raw().to_string();
350					for ele in self.db_names(p.columns.iter().cloned(), rn) {
351						w!(out, "_{}", ele.raw());
352					}
353					decided_names.push(Some(truncate_auto_name(out, "pkey")));
354				}
355				_ => decided_names.push(None),
356			}
357		}
358		assert_eq!(decided_names.len(), self.annotations.len());
359		for (i, ann) in self.annotations.iter_mut().enumerate() {
360			let name = decided_names[i].clone();
361			match ann {
362				TableAnnotation::Index(i) if !i.db_assigned(rn) => {
363					i.set_db(rn, DbIdent::new(&name.unwrap()));
364				}
365				TableAnnotation::PrimaryKey(p) if !p.db_assigned(rn) => {
366					p.set_db(rn, DbIdent::new(&name.unwrap()));
367				}
368				TableAnnotation::Check(c) if !c.db_assigned(rn) => {
369					c.set_db(rn, DbIdent::new(&name.unwrap()));
370				}
371				TableAnnotation::Unique(u) if !u.db_assigned(rn) => {
372					u.set_db(rn, DbIdent::new(&name.unwrap()));
373				}
374				_ => assert!(name.is_none(), "unexpected name for {ann:?}: {name:?}"),
375			}
376		}
377		for fk in self.foreign_keys.iter() {
378			let mut out = self.db(rn).raw().to_string();
379			let fields = fk
380				.source_fields
381				.as_ref()
382				.or(fk.target_fields.as_ref())
383				.expect("source or target should be set");
384			for ele in self.db_names(fields.iter().cloned(), rn) {
385				w!(out, "_{}", ele.raw());
386			}
387			fk.set_db(rn, DbIdent::new(&truncate_auto_name(out, "fk")));
388		}
389	}
390}
391
392impl Pgnc<&mut Enum> {
393	fn generate_name(&self, rn: &mut RenameMap) {
394		if self.db_assigned(rn) {
395			return;
396		}
397		let id = self.id().name();
398		self.set_db(rn, DbIdent::new(&id));
399	}
400	fn generate_item_names(&self, rn: &mut RenameMap) {
401		for ele in self.items.iter() {
402			if ele.db_assigned(rn) {
403				continue;
404			}
405			let id = ele.id().name();
406			ele.set_db(rn, DbIdent::new(&id));
407		}
408	}
409}
410
411impl Pgnc<&mut Composite> {
412	fn generate_name(&self, rn: &mut RenameMap) {
413		if self.db_assigned(rn) {
414			return;
415		}
416		let id = self.id().name();
417		self.set_db(rn, DbIdent::new(&id));
418	}
419	fn generate_item_names(&self, rn: &mut RenameMap) {
420		for ele in self.fields.iter() {
421			if ele.db_assigned(rn) {
422				continue;
423			}
424			let id = ele.id().name();
425			ele.set_db(rn, DbIdent::new(&id));
426		}
427	}
428}
429
430impl Pgnc<&mut View> {
431	/// Generate name for the table itself
432	pub fn generate_name(&self, rn: &mut RenameMap) {
433		if self.db_assigned(rn) {
434			return;
435		}
436		let id = self.id().name();
437		// FIXME: Report error on truncation
438		let id = if self.attrlist.get_single("pgnc", "as_is") == Ok(true) {
439			id
440		} else {
441			let id = to_snake_case(&id);
442			to_plural(&id)
443		};
444		self.set_db(rn, DbIdent::new(&id));
445	}
446}
447
448pub fn check_unique_in_table(table: &SchemaTable, diagnostics: &mut Report) {
449	let seen = &mut HashSet::new();
450	for column in table.columns() {
451		check_unique(seen, column.id().to_unknown(), diagnostics);
452	}
453}
454pub fn check_unique_in_enum(en: &SchemaEnum, diagnostics: &mut Report) {
455	let seen = &mut HashSet::new();
456	for item in en.items() {
457		check_unique(seen, item.id().to_unknown(), diagnostics);
458	}
459}
460pub fn check_unique_in_composite(comp: &SchemaComposite, diagnostics: &mut Report) {
461	let seen = &mut HashSet::new();
462	for field in comp.fields() {
463		check_unique(seen, field.id().to_unknown(), diagnostics);
464	}
465}
466
467pub fn check_unique_mixin_identifiers(schema: &Schema, diagnostics: &mut Report) {
468	let seen = &mut HashSet::new();
469	for mixin in &schema.0 {
470		let Item::Mixin(mixin) = mixin else {
471			continue;
472		};
473		check_unique(seen, mixin.id(), diagnostics);
474	}
475}
476
477fn check_unique<K>(seen: &mut HashSet<Ident<K>>, id: Ident<K>, diagnostics: &mut Report) {
478	if !seen.insert(id) {
479		let old = seen.get(&id).expect("exists");
480
481		// old.span()
482		diagnostics
483			.error("duplicate identifier")
484			.annotate("declared here", id.span())
485			.annotate("previously declared here", old.span());
486	}
487}
488
489/// Basic check if there is any definition with duplicate identifier
490/// Database identifiers are not checked here, because they might be only available after naming convention processing,
491/// and there is some variance between different database implementations.
492/// E.g some databases allow domain and table types to be conflicting, and some not, there is different behavior for
493/// truncation, there might be forbidden system tables, and so on.
494/// Identifiers are unique to immigrant, so checking of them is trivial.
495pub fn check_unique_identifiers(schema: &Schema, diagnostics: &mut Report) {
496	let seen = &mut HashSet::new();
497	for item in &schema.items() {
498		match item {
499			SchemaItem::Table(t) => {
500				check_unique(seen, t.id().to_unknown(), diagnostics);
501				check_unique_in_table(t, diagnostics);
502			}
503			SchemaItem::Enum(e) => {
504				check_unique(seen, e.id().to_unknown(), diagnostics);
505				check_unique_in_enum(e, diagnostics);
506			}
507			SchemaItem::Scalar(s) => {
508				check_unique(seen, s.id().to_unknown(), diagnostics);
509			}
510			SchemaItem::Composite(c) => {
511				check_unique(seen, c.id().to_unknown(), diagnostics);
512				check_unique_in_composite(c, diagnostics);
513			}
514			SchemaItem::View(v) => {
515				check_unique(seen, v.id().to_unknown(), diagnostics);
516			}
517		}
518	}
519}