dbml_rs/analyzer/
mod.rs

1use alloc::collections::BTreeSet;
2use alloc::string::ToString;
3use alloc::vec::Vec;
4use core::str::FromStr;
5
6use self::err::*;
7use crate::ast::*;
8use crate::DEFAULT_SCHEMA;
9
10mod err;
11mod helper;
12mod indexer;
13
14use helper::*;
15use indexer::*;
16
17/// Represents a struct of indexing information after analyzing.
18#[derive(Debug, Clone)]
19pub struct AnalyzedIndexer {
20  pub indexed_refs: Vec<IndexedRef>,
21  pub indexer: Indexer,
22}
23
24/// Represents a reference to a table, indicating relationships between tables.
25#[derive(Debug, Clone)]
26pub struct TableRef {
27  /// References that this table points to, such as foreign keys in its fields.
28  pub ref_to: Vec<IndexedRef>,
29  /// References to this table, indicating other tables that have foreign keys pointing to it.
30  pub ref_by: Vec<IndexedRef>,
31  /// Self-references within this table.
32  pub ref_self: Vec<IndexedRef>,
33}
34
35/// Performs semantic checks of the unsanitized AST and returns an indexed metadata.
36/// This function also mutates the internal structure of the AST by changing column types after validating.
37///
38///
39/// # Arguments
40///
41/// * `schema_block` - A reference to the unsanitized AST representing the parsed DBML text.
42///
43/// # Returns
44///
45/// An `AnalyzerResult<AnalyzedIndexer>`, which is an alias for the result of semantic analysis.
46/// It contains the indexing metadata for the collecting table relations and block names representing the parsed and analyzed DBML.
47///
48/// # Examples
49///
50/// ```rs
51/// use dbml_rs::{parse_dbml, analyze};
52///
53/// let dbml_text = r#"
54///     Table users {
55///         id int
56///         username varchar
57///     }
58/// "#;
59///
60/// let result = parse_dbml(dbml_text);
61/// assert!(result.is_ok());
62/// let ast = result.unwrap();
63///
64/// let analyzing_result = analyze(&ast);
65/// // Now we can guarantee that `ast` is sanitized and semantically checked.
66/// assert!(analyzing_result.is_ok());
67/// let analyzed_indexer = analyzing_result.unwrap();
68/// // of the parsed and analyzed DBML text.
69/// ```
70pub fn analyze(schema_block: &SchemaBlock) -> AnalyzerResult<AnalyzedIndexer> {
71  let input = schema_block.input;
72  let projects = schema_block.projects();
73  let tables = schema_block.tables();
74  let table_groups = schema_block.table_groups();
75  let refs = schema_block.refs();
76  let enums = schema_block.enums();
77
78  // check project block
79  if projects.len() > 1 {
80    throw_err(Err::DuplicateProjectSetting, &schema_block.span_range, input)?;
81  }
82  match projects.first() {
83    Some(project_block) => {
84      check_prop_duplicate_keys(&project_block.properties, input)?;
85    }
86    _ => throw_err(Err::ProjectSettingNotFound, &schema_block.span_range, input)?,
87  }
88
89  // collect tables
90  let mut indexer = Indexer::default();
91  let mut indexed_refs: Vec<_> = refs.into_iter().cloned().map(IndexedRef::from).collect();
92
93  // start indexing the schema
94  indexer.index_table(&tables, input)?;
95  indexer.index_enums(&enums, input)?;
96  indexer.index_table_groups(&table_groups, input)?;
97
98  // index inside the table itself
99  for table in &tables {
100    let mut tmp_table_indexer = TableIndexer::default();
101
102    // validate columns
103    for col in &table.cols {
104      if let Some(settings) = &col.settings {
105        if settings.is_pk {
106          if !tmp_table_indexer.pk_list.is_empty() {
107            throw_err(Err::DuplicatePrimaryKey, &col.span_range, input)?;
108          }
109          if settings.nullable == Some(Nullable::Null) {
110            throw_err(Err::NullablePrimaryKey, &col.span_range, input)?;
111          }
112          if !col.r#type.arrays.is_empty() {
113            throw_err(Err::ArrayPrimaryKey, &col.span_range, input)?;
114          }
115
116          tmp_table_indexer.pk_list.push(col.name.to_string.clone())
117        }
118        if settings.is_unique {
119          tmp_table_indexer
120            .unique_list
121            .push(BTreeSet::from([col.name.to_string.clone()]))
122        }
123
124        let filtered: BTreeSet<_> = settings
125          .attributes
126          .iter()
127          .filter(|&a| ["not null", "null"].contains(&a.key.to_string.as_str()))
128          .map(|a| &a.key.to_string)
129          .collect();
130
131        if filtered.len() == 2 {
132          throw_err(Err::ConflictNullableSetting, &settings.span_range, input)?;
133        }
134      }
135
136      // collect refs from columns
137      let indexed_ref = IndexedRef::from_inline(
138        col.settings.clone().map(|s| s.refs).unwrap_or_default(),
139        &table.ident,
140        &col.name,
141      );
142
143      indexed_refs.extend(indexed_ref);
144    }
145
146    // validate indexes block
147    if let Some(indexes_block) = &table.indexes {
148      for def in &indexes_block.defs {
149        if def.cols.is_empty() {
150          throw_err(Err::EmptyIndexesBlock, &indexes_block.span_range, input)?;
151        }
152
153        let idents: Vec<_> = def
154          .cols
155          .iter()
156          .filter_map(|id| {
157            match id {
158              IndexesColumnType::String(s) => Some(s),
159              _ => None,
160            }
161          })
162          .cloned()
163          .collect();
164        let ident_strings: Vec<_> = idents.iter().map(|s| s.to_string.clone()).collect();
165
166        for ident in &def.cols {
167          if let IndexesColumnType::String(col_name) = ident {
168            if !table.cols.iter().any(|col| col.name.to_string == col_name.to_string) {
169              throw_err(Err::ColumnNotFound, &col_name.span_range, input)?;
170            }
171          }
172        }
173
174        match &def.settings {
175          Some(settings) => {
176            check_attr_duplicate_keys(&settings.attributes, input)?;
177
178            if vec![settings.is_pk, settings.is_unique, settings.r#type.is_some()]
179              .into_iter()
180              .filter(|x| *x)
181              .count()
182              > 1
183            {
184              throw_err(Err::InvalidIndexesSetting, &settings.span_range, input)?;
185            }
186
187            if settings.is_pk {
188              if !tmp_table_indexer.pk_list.is_empty() {
189                throw_err(Err::DuplicatePrimaryKey, &def.span_range, input)?;
190              }
191
192              tmp_table_indexer.pk_list.extend(ident_strings.clone())
193            } else if settings.is_unique {
194              if tmp_table_indexer
195                .unique_list
196                .iter()
197                .any(|uniq_item| idents.iter().all(|id| uniq_item.contains(&id.to_string)))
198              {
199                throw_err(Err::DuplicateUniqueKey, &def.span_range, input)?;
200              }
201
202              tmp_table_indexer
203                .unique_list
204                .push(ident_strings.clone().into_iter().collect())
205            }
206
207            if settings.r#type.is_some() {
208              if tmp_table_indexer
209                .indexed_list
210                .iter()
211                .any(|(idx_item, idx_type)| idx_item == &ident_strings && idx_type == &settings.r#type)
212              {
213                throw_err(Err::DuplicateIndexKey, &def.span_range, input)?;
214              }
215
216              tmp_table_indexer
217                .indexed_list
218                .push((ident_strings, settings.r#type.clone()));
219            }
220          }
221          None => {
222            if tmp_table_indexer
223              .indexed_list
224              .iter()
225              .any(|(idx_item, _)| idx_item == &ident_strings)
226            {
227              throw_err(Err::DuplicateIndexKey, &def.span_range, input)?;
228            }
229
230            tmp_table_indexer.indexed_list.push((ident_strings, None))
231          }
232        };
233      }
234    }
235  }
236
237  // validate table column types
238  let tables = tables
239    .into_iter()
240    .map(|table| {
241      let cols = table.cols
242        .clone()
243        .into_iter()
244        .map(|col| {
245          let type_name = col.r#type.type_name;
246
247          let type_name = match type_name {
248            ColumnTypeName::Undef => {
249              unreachable!("undef field type must not appear");
250            }
251            ColumnTypeName::Raw(raw_type) => {
252              match ColumnTypeName::from_str(&raw_type) {
253                Ok(type_name) => {
254                  if !col.r#type.args.is_empty() {
255                    // TODO: add support for interval
256                    match type_name {
257                      ColumnTypeName::VarChar
258                      | ColumnTypeName::Char
259                      | ColumnTypeName::Time
260                      | ColumnTypeName::Timestamp
261                      | ColumnTypeName::Timetz
262                      | ColumnTypeName::Timestamptz
263                      | ColumnTypeName::Bit
264                      | ColumnTypeName::Varbit => {
265                        if col.r#type.args.len() != 1 {
266                          throw_err(Err::InvalidDataTypeArguments { raw_type, n_arg: 1 }, &col.r#type.span_range, input)?;
267                        }
268                      }
269                      ColumnTypeName::Decimal => {
270                        if col.r#type.args.len() != 2 {
271                          throw_err(Err::InvalidDataTypeArguments { raw_type, n_arg: 2 }, &col.r#type.span_range, input)?;
272                        }
273                      }
274                      _ =>  {
275                        throw_err(Err::InvalidDataTypeArguments { raw_type, n_arg: 0 }, &col.r#type.span_range, input)?
276                      },
277                    };
278
279                    if !col.r#type.args.iter().all(|arg| matches!(arg, Value::Integer(_))) {
280                      throw_err(Err::InvalidArgumentValue, &col.r#type.span_range, input)?;
281                    }
282                  }
283                  
284                  type_name
285                }
286                Err(_) => {
287                  let splited: Vec<_> = raw_type.split('.').collect();
288
289                  let (enum_schema, enum_name) = match splited.len() {
290                    1 => (None, raw_type),
291                    2 => (Some(splited[0].to_string()), splited[1].to_string()),
292                    _ => throw_err(Err::InvalidEnum, &col.r#type.span_range, input)?,
293                  };
294
295                  match &col.settings {
296                    Some(ColumnSettings { attributes, default: Some(default_value), .. }) => {
297                      let default_value_span = attributes.iter()
298                        .find_map(|attr| {
299                          (attr.key.to_string == "default").then(|| attr.value.as_ref().map(|v| &v.span_range))
300                        })
301                        .and_then(|opt_span| opt_span)
302                        .unwrap_or_else(|| unreachable!("default value is missing"));
303
304                      match indexer.lookup_enum_values(&enum_schema, &enum_name, &vec![default_value.to_string()]) {
305                        (false, (_, _)) => throw_err(Err::SchemaNotFound, &col.r#type.span_range, input)?,
306                        (true, (false, _)) => throw_err(Err::EnumNotFound, &col.r#type.span_range, input)?,
307                        (true, (true, f)) if f.iter().any(|f| f == &false) => throw_err(Err::EnumValueNotFound, &default_value_span, input)?,
308                        _ => ColumnTypeName::Enum(enum_name)
309                      }
310                    }
311                    _ => {
312                      ColumnTypeName::Enum(enum_name)
313                    }
314                  }
315                }
316              }
317            }
318            _ => unreachable!("preprocessing data type name is not raw"),
319          };
320
321          // TODO: add more validation
322          if let Some(ColumnSettings { attributes, default: Some(default_value), .. }) = &col.settings {
323            let span_range = attributes.iter()
324              .find_map(|attr| {
325                (attr.key.to_string == "default").then(|| attr.value.as_ref().map(|v| &v.span_range))
326              })
327              .and_then(|opt_span| opt_span)
328              .unwrap_or_else(|| unreachable!("default value is missing"));
329
330            // validate default value association with a col type
331            match default_value {
332              Value::Enum(_) => (),
333              Value::String(val) => {
334                let err = Err::InvalidDefaultValue { raw_value: val.clone(), raw_type: col.r#type.raw.clone() };
335
336                // TODO: validate which type can be strings
337
338                // validate fixed and variable length data type
339                match type_name {
340                  ColumnTypeName::Bit
341                  | ColumnTypeName::Char
342                  if matches!(col.r#type.args[0], Value::Integer(len) if val.len() as i64 != len) => {
343                    throw_err(err.clone(), &span_range, input)?;
344                  }
345                  ColumnTypeName::Varbit
346                  | ColumnTypeName::VarChar
347                  if matches!(col.r#type.args[0], Value::Integer(cap) if val.len() as i64 > cap) => {
348                    throw_err(err.clone(), &span_range, input)?;
349                  }
350                  _ => ()
351                };
352              },
353              Value::Integer(val) => {
354                let err = Err::DataTypeExceeded { raw_type: col.r#type.raw.clone() };
355
356                // TODO: validate which type can be numbers
357
358                match type_name {
359                  ColumnTypeName::SmallInt
360                  if (*val > i16::MAX as i64) || (*val < i16::MIN as i64) => {
361                    throw_err(err.clone(), &span_range, input)?;
362                  }
363                  ColumnTypeName::Integer
364                  if (*val > i32::MAX as i64) || (*val < i32::MIN as i64) => {
365                    throw_err(err.clone(), &span_range, input)?;
366                  }
367                  ColumnTypeName::BigInt
368                  if val.overflowing_add(1).1 || val.overflowing_sub(1).1 => {
369                    throw_err(err.clone(), &span_range, input)?;
370                  }
371                  _ => ()
372                };
373              },
374              Value::Decimal(_) => (),
375              Value::Bool(val) => {
376                if ![ColumnTypeName::Bool].contains(&type_name) {
377                  throw_err(Err::InvalidDefaultValue { raw_value: val.to_string(), raw_type: col.r#type.raw.clone() }, &span_range, input)?;
378                }
379              }
380              Value::HexColor(_) => (),
381              Value::Expr(_) => (),
382              Value::Null => {
383                if !col.settings.as_ref().is_some_and(|s| s.nullable == Some(Nullable::Null)) {
384                  throw_err(Err::DefaultNullInNonNullable, &span_range, input)?;
385                }
386              }
387            }
388          }
389
390          Ok(TableColumn {
391            r#type: ColumnType {
392              type_name,
393              ..col.r#type
394            },
395            ..col
396          })
397        })
398        .collect::<AnalyzerResult<_>>()?;
399
400      Ok(TableBlock { cols, ..table.clone() })
401    })
402    .collect::<AnalyzerResult<_>>()?;
403
404  // validate ref
405  for indexed_ref in &indexed_refs {
406    if let Some(settings) = &indexed_ref.settings {
407      check_attr_duplicate_keys(&settings.attributes, input)?;
408    }
409
410    indexed_ref.validate_ref_type(&tables, &indexer, input)?;
411
412    let count = indexed_refs
413      .iter()
414      .filter(|other_indexed_ref| indexed_ref.occupy_same_column(other_indexed_ref, &indexer))
415      .count();
416
417    if count != 1 {
418      throw_err(Err::ConflictRelation, &indexed_ref.span_range, input)?;
419    }
420  }
421
422  Ok(AnalyzedIndexer { indexed_refs, indexer })
423}