data_modelling_sdk/validation/input.rs
1//! Input validation and sanitization utilities.
2//!
3//! This module provides functions for validating and sanitizing user input
4//! before processing. These functions are used by import parsers and storage
5//! backends to ensure data integrity and security.
6//!
7//! # Security
8//!
9//! Input validation prevents:
10//! - SQL injection via malicious table/column names
11//! - Path traversal via malicious file paths
12//! - Buffer overflows via excessively long inputs
13//! - Unicode normalization attacks
14
15use serde::{Deserialize, Serialize};
16use thiserror::Error;
17use uuid::Uuid;
18
19/// Maximum length for table names
20pub const MAX_TABLE_NAME_LENGTH: usize = 255;
21
22/// Maximum length for column names
23pub const MAX_COLUMN_NAME_LENGTH: usize = 255;
24
25/// Maximum length for identifiers in general
26pub const MAX_IDENTIFIER_LENGTH: usize = 255;
27
28/// Maximum length for descriptions
29pub const MAX_DESCRIPTION_LENGTH: usize = 10000;
30
31/// Maximum file size for BPMN/DMN models (10MB)
32pub const MAX_BPMN_DMN_FILE_SIZE: u64 = 10 * 1024 * 1024;
33
34/// Maximum file size for OpenAPI specifications (5MB)
35pub const MAX_OPENAPI_FILE_SIZE: u64 = 5 * 1024 * 1024;
36
37/// Maximum length for model names (filenames)
38pub const MAX_MODEL_NAME_LENGTH: usize = 255;
39
40/// Errors that can occur during input validation.
41#[derive(Debug, Clone, Error, Serialize, Deserialize)]
42pub enum ValidationError {
43 /// Input is empty when a value is required
44 #[error("{0} cannot be empty")]
45 Empty(&'static str),
46
47 /// Input exceeds maximum allowed length
48 #[error("{field} exceeds maximum length (max: {max}, got: {actual})")]
49 TooLong {
50 field: &'static str,
51 max: usize,
52 actual: usize,
53 },
54
55 /// Input contains invalid characters
56 #[error("{field} contains invalid characters: {reason}")]
57 InvalidCharacters { field: &'static str, reason: String },
58
59 /// Input has invalid format
60 #[error("{0}: {1}")]
61 InvalidFormat(&'static str, String),
62
63 /// Input is a reserved word
64 #[error("{field} cannot be a reserved word: {word}")]
65 ReservedWord { field: &'static str, word: String },
66}
67
68/// Result type for validation operations.
69pub type ValidationResult<T> = Result<T, ValidationError>;
70
71/// Validate a table name.
72///
73/// # Rules
74///
75/// - Must not be empty
76/// - Must not exceed 255 characters
77/// - Must start with a letter or underscore
78/// - May contain letters, digits, underscores, and hyphens
79/// - Cannot be a SQL reserved word
80///
81/// # Examples
82///
83/// ```
84/// use data_modelling_sdk::validation::input::validate_table_name;
85///
86/// assert!(validate_table_name("users").is_ok());
87/// assert!(validate_table_name("user_orders").is_ok());
88/// assert!(validate_table_name("").is_err());
89/// assert!(validate_table_name("123_invalid").is_err());
90/// ```
91pub fn validate_table_name(name: &str) -> ValidationResult<()> {
92 if name.is_empty() {
93 return Err(ValidationError::Empty("table name"));
94 }
95
96 if name.len() > MAX_TABLE_NAME_LENGTH {
97 return Err(ValidationError::TooLong {
98 field: "table name",
99 max: MAX_TABLE_NAME_LENGTH,
100 actual: name.len(),
101 });
102 }
103
104 // Must start with a letter or underscore
105 let first_char = name.chars().next().unwrap();
106 if !first_char.is_alphabetic() && first_char != '_' {
107 return Err(ValidationError::InvalidFormat(
108 "table name",
109 "must start with a letter or underscore".to_string(),
110 ));
111 }
112
113 // May contain letters, digits, underscores, and hyphens
114 for c in name.chars() {
115 if !c.is_alphanumeric() && c != '_' && c != '-' {
116 return Err(ValidationError::InvalidCharacters {
117 field: "table name",
118 reason: format!("invalid character: '{}'", c),
119 });
120 }
121 }
122
123 // Check for SQL reserved words (basic set)
124 if is_sql_reserved_word(name) {
125 return Err(ValidationError::ReservedWord {
126 field: "table name",
127 word: name.to_string(),
128 });
129 }
130
131 Ok(())
132}
133
134/// Validate a column name.
135///
136/// # Rules
137///
138/// - Must not be empty
139/// - Must not exceed 255 characters
140/// - Must start with a letter or underscore
141/// - May contain letters, digits, underscores, hyphens, and dots (for nested columns)
142/// - Cannot be a SQL reserved word (unless nested)
143///
144/// # Examples
145///
146/// ```
147/// use data_modelling_sdk::validation::input::validate_column_name;
148///
149/// assert!(validate_column_name("id").is_ok());
150/// assert!(validate_column_name("user_name").is_ok());
151/// assert!(validate_column_name("address.street").is_ok()); // nested column
152/// assert!(validate_column_name("").is_err());
153/// ```
154pub fn validate_column_name(name: &str) -> ValidationResult<()> {
155 if name.is_empty() {
156 return Err(ValidationError::Empty("column name"));
157 }
158
159 if name.len() > MAX_COLUMN_NAME_LENGTH {
160 return Err(ValidationError::TooLong {
161 field: "column name",
162 max: MAX_COLUMN_NAME_LENGTH,
163 actual: name.len(),
164 });
165 }
166
167 // Must start with a letter or underscore
168 let first_char = name.chars().next().unwrap();
169 if !first_char.is_alphabetic() && first_char != '_' {
170 return Err(ValidationError::InvalidFormat(
171 "column name",
172 "must start with a letter or underscore".to_string(),
173 ));
174 }
175
176 // May contain letters, digits, underscores, hyphens, and dots (for nested columns)
177 for c in name.chars() {
178 if !c.is_alphanumeric() && c != '_' && c != '-' && c != '.' {
179 return Err(ValidationError::InvalidCharacters {
180 field: "column name",
181 reason: format!("invalid character: '{}'", c),
182 });
183 }
184 }
185
186 // Check for SQL reserved words (only for non-nested column names)
187 if !name.contains('.') && is_sql_reserved_word(name) {
188 return Err(ValidationError::ReservedWord {
189 field: "column name",
190 word: name.to_string(),
191 });
192 }
193
194 Ok(())
195}
196
197/// Validate a UUID string.
198///
199/// # Examples
200///
201/// ```
202/// use data_modelling_sdk::validation::input::validate_uuid;
203///
204/// assert!(validate_uuid("550e8400-e29b-41d4-a716-446655440000").is_ok());
205/// assert!(validate_uuid("not-a-uuid").is_err());
206/// ```
207pub fn validate_uuid(id: &str) -> ValidationResult<Uuid> {
208 Uuid::parse_str(id)
209 .map_err(|e| ValidationError::InvalidFormat("UUID", format!("invalid UUID format: {}", e)))
210}
211
212/// Validate a data type string.
213///
214/// # Rules
215///
216/// - Must not be empty
217/// - Must only contain safe characters (no SQL injection)
218/// - Must match known data type patterns
219///
220/// # Examples
221///
222/// ```
223/// use data_modelling_sdk::validation::input::validate_data_type;
224///
225/// assert!(validate_data_type("VARCHAR(255)").is_ok());
226/// assert!(validate_data_type("INTEGER").is_ok());
227/// assert!(validate_data_type("ARRAY<STRING>").is_ok());
228/// assert!(validate_data_type("'; DROP TABLE users;--").is_err());
229/// ```
230pub fn validate_data_type(data_type: &str) -> ValidationResult<()> {
231 if data_type.is_empty() {
232 return Err(ValidationError::Empty("data type"));
233 }
234
235 if data_type.len() > MAX_IDENTIFIER_LENGTH {
236 return Err(ValidationError::TooLong {
237 field: "data type",
238 max: MAX_IDENTIFIER_LENGTH,
239 actual: data_type.len(),
240 });
241 }
242
243 // Check for dangerous patterns
244 let lower = data_type.to_lowercase();
245 if lower.contains(';') || lower.contains("--") || lower.contains("/*") {
246 return Err(ValidationError::InvalidCharacters {
247 field: "data type",
248 reason: "contains SQL comment or statement separator".to_string(),
249 });
250 }
251
252 // Allow alphanumeric, parentheses, commas, spaces, underscores, angle brackets
253 for c in data_type.chars() {
254 if !c.is_alphanumeric()
255 && c != '('
256 && c != ')'
257 && c != ','
258 && c != ' '
259 && c != '_'
260 && c != '<'
261 && c != '>'
262 && c != '['
263 && c != ']'
264 {
265 return Err(ValidationError::InvalidCharacters {
266 field: "data type",
267 reason: format!("invalid character: '{}'", c),
268 });
269 }
270 }
271
272 Ok(())
273}
274
275/// Validate a description string.
276///
277/// # Rules
278///
279/// - May be empty
280/// - Must not exceed 10000 characters
281/// - Control characters (except whitespace) are stripped
282pub fn validate_description(desc: &str) -> ValidationResult<()> {
283 if desc.len() > MAX_DESCRIPTION_LENGTH {
284 return Err(ValidationError::TooLong {
285 field: "description",
286 max: MAX_DESCRIPTION_LENGTH,
287 actual: desc.len(),
288 });
289 }
290
291 Ok(())
292}
293
294/// Sanitize a SQL identifier by quoting it.
295///
296/// This function returns a quoted identifier that is safe to use in SQL
297/// statements without risk of injection.
298///
299/// # Examples
300///
301/// ```
302/// use data_modelling_sdk::validation::input::sanitize_sql_identifier;
303///
304/// assert_eq!(sanitize_sql_identifier("users", "postgres"), "\"users\"");
305/// assert_eq!(sanitize_sql_identifier("user-orders", "mysql"), "`user-orders`");
306/// ```
307pub fn sanitize_sql_identifier(name: &str, dialect: &str) -> String {
308 let quote_char = match dialect.to_lowercase().as_str() {
309 "mysql" | "mariadb" => '`',
310 "sqlserver" | "mssql" => '[',
311 _ => '"', // Standard SQL, PostgreSQL, etc.
312 };
313
314 let end_char = if quote_char == '[' { ']' } else { quote_char };
315
316 // Escape any internal quote characters by doubling them
317 let escaped = if quote_char == end_char {
318 name.replace(quote_char, &format!("{}{}", quote_char, quote_char))
319 } else {
320 name.replace(end_char, &format!("{}{}", end_char, end_char))
321 };
322
323 format!("{}{}{}", quote_char, escaped, end_char)
324}
325
326/// Sanitize a string for safe use in descriptions and comments.
327///
328/// Removes or escapes potentially dangerous characters.
329pub fn sanitize_description(desc: &str) -> String {
330 // Remove control characters except newlines and tabs
331 desc.chars()
332 .filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r')
333 .collect()
334}
335
336/// Check if a word is a SQL reserved word.
337///
338/// This is a basic check covering common reserved words across SQL dialects.
339fn is_sql_reserved_word(word: &str) -> bool {
340 const RESERVED_WORDS: &[&str] = &[
341 "select",
342 "from",
343 "where",
344 "insert",
345 "update",
346 "delete",
347 "create",
348 "drop",
349 "alter",
350 "table",
351 "index",
352 "view",
353 "database",
354 "schema",
355 "grant",
356 "revoke",
357 "commit",
358 "rollback",
359 "begin",
360 "end",
361 "transaction",
362 "primary",
363 "foreign",
364 "key",
365 "references",
366 "constraint",
367 "unique",
368 "check",
369 "default",
370 "not",
371 "null",
372 "and",
373 "or",
374 "in",
375 "between",
376 "like",
377 "is",
378 "case",
379 "when",
380 "then",
381 "else",
382 "as",
383 "on",
384 "join",
385 "inner",
386 "outer",
387 "left",
388 "right",
389 "full",
390 "cross",
391 "natural",
392 "using",
393 "group",
394 "by",
395 "having",
396 "order",
397 "asc",
398 "desc",
399 "limit",
400 "offset",
401 "union",
402 "intersect",
403 "except",
404 "all",
405 "distinct",
406 "top",
407 "values",
408 "set",
409 "into",
410 "exec",
411 "execute",
412 "procedure",
413 "function",
414 "trigger",
415 "true",
416 "false",
417 "int",
418 "integer",
419 "varchar",
420 "char",
421 "text",
422 "boolean",
423 "date",
424 "time",
425 "timestamp",
426 "float",
427 "double",
428 "decimal",
429 "numeric",
430 ];
431
432 let lower = word.to_lowercase();
433 RESERVED_WORDS.contains(&lower.as_str())
434}
435
436/// Sanitize a model name for use as a filename.
437///
438/// # Rules
439///
440/// - Removes or replaces invalid filename characters
441/// - Ensures the name is safe for use in file paths
442/// - Preserves alphanumeric characters, hyphens, underscores, and dots
443/// - Replaces invalid characters with underscores
444/// - Truncates to MAX_MODEL_NAME_LENGTH if needed
445///
446/// # Examples
447///
448/// ```
449/// use data_modelling_sdk::validation::input::sanitize_model_name;
450///
451/// assert_eq!(sanitize_model_name("my-model"), "my-model");
452/// assert_eq!(sanitize_model_name("my/model"), "my_model");
453/// assert_eq!(sanitize_model_name("my..model"), "my.model");
454/// ```
455pub fn sanitize_model_name(name: &str) -> String {
456 let mut sanitized = String::with_capacity(name.len());
457 let mut last_was_dot = false;
458
459 for ch in name.chars() {
460 match ch {
461 // Allow alphanumeric, hyphens, underscores
462 ch if ch.is_alphanumeric() || ch == '-' || ch == '_' => {
463 sanitized.push(ch);
464 last_was_dot = false;
465 }
466 // Allow single dots (but not consecutive)
467 '.' if !last_was_dot => {
468 sanitized.push('.');
469 last_was_dot = true;
470 }
471 // Replace invalid characters with underscore
472 _ => {
473 if !last_was_dot {
474 sanitized.push('_');
475 }
476 last_was_dot = false;
477 }
478 }
479
480 // Truncate if too long
481 if sanitized.len() >= MAX_MODEL_NAME_LENGTH {
482 break;
483 }
484 }
485
486 // Remove trailing dots and underscores
487 sanitized = sanitized.trim_end_matches(['.', '_']).to_string();
488
489 // Ensure not empty
490 if sanitized.is_empty() {
491 sanitized = "model".to_string();
492 }
493
494 sanitized
495}
496
497/// Validate file size for BPMN/DMN models.
498///
499/// # Arguments
500///
501/// * `file_size` - File size in bytes
502///
503/// # Returns
504///
505/// `ValidationResult<()>` indicating whether the file size is valid
506pub fn validate_bpmn_dmn_file_size(file_size: u64) -> ValidationResult<()> {
507 if file_size > MAX_BPMN_DMN_FILE_SIZE {
508 return Err(ValidationError::TooLong {
509 field: "BPMN/DMN file size",
510 max: MAX_BPMN_DMN_FILE_SIZE as usize,
511 actual: file_size as usize,
512 });
513 }
514 Ok(())
515}
516
517/// Validate file size for OpenAPI specifications.
518///
519/// # Arguments
520///
521/// * `file_size` - File size in bytes
522///
523/// # Returns
524///
525/// `ValidationResult<()>` indicating whether the file size is valid
526pub fn validate_openapi_file_size(file_size: u64) -> ValidationResult<()> {
527 if file_size > MAX_OPENAPI_FILE_SIZE {
528 return Err(ValidationError::TooLong {
529 field: "OpenAPI file size",
530 max: MAX_OPENAPI_FILE_SIZE as usize,
531 actual: file_size as usize,
532 });
533 }
534 Ok(())
535}