data_modelling_sdk/validation/input.rs
1//! Input validation and sanitization utilities.
2//!
3//! This module provides functions for validating and sanitizing user input
4//! before processing. These functions are used by import parsers and storage
5//! backends to ensure data integrity and security.
6//!
7//! # Security
8//!
9//! Input validation prevents:
10//! - SQL injection via malicious table/column names
11//! - Path traversal via malicious file paths
12//! - Buffer overflows via excessively long inputs
13//! - Unicode normalization attacks
14
15use serde::{Deserialize, Serialize};
16use thiserror::Error;
17use uuid::Uuid;
18
19/// Maximum length for table names
20pub const MAX_TABLE_NAME_LENGTH: usize = 255;
21
22/// Maximum length for column names
23pub const MAX_COLUMN_NAME_LENGTH: usize = 255;
24
25/// Maximum length for identifiers in general
26pub const MAX_IDENTIFIER_LENGTH: usize = 255;
27
28/// Maximum length for descriptions
29pub const MAX_DESCRIPTION_LENGTH: usize = 10000;
30
31/// Maximum file size for BPMN/DMN models (10MB)
32pub const MAX_BPMN_DMN_FILE_SIZE: u64 = 10 * 1024 * 1024;
33
34/// Maximum file size for OpenAPI specifications (5MB)
35pub const MAX_OPENAPI_FILE_SIZE: u64 = 5 * 1024 * 1024;
36
37/// Maximum length for model names (filenames)
38pub const MAX_MODEL_NAME_LENGTH: usize = 255;
39
40/// Errors that can occur during input validation.
41#[derive(Debug, Clone, Error, Serialize, Deserialize)]
42pub enum ValidationError {
43 /// Input is empty when a value is required
44 #[error("{0} cannot be empty")]
45 Empty(&'static str),
46
47 /// Input exceeds maximum allowed length
48 #[error("{field} exceeds maximum length (max: {max}, got: {actual})")]
49 TooLong {
50 field: &'static str,
51 max: usize,
52 actual: usize,
53 },
54
55 /// Input contains invalid characters
56 #[error("{field} contains invalid characters: {reason}")]
57 InvalidCharacters { field: &'static str, reason: String },
58
59 /// Input has invalid format
60 #[error("{0}: {1}")]
61 InvalidFormat(&'static str, String),
62
63 /// Input is a reserved word
64 #[error("{field} cannot be a reserved word: {word}")]
65 ReservedWord { field: &'static str, word: String },
66}
67
68/// Result type for validation operations.
69pub type ValidationResult<T> = Result<T, ValidationError>;
70
71/// Validate a table name.
72///
73/// # Rules
74///
75/// - Must not be empty
76/// - Must not exceed 255 characters
77/// - Must start with a letter or underscore
78/// - May contain letters, digits, underscores, and hyphens
79/// - Cannot be a SQL reserved word
80///
81/// # Examples
82///
83/// ```
84/// use data_modelling_sdk::validation::input::validate_table_name;
85///
86/// assert!(validate_table_name("users").is_ok());
87/// assert!(validate_table_name("user_orders").is_ok());
88/// assert!(validate_table_name("").is_err());
89/// assert!(validate_table_name("123_invalid").is_err());
90/// ```
91pub fn validate_table_name(name: &str) -> ValidationResult<()> {
92 if name.is_empty() {
93 return Err(ValidationError::Empty("table name"));
94 }
95
96 if name.len() > MAX_TABLE_NAME_LENGTH {
97 return Err(ValidationError::TooLong {
98 field: "table name",
99 max: MAX_TABLE_NAME_LENGTH,
100 actual: name.len(),
101 });
102 }
103
104 // Must start with a letter or underscore
105 // Note: unwrap is safe here due to the empty check above, but we use match for clarity
106 let first_char = match name.chars().next() {
107 Some(c) => c,
108 None => return Err(ValidationError::Empty("table name")),
109 };
110 if !first_char.is_alphabetic() && first_char != '_' {
111 return Err(ValidationError::InvalidFormat(
112 "table name",
113 "must start with a letter or underscore".to_string(),
114 ));
115 }
116
117 // May contain letters, digits, underscores, and hyphens
118 for c in name.chars() {
119 if !c.is_alphanumeric() && c != '_' && c != '-' {
120 return Err(ValidationError::InvalidCharacters {
121 field: "table name",
122 reason: format!("invalid character: '{}'", c),
123 });
124 }
125 }
126
127 // Check for SQL reserved words (basic set)
128 if is_sql_reserved_word(name) {
129 return Err(ValidationError::ReservedWord {
130 field: "table name",
131 word: name.to_string(),
132 });
133 }
134
135 Ok(())
136}
137
138/// Validate a column name.
139///
140/// # Rules
141///
142/// - Must not be empty
143/// - Must not exceed 255 characters
144/// - Must start with a letter or underscore
145/// - May contain letters, digits, underscores, hyphens, and dots (for nested columns)
146/// - Cannot be a SQL reserved word (unless nested)
147///
148/// # Examples
149///
150/// ```
151/// use data_modelling_sdk::validation::input::validate_column_name;
152///
153/// assert!(validate_column_name("id").is_ok());
154/// assert!(validate_column_name("user_name").is_ok());
155/// assert!(validate_column_name("address.street").is_ok()); // nested column
156/// assert!(validate_column_name("").is_err());
157/// ```
158pub fn validate_column_name(name: &str) -> ValidationResult<()> {
159 if name.is_empty() {
160 return Err(ValidationError::Empty("column name"));
161 }
162
163 if name.len() > MAX_COLUMN_NAME_LENGTH {
164 return Err(ValidationError::TooLong {
165 field: "column name",
166 max: MAX_COLUMN_NAME_LENGTH,
167 actual: name.len(),
168 });
169 }
170
171 // Must start with a letter or underscore
172 // Note: unwrap is safe here due to the empty check above, but we use match for clarity
173 let first_char = match name.chars().next() {
174 Some(c) => c,
175 None => return Err(ValidationError::Empty("column name")),
176 };
177 if !first_char.is_alphabetic() && first_char != '_' {
178 return Err(ValidationError::InvalidFormat(
179 "column name",
180 "must start with a letter or underscore".to_string(),
181 ));
182 }
183
184 // May contain letters, digits, underscores, hyphens, and dots (for nested columns)
185 for c in name.chars() {
186 if !c.is_alphanumeric() && c != '_' && c != '-' && c != '.' {
187 return Err(ValidationError::InvalidCharacters {
188 field: "column name",
189 reason: format!("invalid character: '{}'", c),
190 });
191 }
192 }
193
194 // Check for SQL reserved words (only for non-nested column names)
195 if !name.contains('.') && is_sql_reserved_word(name) {
196 return Err(ValidationError::ReservedWord {
197 field: "column name",
198 word: name.to_string(),
199 });
200 }
201
202 Ok(())
203}
204
205/// Validate a UUID string.
206///
207/// # Examples
208///
209/// ```
210/// use data_modelling_sdk::validation::input::validate_uuid;
211///
212/// assert!(validate_uuid("550e8400-e29b-41d4-a716-446655440000").is_ok());
213/// assert!(validate_uuid("not-a-uuid").is_err());
214/// ```
215pub fn validate_uuid(id: &str) -> ValidationResult<Uuid> {
216 Uuid::parse_str(id)
217 .map_err(|e| ValidationError::InvalidFormat("UUID", format!("invalid UUID format: {}", e)))
218}
219
220/// Validate a data type string.
221///
222/// # Rules
223///
224/// - Must not be empty
225/// - Must only contain safe characters (no SQL injection)
226/// - Must match known data type patterns
227///
228/// # Examples
229///
230/// ```
231/// use data_modelling_sdk::validation::input::validate_data_type;
232///
233/// assert!(validate_data_type("VARCHAR(255)").is_ok());
234/// assert!(validate_data_type("INTEGER").is_ok());
235/// assert!(validate_data_type("ARRAY<STRING>").is_ok());
236/// assert!(validate_data_type("'; DROP TABLE users;--").is_err());
237/// ```
238pub fn validate_data_type(data_type: &str) -> ValidationResult<()> {
239 if data_type.is_empty() {
240 return Err(ValidationError::Empty("data type"));
241 }
242
243 if data_type.len() > MAX_IDENTIFIER_LENGTH {
244 return Err(ValidationError::TooLong {
245 field: "data type",
246 max: MAX_IDENTIFIER_LENGTH,
247 actual: data_type.len(),
248 });
249 }
250
251 // Check for dangerous patterns
252 let lower = data_type.to_lowercase();
253 if lower.contains(';') || lower.contains("--") || lower.contains("/*") {
254 return Err(ValidationError::InvalidCharacters {
255 field: "data type",
256 reason: "contains SQL comment or statement separator".to_string(),
257 });
258 }
259
260 // Allow alphanumeric, parentheses, commas, spaces, underscores, angle brackets
261 for c in data_type.chars() {
262 if !c.is_alphanumeric()
263 && c != '('
264 && c != ')'
265 && c != ','
266 && c != ' '
267 && c != '_'
268 && c != '<'
269 && c != '>'
270 && c != '['
271 && c != ']'
272 {
273 return Err(ValidationError::InvalidCharacters {
274 field: "data type",
275 reason: format!("invalid character: '{}'", c),
276 });
277 }
278 }
279
280 Ok(())
281}
282
283/// Validate a description string.
284///
285/// # Rules
286///
287/// - May be empty
288/// - Must not exceed 10000 characters
289/// - Control characters (except whitespace) are stripped
290pub fn validate_description(desc: &str) -> ValidationResult<()> {
291 if desc.len() > MAX_DESCRIPTION_LENGTH {
292 return Err(ValidationError::TooLong {
293 field: "description",
294 max: MAX_DESCRIPTION_LENGTH,
295 actual: desc.len(),
296 });
297 }
298
299 Ok(())
300}
301
302/// Sanitize a SQL identifier by quoting it.
303///
304/// This function returns a quoted identifier that is safe to use in SQL
305/// statements without risk of injection.
306///
307/// # Examples
308///
309/// ```
310/// use data_modelling_sdk::validation::input::sanitize_sql_identifier;
311///
312/// assert_eq!(sanitize_sql_identifier("users", "postgres"), "\"users\"");
313/// assert_eq!(sanitize_sql_identifier("user-orders", "mysql"), "`user-orders`");
314/// ```
315pub fn sanitize_sql_identifier(name: &str, dialect: &str) -> String {
316 let quote_char = match dialect.to_lowercase().as_str() {
317 "mysql" | "mariadb" => '`',
318 "sqlserver" | "mssql" => '[',
319 _ => '"', // Standard SQL, PostgreSQL, etc.
320 };
321
322 let end_char = if quote_char == '[' { ']' } else { quote_char };
323
324 // Escape any internal quote characters by doubling them
325 let escaped = if quote_char == end_char {
326 name.replace(quote_char, &format!("{}{}", quote_char, quote_char))
327 } else {
328 name.replace(end_char, &format!("{}{}", end_char, end_char))
329 };
330
331 format!("{}{}{}", quote_char, escaped, end_char)
332}
333
334/// Sanitize a string for safe use in descriptions and comments.
335///
336/// Removes or escapes potentially dangerous characters.
337pub fn sanitize_description(desc: &str) -> String {
338 // Remove control characters except newlines and tabs
339 desc.chars()
340 .filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r')
341 .collect()
342}
343
344/// Check if a word is a SQL reserved word.
345///
346/// This is a basic check covering common reserved words across SQL dialects.
347fn is_sql_reserved_word(word: &str) -> bool {
348 const RESERVED_WORDS: &[&str] = &[
349 "select",
350 "from",
351 "where",
352 "insert",
353 "update",
354 "delete",
355 "create",
356 "drop",
357 "alter",
358 "table",
359 "index",
360 "view",
361 "database",
362 "schema",
363 "grant",
364 "revoke",
365 "commit",
366 "rollback",
367 "begin",
368 "end",
369 "transaction",
370 "primary",
371 "foreign",
372 "key",
373 "references",
374 "constraint",
375 "unique",
376 "check",
377 "default",
378 "not",
379 "null",
380 "and",
381 "or",
382 "in",
383 "between",
384 "like",
385 "is",
386 "case",
387 "when",
388 "then",
389 "else",
390 "as",
391 "on",
392 "join",
393 "inner",
394 "outer",
395 "left",
396 "right",
397 "full",
398 "cross",
399 "natural",
400 "using",
401 "group",
402 "by",
403 "having",
404 "order",
405 "asc",
406 "desc",
407 "limit",
408 "offset",
409 "union",
410 "intersect",
411 "except",
412 "all",
413 "distinct",
414 "top",
415 "values",
416 "set",
417 "into",
418 "exec",
419 "execute",
420 "procedure",
421 "function",
422 "trigger",
423 "true",
424 "false",
425 "int",
426 "integer",
427 "varchar",
428 "char",
429 "text",
430 "boolean",
431 "date",
432 "time",
433 "timestamp",
434 "float",
435 "double",
436 "decimal",
437 "numeric",
438 ];
439
440 let lower = word.to_lowercase();
441 RESERVED_WORDS.contains(&lower.as_str())
442}
443
444/// Sanitize a model name for use as a filename.
445///
446/// # Rules
447///
448/// - Removes or replaces invalid filename characters
449/// - Ensures the name is safe for use in file paths
450/// - Preserves alphanumeric characters, hyphens, underscores, and dots
451/// - Replaces invalid characters with underscores
452/// - Truncates to MAX_MODEL_NAME_LENGTH if needed
453///
454/// # Examples
455///
456/// ```
457/// use data_modelling_sdk::validation::input::sanitize_model_name;
458///
459/// assert_eq!(sanitize_model_name("my-model"), "my-model");
460/// assert_eq!(sanitize_model_name("my/model"), "my_model");
461/// assert_eq!(sanitize_model_name("my..model"), "my.model");
462/// ```
463pub fn sanitize_model_name(name: &str) -> String {
464 let mut sanitized = String::with_capacity(name.len());
465 let mut last_was_dot = false;
466
467 for ch in name.chars() {
468 match ch {
469 // Allow alphanumeric, hyphens, underscores
470 ch if ch.is_alphanumeric() || ch == '-' || ch == '_' => {
471 sanitized.push(ch);
472 last_was_dot = false;
473 }
474 // Allow single dots (but not consecutive)
475 '.' if !last_was_dot => {
476 sanitized.push('.');
477 last_was_dot = true;
478 }
479 // Replace invalid characters with underscore
480 _ => {
481 if !last_was_dot {
482 sanitized.push('_');
483 }
484 last_was_dot = false;
485 }
486 }
487
488 // Truncate if too long
489 if sanitized.len() >= MAX_MODEL_NAME_LENGTH {
490 break;
491 }
492 }
493
494 // Remove trailing dots and underscores
495 sanitized = sanitized.trim_end_matches(['.', '_']).to_string();
496
497 // Ensure not empty
498 if sanitized.is_empty() {
499 sanitized = "model".to_string();
500 }
501
502 sanitized
503}
504
505/// Validate file size for BPMN/DMN models.
506///
507/// # Arguments
508///
509/// * `file_size` - File size in bytes
510///
511/// # Returns
512///
513/// `ValidationResult<()>` indicating whether the file size is valid
514pub fn validate_bpmn_dmn_file_size(file_size: u64) -> ValidationResult<()> {
515 if file_size > MAX_BPMN_DMN_FILE_SIZE {
516 return Err(ValidationError::TooLong {
517 field: "BPMN/DMN file size",
518 max: MAX_BPMN_DMN_FILE_SIZE as usize,
519 actual: file_size as usize,
520 });
521 }
522 Ok(())
523}
524
525/// Validate file size for OpenAPI specifications.
526///
527/// # Arguments
528///
529/// * `file_size` - File size in bytes
530///
531/// # Returns
532///
533/// `ValidationResult<()>` indicating whether the file size is valid
534pub fn validate_openapi_file_size(file_size: u64) -> ValidationResult<()> {
535 if file_size > MAX_OPENAPI_FILE_SIZE {
536 return Err(ValidationError::TooLong {
537 field: "OpenAPI file size",
538 max: MAX_OPENAPI_FILE_SIZE as usize,
539 actual: file_size as usize,
540 });
541 }
542 Ok(())
543}