data_modelling_sdk/validation/
input.rs

1//! Input validation and sanitization utilities.
2//!
3//! This module provides functions for validating and sanitizing user input
4//! before processing. These functions are used by import parsers and storage
5//! backends to ensure data integrity and security.
6//!
7//! # Security
8//!
9//! Input validation prevents:
10//! - SQL injection via malicious table/column names
11//! - Path traversal via malicious file paths
12//! - Buffer overflows via excessively long inputs
13//! - Unicode normalization attacks
14
15use serde::{Deserialize, Serialize};
16use thiserror::Error;
17use uuid::Uuid;
18
19/// Maximum length for table names
20pub const MAX_TABLE_NAME_LENGTH: usize = 255;
21
22/// Maximum length for column names
23pub const MAX_COLUMN_NAME_LENGTH: usize = 255;
24
25/// Maximum length for identifiers in general
26pub const MAX_IDENTIFIER_LENGTH: usize = 255;
27
28/// Maximum length for descriptions
29pub const MAX_DESCRIPTION_LENGTH: usize = 10000;
30
31/// Maximum file size for BPMN/DMN models (10MB)
32pub const MAX_BPMN_DMN_FILE_SIZE: u64 = 10 * 1024 * 1024;
33
34/// Maximum file size for OpenAPI specifications (5MB)
35pub const MAX_OPENAPI_FILE_SIZE: u64 = 5 * 1024 * 1024;
36
37/// Maximum length for model names (filenames)
38pub const MAX_MODEL_NAME_LENGTH: usize = 255;
39
40/// Errors that can occur during input validation.
41#[derive(Debug, Clone, Error, Serialize, Deserialize)]
42pub enum ValidationError {
43    /// Input is empty when a value is required
44    #[error("{0} cannot be empty")]
45    Empty(&'static str),
46
47    /// Input exceeds maximum allowed length
48    #[error("{field} exceeds maximum length (max: {max}, got: {actual})")]
49    TooLong {
50        field: &'static str,
51        max: usize,
52        actual: usize,
53    },
54
55    /// Input contains invalid characters
56    #[error("{field} contains invalid characters: {reason}")]
57    InvalidCharacters { field: &'static str, reason: String },
58
59    /// Input has invalid format
60    #[error("{0}: {1}")]
61    InvalidFormat(&'static str, String),
62
63    /// Input is a reserved word
64    #[error("{field} cannot be a reserved word: {word}")]
65    ReservedWord { field: &'static str, word: String },
66}
67
68/// Result type for validation operations.
69pub type ValidationResult<T> = Result<T, ValidationError>;
70
71/// Validate a table name.
72///
73/// # Rules
74///
75/// - Must not be empty
76/// - Must not exceed 255 characters
77/// - Must start with a letter or underscore
78/// - May contain letters, digits, underscores, and hyphens
79/// - Cannot be a SQL reserved word
80///
81/// # Examples
82///
83/// ```
84/// use data_modelling_sdk::validation::input::validate_table_name;
85///
86/// assert!(validate_table_name("users").is_ok());
87/// assert!(validate_table_name("user_orders").is_ok());
88/// assert!(validate_table_name("").is_err());
89/// assert!(validate_table_name("123_invalid").is_err());
90/// ```
91pub fn validate_table_name(name: &str) -> ValidationResult<()> {
92    if name.is_empty() {
93        return Err(ValidationError::Empty("table name"));
94    }
95
96    if name.len() > MAX_TABLE_NAME_LENGTH {
97        return Err(ValidationError::TooLong {
98            field: "table name",
99            max: MAX_TABLE_NAME_LENGTH,
100            actual: name.len(),
101        });
102    }
103
104    // Must start with a letter or underscore
105    // Note: unwrap is safe here due to the empty check above, but we use match for clarity
106    let first_char = match name.chars().next() {
107        Some(c) => c,
108        None => return Err(ValidationError::Empty("table name")),
109    };
110    if !first_char.is_alphabetic() && first_char != '_' {
111        return Err(ValidationError::InvalidFormat(
112            "table name",
113            "must start with a letter or underscore".to_string(),
114        ));
115    }
116
117    // May contain letters, digits, underscores, and hyphens
118    for c in name.chars() {
119        if !c.is_alphanumeric() && c != '_' && c != '-' {
120            return Err(ValidationError::InvalidCharacters {
121                field: "table name",
122                reason: format!("invalid character: '{}'", c),
123            });
124        }
125    }
126
127    // Check for SQL reserved words (basic set)
128    if is_sql_reserved_word(name) {
129        return Err(ValidationError::ReservedWord {
130            field: "table name",
131            word: name.to_string(),
132        });
133    }
134
135    Ok(())
136}
137
138/// Validate a column name.
139///
140/// # Rules
141///
142/// - Must not be empty
143/// - Must not exceed 255 characters
144/// - Must start with a letter or underscore
145/// - May contain letters, digits, underscores, hyphens, and dots (for nested columns)
146/// - Cannot be a SQL reserved word (unless nested)
147///
148/// # Examples
149///
150/// ```
151/// use data_modelling_sdk::validation::input::validate_column_name;
152///
153/// assert!(validate_column_name("id").is_ok());
154/// assert!(validate_column_name("user_name").is_ok());
155/// assert!(validate_column_name("address.street").is_ok()); // nested column
156/// assert!(validate_column_name("").is_err());
157/// ```
158pub fn validate_column_name(name: &str) -> ValidationResult<()> {
159    if name.is_empty() {
160        return Err(ValidationError::Empty("column name"));
161    }
162
163    if name.len() > MAX_COLUMN_NAME_LENGTH {
164        return Err(ValidationError::TooLong {
165            field: "column name",
166            max: MAX_COLUMN_NAME_LENGTH,
167            actual: name.len(),
168        });
169    }
170
171    // Must start with a letter or underscore
172    // Note: unwrap is safe here due to the empty check above, but we use match for clarity
173    let first_char = match name.chars().next() {
174        Some(c) => c,
175        None => return Err(ValidationError::Empty("column name")),
176    };
177    if !first_char.is_alphabetic() && first_char != '_' {
178        return Err(ValidationError::InvalidFormat(
179            "column name",
180            "must start with a letter or underscore".to_string(),
181        ));
182    }
183
184    // May contain letters, digits, underscores, hyphens, and dots (for nested columns)
185    for c in name.chars() {
186        if !c.is_alphanumeric() && c != '_' && c != '-' && c != '.' {
187            return Err(ValidationError::InvalidCharacters {
188                field: "column name",
189                reason: format!("invalid character: '{}'", c),
190            });
191        }
192    }
193
194    // Check for SQL reserved words (only for non-nested column names)
195    if !name.contains('.') && is_sql_reserved_word(name) {
196        return Err(ValidationError::ReservedWord {
197            field: "column name",
198            word: name.to_string(),
199        });
200    }
201
202    Ok(())
203}
204
205/// Validate a UUID string.
206///
207/// # Examples
208///
209/// ```
210/// use data_modelling_sdk::validation::input::validate_uuid;
211///
212/// assert!(validate_uuid("550e8400-e29b-41d4-a716-446655440000").is_ok());
213/// assert!(validate_uuid("not-a-uuid").is_err());
214/// ```
215pub fn validate_uuid(id: &str) -> ValidationResult<Uuid> {
216    Uuid::parse_str(id)
217        .map_err(|e| ValidationError::InvalidFormat("UUID", format!("invalid UUID format: {}", e)))
218}
219
220/// Validate a data type string.
221///
222/// # Rules
223///
224/// - Must not be empty
225/// - Must only contain safe characters (no SQL injection)
226/// - Must match known data type patterns
227///
228/// # Examples
229///
230/// ```
231/// use data_modelling_sdk::validation::input::validate_data_type;
232///
233/// assert!(validate_data_type("VARCHAR(255)").is_ok());
234/// assert!(validate_data_type("INTEGER").is_ok());
235/// assert!(validate_data_type("ARRAY<STRING>").is_ok());
236/// assert!(validate_data_type("'; DROP TABLE users;--").is_err());
237/// ```
238pub fn validate_data_type(data_type: &str) -> ValidationResult<()> {
239    if data_type.is_empty() {
240        return Err(ValidationError::Empty("data type"));
241    }
242
243    if data_type.len() > MAX_IDENTIFIER_LENGTH {
244        return Err(ValidationError::TooLong {
245            field: "data type",
246            max: MAX_IDENTIFIER_LENGTH,
247            actual: data_type.len(),
248        });
249    }
250
251    // Check for dangerous patterns
252    let lower = data_type.to_lowercase();
253    if lower.contains(';') || lower.contains("--") || lower.contains("/*") {
254        return Err(ValidationError::InvalidCharacters {
255            field: "data type",
256            reason: "contains SQL comment or statement separator".to_string(),
257        });
258    }
259
260    // Allow alphanumeric, parentheses, commas, spaces, underscores, angle brackets
261    for c in data_type.chars() {
262        if !c.is_alphanumeric()
263            && c != '('
264            && c != ')'
265            && c != ','
266            && c != ' '
267            && c != '_'
268            && c != '<'
269            && c != '>'
270            && c != '['
271            && c != ']'
272        {
273            return Err(ValidationError::InvalidCharacters {
274                field: "data type",
275                reason: format!("invalid character: '{}'", c),
276            });
277        }
278    }
279
280    Ok(())
281}
282
283/// Validate a description string.
284///
285/// # Rules
286///
287/// - May be empty
288/// - Must not exceed 10000 characters
289/// - Control characters (except whitespace) are stripped
290pub fn validate_description(desc: &str) -> ValidationResult<()> {
291    if desc.len() > MAX_DESCRIPTION_LENGTH {
292        return Err(ValidationError::TooLong {
293            field: "description",
294            max: MAX_DESCRIPTION_LENGTH,
295            actual: desc.len(),
296        });
297    }
298
299    Ok(())
300}
301
302/// Sanitize a SQL identifier by quoting it.
303///
304/// This function returns a quoted identifier that is safe to use in SQL
305/// statements without risk of injection.
306///
307/// # Examples
308///
309/// ```
310/// use data_modelling_sdk::validation::input::sanitize_sql_identifier;
311///
312/// assert_eq!(sanitize_sql_identifier("users", "postgres"), "\"users\"");
313/// assert_eq!(sanitize_sql_identifier("user-orders", "mysql"), "`user-orders`");
314/// ```
315pub fn sanitize_sql_identifier(name: &str, dialect: &str) -> String {
316    let quote_char = match dialect.to_lowercase().as_str() {
317        "mysql" | "mariadb" => '`',
318        "sqlserver" | "mssql" => '[',
319        _ => '"', // Standard SQL, PostgreSQL, etc.
320    };
321
322    let end_char = if quote_char == '[' { ']' } else { quote_char };
323
324    // Escape any internal quote characters by doubling them
325    let escaped = if quote_char == end_char {
326        name.replace(quote_char, &format!("{}{}", quote_char, quote_char))
327    } else {
328        name.replace(end_char, &format!("{}{}", end_char, end_char))
329    };
330
331    format!("{}{}{}", quote_char, escaped, end_char)
332}
333
334/// Sanitize a string for safe use in descriptions and comments.
335///
336/// Removes or escapes potentially dangerous characters.
337pub fn sanitize_description(desc: &str) -> String {
338    // Remove control characters except newlines and tabs
339    desc.chars()
340        .filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r')
341        .collect()
342}
343
344/// Check if a word is a SQL reserved word.
345///
346/// This is a basic check covering common reserved words across SQL dialects.
347fn is_sql_reserved_word(word: &str) -> bool {
348    const RESERVED_WORDS: &[&str] = &[
349        "select",
350        "from",
351        "where",
352        "insert",
353        "update",
354        "delete",
355        "create",
356        "drop",
357        "alter",
358        "table",
359        "index",
360        "view",
361        "database",
362        "schema",
363        "grant",
364        "revoke",
365        "commit",
366        "rollback",
367        "begin",
368        "end",
369        "transaction",
370        "primary",
371        "foreign",
372        "key",
373        "references",
374        "constraint",
375        "unique",
376        "check",
377        "default",
378        "not",
379        "null",
380        "and",
381        "or",
382        "in",
383        "between",
384        "like",
385        "is",
386        "case",
387        "when",
388        "then",
389        "else",
390        "as",
391        "on",
392        "join",
393        "inner",
394        "outer",
395        "left",
396        "right",
397        "full",
398        "cross",
399        "natural",
400        "using",
401        "group",
402        "by",
403        "having",
404        "order",
405        "asc",
406        "desc",
407        "limit",
408        "offset",
409        "union",
410        "intersect",
411        "except",
412        "all",
413        "distinct",
414        "top",
415        "values",
416        "set",
417        "into",
418        "exec",
419        "execute",
420        "procedure",
421        "function",
422        "trigger",
423        "true",
424        "false",
425        "int",
426        "integer",
427        "varchar",
428        "char",
429        "text",
430        "boolean",
431        "date",
432        "time",
433        "timestamp",
434        "float",
435        "double",
436        "decimal",
437        "numeric",
438    ];
439
440    let lower = word.to_lowercase();
441    RESERVED_WORDS.contains(&lower.as_str())
442}
443
444/// Sanitize a model name for use as a filename.
445///
446/// # Rules
447///
448/// - Removes or replaces invalid filename characters
449/// - Ensures the name is safe for use in file paths
450/// - Preserves alphanumeric characters, hyphens, underscores, and dots
451/// - Replaces invalid characters with underscores
452/// - Truncates to MAX_MODEL_NAME_LENGTH if needed
453///
454/// # Examples
455///
456/// ```
457/// use data_modelling_sdk::validation::input::sanitize_model_name;
458///
459/// assert_eq!(sanitize_model_name("my-model"), "my-model");
460/// assert_eq!(sanitize_model_name("my/model"), "my_model");
461/// assert_eq!(sanitize_model_name("my..model"), "my.model");
462/// ```
463pub fn sanitize_model_name(name: &str) -> String {
464    let mut sanitized = String::with_capacity(name.len());
465    let mut last_was_dot = false;
466
467    for ch in name.chars() {
468        match ch {
469            // Allow alphanumeric, hyphens, underscores
470            ch if ch.is_alphanumeric() || ch == '-' || ch == '_' => {
471                sanitized.push(ch);
472                last_was_dot = false;
473            }
474            // Allow single dots (but not consecutive)
475            '.' if !last_was_dot => {
476                sanitized.push('.');
477                last_was_dot = true;
478            }
479            // Replace invalid characters with underscore
480            _ => {
481                if !last_was_dot {
482                    sanitized.push('_');
483                }
484                last_was_dot = false;
485            }
486        }
487
488        // Truncate if too long
489        if sanitized.len() >= MAX_MODEL_NAME_LENGTH {
490            break;
491        }
492    }
493
494    // Remove trailing dots and underscores
495    sanitized = sanitized.trim_end_matches(['.', '_']).to_string();
496
497    // Ensure not empty
498    if sanitized.is_empty() {
499        sanitized = "model".to_string();
500    }
501
502    sanitized
503}
504
505/// Validate file size for BPMN/DMN models.
506///
507/// # Arguments
508///
509/// * `file_size` - File size in bytes
510///
511/// # Returns
512///
513/// `ValidationResult<()>` indicating whether the file size is valid
514pub fn validate_bpmn_dmn_file_size(file_size: u64) -> ValidationResult<()> {
515    if file_size > MAX_BPMN_DMN_FILE_SIZE {
516        return Err(ValidationError::TooLong {
517            field: "BPMN/DMN file size",
518            max: MAX_BPMN_DMN_FILE_SIZE as usize,
519            actual: file_size as usize,
520        });
521    }
522    Ok(())
523}
524
525/// Validate file size for OpenAPI specifications.
526///
527/// # Arguments
528///
529/// * `file_size` - File size in bytes
530///
531/// # Returns
532///
533/// `ValidationResult<()>` indicating whether the file size is valid
534pub fn validate_openapi_file_size(file_size: u64) -> ValidationResult<()> {
535    if file_size > MAX_OPENAPI_FILE_SIZE {
536        return Err(ValidationError::TooLong {
537            field: "OpenAPI file size",
538            max: MAX_OPENAPI_FILE_SIZE as usize,
539            actual: file_size as usize,
540        });
541    }
542    Ok(())
543}