data_modelling_sdk/validation/
input.rs

1//! Input validation and sanitization utilities.
2//!
3//! This module provides functions for validating and sanitizing user input
4//! before processing. These functions are used by import parsers and storage
5//! backends to ensure data integrity and security.
6//!
7//! # Security
8//!
9//! Input validation prevents:
10//! - SQL injection via malicious table/column names
11//! - Path traversal via malicious file paths
12//! - Buffer overflows via excessively long inputs
13//! - Unicode normalization attacks
14
15use serde::{Deserialize, Serialize};
16use thiserror::Error;
17use uuid::Uuid;
18
19/// Maximum length for table names
20pub const MAX_TABLE_NAME_LENGTH: usize = 255;
21
22/// Maximum length for column names
23pub const MAX_COLUMN_NAME_LENGTH: usize = 255;
24
25/// Maximum length for identifiers in general
26pub const MAX_IDENTIFIER_LENGTH: usize = 255;
27
28/// Maximum length for descriptions
29pub const MAX_DESCRIPTION_LENGTH: usize = 10000;
30
31/// Maximum file size for BPMN/DMN models (10MB)
32pub const MAX_BPMN_DMN_FILE_SIZE: u64 = 10 * 1024 * 1024;
33
34/// Maximum file size for OpenAPI specifications (5MB)
35pub const MAX_OPENAPI_FILE_SIZE: u64 = 5 * 1024 * 1024;
36
37/// Maximum length for model names (filenames)
38pub const MAX_MODEL_NAME_LENGTH: usize = 255;
39
40/// Errors that can occur during input validation.
41#[derive(Debug, Clone, Error, Serialize, Deserialize)]
42pub enum ValidationError {
43    /// Input is empty when a value is required
44    #[error("{0} cannot be empty")]
45    Empty(&'static str),
46
47    /// Input exceeds maximum allowed length
48    #[error("{field} exceeds maximum length (max: {max}, got: {actual})")]
49    TooLong {
50        field: &'static str,
51        max: usize,
52        actual: usize,
53    },
54
55    /// Input contains invalid characters
56    #[error("{field} contains invalid characters: {reason}")]
57    InvalidCharacters { field: &'static str, reason: String },
58
59    /// Input has invalid format
60    #[error("{0}: {1}")]
61    InvalidFormat(&'static str, String),
62
63    /// Input is a reserved word
64    #[error("{field} cannot be a reserved word: {word}")]
65    ReservedWord { field: &'static str, word: String },
66}
67
68/// Result type for validation operations.
69pub type ValidationResult<T> = Result<T, ValidationError>;
70
71/// Validate a table name.
72///
73/// # Rules
74///
75/// - Must not be empty
76/// - Must not exceed 255 characters
77/// - Must start with a letter or underscore
78/// - May contain letters, digits, underscores, and hyphens
79/// - Cannot be a SQL reserved word
80///
81/// # Examples
82///
83/// ```
84/// use data_modelling_sdk::validation::input::validate_table_name;
85///
86/// assert!(validate_table_name("users").is_ok());
87/// assert!(validate_table_name("user_orders").is_ok());
88/// assert!(validate_table_name("").is_err());
89/// assert!(validate_table_name("123_invalid").is_err());
90/// ```
91pub fn validate_table_name(name: &str) -> ValidationResult<()> {
92    if name.is_empty() {
93        return Err(ValidationError::Empty("table name"));
94    }
95
96    if name.len() > MAX_TABLE_NAME_LENGTH {
97        return Err(ValidationError::TooLong {
98            field: "table name",
99            max: MAX_TABLE_NAME_LENGTH,
100            actual: name.len(),
101        });
102    }
103
104    // Must start with a letter or underscore
105    let first_char = name.chars().next().unwrap();
106    if !first_char.is_alphabetic() && first_char != '_' {
107        return Err(ValidationError::InvalidFormat(
108            "table name",
109            "must start with a letter or underscore".to_string(),
110        ));
111    }
112
113    // May contain letters, digits, underscores, and hyphens
114    for c in name.chars() {
115        if !c.is_alphanumeric() && c != '_' && c != '-' {
116            return Err(ValidationError::InvalidCharacters {
117                field: "table name",
118                reason: format!("invalid character: '{}'", c),
119            });
120        }
121    }
122
123    // Check for SQL reserved words (basic set)
124    if is_sql_reserved_word(name) {
125        return Err(ValidationError::ReservedWord {
126            field: "table name",
127            word: name.to_string(),
128        });
129    }
130
131    Ok(())
132}
133
134/// Validate a column name.
135///
136/// # Rules
137///
138/// - Must not be empty
139/// - Must not exceed 255 characters
140/// - Must start with a letter or underscore
141/// - May contain letters, digits, underscores, hyphens, and dots (for nested columns)
142/// - Cannot be a SQL reserved word (unless nested)
143///
144/// # Examples
145///
146/// ```
147/// use data_modelling_sdk::validation::input::validate_column_name;
148///
149/// assert!(validate_column_name("id").is_ok());
150/// assert!(validate_column_name("user_name").is_ok());
151/// assert!(validate_column_name("address.street").is_ok()); // nested column
152/// assert!(validate_column_name("").is_err());
153/// ```
154pub fn validate_column_name(name: &str) -> ValidationResult<()> {
155    if name.is_empty() {
156        return Err(ValidationError::Empty("column name"));
157    }
158
159    if name.len() > MAX_COLUMN_NAME_LENGTH {
160        return Err(ValidationError::TooLong {
161            field: "column name",
162            max: MAX_COLUMN_NAME_LENGTH,
163            actual: name.len(),
164        });
165    }
166
167    // Must start with a letter or underscore
168    let first_char = name.chars().next().unwrap();
169    if !first_char.is_alphabetic() && first_char != '_' {
170        return Err(ValidationError::InvalidFormat(
171            "column name",
172            "must start with a letter or underscore".to_string(),
173        ));
174    }
175
176    // May contain letters, digits, underscores, hyphens, and dots (for nested columns)
177    for c in name.chars() {
178        if !c.is_alphanumeric() && c != '_' && c != '-' && c != '.' {
179            return Err(ValidationError::InvalidCharacters {
180                field: "column name",
181                reason: format!("invalid character: '{}'", c),
182            });
183        }
184    }
185
186    // Check for SQL reserved words (only for non-nested column names)
187    if !name.contains('.') && is_sql_reserved_word(name) {
188        return Err(ValidationError::ReservedWord {
189            field: "column name",
190            word: name.to_string(),
191        });
192    }
193
194    Ok(())
195}
196
197/// Validate a UUID string.
198///
199/// # Examples
200///
201/// ```
202/// use data_modelling_sdk::validation::input::validate_uuid;
203///
204/// assert!(validate_uuid("550e8400-e29b-41d4-a716-446655440000").is_ok());
205/// assert!(validate_uuid("not-a-uuid").is_err());
206/// ```
207pub fn validate_uuid(id: &str) -> ValidationResult<Uuid> {
208    Uuid::parse_str(id)
209        .map_err(|e| ValidationError::InvalidFormat("UUID", format!("invalid UUID format: {}", e)))
210}
211
212/// Validate a data type string.
213///
214/// # Rules
215///
216/// - Must not be empty
217/// - Must only contain safe characters (no SQL injection)
218/// - Must match known data type patterns
219///
220/// # Examples
221///
222/// ```
223/// use data_modelling_sdk::validation::input::validate_data_type;
224///
225/// assert!(validate_data_type("VARCHAR(255)").is_ok());
226/// assert!(validate_data_type("INTEGER").is_ok());
227/// assert!(validate_data_type("ARRAY<STRING>").is_ok());
228/// assert!(validate_data_type("'; DROP TABLE users;--").is_err());
229/// ```
230pub fn validate_data_type(data_type: &str) -> ValidationResult<()> {
231    if data_type.is_empty() {
232        return Err(ValidationError::Empty("data type"));
233    }
234
235    if data_type.len() > MAX_IDENTIFIER_LENGTH {
236        return Err(ValidationError::TooLong {
237            field: "data type",
238            max: MAX_IDENTIFIER_LENGTH,
239            actual: data_type.len(),
240        });
241    }
242
243    // Check for dangerous patterns
244    let lower = data_type.to_lowercase();
245    if lower.contains(';') || lower.contains("--") || lower.contains("/*") {
246        return Err(ValidationError::InvalidCharacters {
247            field: "data type",
248            reason: "contains SQL comment or statement separator".to_string(),
249        });
250    }
251
252    // Allow alphanumeric, parentheses, commas, spaces, underscores, angle brackets
253    for c in data_type.chars() {
254        if !c.is_alphanumeric()
255            && c != '('
256            && c != ')'
257            && c != ','
258            && c != ' '
259            && c != '_'
260            && c != '<'
261            && c != '>'
262            && c != '['
263            && c != ']'
264        {
265            return Err(ValidationError::InvalidCharacters {
266                field: "data type",
267                reason: format!("invalid character: '{}'", c),
268            });
269        }
270    }
271
272    Ok(())
273}
274
275/// Validate a description string.
276///
277/// # Rules
278///
279/// - May be empty
280/// - Must not exceed 10000 characters
281/// - Control characters (except whitespace) are stripped
282pub fn validate_description(desc: &str) -> ValidationResult<()> {
283    if desc.len() > MAX_DESCRIPTION_LENGTH {
284        return Err(ValidationError::TooLong {
285            field: "description",
286            max: MAX_DESCRIPTION_LENGTH,
287            actual: desc.len(),
288        });
289    }
290
291    Ok(())
292}
293
294/// Sanitize a SQL identifier by quoting it.
295///
296/// This function returns a quoted identifier that is safe to use in SQL
297/// statements without risk of injection.
298///
299/// # Examples
300///
301/// ```
302/// use data_modelling_sdk::validation::input::sanitize_sql_identifier;
303///
304/// assert_eq!(sanitize_sql_identifier("users", "postgres"), "\"users\"");
305/// assert_eq!(sanitize_sql_identifier("user-orders", "mysql"), "`user-orders`");
306/// ```
307pub fn sanitize_sql_identifier(name: &str, dialect: &str) -> String {
308    let quote_char = match dialect.to_lowercase().as_str() {
309        "mysql" | "mariadb" => '`',
310        "sqlserver" | "mssql" => '[',
311        _ => '"', // Standard SQL, PostgreSQL, etc.
312    };
313
314    let end_char = if quote_char == '[' { ']' } else { quote_char };
315
316    // Escape any internal quote characters by doubling them
317    let escaped = if quote_char == end_char {
318        name.replace(quote_char, &format!("{}{}", quote_char, quote_char))
319    } else {
320        name.replace(end_char, &format!("{}{}", end_char, end_char))
321    };
322
323    format!("{}{}{}", quote_char, escaped, end_char)
324}
325
326/// Sanitize a string for safe use in descriptions and comments.
327///
328/// Removes or escapes potentially dangerous characters.
329pub fn sanitize_description(desc: &str) -> String {
330    // Remove control characters except newlines and tabs
331    desc.chars()
332        .filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r')
333        .collect()
334}
335
336/// Check if a word is a SQL reserved word.
337///
338/// This is a basic check covering common reserved words across SQL dialects.
339fn is_sql_reserved_word(word: &str) -> bool {
340    const RESERVED_WORDS: &[&str] = &[
341        "select",
342        "from",
343        "where",
344        "insert",
345        "update",
346        "delete",
347        "create",
348        "drop",
349        "alter",
350        "table",
351        "index",
352        "view",
353        "database",
354        "schema",
355        "grant",
356        "revoke",
357        "commit",
358        "rollback",
359        "begin",
360        "end",
361        "transaction",
362        "primary",
363        "foreign",
364        "key",
365        "references",
366        "constraint",
367        "unique",
368        "check",
369        "default",
370        "not",
371        "null",
372        "and",
373        "or",
374        "in",
375        "between",
376        "like",
377        "is",
378        "case",
379        "when",
380        "then",
381        "else",
382        "as",
383        "on",
384        "join",
385        "inner",
386        "outer",
387        "left",
388        "right",
389        "full",
390        "cross",
391        "natural",
392        "using",
393        "group",
394        "by",
395        "having",
396        "order",
397        "asc",
398        "desc",
399        "limit",
400        "offset",
401        "union",
402        "intersect",
403        "except",
404        "all",
405        "distinct",
406        "top",
407        "values",
408        "set",
409        "into",
410        "exec",
411        "execute",
412        "procedure",
413        "function",
414        "trigger",
415        "true",
416        "false",
417        "int",
418        "integer",
419        "varchar",
420        "char",
421        "text",
422        "boolean",
423        "date",
424        "time",
425        "timestamp",
426        "float",
427        "double",
428        "decimal",
429        "numeric",
430    ];
431
432    let lower = word.to_lowercase();
433    RESERVED_WORDS.contains(&lower.as_str())
434}
435
436/// Sanitize a model name for use as a filename.
437///
438/// # Rules
439///
440/// - Removes or replaces invalid filename characters
441/// - Ensures the name is safe for use in file paths
442/// - Preserves alphanumeric characters, hyphens, underscores, and dots
443/// - Replaces invalid characters with underscores
444/// - Truncates to MAX_MODEL_NAME_LENGTH if needed
445///
446/// # Examples
447///
448/// ```
449/// use data_modelling_sdk::validation::input::sanitize_model_name;
450///
451/// assert_eq!(sanitize_model_name("my-model"), "my-model");
452/// assert_eq!(sanitize_model_name("my/model"), "my_model");
453/// assert_eq!(sanitize_model_name("my..model"), "my.model");
454/// ```
455pub fn sanitize_model_name(name: &str) -> String {
456    let mut sanitized = String::with_capacity(name.len());
457    let mut last_was_dot = false;
458
459    for ch in name.chars() {
460        match ch {
461            // Allow alphanumeric, hyphens, underscores
462            ch if ch.is_alphanumeric() || ch == '-' || ch == '_' => {
463                sanitized.push(ch);
464                last_was_dot = false;
465            }
466            // Allow single dots (but not consecutive)
467            '.' if !last_was_dot => {
468                sanitized.push('.');
469                last_was_dot = true;
470            }
471            // Replace invalid characters with underscore
472            _ => {
473                if !last_was_dot {
474                    sanitized.push('_');
475                }
476                last_was_dot = false;
477            }
478        }
479
480        // Truncate if too long
481        if sanitized.len() >= MAX_MODEL_NAME_LENGTH {
482            break;
483        }
484    }
485
486    // Remove trailing dots and underscores
487    sanitized = sanitized.trim_end_matches(['.', '_']).to_string();
488
489    // Ensure not empty
490    if sanitized.is_empty() {
491        sanitized = "model".to_string();
492    }
493
494    sanitized
495}
496
497/// Validate file size for BPMN/DMN models.
498///
499/// # Arguments
500///
501/// * `file_size` - File size in bytes
502///
503/// # Returns
504///
505/// `ValidationResult<()>` indicating whether the file size is valid
506pub fn validate_bpmn_dmn_file_size(file_size: u64) -> ValidationResult<()> {
507    if file_size > MAX_BPMN_DMN_FILE_SIZE {
508        return Err(ValidationError::TooLong {
509            field: "BPMN/DMN file size",
510            max: MAX_BPMN_DMN_FILE_SIZE as usize,
511            actual: file_size as usize,
512        });
513    }
514    Ok(())
515}
516
517/// Validate file size for OpenAPI specifications.
518///
519/// # Arguments
520///
521/// * `file_size` - File size in bytes
522///
523/// # Returns
524///
525/// `ValidationResult<()>` indicating whether the file size is valid
526pub fn validate_openapi_file_size(file_size: u64) -> ValidationResult<()> {
527    if file_size > MAX_OPENAPI_FILE_SIZE {
528        return Err(ValidationError::TooLong {
529            field: "OpenAPI file size",
530            max: MAX_OPENAPI_FILE_SIZE as usize,
531            actual: file_size as usize,
532        });
533    }
534    Ok(())
535}