Skip to main content

fraiseql_cli/schema/validator/
sql_identifier.rs

1//! SQL identifier validation.
2
3use std::sync::LazyLock;
4
5use regex::Regex;
6
7use super::types::{ErrorSeverity, ValidationError};
8
9/// Pattern for safe SQL identifiers: up to three dot-separated segments
10/// (`name`, `schema.name`, or `catalog.schema.name`).
11/// Each segment must start with a letter or underscore, followed by alphanumerics/underscores.
12static SAFE_IDENTIFIER: LazyLock<Regex> = LazyLock::new(|| {
13    Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*){0,2}$")
14        .expect("static regex is valid")
15});
16
17/// PostgreSQL's `NAMEDATALEN - 1`: the maximum byte length of a single identifier segment.
18const PG_MAX_IDENTIFIER_BYTES: usize = 63;
19
20/// Validates that `value` is a safe SQL identifier.
21///
22/// Accepts `[A-Za-z_][A-Za-z0-9_]*` with up to two schema dots
23/// (e.g. `"v_user"`, `"public.v_user"`, or `"catalog.schema.table"`).
24/// Rejects anything that could be SQL injection or cause a runtime syntax error.
25///
26/// Each dot-separated segment is limited to 63 bytes (PostgreSQL `NAMEDATALEN - 1`).
27/// Identifiers exceeding this limit are silently truncated by PostgreSQL, which can
28/// cause confusing "relation not found" errors at runtime.
29///
30/// # Arguments
31/// - `value`: The string to validate (e.g. `"v_user"` or `"public.v_user"`)
32/// - `field`: The TOML/decorator field name (`"sql_source"`, `"function_name"`)
33/// - `path`: Human-readable location for the error (`"Query.users"`, `"Mutation.createPost"`)
34///
35/// # Errors
36///
37/// Returns a `ValidationError` if `value` is empty, exceeds the PostgreSQL identifier
38/// length limit, or does not match the safe identifier pattern.
39pub fn validate_sql_identifier(
40    value: &str,
41    field: &str,
42    path: &str,
43) -> std::result::Result<(), ValidationError> {
44    if value.is_empty() {
45        return Err(ValidationError {
46            message:    format!(
47                "`{field}` at `{path}` must not be empty. \
48                 Provide a view or function name such as \"v_user\" or \"public.v_user\"."
49            ),
50            path:       path.to_string(),
51            severity:   ErrorSeverity::Error,
52            suggestion: None,
53        });
54    }
55
56    // Check each segment against PostgreSQL's NAMEDATALEN limit.
57    for segment in value.split('.') {
58        if segment.len() > PG_MAX_IDENTIFIER_BYTES {
59            return Err(ValidationError {
60                message:    format!(
61                    "`{field}` segment {segment:?} at `{path}` is {} bytes, \
62                     which exceeds the PostgreSQL maximum of {PG_MAX_IDENTIFIER_BYTES} bytes. \
63                     PostgreSQL silently truncates longer identifiers, causing \
64                     \"relation not found\" errors at runtime.",
65                    segment.len(),
66                ),
67                path:       path.to_string(),
68                severity:   ErrorSeverity::Error,
69                suggestion: Some("Shorten the identifier to 63 characters or fewer.".to_string()),
70            });
71        }
72    }
73
74    if !SAFE_IDENTIFIER.is_match(value) {
75        return Err(ValidationError {
76            message:    format!(
77                "`{field}` value {value:?} at `{path}` is not a valid SQL identifier. \
78                 Only ASCII letters, digits, underscores, and up to two schema dots are \
79                 allowed (1-3 segments). Valid examples: \"v_user\", \"public.v_user\", \
80                 \"catalog.schema.table\"."
81            ),
82            path:       path.to_string(),
83            severity:   ErrorSeverity::Error,
84            suggestion: Some(
85                "Remove semicolons, quotes, dashes, spaces, or any SQL syntax \
86                 from the identifier value."
87                    .to_string(),
88            ),
89        });
90    }
91    Ok(())
92}