Skip to main content

fraiseql_cli/schema/validator/
sql_identifier.rs

1//! SQL identifier validation.
2
3use std::sync::LazyLock;
4
5use regex::Regex;
6
7use super::types::{ErrorSeverity, ValidationError};
8
9/// Pattern for safe SQL identifiers: up to three dot-separated segments
10/// (`name`, `schema.name`, or `catalog.schema.name`).
11/// Each segment must start with a letter or underscore, followed by alphanumerics/underscores.
12static SAFE_IDENTIFIER: LazyLock<Regex> = LazyLock::new(|| {
13    Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*){0,2}$")
14        .expect("static regex is valid")
15});
16
17/// PostgreSQL's `NAMEDATALEN - 1`: the maximum byte length of a single identifier segment.
18const PG_MAX_IDENTIFIER_BYTES: usize = 63;
19
20/// Validates that `value` is a safe SQL identifier.
21///
22/// Accepts `[A-Za-z_][A-Za-z0-9_]*` with up to two schema dots
23/// (e.g. `"v_user"`, `"public.v_user"`, or `"catalog.schema.table"`).
24/// Rejects anything that could be SQL injection or cause a runtime syntax error.
25///
26/// Each dot-separated segment is limited to 63 bytes (PostgreSQL `NAMEDATALEN - 1`).
27/// Identifiers exceeding this limit are silently truncated by PostgreSQL, which can
28/// cause confusing "relation not found" errors at runtime.
29///
30/// # Arguments
31/// - `value`: The string to validate (e.g. `"v_user"` or `"public.v_user"`)
32/// - `field`: The TOML/decorator field name (`"sql_source"`, `"function_name"`)
33/// - `path`: Human-readable location for the error (`"Query.users"`, `"Mutation.createPost"`)
34///
35/// # Errors
36///
37/// Returns a `ValidationError` if `value` is empty, exceeds the PostgreSQL identifier
38/// length limit, or does not match the safe identifier pattern.
39pub fn validate_sql_identifier(
40    value: &str,
41    field: &str,
42    path: &str,
43) -> std::result::Result<(), ValidationError> {
44    if value.is_empty() {
45        return Err(ValidationError {
46            message:    format!(
47                "`{field}` at `{path}` must not be empty. \
48                 Provide a view or function name such as \"v_user\" or \"public.v_user\"."
49            ),
50            path:       path.to_string(),
51            severity:   ErrorSeverity::Error,
52            suggestion: None,
53        });
54    }
55
56    // Check each segment against PostgreSQL's NAMEDATALEN limit.
57    for segment in value.split('.') {
58        if segment.len() > PG_MAX_IDENTIFIER_BYTES {
59            return Err(ValidationError {
60                message:    format!(
61                    "`{field}` segment {segment:?} at `{path}` is {} bytes, \
62                     which exceeds the PostgreSQL maximum of {PG_MAX_IDENTIFIER_BYTES} bytes. \
63                     PostgreSQL silently truncates longer identifiers, causing \
64                     \"relation not found\" errors at runtime.",
65                    segment.len(),
66                ),
67                path:       path.to_string(),
68                severity:   ErrorSeverity::Error,
69                suggestion: Some("Shorten the identifier to 63 characters or fewer.".to_string()),
70            });
71        }
72    }
73
74    if !SAFE_IDENTIFIER.is_match(value) {
75        return Err(ValidationError {
76            message:    format!(
77                "`{field}` value {value:?} at `{path}` is not a valid SQL identifier. \
78                 Only ASCII letters, digits, underscores, and up to two schema dots are \
79                 allowed (1-3 segments). Valid examples: \"v_user\", \"public.v_user\", \
80                 \"catalog.schema.table\"."
81            ),
82            path:       path.to_string(),
83            severity:   ErrorSeverity::Error,
84            suggestion: Some(
85                "Remove semicolons, quotes, dashes, spaces, or any SQL syntax \
86                 from the identifier value."
87                    .to_string(),
88            ),
89        });
90    }
91    Ok(())
92}
93
94#[cfg(test)]
95mod tests {
96    #![allow(clippy::unwrap_used)] // Reason: test code, panics acceptable
97
98    use super::*;
99
100    #[test]
101    fn test_valid_simple_identifier() {
102        validate_sql_identifier("v_user", "sql_source", "Query.users")
103            .unwrap_or_else(|e| panic!("expected Ok: {e:?}"));
104    }
105
106    #[test]
107    fn test_valid_schema_qualified_identifier() {
108        validate_sql_identifier("public.v_user", "sql_source", "Query.users")
109            .unwrap_or_else(|e| panic!("expected Ok: {e:?}"));
110    }
111
112    #[test]
113    fn test_empty_identifier_rejected() {
114        let err = validate_sql_identifier("", "sql_source", "Query.users").unwrap_err();
115        assert!(err.message.contains("must not be empty"));
116    }
117
118    #[test]
119    fn test_identifier_exactly_63_bytes_accepted() {
120        let ident = "a".repeat(63);
121        validate_sql_identifier(&ident, "sql_source", "Query.x")
122            .unwrap_or_else(|e| panic!("expected Ok: {e:?}"));
123    }
124
125    #[test]
126    fn test_identifier_64_bytes_rejected() {
127        let ident = "a".repeat(64);
128        let err = validate_sql_identifier(&ident, "sql_source", "Query.x").unwrap_err();
129        assert!(err.message.contains("exceeds the PostgreSQL maximum"));
130        assert!(err.message.contains("63 bytes"));
131    }
132
133    #[test]
134    fn test_schema_segment_64_bytes_rejected() {
135        // The schema part (before the dot) is 64 chars — should fail on that segment.
136        let schema_part = "a".repeat(64);
137        let ident = format!("{schema_part}.v_user");
138        let err = validate_sql_identifier(&ident, "sql_source", "Query.x").unwrap_err();
139        assert!(err.message.contains("exceeds the PostgreSQL maximum"));
140    }
141
142    #[test]
143    fn test_name_segment_64_bytes_rejected() {
144        // The name part (after the dot) is 64 chars — should fail on that segment.
145        let name_part = "a".repeat(64);
146        let ident = format!("public.{name_part}");
147        let err = validate_sql_identifier(&ident, "sql_source", "Query.x").unwrap_err();
148        assert!(err.message.contains("exceeds the PostgreSQL maximum"));
149    }
150
151    #[test]
152    fn test_valid_three_part_identifier() {
153        assert!(validate_sql_identifier("catalog.schema.table", "sql_source", "Query.x").is_ok());
154    }
155
156    #[test]
157    fn test_four_part_identifier_rejected() {
158        let err = validate_sql_identifier("a.b.c.d", "sql_source", "Query.x").unwrap_err();
159        assert!(err.message.contains("is not a valid SQL identifier"));
160    }
161
162    #[test]
163    fn test_leading_dot_rejected() {
164        let err = validate_sql_identifier(".foo", "sql_source", "Query.x").unwrap_err();
165        assert!(err.message.contains("is not a valid SQL identifier"));
166    }
167
168    #[test]
169    fn test_trailing_dot_rejected() {
170        let err = validate_sql_identifier("foo.", "sql_source", "Query.x").unwrap_err();
171        assert!(err.message.contains("is not a valid SQL identifier"));
172    }
173
174    #[test]
175    fn test_double_dot_rejected() {
176        let err = validate_sql_identifier("foo..bar", "sql_source", "Query.x").unwrap_err();
177        assert!(err.message.contains("is not a valid SQL identifier"));
178    }
179
180    #[test]
181    fn test_injection_attempt_rejected() {
182        let err = validate_sql_identifier("v_user; DROP TABLE users", "sql_source", "Query.users")
183            .unwrap_err();
184        assert!(err.message.contains("is not a valid SQL identifier"));
185    }
186}