fraiseql_cli/schema/validator/sql_identifier.rs
1//! SQL identifier validation.
2
3use std::sync::LazyLock;
4
5use regex::Regex;
6
7use super::types::{ErrorSeverity, ValidationError};
8
9/// Pattern for safe SQL identifiers: up to three dot-separated segments
10/// (`name`, `schema.name`, or `catalog.schema.name`).
11/// Each segment must start with a letter or underscore, followed by alphanumerics/underscores.
12static SAFE_IDENTIFIER: LazyLock<Regex> = LazyLock::new(|| {
13 Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*){0,2}$")
14 .expect("static regex is valid")
15});
16
17/// PostgreSQL's `NAMEDATALEN - 1`: the maximum byte length of a single identifier segment.
18const PG_MAX_IDENTIFIER_BYTES: usize = 63;
19
20/// Validates that `value` is a safe SQL identifier.
21///
22/// Accepts `[A-Za-z_][A-Za-z0-9_]*` with up to two schema dots
23/// (e.g. `"v_user"`, `"public.v_user"`, or `"catalog.schema.table"`).
24/// Rejects anything that could be SQL injection or cause a runtime syntax error.
25///
26/// Each dot-separated segment is limited to 63 bytes (PostgreSQL `NAMEDATALEN - 1`).
27/// Identifiers exceeding this limit are silently truncated by PostgreSQL, which can
28/// cause confusing "relation not found" errors at runtime.
29///
30/// # Arguments
31/// - `value`: The string to validate (e.g. `"v_user"` or `"public.v_user"`)
32/// - `field`: The TOML/decorator field name (`"sql_source"`, `"function_name"`)
33/// - `path`: Human-readable location for the error (`"Query.users"`, `"Mutation.createPost"`)
34///
35/// # Errors
36///
37/// Returns a `ValidationError` if `value` is empty, exceeds the PostgreSQL identifier
38/// length limit, or does not match the safe identifier pattern.
39pub fn validate_sql_identifier(
40 value: &str,
41 field: &str,
42 path: &str,
43) -> std::result::Result<(), ValidationError> {
44 if value.is_empty() {
45 return Err(ValidationError {
46 message: format!(
47 "`{field}` at `{path}` must not be empty. \
48 Provide a view or function name such as \"v_user\" or \"public.v_user\"."
49 ),
50 path: path.to_string(),
51 severity: ErrorSeverity::Error,
52 suggestion: None,
53 });
54 }
55
56 // Check each segment against PostgreSQL's NAMEDATALEN limit.
57 for segment in value.split('.') {
58 if segment.len() > PG_MAX_IDENTIFIER_BYTES {
59 return Err(ValidationError {
60 message: format!(
61 "`{field}` segment {segment:?} at `{path}` is {} bytes, \
62 which exceeds the PostgreSQL maximum of {PG_MAX_IDENTIFIER_BYTES} bytes. \
63 PostgreSQL silently truncates longer identifiers, causing \
64 \"relation not found\" errors at runtime.",
65 segment.len(),
66 ),
67 path: path.to_string(),
68 severity: ErrorSeverity::Error,
69 suggestion: Some("Shorten the identifier to 63 characters or fewer.".to_string()),
70 });
71 }
72 }
73
74 if !SAFE_IDENTIFIER.is_match(value) {
75 return Err(ValidationError {
76 message: format!(
77 "`{field}` value {value:?} at `{path}` is not a valid SQL identifier. \
78 Only ASCII letters, digits, underscores, and up to two schema dots are \
79 allowed (1-3 segments). Valid examples: \"v_user\", \"public.v_user\", \
80 \"catalog.schema.table\"."
81 ),
82 path: path.to_string(),
83 severity: ErrorSeverity::Error,
84 suggestion: Some(
85 "Remove semicolons, quotes, dashes, spaces, or any SQL syntax \
86 from the identifier value."
87 .to_string(),
88 ),
89 });
90 }
91 Ok(())
92}