Skip to main content

nodedb_types/
kv_parsing.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Shared KV DDL parsing helpers used by both Origin (pgwire) and Lite.
4//!
5//! Extracts interval parsing, storage-mode detection, and WITH-clause
6//! helpers into one place so both runtimes share identical logic.
7
8/// Errors produced by interval parsing.
9#[derive(Debug, Clone, thiserror::Error)]
10#[non_exhaustive]
11pub enum IntervalParseError {
12    /// The interval expression was empty after stripping the INTERVAL keyword.
13    #[error("empty interval expression")]
14    Empty,
15    /// The numeric amount could not be parsed.
16    #[error("invalid interval amount: '{0}'")]
17    InvalidAmount(String),
18    /// The unit suffix is not recognized.
19    #[error("unknown interval unit: '{0}'")]
20    UnknownUnit(String),
21    /// General parse failure.
22    #[error("cannot parse interval: '{0}'")]
23    Unparseable(String),
24}
25
26/// Detect KV storage mode from uppercased SQL.
27///
28/// Matches `WITH storage = 'kv'` or `WITH STORAGE KV` keyword patterns.
29/// Must not false-positive on field names containing "kv".
30pub fn is_kv_storage_mode(upper: &str) -> bool {
31    if !upper.contains("STORAGE") {
32        return false;
33    }
34    if let Some(pos) = upper.find("STORAGE") {
35        let after = &upper[pos + 7..];
36        let trimmed =
37            after.trim_start_matches(|c: char| c.is_whitespace() || c == '=' || c == '\'');
38        return trimmed.starts_with("KV")
39            && (trimmed.len() == 2
40                || trimmed[2..].starts_with(|c: char| {
41                    c.is_whitespace() || c == '\'' || c == ',' || c == ';'
42                }));
43    }
44    false
45}
46
47/// Parse an INTERVAL literal to milliseconds.
48///
49/// Supports: `INTERVAL '15 minutes'`, `INTERVAL '1h'`, `INTERVAL '30s'`,
50/// `INTERVAL '1 hour'`, `INTERVAL '2 days'`, `'15 minutes'` (without INTERVAL keyword).
51pub fn parse_interval_to_ms(s: &str) -> Result<u64, IntervalParseError> {
52    let trimmed = s.trim();
53    let inner = if trimmed.to_uppercase().starts_with("INTERVAL") {
54        trimmed[8..].trim()
55    } else {
56        trimmed
57    };
58    let unquoted = inner.trim_matches('\'').trim();
59
60    if unquoted.is_empty() {
61        return Err(IntervalParseError::Empty);
62    }
63
64    // Short-form: "15m", "1h", "30s", "2d"
65    if let Some(ms) = try_parse_short_interval(unquoted) {
66        return Ok(ms);
67    }
68
69    // Long-form: "15 minutes", "1 hour", "2 days 12 hours", "30 seconds"
70    // Supports compound: "2 hours 30 minutes" by parsing pairs of (number, unit).
71    let parts: Vec<&str> = unquoted.split_whitespace().collect();
72    if parts.len() >= 2 && parts.len().is_multiple_of(2) {
73        let mut total_ms: u64 = 0;
74        for chunk in parts.chunks(2) {
75            let amount: u64 = chunk[0]
76                .parse()
77                .map_err(|_| IntervalParseError::InvalidAmount(chunk[0].to_string()))?;
78            let unit = chunk[1].to_lowercase();
79            let multiplier = unit_to_ms_multiplier(&unit)
80                .ok_or_else(|| IntervalParseError::UnknownUnit(unit.clone()))?;
81            total_ms += amount * multiplier;
82        }
83        return Ok(total_ms);
84    }
85
86    // Bare number: treat as milliseconds.
87    if parts.len() == 1
88        && let Ok(ms) = unquoted.parse::<u64>()
89    {
90        return Ok(ms);
91    }
92
93    Err(IntervalParseError::Unparseable(unquoted.to_string()))
94}
95
96/// Try to parse a short-form interval like "15m", "1h", "30s", "2d".
97pub fn try_parse_short_interval(s: &str) -> Option<u64> {
98    let s = s.trim();
99    if s.is_empty() {
100        return None;
101    }
102    let num_end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
103    if num_end == 0 || num_end == s.len() {
104        return None;
105    }
106    let amount: u64 = s[..num_end].parse().ok()?;
107    let unit = &s[num_end..].to_lowercase();
108    let multiplier = unit_to_ms_multiplier(unit)?;
109    Some(amount * multiplier)
110}
111
112/// Map a time unit string to its millisecond multiplier.
113///
114/// Accepts both short forms ("ms", "s", "m", "h", "d", "w", "y") and
115/// long forms ("millisecond", "seconds", "minutes", "hours", "days", etc.).
116fn unit_to_ms_multiplier(unit: &str) -> Option<u64> {
117    match unit {
118        "ms" | "millisecond" | "milliseconds" => Some(1),
119        "s" | "sec" | "second" | "seconds" => Some(1_000),
120        "m" | "min" | "minute" | "minutes" => Some(60_000),
121        "h" | "hr" | "hour" | "hours" => Some(3_600_000),
122        "d" | "day" | "days" => Some(86_400_000),
123        "w" | "week" | "weeks" => Some(604_800_000),
124        "y" | "year" | "years" => Some(31_536_000_000),
125        _ => None,
126    }
127}
128
129/// Find the byte position of a named option in the WITH clause.
130///
131/// Only searches after the WITH keyword to avoid matching column names.
132pub fn find_with_option(upper: &str, option: &str) -> Option<usize> {
133    let with_pos = upper.find("WITH")?;
134    let after_with = &upper[with_pos..];
135    after_with.find(option).map(|p| with_pos + p)
136}
137
138/// Find the end of a WITH option value expression.
139///
140/// Ends at the next unquoted comma, semicolon, or end of string.
141pub fn find_with_option_end(s: &str) -> usize {
142    let mut in_quote = false;
143    for (i, c) in s.char_indices() {
144        match c {
145            '\'' => in_quote = !in_quote,
146            ',' | ';' if !in_quote => return i,
147            _ => {}
148        }
149    }
150    s.len()
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn detect_kv_storage_mode() {
159        assert!(is_kv_storage_mode("WITH STORAGE = 'KV'"));
160        assert!(is_kv_storage_mode("WITH STORAGE KV"));
161        assert!(is_kv_storage_mode("WITH STORAGE='KV'"));
162        assert!(is_kv_storage_mode(
163            "WITH STORAGE = 'KV', TTL = INTERVAL '1H'"
164        ));
165        assert!(!is_kv_storage_mode("WITH STORAGE = 'STRICT'"));
166        assert!(!is_kv_storage_mode("WITH STORAGE = 'COLUMNAR'"));
167        assert!(!is_kv_storage_mode("CREATE COLLECTION KV_STUFF"));
168    }
169
170    #[test]
171    fn interval_parsing_short_form() {
172        assert_eq!(parse_interval_to_ms("INTERVAL '15m'").unwrap(), 900_000);
173        assert_eq!(parse_interval_to_ms("INTERVAL '1h'").unwrap(), 3_600_000);
174        assert_eq!(parse_interval_to_ms("INTERVAL '30s'").unwrap(), 30_000);
175        assert_eq!(parse_interval_to_ms("INTERVAL '2d'").unwrap(), 172_800_000);
176        assert_eq!(parse_interval_to_ms("'500ms'").unwrap(), 500);
177    }
178
179    #[test]
180    fn interval_parsing_long_form() {
181        assert_eq!(
182            parse_interval_to_ms("INTERVAL '15 minutes'").unwrap(),
183            900_000
184        );
185        assert_eq!(
186            parse_interval_to_ms("INTERVAL '1 hour'").unwrap(),
187            3_600_000
188        );
189        assert_eq!(
190            parse_interval_to_ms("INTERVAL '30 seconds'").unwrap(),
191            30_000
192        );
193        assert_eq!(
194            parse_interval_to_ms("INTERVAL '2 days'").unwrap(),
195            172_800_000
196        );
197    }
198
199    #[test]
200    fn interval_parsing_bare_number() {
201        assert_eq!(parse_interval_to_ms("5000").unwrap(), 5000);
202    }
203
204    #[test]
205    fn interval_parsing_errors() {
206        assert!(parse_interval_to_ms("INTERVAL ''").is_err());
207        assert!(parse_interval_to_ms("INTERVAL 'abc'").is_err());
208        assert!(parse_interval_to_ms("INTERVAL '15 foobar'").is_err());
209    }
210
211    #[test]
212    fn with_option_end_respects_quotes() {
213        assert_eq!(find_with_option_end("'hello, world', next"), 14);
214        assert_eq!(find_with_option_end("simple, next"), 6);
215        assert_eq!(find_with_option_end("no_comma"), 8);
216    }
217}