Skip to main content

nodedb_types/
kv_parsing.rs

1//! Shared KV DDL parsing helpers used by both Origin (pgwire) and Lite.
2//!
3//! Extracts interval parsing, storage-mode detection, and WITH-clause
4//! helpers into one place so both runtimes share identical logic.
5
6/// Errors produced by interval parsing.
7#[derive(Debug, Clone, thiserror::Error)]
8pub enum IntervalParseError {
9    /// The interval expression was empty after stripping the INTERVAL keyword.
10    #[error("empty interval expression")]
11    Empty,
12    /// The numeric amount could not be parsed.
13    #[error("invalid interval amount: '{0}'")]
14    InvalidAmount(String),
15    /// The unit suffix is not recognized.
16    #[error("unknown interval unit: '{0}'")]
17    UnknownUnit(String),
18    /// General parse failure.
19    #[error("cannot parse interval: '{0}'")]
20    Unparseable(String),
21}
22
23/// Detect KV storage mode from uppercased SQL.
24///
25/// Matches `WITH storage = 'kv'` or `WITH STORAGE KV` keyword patterns.
26/// Must not false-positive on field names containing "kv".
27pub fn is_kv_storage_mode(upper: &str) -> bool {
28    if !upper.contains("STORAGE") {
29        return false;
30    }
31    if let Some(pos) = upper.find("STORAGE") {
32        let after = &upper[pos + 7..];
33        let trimmed =
34            after.trim_start_matches(|c: char| c.is_whitespace() || c == '=' || c == '\'');
35        return trimmed.starts_with("KV")
36            && (trimmed.len() == 2
37                || trimmed[2..].starts_with(|c: char| {
38                    c.is_whitespace() || c == '\'' || c == ',' || c == ';'
39                }));
40    }
41    false
42}
43
44/// Parse an INTERVAL literal to milliseconds.
45///
46/// Supports: `INTERVAL '15 minutes'`, `INTERVAL '1h'`, `INTERVAL '30s'`,
47/// `INTERVAL '1 hour'`, `INTERVAL '2 days'`, `'15 minutes'` (without INTERVAL keyword).
48pub fn parse_interval_to_ms(s: &str) -> Result<u64, IntervalParseError> {
49    let trimmed = s.trim();
50    let inner = if trimmed.to_uppercase().starts_with("INTERVAL") {
51        trimmed[8..].trim()
52    } else {
53        trimmed
54    };
55    let unquoted = inner.trim_matches('\'').trim();
56
57    if unquoted.is_empty() {
58        return Err(IntervalParseError::Empty);
59    }
60
61    // Short-form: "15m", "1h", "30s", "2d"
62    if let Some(ms) = try_parse_short_interval(unquoted) {
63        return Ok(ms);
64    }
65
66    // Long-form: "15 minutes", "1 hour", "2 days 12 hours", "30 seconds"
67    // Supports compound: "2 hours 30 minutes" by parsing pairs of (number, unit).
68    let parts: Vec<&str> = unquoted.split_whitespace().collect();
69    if parts.len() >= 2 && parts.len().is_multiple_of(2) {
70        let mut total_ms: u64 = 0;
71        for chunk in parts.chunks(2) {
72            let amount: u64 = chunk[0]
73                .parse()
74                .map_err(|_| IntervalParseError::InvalidAmount(chunk[0].to_string()))?;
75            let unit = chunk[1].to_lowercase();
76            let multiplier = unit_to_ms_multiplier(&unit)
77                .ok_or_else(|| IntervalParseError::UnknownUnit(unit.clone()))?;
78            total_ms += amount * multiplier;
79        }
80        return Ok(total_ms);
81    }
82
83    // Bare number: treat as milliseconds.
84    if parts.len() == 1
85        && let Ok(ms) = unquoted.parse::<u64>()
86    {
87        return Ok(ms);
88    }
89
90    Err(IntervalParseError::Unparseable(unquoted.to_string()))
91}
92
93/// Try to parse a short-form interval like "15m", "1h", "30s", "2d".
94pub fn try_parse_short_interval(s: &str) -> Option<u64> {
95    let s = s.trim();
96    if s.is_empty() {
97        return None;
98    }
99    let num_end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
100    if num_end == 0 || num_end == s.len() {
101        return None;
102    }
103    let amount: u64 = s[..num_end].parse().ok()?;
104    let unit = &s[num_end..].to_lowercase();
105    let multiplier = unit_to_ms_multiplier(unit)?;
106    Some(amount * multiplier)
107}
108
109/// Map a time unit string to its millisecond multiplier.
110///
111/// Accepts both short forms ("ms", "s", "m", "h", "d", "w", "y") and
112/// long forms ("millisecond", "seconds", "minutes", "hours", "days", etc.).
113fn unit_to_ms_multiplier(unit: &str) -> Option<u64> {
114    match unit {
115        "ms" | "millisecond" | "milliseconds" => Some(1),
116        "s" | "sec" | "second" | "seconds" => Some(1_000),
117        "m" | "min" | "minute" | "minutes" => Some(60_000),
118        "h" | "hr" | "hour" | "hours" => Some(3_600_000),
119        "d" | "day" | "days" => Some(86_400_000),
120        "w" | "week" | "weeks" => Some(604_800_000),
121        "y" | "year" | "years" => Some(31_536_000_000),
122        _ => None,
123    }
124}
125
126/// Find the byte position of a named option in the WITH clause.
127///
128/// Only searches after the WITH keyword to avoid matching column names.
129pub fn find_with_option(upper: &str, option: &str) -> Option<usize> {
130    let with_pos = upper.find("WITH")?;
131    let after_with = &upper[with_pos..];
132    after_with.find(option).map(|p| with_pos + p)
133}
134
135/// Find the end of a WITH option value expression.
136///
137/// Ends at the next unquoted comma, semicolon, or end of string.
138pub fn find_with_option_end(s: &str) -> usize {
139    let mut in_quote = false;
140    for (i, c) in s.char_indices() {
141        match c {
142            '\'' => in_quote = !in_quote,
143            ',' | ';' if !in_quote => return i,
144            _ => {}
145        }
146    }
147    s.len()
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn detect_kv_storage_mode() {
156        assert!(is_kv_storage_mode("WITH STORAGE = 'KV'"));
157        assert!(is_kv_storage_mode("WITH STORAGE KV"));
158        assert!(is_kv_storage_mode("WITH STORAGE='KV'"));
159        assert!(is_kv_storage_mode(
160            "WITH STORAGE = 'KV', TTL = INTERVAL '1H'"
161        ));
162        assert!(!is_kv_storage_mode("WITH STORAGE = 'STRICT'"));
163        assert!(!is_kv_storage_mode("WITH STORAGE = 'COLUMNAR'"));
164        assert!(!is_kv_storage_mode("CREATE COLLECTION KV_STUFF"));
165    }
166
167    #[test]
168    fn interval_parsing_short_form() {
169        assert_eq!(parse_interval_to_ms("INTERVAL '15m'").unwrap(), 900_000);
170        assert_eq!(parse_interval_to_ms("INTERVAL '1h'").unwrap(), 3_600_000);
171        assert_eq!(parse_interval_to_ms("INTERVAL '30s'").unwrap(), 30_000);
172        assert_eq!(parse_interval_to_ms("INTERVAL '2d'").unwrap(), 172_800_000);
173        assert_eq!(parse_interval_to_ms("'500ms'").unwrap(), 500);
174    }
175
176    #[test]
177    fn interval_parsing_long_form() {
178        assert_eq!(
179            parse_interval_to_ms("INTERVAL '15 minutes'").unwrap(),
180            900_000
181        );
182        assert_eq!(
183            parse_interval_to_ms("INTERVAL '1 hour'").unwrap(),
184            3_600_000
185        );
186        assert_eq!(
187            parse_interval_to_ms("INTERVAL '30 seconds'").unwrap(),
188            30_000
189        );
190        assert_eq!(
191            parse_interval_to_ms("INTERVAL '2 days'").unwrap(),
192            172_800_000
193        );
194    }
195
196    #[test]
197    fn interval_parsing_bare_number() {
198        assert_eq!(parse_interval_to_ms("5000").unwrap(), 5000);
199    }
200
201    #[test]
202    fn interval_parsing_errors() {
203        assert!(parse_interval_to_ms("INTERVAL ''").is_err());
204        assert!(parse_interval_to_ms("INTERVAL 'abc'").is_err());
205        assert!(parse_interval_to_ms("INTERVAL '15 foobar'").is_err());
206    }
207
208    #[test]
209    fn with_option_end_respects_quotes() {
210        assert_eq!(find_with_option_end("'hello, world', next"), 14);
211        assert_eq!(find_with_option_end("simple, next"), 6);
212        assert_eq!(find_with_option_end("no_comma"), 8);
213    }
214}