nodedb_sql/parser/preprocess/
temporal.rs1use super::lex::keyword_position_outside_literals;
40use crate::temporal::{TemporalScope, ValidTime};
41
42#[derive(Debug)]
44pub struct Extracted {
45 pub sql: String,
47 pub temporal: TemporalScope,
49}
50
51#[derive(Debug)]
53pub struct TemporalParseError(pub String);
54
55pub fn extract(sql: &str) -> Result<Option<Extracted>, TemporalParseError> {
59 let mut scope = TemporalScope::default();
60 let mut working = sql.to_string();
61 let mut any = false;
62
63 if let Some((rewritten, ms)) = strip_system_time_as_of(&working)? {
65 working = rewritten;
66 scope.system_as_of_ms = Some(ms);
67 any = true;
68 }
69 if let Some((rewritten, ms)) = strip_system_as_of_function(&working)? {
71 if scope.system_as_of_ms.is_some() {
72 return Err(TemporalParseError(
73 "multiple FOR SYSTEM_TIME / __system_as_of__ clauses".into(),
74 ));
75 }
76 working = rewritten;
77 scope.system_as_of_ms = Some(ms);
78 any = true;
79 }
80 if let Some((rewritten, ms)) = strip_as_of_system_time(&working)? {
82 if scope.system_as_of_ms.is_some() {
83 return Err(TemporalParseError(
84 "multiple system-time AS OF clauses in one statement".into(),
85 ));
86 }
87 working = rewritten;
88 scope.system_as_of_ms = Some(ms);
89 any = true;
90 if strip_as_of_system_time(&working)?.is_some() {
91 return Err(TemporalParseError(
92 "multiple system-time AS OF clauses in one statement".into(),
93 ));
94 }
95 }
96 if let Some((rewritten, vt)) = strip_valid_time(&working)? {
98 working = rewritten;
99 scope.valid_time = vt;
100 any = true;
101 }
102 if let Some((rewritten, ms)) = strip_as_of_valid_time(&working)? {
104 if !matches!(scope.valid_time, ValidTime::Any) {
105 return Err(TemporalParseError(
106 "multiple valid-time AS OF clauses in one statement".into(),
107 ));
108 }
109 working = rewritten;
110 scope.valid_time = ValidTime::At(ms);
111 any = true;
112 }
113
114 if any {
115 Ok(Some(Extracted {
116 sql: working,
117 temporal: scope,
118 }))
119 } else {
120 Ok(None)
121 }
122}
123
124fn strip_system_time_as_of(sql: &str) -> Result<Option<(String, i64)>, TemporalParseError> {
126 let Some(start) = keyword_position_outside_literals(sql, "FOR SYSTEM_TIME") else {
127 return Ok(None);
128 };
129 let after_kw = start + "FOR SYSTEM_TIME".len();
131 let tail_upper = sql[after_kw..].to_uppercase();
132 let Some(_as_of_rel) = tail_upper.trim_start().strip_prefix("AS OF") else {
133 return Err(TemporalParseError(
134 "FOR SYSTEM_TIME must be followed by AS OF <ms>".into(),
135 ));
136 };
137 let leading_ws = sql[after_kw..].len() - sql[after_kw..].trim_start().len();
138 let as_of_abs = after_kw + leading_ws + "AS OF".len();
139 let (ms, end_abs) = parse_trailing_i64(sql, as_of_abs)?;
140 let mut out = String::with_capacity(sql.len());
141 out.push_str(&sql[..start]);
142 out.push(' ');
143 out.push_str(&sql[end_abs..]);
144 Ok(Some((out, ms)))
145}
146
147fn strip_system_as_of_function(sql: &str) -> Result<Option<(String, i64)>, TemporalParseError> {
154 let upper = sql.to_uppercase();
155 let Some(start) = upper.find("__SYSTEM_AS_OF__(") else {
156 return Ok(None);
157 };
158 let after_open = start + "__SYSTEM_AS_OF__(".len();
159 let Some(close_rel) = sql[after_open..].find(')') else {
160 return Err(TemporalParseError(
161 "__system_as_of__(...) missing closing paren".into(),
162 ));
163 };
164 let close_abs = after_open + close_rel;
165 let arg = sql[after_open..close_abs].trim();
166 let ms: i64 = arg
167 .parse()
168 .map_err(|_| TemporalParseError(format!("__system_as_of__ arg not i64: {arg}")))?;
169 let mut out = String::with_capacity(sql.len());
170 out.push_str(&sql[..start]);
171 out.push_str("TRUE");
172 out.push_str(&sql[close_abs + 1..]);
173 Ok(Some((out, ms)))
174}
175
176fn strip_valid_time(sql: &str) -> Result<Option<(String, ValidTime)>, TemporalParseError> {
179 let Some(start) = keyword_position_outside_literals(sql, "FOR VALID_TIME") else {
180 return Ok(None);
181 };
182 let after_kw = start + "FOR VALID_TIME".len();
183 let tail_raw = &sql[after_kw..];
184 let tail_upper_owned = tail_raw.to_uppercase();
185 let tail_upper = tail_upper_owned.trim_start();
186 let leading_ws = tail_raw.len() - tail_raw.trim_start().len();
187
188 if let Some(rest) = tail_upper.strip_prefix("CONTAINS") {
189 let arg_abs = after_kw + leading_ws + "CONTAINS".len();
190 let (ms, end_abs) = parse_trailing_i64(sql, arg_abs)?;
191 let _ = rest;
192 let mut out = String::with_capacity(sql.len());
193 out.push_str(&sql[..start]);
194 out.push(' ');
195 out.push_str(&sql[end_abs..]);
196 return Ok(Some((out, ValidTime::At(ms))));
197 }
198 if let Some(rest) = tail_upper.strip_prefix("FROM") {
199 let arg_abs = after_kw + leading_ws + "FROM".len();
200 let (lo, lo_end) = parse_trailing_i64(sql, arg_abs)?;
201 let after_lo_raw = &sql[lo_end..];
202 let after_lo_upper_owned = after_lo_raw.to_uppercase();
203 let after_lo_upper = after_lo_upper_owned.trim_start();
204 let leading_ws2 = after_lo_raw.len() - after_lo_raw.trim_start().len();
205 let Some(_after_to) = after_lo_upper.strip_prefix("TO") else {
206 return Err(TemporalParseError(
207 "FOR VALID_TIME FROM <ms> must be followed by TO <ms>".into(),
208 ));
209 };
210 let hi_arg = lo_end + leading_ws2 + "TO".len();
211 let (hi, hi_end) = parse_trailing_i64(sql, hi_arg)?;
212 let _ = rest;
213 let mut out = String::with_capacity(sql.len());
214 out.push_str(&sql[..start]);
215 out.push(' ');
216 out.push_str(&sql[hi_end..]);
217 if hi <= lo {
218 return Err(TemporalParseError(format!(
219 "FOR VALID_TIME FROM {lo} TO {hi}: hi must be > lo"
220 )));
221 }
222 return Ok(Some((out, ValidTime::Range(lo, hi))));
223 }
224 Err(TemporalParseError(
225 "FOR VALID_TIME must be followed by CONTAINS or FROM".into(),
226 ))
227}
228
229fn parse_temporal_expr(token: &str) -> Result<i64, TemporalParseError> {
242 let t = token.trim();
243
244 if let Ok(v) = t.parse::<i64>() {
246 return Ok(v);
247 }
248
249 if t.to_uppercase() == "NOW()" {
251 let ms = std::time::SystemTime::now()
252 .duration_since(std::time::UNIX_EPOCH)
253 .map(|d| d.as_millis() as i64)
254 .unwrap_or(0);
255 return Ok(ms);
256 }
257
258 if (t.starts_with('\'') && t.ends_with('\'')) || (t.starts_with('"') && t.ends_with('"')) {
260 let inner = &t[1..t.len() - 1];
261 if let Ok(dt) = inner.parse::<chrono::DateTime<chrono::Utc>>() {
263 return Ok(dt.timestamp_millis());
264 }
265 if let Ok(ndt) = chrono::NaiveDateTime::parse_from_str(inner, "%Y-%m-%dT%H:%M:%S") {
267 use chrono::TimeZone as _;
268 return Ok(chrono::Utc.from_utc_datetime(&ndt).timestamp_millis());
269 }
270 return Err(TemporalParseError(format!(
271 "AS OF temporal expression: cannot parse timestamp string '{inner}'; \
272 expected RFC-3339 / ISO-8601 (e.g. '2024-01-15T00:00:00Z')"
273 )));
274 }
275
276 Err(TemporalParseError(format!(
277 "AS OF temporal expression '{t}' is not supported; \
278 use an integer (ms since epoch), NOW(), or an ISO-8601 string literal \
279 (e.g. '2024-01-15T00:00:00Z')"
280 )))
281}
282
283fn strip_as_of_system_time(sql: &str) -> Result<Option<(String, i64)>, TemporalParseError> {
295 let keyword = "AS OF SYSTEM TIME";
296 let Some(start) = keyword_position_outside_literals(sql, keyword) else {
297 return Ok(None);
298 };
299 let after_kw = start + keyword.len();
300 let (ms, end_abs) = parse_as_of_expr(sql, after_kw)?;
301 let mut out = String::with_capacity(sql.len());
302 out.push_str(sql[..start].trim_end());
303 out.push(' ');
304 out.push_str(sql[end_abs..].trim_start());
305 Ok(Some((out.trim().to_string(), ms)))
306}
307
308fn strip_as_of_valid_time(sql: &str) -> Result<Option<(String, i64)>, TemporalParseError> {
312 let keyword = "AS OF VALID TIME";
313 let Some(start) = keyword_position_outside_literals(sql, keyword) else {
314 return Ok(None);
315 };
316 let after_kw = start + keyword.len();
317 let (ms, end_abs) = parse_as_of_expr(sql, after_kw)?;
318 let mut out = String::with_capacity(sql.len());
319 out.push_str(sql[..start].trim_end());
320 out.push(' ');
321 out.push_str(sql[end_abs..].trim_start());
322 Ok(Some((out.trim().to_string(), ms)))
323}
324
325fn parse_as_of_expr(sql: &str, offset: usize) -> Result<(i64, usize), TemporalParseError> {
332 let rest = &sql[offset..];
333 let trimmed = rest.trim_start();
334 let leading = rest.len() - trimmed.len();
335 let abs_start = offset + leading;
336
337 if trimmed.starts_with('\'') || trimmed.starts_with('"') {
340 let quote = trimmed
341 .chars()
342 .next()
343 .expect("invariant: trimmed.starts_with('\\'' | '\"') guarantees at least one char");
344 let inner_start = 1;
345 let close = trimmed[inner_start..].find(quote).ok_or_else(|| {
346 TemporalParseError(format!(
347 "AS OF temporal expression: unterminated string literal at offset {abs_start}"
348 ))
349 })?;
350 let end_rel = inner_start + close + 1; let token = &trimmed[..end_rel];
352 let ms = parse_temporal_expr(token)?;
353 return Ok((ms, abs_start + end_rel));
354 }
355
356 let stop_tokens = [
361 "AS OF",
362 "WHERE",
363 "LIMIT",
364 "ORDER",
365 "GROUP",
366 "HAVING",
367 "UNION",
368 "EXCEPT",
369 "INTERSECT",
370 "FETCH",
371 "OFFSET",
372 ";",
373 ];
374 let upper_trimmed = trimmed.to_uppercase();
375 let mut end_rel = trimmed.len();
376 for stop in &stop_tokens {
377 let mut search_from = 0;
379 while let Some(pos) = upper_trimmed[search_from..].find(stop) {
380 let abs_pos = search_from + pos;
381 let at_boundary = abs_pos == 0
382 || upper_trimmed[..abs_pos].ends_with(|c: char| c.is_ascii_whitespace());
383 if at_boundary {
384 end_rel = end_rel.min(abs_pos);
385 break;
386 }
387 search_from = abs_pos + 1;
388 }
389 }
390 let token = trimmed[..end_rel].trim();
392 if token.is_empty() {
393 return Err(TemporalParseError(format!(
394 "AS OF clause at offset {abs_start} has no expression"
395 )));
396 }
397 let ms = parse_temporal_expr(token)?;
398 let raw_end = abs_start + end_rel;
400 Ok((ms, raw_end))
401}
402
403fn parse_trailing_i64(sql: &str, offset: usize) -> Result<(i64, usize), TemporalParseError> {
406 let rest = &sql[offset..];
407 let trimmed = rest.trim_start();
408 let leading = rest.len() - trimmed.len();
409 let end_rel = trimmed
410 .char_indices()
411 .take_while(|(i, c)| c.is_ascii_digit() || (*i == 0 && *c == '-'))
412 .map(|(i, c)| i + c.len_utf8())
413 .last()
414 .ok_or_else(|| TemporalParseError(format!("expected integer at offset {offset}")))?;
415 let num_str = &trimmed[..end_rel];
416 let v: i64 = num_str
417 .parse()
418 .map_err(|_| TemporalParseError(format!("not an i64: {num_str}")))?;
419 Ok((v, offset + leading + end_rel))
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425
426 #[test]
427 fn passthrough_no_temporal() {
428 assert!(extract("SELECT * FROM t WHERE x = 1").unwrap().is_none());
429 }
430
431 #[test]
432 fn system_time_as_of() {
433 let ex = extract("SELECT * FROM t FOR SYSTEM_TIME AS OF 100 WHERE x = 1")
434 .unwrap()
435 .unwrap();
436 assert_eq!(ex.temporal.system_as_of_ms, Some(100));
437 assert_eq!(ex.temporal.valid_time, ValidTime::Any);
438 assert!(!ex.sql.to_uppercase().contains("FOR SYSTEM_TIME"));
439 assert!(ex.sql.contains("WHERE x = 1"));
440 }
441
442 #[test]
443 fn valid_time_contains() {
444 let ex = extract("SELECT * FROM t FOR VALID_TIME CONTAINS 250")
445 .unwrap()
446 .unwrap();
447 assert_eq!(ex.temporal.valid_time, ValidTime::At(250));
448 assert!(!ex.sql.to_uppercase().contains("FOR VALID_TIME"));
449 }
450
451 #[test]
452 fn valid_time_range() {
453 let ex = extract("SELECT * FROM t FOR VALID_TIME FROM 100 TO 300 LIMIT 10")
454 .unwrap()
455 .unwrap();
456 assert_eq!(ex.temporal.valid_time, ValidTime::Range(100, 300));
457 assert!(ex.sql.contains("LIMIT 10"));
458 }
459
460 #[test]
461 fn combined_system_and_valid() {
462 let ex = extract(
463 "SELECT * FROM t FOR SYSTEM_TIME AS OF 500 FOR VALID_TIME CONTAINS 250 LIMIT 5",
464 )
465 .unwrap()
466 .unwrap();
467 assert_eq!(ex.temporal.system_as_of_ms, Some(500));
468 assert_eq!(ex.temporal.valid_time, ValidTime::At(250));
469 }
470
471 #[test]
472 fn function_form() {
473 let ex = extract("SELECT __system_as_of__(777), x FROM t")
474 .unwrap()
475 .unwrap();
476 assert_eq!(ex.temporal.system_as_of_ms, Some(777));
477 assert!(ex.sql.contains("TRUE"));
478 assert!(!ex.sql.contains("__system_as_of__"));
479 }
480
481 #[test]
482 fn invalid_range_rejects() {
483 assert!(extract("SELECT * FROM t FOR VALID_TIME FROM 500 TO 100").is_err());
484 }
485
486 #[test]
487 fn valid_time_missing_verb() {
488 assert!(extract("SELECT * FROM t FOR VALID_TIME 100").is_err());
489 }
490
491 #[test]
492 fn as_of_system_time_integer() {
493 let ex = extract("SELECT * FROM array_slice('g', '{}') AS OF SYSTEM TIME 1700000000000")
494 .unwrap()
495 .unwrap();
496 assert_eq!(ex.temporal.system_as_of_ms, Some(1_700_000_000_000));
497 assert!(!ex.sql.to_uppercase().contains("AS OF SYSTEM TIME"));
498 }
499
500 #[test]
501 fn as_of_valid_time_integer() {
502 let ex = extract("SELECT * FROM array_slice('g', '{}') AS OF VALID TIME 1700000000001")
503 .unwrap()
504 .unwrap();
505 assert_eq!(ex.temporal.valid_time, ValidTime::At(1_700_000_000_001));
506 assert!(!ex.sql.to_uppercase().contains("AS OF VALID TIME"));
507 }
508
509 #[test]
510 fn as_of_system_time_iso8601() {
511 let ex = extract(
512 "SELECT * FROM array_slice('g', '{}') AS OF SYSTEM TIME '2024-01-15T00:00:00Z'",
513 )
514 .unwrap()
515 .unwrap();
516 assert_eq!(ex.temporal.system_as_of_ms, Some(1_705_276_800_000));
518 assert!(!ex.sql.to_uppercase().contains("AS OF SYSTEM TIME"));
519 }
520
521 #[test]
522 fn as_of_both_clauses() {
523 let ex = extract(
524 "SELECT * FROM array_slice('g', '{}') AS OF SYSTEM TIME 500 AS OF VALID TIME 250",
525 )
526 .unwrap()
527 .unwrap();
528 assert_eq!(ex.temporal.system_as_of_ms, Some(500));
529 assert_eq!(ex.temporal.valid_time, ValidTime::At(250));
530 }
531
532 #[test]
533 fn as_of_system_time_now() {
534 let before = std::time::SystemTime::now()
535 .duration_since(std::time::UNIX_EPOCH)
536 .unwrap()
537 .as_millis() as i64;
538 let ex = extract("SELECT * FROM array_slice('g', '{}') AS OF SYSTEM TIME NOW()")
539 .unwrap()
540 .unwrap();
541 let after = std::time::SystemTime::now()
542 .duration_since(std::time::UNIX_EPOCH)
543 .unwrap()
544 .as_millis() as i64;
545 let ts = ex.temporal.system_as_of_ms.unwrap();
546 assert!(
547 ts >= before && ts <= after,
548 "NOW() ts {ts} not in [{before}, {after}]"
549 );
550 }
551
552 #[test]
553 fn as_of_system_time_unsupported_expr_rejected() {
554 let err = extract(
555 "SELECT * FROM array_slice('g', '{}') AS OF SYSTEM TIME NOW() - INTERVAL '1 day'",
556 );
557 assert!(err.is_err(), "INTERVAL expression should be rejected");
558 }
559
560 #[test]
561 fn as_of_duplicate_system_time_rejected() {
562 assert!(
563 extract("SELECT * FROM t AS OF SYSTEM TIME 100 AS OF SYSTEM TIME 200").is_err(),
564 "duplicate AS OF SYSTEM TIME should be rejected"
565 );
566 }
567
568 #[test]
569 fn system_time_in_string_literal_not_triggered() {
570 let result = extract("SELECT * FROM t WHERE name = 'FOR SYSTEM_TIME'").unwrap();
573 assert!(
574 result.is_none(),
575 "FOR SYSTEM_TIME inside string literal must not trigger, got: {result:?}"
576 );
577 }
578
579 #[test]
580 fn system_time_as_of_outside_literal_triggered() {
581 let ex = extract("SELECT * FROM t FOR SYSTEM_TIME AS OF 100")
582 .unwrap()
583 .unwrap();
584 assert_eq!(ex.temporal.system_as_of_ms, Some(100));
585 }
586}