Skip to main content

surql_parser/upstream/syn/lexer/compound/
number.rs

1use crate::compat::val::DecimalExt;
2use crate::compat::val::duration::{
3	SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, SECONDS_PER_YEAR,
4};
5use crate::upstream::syn::error::{SyntaxError, bail, syntax_error};
6use crate::upstream::syn::lexer::Lexer;
7use crate::upstream::syn::parser::ParseResult;
8use crate::upstream::syn::token::{Span, Token, TokenKind, t};
9use rust_decimal::Decimal;
10use std::borrow::Cow;
11use std::num::{ParseFloatError, ParseIntError};
12use std::str::FromStr;
13use std::time::Duration;
14#[derive(Debug, Clone, Copy)]
15pub struct ParsedInt {
16	sign: bool,
17	int: u64,
18}
19impl ParsedInt {
20	pub fn from_number_str(s: &str, span: Span) -> ParseResult<Self> {
21		let (sign, number) = match s.strip_prefix("-") {
22			Some(x) => (
23				true,
24				x.parse::<u64>()
25					.ok()
26					.filter(|x| *x <= (i64::MAX as u64) + 1),
27			),
28			None => (
29				false,
30				s.parse::<u64>()
31					.ok()
32					.filter(|x| *x <= (i64::MAX as u64) + 1),
33			),
34		};
35		let Some(n) = number else {
36			bail!(
37				"Failed to parse number: number cannot fit within a 64bit signed integer",
38				@ span
39			)
40		};
41		Ok(ParsedInt { sign, int: n })
42	}
43	pub fn into_int(self, span: Span) -> ParseResult<i64> {
44		let int = if self.int <= i64::MAX as u64 {
45			if self.sign {
46				-(self.int as i64)
47			} else {
48				self.int as i64
49			}
50		} else if self.sign {
51			debug_assert_eq!(self.int, (i64::MAX as u64) + 1);
52			i64::MIN
53		} else {
54			bail!(
55				"Failed to parse number: number cannot fit within a 64bit signed integer",
56				@ span
57			)
58		};
59		Ok(int)
60	}
61	pub fn into_neg_int(self, span: Span) -> ParseResult<i64> {
62		let int = if self.int <= i64::MAX as u64 {
63			if self.sign {
64				self.int as i64
65			} else {
66				-(self.int as i64)
67			}
68		} else if !self.sign {
69			debug_assert_eq!(self.int, (i64::MAX as u64) + 1);
70			i64::MIN
71		} else {
72			bail!(
73				"Failed to parse number: number cannot fit within a 64bit signed integer",
74				@ span
75			)
76		};
77		Ok(int)
78	}
79}
80#[derive(Debug)]
81pub enum Numeric {
82	Float(f64),
83	Integer(ParsedInt),
84	Decimal(Decimal),
85	Duration(Duration),
86}
87/// Like numeric but holds of parsing the a number into a specific value.
88#[derive(Debug)]
89pub enum NumericKind {
90	Float,
91	Int,
92	Decimal,
93	Duration(Duration),
94}
95#[derive(Debug)]
96pub enum NumberKind {
97	Integer,
98	Float,
99	Decimal,
100}
101enum DurationSuffix {
102	Nano,
103	Micro,
104	Milli,
105	Second,
106	Minute,
107	Hour,
108	Day,
109	Week,
110	Year,
111}
112pub fn prepare_number_str(str: &str) -> Cow<'_, str> {
113	if str.contains('_') {
114		Cow::Owned(str.chars().filter(|x| *x != '_').collect())
115	} else {
116		Cow::Borrowed(str)
117	}
118}
119/// Tokens which can start with digits: Number or Duration.
120/// Like numeric but holds off on parsing the a number into a specific value.
121pub fn numeric_kind(lexer: &mut Lexer, start: Token) -> Result<NumericKind, SyntaxError> {
122	match start.kind {
123		t!("-") | t!("+") => match number_kind(lexer, start)? {
124			NumberKind::Integer => Ok(NumericKind::Int),
125			NumberKind::Float => Ok(NumericKind::Float),
126			NumberKind::Decimal => Ok(NumericKind::Decimal),
127		},
128		TokenKind::NaN | TokenKind::Infinity => Ok(NumericKind::Float),
129		TokenKind::Digits => match lexer.reader.peek() {
130			Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
131				duration(lexer, start).map(NumericKind::Duration)
132			}
133			Some(b'd') => {
134				if let Some(b'e') = lexer.reader.peek1() {
135					match number_kind(lexer, start)? {
136						NumberKind::Integer => Ok(NumericKind::Int),
137						NumberKind::Float => Ok(NumericKind::Float),
138						NumberKind::Decimal => Ok(NumericKind::Decimal),
139					}
140				} else {
141					duration(lexer, start).map(NumericKind::Duration)
142				}
143			}
144			Some(x) if !x.is_ascii() => duration(lexer, start).map(NumericKind::Duration),
145			_ => match number_kind(lexer, start)? {
146				NumberKind::Integer => Ok(NumericKind::Int),
147				NumberKind::Float => Ok(NumericKind::Float),
148				NumberKind::Decimal => Ok(NumericKind::Decimal),
149			},
150		},
151		x => {
152			bail!(
153				"Unexpected token `{x}`, expected a numeric value, either a duration or number",@
154				start.span
155			)
156		}
157	}
158}
159/// Tokens which can start with digits: Number or Duration.
160pub fn numeric(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
161	match start.kind {
162		t!("-") | t!("+") => number(lexer, start),
163		TokenKind::Digits => match lexer.reader.peek() {
164			Some(b'n' | b's' | b'm' | b'h' | b'y' | b'w' | b'u') => {
165				duration(lexer, start).map(Numeric::Duration)
166			}
167			Some(b'd') => {
168				if lexer.reader.peek1() == Some(b'e') {
169					number(lexer, start)
170				} else {
171					duration(lexer, start).map(Numeric::Duration)
172				}
173			}
174			Some(0xC2) => duration(lexer, start).map(Numeric::Duration),
175			_ => number(lexer, start),
176		},
177		x => {
178			bail!(
179				"Unexpected token `{x}`, expected a numeric value, either a duration or number",@
180				start.span
181			)
182		}
183	}
184}
185pub fn number_kind(lexer: &mut Lexer, start: Token) -> Result<NumberKind, SyntaxError> {
186	let offset = start.span.offset;
187	match start.kind {
188		t!("-") | t!("+") => {
189			if eat_infinity(lexer, offset)? {
190				return Ok(NumberKind::Float);
191			}
192			eat_digits1(lexer, offset)?;
193		}
194		TokenKind::Digits => {}
195		TokenKind::Infinity => return Ok(NumberKind::Float),
196		TokenKind::NaN => return Ok(NumberKind::Float),
197		x => bail!("Unexpected start token for integer: {x}",@ start.span),
198	}
199	let mut kind = NumberKind::Integer;
200	let before_mantissa = lexer.reader.offset();
201	if lexer
202		.reader
203		.peek1()
204		.map(|x| x.is_ascii_digit())
205		.unwrap_or(false)
206		&& lexer.eat(b'.')
207	{
208		eat_digits1(lexer, before_mantissa)?;
209		kind = NumberKind::Float;
210	}
211	let before_exponent = lexer.reader.offset();
212	if lexer.eat(b'e') || lexer.eat(b'E') {
213		if !lexer.eat(b'-') {
214			lexer.eat(b'+');
215		}
216		eat_digits1(lexer, before_exponent)?;
217		kind = NumberKind::Float;
218	}
219	if !lexer.eat(b'f') {
220		if lexer.eat(b'd') {
221			lexer.expect('e')?;
222			lexer.expect('c')?;
223			kind = NumberKind::Decimal;
224		}
225	} else {
226		kind = NumberKind::Float;
227	}
228	if has_ident_after(lexer) {
229		let char = lexer.reader.next().expect("lexer validated input");
230		let char = lexer.reader.convert_to_char(char)?;
231		bail!(
232			"Invalid token, found unexpected character `{char}` after number token", @
233			lexer.current_span()
234		)
235	}
236	Ok(kind)
237}
238pub fn number(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
239	let kind = number_kind(lexer, start)?;
240	let span = lexer.current_span();
241	let number_str = prepare_number_str(lexer.span_str(span));
242	match kind {
243		NumberKind::Integer => Ok(Numeric::Integer(ParsedInt::from_number_str(
244			number_str.as_ref(),
245			span,
246		)?)),
247		NumberKind::Float => {
248			if number_str.as_bytes()[0] == b'N' {
249				Ok(Numeric::Float(f64::NAN))
250			} else if number_str.as_bytes()[0] == b'-' && number_str.as_bytes()[1] == b'I' {
251				Ok(Numeric::Float(f64::NEG_INFINITY))
252			} else if number_str.as_bytes()[0] == b'+' && number_str.as_bytes()[1] == b'I'
253				|| number_str.as_bytes()[0] == b'I'
254			{
255				Ok(Numeric::Float(f64::INFINITY))
256			} else {
257				let number_str = number_str.trim_end_matches('f');
258				number_str.parse().map(Numeric::Float).map_err(|e| {
259					syntax_error!(
260						"Failed to parse number: {e}", @ lexer.current_span()
261					)
262				})
263			}
264		}
265		NumberKind::Decimal => {
266			let number_str = number_str.trim_end_matches("dec");
267			let decimal = if number_str.contains(['e', 'E']) {
268				Decimal::from_scientific(number_str).map_err(|e| {
269					syntax_error!(
270						"Failed to parser decimal: {e}", @ lexer.current_span()
271					)
272				})?
273			} else {
274				crate::compat::decimal_from_str_normalized(number_str).map_err(|e| {
275					syntax_error!(
276						"Failed to parser decimal: {e}", @ lexer.current_span()
277					)
278				})?
279			};
280			Ok(Numeric::Decimal(decimal))
281		}
282	}
283}
284/// Generic integer parsing method,
285/// works for all unsigned integers.
286pub fn integer<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
287where
288	I: FromStr<Err = ParseIntError>,
289{
290	let offset = start.span.offset;
291	match start.kind {
292		t!("-") | t!("+") => {
293			eat_digits1(lexer, offset)?;
294		}
295		TokenKind::Digits => {}
296		x => bail!("Unexpected token {x}, expected integer",@ start.span),
297	};
298	if has_ident_after(lexer) {
299		let char = lexer.reader.next().expect("lexer validated input");
300		let char = lexer.reader.convert_to_char(char)?;
301		bail!(
302			"Invalid token, found unexpected character `{char} after integer token", @
303			lexer.current_span()
304		)
305	}
306	let last_offset = lexer.reader.offset();
307	let peek = lexer.reader.peek();
308	if peek == Some(b'.') {
309		let is_mantissa = lexer
310			.reader
311			.peek1()
312			.map(|x| x.is_ascii_digit())
313			.unwrap_or(false);
314		if is_mantissa {
315			let span = Span {
316				offset: last_offset,
317				len: 1,
318			};
319			bail!(
320				"Unexpected character `.` starting float, only integers are allowed here",
321				@ span
322			)
323		}
324	}
325	if peek == Some(b'e') || peek == Some(b'E') {
326		bail!(
327			"Unexpected character `{}` only integers are allowed here", peek
328			.expect("validated input") as char, @ lexer.current_span()
329		)
330	}
331	let span = lexer.current_span();
332	let str = prepare_number_str(lexer.span_str(span));
333	str.parse()
334		.map_err(|e| syntax_error!("Invalid integer: {e}", @ span))
335}
336pub trait Float: Sized {
337	const NAN: Self;
338	const NEG_INFINITY: Self;
339	const INFINITY: Self;
340	fn from_str(s: &str) -> Result<Self, ParseFloatError>;
341}
342impl Float for f64 {
343	const NAN: Self = f64::NAN;
344	const NEG_INFINITY: Self = f64::NEG_INFINITY;
345	const INFINITY: Self = f64::INFINITY;
346	fn from_str(s: &str) -> Result<Self, ParseFloatError> {
347		s.parse()
348	}
349}
350impl Float for f32 {
351	const NAN: Self = f32::NAN;
352	const NEG_INFINITY: Self = f32::NEG_INFINITY;
353	const INFINITY: Self = f32::INFINITY;
354	fn from_str(s: &str) -> Result<Self, ParseFloatError> {
355		s.parse()
356	}
357}
358/// Generic integer parsing method,
359/// works for all unsigned integers.
360pub fn float<F>(lexer: &mut Lexer, start: Token) -> Result<F, SyntaxError>
361where
362	F: Float,
363{
364	let offset = start.span.offset;
365	match start.kind {
366		t!("-") => {
367			if eat_infinity(lexer, offset)? {
368				return Ok(F::NEG_INFINITY);
369			}
370			eat_digits1(lexer, offset)?;
371		}
372		t!("+") => {
373			if eat_infinity(lexer, offset)? {
374				return Ok(F::INFINITY);
375			}
376			eat_digits1(lexer, offset)?;
377		}
378		TokenKind::Digits => {}
379		TokenKind::NaN => return Ok(F::NAN),
380		TokenKind::Infinity => return Ok(F::INFINITY),
381		x => bail!("Unexpected token {x}, expected floating point number",@ start.span),
382	};
383	let before_mantissa = lexer.reader.offset();
384	if lexer.eat(b'.') {
385		eat_digits1(lexer, before_mantissa)?;
386	}
387	let before_exponent = lexer.reader.offset();
388	if lexer.eat(b'e') || lexer.eat(b'E') {
389		if !lexer.eat(b'-') {
390			lexer.eat(b'+');
391		}
392		eat_digits1(lexer, before_exponent)?;
393	}
394	let number_span = lexer.current_span();
395	lexer.eat(b'f');
396	if has_ident_after(lexer) {
397		let char = lexer.reader.next().expect("lexer validated input");
398		let char = lexer.reader.convert_to_char(char)?;
399		bail!(
400			"Invalid token, found invalid character `{char}` after number token", @ lexer
401			.current_span()
402		)
403	}
404	let str = prepare_number_str(lexer.span_str(number_span));
405	F::from_str(str.as_ref())
406		.map_err(|e| syntax_error!("Invalid floating point number: {e}", @ lexer.current_span()))
407}
408pub fn duration(lexer: &mut Lexer, start: Token) -> Result<Duration, SyntaxError> {
409	match start.kind {
410		TokenKind::Digits => {}
411		x => bail!("Unexpected token {x}, expected duration", @ start.span),
412	}
413	let mut duration = Duration::ZERO;
414	let mut number_span = start.span;
415	loop {
416		let suffix = lex_duration_suffix(lexer)?;
417		let numeric_string = prepare_number_str(lexer.span_str(number_span));
418		let numeric_value: u64 = numeric_string.parse().map_err(|e| {
419			syntax_error!(
420				"Invalid token, failed to parse duration digits: {e}",@ lexer
421				.current_span()
422			)
423		})?;
424		let addition = match suffix {
425			DurationSuffix::Nano => Duration::from_nanos(numeric_value),
426			DurationSuffix::Micro => Duration::from_micros(numeric_value),
427			DurationSuffix::Milli => Duration::from_millis(numeric_value),
428			DurationSuffix::Second => Duration::from_secs(numeric_value),
429			DurationSuffix::Minute => {
430				let minutes = numeric_value
431					.checked_mul(SECONDS_PER_MINUTE)
432					.ok_or_else(|| {
433						syntax_error!(
434							"Invalid duration, value overflowed maximum allowed value", @
435							lexer.current_span()
436						)
437					})?;
438				Duration::from_secs(minutes)
439			}
440			DurationSuffix::Hour => {
441				let hours = numeric_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(|| {
442					syntax_error!(
443						"Invalid duration, value overflowed maximum allowed value", @
444						lexer.current_span()
445					)
446				})?;
447				Duration::from_secs(hours)
448			}
449			DurationSuffix::Day => {
450				let day = numeric_value.checked_mul(SECONDS_PER_DAY).ok_or_else(|| {
451					syntax_error!(
452						"Invalid duration, value overflowed maximum allowed value", @
453						lexer.current_span()
454					)
455				})?;
456				Duration::from_secs(day)
457			}
458			DurationSuffix::Week => {
459				let week = numeric_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(|| {
460					syntax_error!(
461						"Invalid duration, value overflowed maximum allowed value", @
462						lexer.current_span()
463					)
464				})?;
465				Duration::from_secs(week)
466			}
467			DurationSuffix::Year => {
468				let year = numeric_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(|| {
469					syntax_error!(
470						"Invalid duration, value overflowed maximum allowed value", @
471						lexer.current_span()
472					)
473				})?;
474				Duration::from_secs(year)
475			}
476		};
477		duration = duration.checked_add(addition).ok_or_else(|| {
478			syntax_error!(
479				"Invalid duration, value overflowed maximum allowed value", @ lexer
480				.current_span()
481			)
482		})?;
483		match lexer.reader.peek() {
484			Some(x) if x.is_ascii_digit() => {
485				let before = lexer.reader.offset();
486				eat_digits(lexer);
487				number_span = lexer.span_since(before);
488			}
489			_ => break,
490		}
491	}
492	Ok(duration)
493}
494fn lex_duration_suffix(lexer: &mut Lexer) -> Result<DurationSuffix, SyntaxError> {
495	let suffix = match lexer.reader.next() {
496		Some(b'n') => {
497			lexer.expect('s')?;
498			DurationSuffix::Nano
499		}
500		Some(b'u') => {
501			lexer.expect('s')?;
502			DurationSuffix::Micro
503		}
504		Some(b'm') => {
505			if lexer.eat(b's') {
506				DurationSuffix::Milli
507			} else {
508				DurationSuffix::Minute
509			}
510		}
511		Some(b's') => DurationSuffix::Second,
512		Some(b'h') => DurationSuffix::Hour,
513		Some(b'd') => DurationSuffix::Day,
514		Some(b'w') => DurationSuffix::Week,
515		Some(b'y') => DurationSuffix::Year,
516		Some(0xC2) => {
517			if !lexer.eat(0xB5) {
518				let char = lexer.reader.complete_char(0xC2)?;
519				bail!(
520					"Invalid duration token, expected a duration suffix found `{char}`",@
521					lexer.current_span()
522				)
523			}
524			lexer.expect('s')?;
525			DurationSuffix::Micro
526		}
527		Some(x) => {
528			let char = lexer.reader.convert_to_char(x)?;
529			bail!(
530				"Invalid duration token, expected a duration suffix found `{char}`",@
531				lexer.current_span()
532			)
533		}
534		None => {
535			bail!(
536				"Unexpected end of file, expected a duration suffix",@ lexer
537				.current_span()
538			)
539		}
540	};
541	if has_ident_after(lexer) {
542		let char = lexer.reader.next().expect("lexer validated input");
543		let char = lexer.reader.convert_to_char(char)?;
544		bail!(
545			"Invalid token, found invalid character `{char}` after duration suffix", @
546			lexer.current_span()
547		)
548	}
549	Ok(suffix)
550}
551fn has_ident_after(lexer: &mut Lexer) -> bool {
552	match lexer.reader.peek() {
553		Some(x) => x.is_ascii_alphabetic(),
554		None => false,
555	}
556}
557fn eat_digits1(lexer: &mut Lexer, start: u32) -> Result<(), SyntaxError> {
558	match lexer.reader.peek() {
559		Some(x) if x.is_ascii_digit() => {}
560		Some(x) => {
561			let char = lexer.reader.convert_to_char(x)?;
562			bail!(
563				"Invalid number token, expected a digit, found: {char}", @ lexer
564				.span_since(start)
565			);
566		}
567		None => {
568			bail!(
569				"Unexpected end of file, expected a number token digit", @ lexer
570				.span_since(start)
571			);
572		}
573	}
574	eat_digits(lexer);
575	Ok(())
576}
577fn eat_digits(lexer: &mut Lexer) {
578	while lexer.eat_when(|x| x.is_ascii_digit() || x == b'_') {}
579}
580fn eat_infinity(lexer: &mut Lexer, start: u32) -> Result<bool, SyntaxError> {
581	if lexer.reader.eat(b'I') {
582		for b in b"nfinity" {
583			match lexer.reader.next() {
584				Some(x) if x == *b => {}
585				Some(x) => {
586					bail!(
587						"Invalid number token, expected `{}` found: {x}",* b as char, @
588						lexer.span_since(start)
589					)
590				}
591				None => {
592					bail!(
593						"Unexpected end of file, expected a number token digit", @ lexer
594						.span_since(start)
595					);
596				}
597			}
598		}
599		Ok(true)
600	} else {
601		Ok(false)
602	}
603}