jrsonnet_evaluator/builtin/
format.rs

1//! faster std.format impl
2#![allow(clippy::too_many_arguments)]
3
4use crate::{error::Error::*, throw, LocError, ObjValue, Result, Val};
5use jrsonnet_gc::Trace;
6use jrsonnet_interner::IStr;
7use jrsonnet_types::ValType;
8use thiserror::Error;
9
10#[derive(Debug, Clone, Error, Trace)]
11#[trivially_drop]
12pub enum FormatError {
13	#[error("truncated format code")]
14	TruncatedFormatCode,
15	#[error("unrecognized conversion type: {0}")]
16	UnrecognizedConversionType(char),
17
18	#[error("not enough values")]
19	NotEnoughValues,
20
21	#[error("cannot use * width with object")]
22	CannotUseStarWidthWithObject,
23	#[error("mapping keys required")]
24	MappingKeysRequired,
25	#[error("no such format field: {0}")]
26	NoSuchFormatField(IStr),
27}
28
29impl From<FormatError> for LocError {
30	fn from(e: FormatError) -> Self {
31		Self::new(Format(e))
32	}
33}
34
35use FormatError::*;
36
37type ParseResult<'t, T> = std::result::Result<(T, &'t str), FormatError>;
38
39pub fn try_parse_mapping_key(str: &str) -> ParseResult<&str> {
40	if str.is_empty() {
41		return Err(TruncatedFormatCode);
42	}
43	let bytes = str.as_bytes();
44	if bytes[0] == b'(' {
45		let mut i = 1;
46		while i < bytes.len() {
47			if bytes[i] == b')' {
48				return Ok((&str[1..i as usize], &str[i as usize + 1..]));
49			}
50			i += 1;
51		}
52		Err(TruncatedFormatCode)
53	} else {
54		Ok(("", str))
55	}
56}
57
58#[cfg(test)]
59pub mod tests_key {
60	use super::*;
61
62	#[test]
63	fn parse_key() {
64		assert_eq!(
65			try_parse_mapping_key("(hello ) world").unwrap(),
66			("hello ", " world")
67		);
68		assert_eq!(try_parse_mapping_key("() world").unwrap(), ("", " world"));
69		assert_eq!(try_parse_mapping_key(" world").unwrap(), ("", " world"));
70		assert_eq!(
71			try_parse_mapping_key(" () world").unwrap(),
72			("", " () world")
73		);
74	}
75
76	#[test]
77	#[should_panic]
78	fn parse_key_missing_start() {
79		try_parse_mapping_key("").unwrap();
80	}
81
82	#[test]
83	#[should_panic]
84	fn parse_key_missing_end() {
85		try_parse_mapping_key("(   ").unwrap();
86	}
87}
88
89#[derive(Default, Debug)]
90pub struct CFlags {
91	pub alt: bool,
92	pub zero: bool,
93	pub left: bool,
94	pub blank: bool,
95	pub sign: bool,
96}
97
98pub fn try_parse_cflags(str: &str) -> ParseResult<CFlags> {
99	if str.is_empty() {
100		return Err(TruncatedFormatCode);
101	}
102	let bytes = str.as_bytes();
103	let mut i = 0;
104	let mut out = CFlags::default();
105	loop {
106		if bytes.len() == i {
107			return Err(TruncatedFormatCode);
108		}
109		match bytes[i] {
110			b'#' => out.alt = true,
111			b'0' => out.zero = true,
112			b'-' => out.left = true,
113			b' ' => out.blank = true,
114			b'+' => out.sign = true,
115			_ => break,
116		}
117		i += 1;
118	}
119	Ok((out, &str[i..]))
120}
121
122#[derive(Debug, PartialEq)]
123pub enum Width {
124	Star,
125	Fixed(usize),
126}
127pub fn try_parse_field_width(str: &str) -> ParseResult<Width> {
128	if str.is_empty() {
129		return Err(TruncatedFormatCode);
130	}
131	let bytes = str.as_bytes();
132	if bytes[0] == b'*' {
133		return Ok((Width::Star, &str[1..]));
134	}
135	let mut out: usize = 0;
136	let mut digits = 0;
137	while let Some(digit) = (bytes[digits] as char).to_digit(10) {
138		out *= 10;
139		out += digit as usize;
140		digits += 1;
141		if digits == bytes.len() {
142			return Err(TruncatedFormatCode);
143		}
144	}
145	Ok((Width::Fixed(out), &str[digits..]))
146}
147
148pub fn try_parse_precision(str: &str) -> ParseResult<Option<Width>> {
149	if str.is_empty() {
150		return Err(TruncatedFormatCode);
151	}
152	let bytes = str.as_bytes();
153	if bytes[0] == b'.' {
154		try_parse_field_width(&str[1..]).map(|(r, s)| (Some(r), s))
155	} else {
156		Ok((None, str))
157	}
158}
159
160// Only skips
161pub fn try_parse_length_modifier(str: &str) -> ParseResult<()> {
162	if str.is_empty() {
163		return Err(TruncatedFormatCode);
164	}
165	let bytes = str.as_bytes();
166	let mut idx = 0;
167	while bytes[idx] == b'h' || bytes[idx] == b'l' || bytes[idx] == b'L' {
168		idx += 1;
169		if bytes.len() == idx {
170			return Err(TruncatedFormatCode);
171		}
172	}
173	Ok(((), &str[idx..]))
174}
175
176#[derive(Debug, PartialEq)]
177pub enum ConvTypeV {
178	Decimal,
179	Octal,
180	Hexadecimal,
181	Scientific,
182	Float,
183	Shorter,
184	Char,
185	String,
186	Percent,
187}
188pub struct ConvType {
189	v: ConvTypeV,
190	caps: bool,
191}
192
193pub fn parse_conversion_type(str: &str) -> ParseResult<ConvType> {
194	if str.is_empty() {
195		return Err(TruncatedFormatCode);
196	}
197
198	let code = str.as_bytes()[0];
199	let v: (ConvTypeV, bool) = match code {
200		b'd' | b'i' | b'u' => (ConvTypeV::Decimal, false),
201		b'o' => (ConvTypeV::Octal, false),
202		b'x' => (ConvTypeV::Hexadecimal, false),
203		b'X' => (ConvTypeV::Hexadecimal, true),
204		b'e' => (ConvTypeV::Scientific, false),
205		b'E' => (ConvTypeV::Scientific, true),
206		b'f' => (ConvTypeV::Float, false),
207		b'F' => (ConvTypeV::Float, true),
208		b'g' => (ConvTypeV::Shorter, false),
209		b'G' => (ConvTypeV::Shorter, true),
210		b'c' => (ConvTypeV::Char, false),
211		b's' => (ConvTypeV::String, false),
212		b'%' => (ConvTypeV::Percent, false),
213		c => return Err(UnrecognizedConversionType(c as char)),
214	};
215
216	Ok((ConvType { v: v.0, caps: v.1 }, &str[1..]))
217}
218
219#[derive(Debug)]
220pub struct Code<'s> {
221	mkey: &'s str,
222	cflags: CFlags,
223	width: Width,
224	precision: Option<Width>,
225	convtype: ConvTypeV,
226	caps: bool,
227}
228pub fn parse_code(str: &str) -> ParseResult<Code> {
229	if str.is_empty() {
230		return Err(TruncatedFormatCode);
231	}
232	let (mkey, str) = try_parse_mapping_key(str)?;
233	let (cflags, str) = try_parse_cflags(str)?;
234	let (width, str) = try_parse_field_width(str)?;
235	let (precision, str) = try_parse_precision(str)?;
236	let (_, str) = try_parse_length_modifier(str)?;
237	let (convtype, str) = parse_conversion_type(str)?;
238
239	Ok((
240		Code {
241			mkey,
242			cflags,
243			width,
244			precision,
245			convtype: convtype.v,
246			caps: convtype.caps,
247		},
248		str,
249	))
250}
251
252#[derive(Debug)]
253pub enum Element<'s> {
254	String(&'s str),
255	Code(Code<'s>),
256}
257pub fn parse_codes(mut str: &str) -> Result<Vec<Element>> {
258	let mut bytes = str.as_bytes();
259	let mut out = vec![];
260	let mut offset = 0;
261
262	loop {
263		while offset != bytes.len() && bytes[offset] != b'%' {
264			offset += 1;
265		}
266		if offset != 0 {
267			out.push(Element::String(&str[0..offset]));
268		}
269		if offset == bytes.len() {
270			return Ok(out);
271		}
272		str = &str[offset + 1..];
273		let (code, nstr) = parse_code(str)?;
274		str = nstr;
275		bytes = str.as_bytes();
276		offset = 0;
277
278		out.push(Element::Code(code))
279	}
280}
281
282const NUMBERS: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
283
284#[inline]
285pub fn render_integer(
286	out: &mut String,
287	iv: i64,
288	padding: usize,
289	precision: usize,
290	blank: bool,
291	sign: bool,
292	radix: i64,
293	prefix: &str,
294	caps: bool,
295) {
296	// Digit char indexes in reverse order, i.e
297	// for radix = 16 and n = 12f: [15, 2, 1]
298	let digits = if iv == 0 {
299		vec![0u8]
300	} else {
301		let mut v = iv.abs();
302		let mut nums = Vec::with_capacity(1);
303		while v > 0 {
304			nums.push((v % radix) as u8);
305			v /= radix;
306		}
307		nums
308	};
309	let neg = iv < 0;
310	let zp = padding.saturating_sub(if neg || blank || sign { 1 } else { 0 });
311	let zp2 = zp
312		.max(precision)
313		.saturating_sub(prefix.len() + digits.len());
314
315	if neg {
316		out.push('-')
317	} else if sign {
318		out.push('+');
319	} else if blank {
320		out.push(' ');
321	}
322
323	out.reserve(zp2);
324	for _ in 0..zp2 {
325		out.push('0');
326	}
327	out.push_str(prefix);
328
329	for digit in digits.into_iter().rev() {
330		let ch = NUMBERS[digit as usize] as char;
331		out.push(if caps { ch.to_ascii_uppercase() } else { ch });
332	}
333}
334
335pub fn render_decimal(
336	out: &mut String,
337	iv: i64,
338	padding: usize,
339	precision: usize,
340	blank: bool,
341	sign: bool,
342) {
343	render_integer(out, iv, padding, precision, blank, sign, 10, "", false)
344}
345pub fn render_octal(
346	out: &mut String,
347	iv: i64,
348	padding: usize,
349	precision: usize,
350	alt: bool,
351	blank: bool,
352	sign: bool,
353) {
354	render_integer(
355		out,
356		iv,
357		padding,
358		precision,
359		blank,
360		sign,
361		8,
362		if alt && iv != 0 { "0" } else { "" },
363		false,
364	)
365}
366pub fn render_hexadecimal(
367	out: &mut String,
368	iv: i64,
369	padding: usize,
370	precision: usize,
371	alt: bool,
372	blank: bool,
373	sign: bool,
374	caps: bool,
375) {
376	render_integer(
377		out,
378		iv,
379		padding,
380		precision,
381		blank,
382		sign,
383		16,
384		match (alt, caps) {
385			(true, true) => "0X",
386			(true, false) => "0x",
387			(false, _) => "",
388		},
389		caps,
390	)
391}
392
393pub fn render_float(
394	out: &mut String,
395	n: f64,
396	mut padding: usize,
397	precision: usize,
398	blank: bool,
399	sign: bool,
400	ensure_pt: bool,
401	trailing: bool,
402) {
403	let dot_size = if precision == 0 && !ensure_pt { 0 } else { 1 };
404	padding = padding.saturating_sub(dot_size + precision);
405	render_decimal(out, n.floor() as i64, padding, 0, blank, sign);
406	if precision == 0 {
407		if ensure_pt {
408			out.push('.');
409		}
410		return;
411	}
412	let frac = n
413		.fract()
414		.mul_add(10.0_f64.powf(precision as f64), 0.5)
415		.floor();
416	if trailing || frac > 0.0 {
417		out.push('.');
418		let mut frac_str = String::new();
419		render_decimal(&mut frac_str, frac as i64, precision, 0, false, false);
420		let mut trim = frac_str.len();
421		if !trailing {
422			for b in frac_str.as_bytes().iter().rev() {
423				if *b == b'0' {
424					trim -= 1;
425				}
426			}
427		}
428		out.push_str(&frac_str[..trim]);
429	} else if ensure_pt {
430		out.push('.');
431	}
432}
433
434pub fn render_float_sci(
435	out: &mut String,
436	n: f64,
437	mut padding: usize,
438	precision: usize,
439	blank: bool,
440	sign: bool,
441	ensure_pt: bool,
442	trailing: bool,
443	caps: bool,
444) {
445	let exponent = n.log10().floor();
446	let mantissa = if exponent as i16 == -324 {
447		n * 10.0 / 10.0_f64.powf(exponent + 1.0)
448	} else {
449		n / 10.0_f64.powf(exponent)
450	};
451	let mut exponent_str = String::new();
452	render_decimal(&mut exponent_str, exponent as i64, 3, 0, false, true);
453
454	// +1 for e
455	padding = padding.saturating_sub(exponent_str.len() + 1);
456
457	render_float(
458		out, mantissa, padding, precision, blank, sign, ensure_pt, trailing,
459	);
460	out.push(if caps { 'E' } else { 'e' });
461	out.push_str(&exponent_str);
462}
463
464pub fn format_code(
465	out: &mut String,
466	value: &Val,
467	code: &Code,
468	width: usize,
469	precision: Option<usize>,
470) -> Result<()> {
471	let clfags = &code.cflags;
472	let (fpprec, iprec) = match precision {
473		Some(v) => (v, v),
474		None => (6, 0),
475	};
476	let padding = if clfags.zero && !clfags.left {
477		width
478	} else {
479		0
480	};
481
482	// TODO: If left padded, can optimize by writing directly to out
483	let mut tmp_out = String::new();
484
485	match code.convtype {
486		ConvTypeV::String => tmp_out.push_str(&value.clone().to_string()?),
487		ConvTypeV::Decimal => {
488			let value = value.clone().try_cast_num("%d/%u/%i requires number")?;
489			render_decimal(
490				&mut tmp_out,
491				value as i64,
492				padding,
493				iprec,
494				clfags.blank,
495				clfags.sign,
496			);
497		}
498		ConvTypeV::Octal => {
499			let value = value.clone().try_cast_num("%o requires number")?;
500			render_octal(
501				&mut tmp_out,
502				value as i64,
503				padding,
504				iprec,
505				clfags.alt,
506				clfags.blank,
507				clfags.sign,
508			);
509		}
510		ConvTypeV::Hexadecimal => {
511			let value = value.clone().try_cast_num("%x/%X requires number")?;
512			render_hexadecimal(
513				&mut tmp_out,
514				value as i64,
515				padding,
516				iprec,
517				clfags.alt,
518				clfags.blank,
519				clfags.sign,
520				code.caps,
521			);
522		}
523		ConvTypeV::Scientific => {
524			let value = value.clone().try_cast_num("%e/%E requires number")?;
525			render_float_sci(
526				&mut tmp_out,
527				value,
528				padding,
529				fpprec,
530				clfags.blank,
531				clfags.sign,
532				clfags.alt,
533				true,
534				code.caps,
535			);
536		}
537		ConvTypeV::Float => {
538			let value = value.clone().try_cast_num("%e/%E requires number")?;
539			render_float(
540				&mut tmp_out,
541				value,
542				padding,
543				fpprec,
544				clfags.blank,
545				clfags.sign,
546				clfags.alt,
547				true,
548			);
549		}
550		ConvTypeV::Shorter => {
551			let value = value.clone().try_cast_num("%g/%G requires number")?;
552			let exponent = value.log10().floor();
553			if exponent < -4.0 || exponent >= fpprec as f64 {
554				render_float_sci(
555					&mut tmp_out,
556					value,
557					padding,
558					fpprec - 1,
559					clfags.blank,
560					clfags.sign,
561					clfags.alt,
562					clfags.alt,
563					code.caps,
564				);
565			} else {
566				let digits_before_pt = 1.max(exponent as usize + 1);
567				render_float(
568					&mut tmp_out,
569					value,
570					padding,
571					fpprec - digits_before_pt,
572					clfags.blank,
573					clfags.sign,
574					clfags.alt,
575					clfags.alt,
576				);
577			}
578		}
579		ConvTypeV::Char => match value.clone() {
580			Val::Num(n) => tmp_out.push(
581				std::char::from_u32(n as u32)
582					.ok_or_else(|| InvalidUnicodeCodepointGot(n as u32))?,
583			),
584			Val::Str(s) => {
585				if s.chars().count() != 1 {
586					throw!(RuntimeError(
587						format!("%c expected 1 char string, got {}", s.chars().count()).into(),
588					));
589				}
590				tmp_out.push_str(&s);
591			}
592			_ => {
593				throw!(TypeMismatch(
594					"%c requires number/string",
595					vec![ValType::Num, ValType::Str],
596					value.value_type(),
597				));
598			}
599		},
600		ConvTypeV::Percent => tmp_out.push('%'),
601	};
602
603	let padding = width.saturating_sub(tmp_out.len());
604
605	if !clfags.left {
606		for _ in 0..padding {
607			out.push(' ');
608		}
609	}
610	out.push_str(&tmp_out);
611	if clfags.left {
612		for _ in 0..padding {
613			out.push(' ');
614		}
615	}
616
617	Ok(())
618}
619
620pub fn format_arr(str: &str, mut values: &[Val]) -> Result<String> {
621	let codes = parse_codes(str)?;
622	let mut out = String::new();
623
624	for code in codes {
625		match code {
626			Element::String(s) => {
627				out.push_str(s);
628			}
629			Element::Code(c) => {
630				let width = match c.width {
631					Width::Star => {
632						if values.is_empty() {
633							throw!(NotEnoughValues);
634						}
635						let value = &values[0];
636						values = &values[1..];
637						value.clone().try_cast_num("field width")? as usize
638					}
639					Width::Fixed(n) => n,
640				};
641				let precision = match c.precision {
642					Some(Width::Star) => {
643						if values.is_empty() {
644							throw!(NotEnoughValues);
645						}
646						let value = &values[0];
647						values = &values[1..];
648						Some(value.clone().try_cast_num("field precision")? as usize)
649					}
650					Some(Width::Fixed(n)) => Some(n),
651					None => None,
652				};
653
654				// %% should not consume a value
655				let value = if c.convtype == ConvTypeV::Percent {
656					&Val::Null
657				} else {
658					if values.is_empty() {
659						throw!(NotEnoughValues);
660					}
661					let value = &values[0];
662					values = &values[1..];
663					value
664				};
665
666				format_code(&mut out, value, &c, width, precision)?;
667			}
668		}
669	}
670
671	Ok(out)
672}
673
674pub fn format_obj(str: &str, values: &ObjValue) -> Result<String> {
675	let codes = parse_codes(str)?;
676	let mut out = String::new();
677
678	for code in codes {
679		match code {
680			Element::String(s) => {
681				out.push_str(s);
682			}
683			Element::Code(c) => {
684				// TODO: Operate on ref
685				let f: IStr = c.mkey.into();
686				let width = match c.width {
687					Width::Star => {
688						throw!(CannotUseStarWidthWithObject);
689					}
690					Width::Fixed(n) => n,
691				};
692				let precision = match c.precision {
693					Some(Width::Star) => {
694						throw!(CannotUseStarWidthWithObject);
695					}
696					Some(Width::Fixed(n)) => Some(n),
697					None => None,
698				};
699
700				let value = if c.convtype == ConvTypeV::Percent {
701					Val::Null
702				} else {
703					if f.is_empty() {
704						throw!(MappingKeysRequired);
705					}
706					if let Some(v) = values.get(f.clone())? {
707						v
708					} else {
709						throw!(NoSuchFormatField(f));
710					}
711				};
712
713				format_code(&mut out, &value, &c, width, precision)?;
714			}
715		}
716	}
717
718	Ok(out)
719}
720
721#[cfg(test)]
722pub mod test_format {
723	use super::*;
724
725	#[test]
726	fn parse() {
727		assert_eq!(
728			parse_codes(
729				"How much error budget is left looking at our %.3f%% availability gurantees?"
730			)
731			.unwrap()
732			.len(),
733			4
734		);
735	}
736
737	#[test]
738	fn octals() {
739		assert_eq!(format_arr("%#o", &[Val::Num(8.0)]).unwrap(), "010");
740		assert_eq!(format_arr("%#4o", &[Val::Num(8.0)]).unwrap(), " 010");
741		assert_eq!(format_arr("%4o", &[Val::Num(8.0)]).unwrap(), "  10");
742		assert_eq!(format_arr("%04o", &[Val::Num(8.0)]).unwrap(), "0010");
743		assert_eq!(format_arr("%+4o", &[Val::Num(8.0)]).unwrap(), " +10");
744		assert_eq!(format_arr("%+04o", &[Val::Num(8.0)]).unwrap(), "+010");
745		assert_eq!(format_arr("%-4o", &[Val::Num(8.0)]).unwrap(), "10  ");
746		assert_eq!(format_arr("%+-4o", &[Val::Num(8.0)]).unwrap(), "+10 ");
747		assert_eq!(format_arr("%+-04o", &[Val::Num(8.0)]).unwrap(), "+10 ");
748	}
749
750	#[test]
751	fn percent_doesnt_consumes_values() {
752		assert_eq!(
753			format_arr(
754				"How much error budget is left looking at our %.3f%% availability gurantees?",
755				&[Val::Num(4.0)]
756			)
757			.unwrap(),
758			"How much error budget is left looking at our 4.000% availability gurantees?"
759		);
760	}
761}