#![allow(missing_docs)]
use std::{collections::BTreeMap, result::Result as StdResult};
use pest::{Parser, iterators::Pair};
use pest_derive::Parser;
use super::{Error, Result, indent};
use crate::{Binary, Value};
pub(super) type PestError = pest::error::Error<Rule>;
#[derive(Parser)]
#[grammar = "parser/grammar.pest"]
pub(super) struct JamlParser;
#[derive(Debug, Clone)]
struct Line {
indent: usize,
content: LineContent,
line_num: usize,
}
#[derive(Debug, Clone)]
enum LineContent {
ListItem(Option<Value>),
MapEntry(String, Option<Value>),
Value(Value),
Empty,
}
pub(super) fn parse_impl(input: &str) -> Result<Value> {
let pairs = JamlParser::parse(Rule::jaml, input)?;
let lines = parse_lines(pairs)?;
if lines.is_empty() {
return Err(Error::EmptyDocument);
}
let (value, _) = build_value(&lines, 0, 0)?;
Ok(value)
}
fn parse_lines(pairs: pest::iterators::Pairs<Rule>) -> Result<Vec<Line>> {
let mut lines = Vec::new();
let mut line_num = 1;
let mut indent_tracker = indent::Tracker::default();
for pair in pairs {
if pair.as_rule() == Rule::jaml {
let document = pair.into_inner().next().unwrap();
for line_pair in document.into_inner() {
match line_pair.as_rule() {
Rule::non_empty_line => {
let mut inner = line_pair.into_inner();
let indent_pair = inner.next().unwrap();
let indent_str = indent_pair.as_str();
let indent = indent_tracker.validate(indent_str)?;
let content_pair = inner.next().unwrap();
let content = parse_line_content(content_pair, line_num)?;
if !matches!(content, LineContent::Empty) {
lines.push(Line {
indent,
content,
line_num,
});
}
line_num += 1;
}
Rule::empty_line | Rule::NEWLINE => {
line_num += 1;
}
_ => {}
}
}
}
}
Ok(lines)
}
fn parse_line_content(pair: Pair<Rule>, _line_num: usize) -> Result<LineContent> {
match pair.as_rule() {
Rule::content => {
let inner = pair.into_inner().next().unwrap();
parse_line_content(inner, _line_num)
}
Rule::list_item => {
let value = pair
.into_inner()
.find_map(|inner| {
match inner.as_rule() {
Rule::inline_value => Some(parse_inline_value(inner)),
_ => None, }
})
.transpose()?;
Ok(LineContent::ListItem(value))
}
Rule::map_entry => {
let mut inner = pair.into_inner();
let key = parse_key(inner.next().unwrap())?;
let value = inner
.find_map(|pair| {
match pair.as_rule() {
Rule::inline_value => Some(parse_inline_value(pair)),
_ => None, }
})
.transpose()?;
Ok(LineContent::MapEntry(key, value))
}
Rule::inline_value => {
let value = parse_inline_value(pair)?;
Ok(LineContent::Value(value))
}
Rule::comment => Ok(LineContent::Empty),
_ => Ok(LineContent::Empty),
}
}
fn build_value(lines: &[Line], start_idx: usize, expected_indent: usize) -> Result<(Value, usize)> {
if start_idx >= lines.len() {
return Err(Error::EmptyDocument);
}
let first = &lines[start_idx];
if first.indent != expected_indent {
return Err(Error::UnexpectedIndent(expected_indent, first.indent));
}
match &first.content {
LineContent::Value(v) => Ok((v.clone(), start_idx + 1)),
LineContent::ListItem(_) => build_list(lines, start_idx, expected_indent),
LineContent::MapEntry(_, _) => build_map(lines, start_idx, expected_indent),
LineContent::Empty => Err(Error::EmptyDocument),
}
}
fn build_list(lines: &[Line], start_idx: usize, expected_indent: usize) -> Result<(Value, usize)> {
let mut items = Vec::new();
let mut idx = start_idx;
while idx < lines.len() {
let line = &lines[idx];
if line.indent < expected_indent {
break;
}
if line.indent > expected_indent {
return Err(Error::UnexpectedIndent(expected_indent, line.indent));
}
match &line.content {
LineContent::ListItem(maybe_val) => {
if let Some(val) = maybe_val {
items.push(val.clone());
idx += 1;
} else {
idx += 1;
if idx < lines.len() {
let (nested_val, next_idx) = build_value(lines, idx, expected_indent + 1)?;
items.push(nested_val);
idx = next_idx;
} else {
return Err(Error::MissingValue(line.line_num));
}
}
}
_ => break,
}
}
Ok((Value::List(items), idx))
}
fn build_map(lines: &[Line], start_idx: usize, expected_indent: usize) -> Result<(Value, usize)> {
let mut map = BTreeMap::new();
let mut idx = start_idx;
while idx < lines.len() {
let line = &lines[idx];
if line.indent < expected_indent {
break;
}
if line.indent > expected_indent {
return Err(Error::UnexpectedIndent(expected_indent, line.indent));
}
match &line.content {
LineContent::MapEntry(key, maybe_val) => {
if map.contains_key(key) {
return Err(Error::DuplicateKey(key.clone()));
}
if let Some(val) = maybe_val {
map.insert(key.clone(), val.clone());
idx += 1;
} else {
idx += 1;
if idx < lines.len() {
let (nested_val, next_idx) = build_value(lines, idx, expected_indent + 1)?;
map.insert(key.clone(), nested_val);
idx = next_idx;
} else {
return Err(Error::MissingValue(line.line_num));
}
}
}
_ => break,
}
}
Ok((Value::Map(map), idx))
}
fn parse_key(pair: Pair<Rule>) -> Result<String> {
match pair.as_rule() {
Rule::key => {
let inner = pair.into_inner().next().unwrap();
parse_key(inner)
}
Rule::identifier => Ok(pair.as_str().to_string()),
Rule::string => {
if let Value::String(s) = parse_string(pair)? {
Ok(s)
} else {
unreachable!()
}
}
_ => unreachable!("Unexpected key rule: {:?}", pair.as_rule()),
}
}
fn parse_inline_value(pair: Pair<Rule>) -> Result<Value> {
let rule = if pair.as_rule() == Rule::inline_value {
pair.into_inner().next().unwrap()
} else {
pair
};
match rule.as_rule() {
Rule::null => Ok(Value::Null),
Rule::boolean => Ok(Value::Bool(rule.as_str() == "true")),
Rule::integer => parse_int(rule),
Rule::float => parse_float(rule),
Rule::string => parse_string(rule),
Rule::binary => parse_binary(rule),
Rule::timestamp => parse_timestamp(rule),
Rule::inline_list => parse_inline_list(rule),
Rule::inline_map => parse_inline_map(rule),
_ => unreachable!("Unexpected inline value rule: {:?}", rule.as_rule()),
}
}
fn parse_inline_list(pair: Pair<Rule>) -> Result<Value> {
let mut items = Vec::new();
for inner in pair.into_inner() {
if inner.as_rule() == Rule::inline_value {
items.push(parse_inline_value(inner)?);
}
}
Ok(Value::List(items))
}
fn parse_inline_map(pair: Pair<Rule>) -> Result<Value> {
let mut map = BTreeMap::new();
for member in pair.into_inner() {
if member.as_rule() == Rule::inline_member {
let mut inner = member.into_inner();
let key = parse_key(inner.next().unwrap())?;
let value = parse_inline_value(inner.next().unwrap())?;
if map.contains_key(&key) {
return Err(Error::DuplicateKey(key));
}
map.insert(key, value);
}
}
Ok(Value::Map(map))
}
fn parse_int(pair: Pair<Rule>) -> Result<Value> {
let s = pair.as_str();
let normalized = s.replace('_', "");
let normalized = normalized.strip_prefix('+').unwrap_or(&normalized);
let (is_negative, unsigned_str) = match normalized.strip_prefix('-') {
Some(rest) => (true, rest),
None => (false, normalized),
};
let uint = match unsigned_str {
s if s.starts_with("0x") || s.starts_with("0X") => parse_int_radix(&s[2..], 16)?,
s if s.starts_with("0b") || s.starts_with("0B") => parse_int_radix(&s[2..], 2)?,
s if s.starts_with("0o") || s.starts_with("0O") => parse_int_radix(&s[2..], 8)?,
_ => return Ok(Value::Int(normalized.parse::<i64>()?)),
};
let int = if is_negative { -uint } else { uint };
Ok(Value::Int(int))
}
fn parse_int_radix(s: &str, radix: u32) -> Result<i64> {
i64::from_str_radix(s, radix).map_err(Into::into)
}
fn parse_float(pair: Pair<Rule>) -> Result<Value> {
let s = pair.as_str();
let value = match s {
"inf" | "+inf" => f64::INFINITY,
"-inf" => f64::NEG_INFINITY,
"nan" | "+nan" | "-nan" => f64::NAN,
_ => s.parse::<f64>()?,
};
Ok(Value::Float(value))
}
fn parse_string(pair: Pair<Rule>) -> Result<Value> {
let mut inner = pair.into_inner();
let quoted = inner.next().unwrap();
let content_pair = quoted.into_inner().next().unwrap();
let content = content_pair.as_str();
let mut result = String::with_capacity(content.len());
let mut chars = content.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('"') => result.push('"'),
Some('\'') => result.push('\''),
Some('\\') => result.push('\\'),
Some('/') => result.push('/'),
Some('b') => result.push('\u{0008}'),
Some('f') => result.push('\u{000C}'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some('u') => result.push(parse_unicode_escape(&mut chars)?),
Some(c) => return Err(Error::InvalidEscapeChar(c)),
None => return Err(Error::InvalidEscapeChar('\\')),
}
} else {
result.push(ch);
}
}
Ok(Value::String(result))
}
fn parse_unicode_escape(chars: &mut std::str::Chars) -> Result<char> {
let hex: String = chars.take(4).collect();
if hex.len() < 4 {
return Err(Error::InvalidUnicodeEscape(hex));
}
let code =
u32::from_str_radix(&hex, 16).map_err(|_| Error::InvalidUnicodeEscape(hex.clone()))?;
if (0xD800..=0xDBFF).contains(&code) {
let saved_chars = chars.clone();
if chars.next() == Some('\\') && chars.next() == Some('u') {
let low_hex: String = chars.take(4).collect();
if low_hex.len() == 4
&& let Ok(low_code) = u32::from_str_radix(&low_hex, 16)
&& (0xDC00..=0xDFFF).contains(&low_code)
{
let codepoint = 0x10000 + ((code - 0xD800) << 10) + (low_code - 0xDC00);
return char::from_u32(codepoint).ok_or(Error::InvalidUnicodeCodepoint(codepoint));
}
}
*chars = saved_chars;
}
char::from_u32(code).ok_or(Error::InvalidUnicodeCodepoint(code))
}
fn parse_binary(pair: Pair<Rule>) -> Result<Value> {
let rule = pair.into_inner().next().unwrap();
match rule.as_rule() {
Rule::base64_binary => {
let content = rule.into_inner().next().unwrap().as_str();
let bytes =
base64::Engine::decode(&base64::engine::general_purpose::STANDARD, content)?;
Ok(Value::Binary(Binary(bytes)))
}
Rule::hex_binary => {
let content = rule.into_inner().next().unwrap().as_str();
if !content.len().is_multiple_of(2) {
return Err(Error::OddHexDigits);
}
let bytes = (0..content.len())
.step_by(2)
.map(|i| u8::from_str_radix(&content[i..i + 2], 16))
.collect::<StdResult<Vec<u8>, _>>()?;
Ok(Value::Binary(Binary(bytes)))
}
_ => unreachable!("Unexpected binary rule: {:?}", rule.as_rule()),
}
}
fn parse_timestamp(pair: Pair<Rule>) -> Result<Value> {
let content = pair.into_inner().next().unwrap().as_str();
match time::OffsetDateTime::parse(content, &time::format_description::well_known::Rfc3339) {
Ok(dt) => Ok(Value::Timestamp(dt)),
Err(e) => Err(Error::InvalidTimestamp(content.to_string(), e.to_string())),
}
}