#![deny(missing_docs)]
use {
crate::{content::*, error::*},
lazy_static::lazy_static,
regex::{CaptureLocations, Match, Regex},
std::cell::RefCell,
std::rc::Rc,
};
fn from_start(regex: &str) -> String {
"^".to_owned() + regex
}
fn exact_match(regex: &str) -> String {
"^".to_owned() + regex + "$"
}
lazy_static! {
static ref WHITESPACE_PATTERN: &'static str = r#"([\s&&[^\n]]+)"#;
static ref WHITESPACE: usize = 1;
static ref NEWLINE_PATTERN: &'static str = r#"(\n)"#;
static ref NEWLINE: usize = 2;
static ref LINE_COMMENT_SLASHES_PATTERN: &'static str = r#"(//)"#;
static ref LINE_COMMENT_SLASHES: usize = 3;
static ref OPEN_BLOCK_COMMENT_PATTERN: &'static str = r#"(/\*)"#;
static ref OPEN_BLOCK_COMMENT: usize = 4;
static ref NON_STRING_PRIMITIVE_PATTERN: &'static str =
r#"((?x) # ignore whitespace and allow '#' comments
# Capture null, true, or false (lowercase only, as in the ECMAScript keywords).
# End with a word boundary ('\b' marker) to ensure the pattern does not match if
# it is followed by a word ('\w') character; for example, 'nullify' is a valid
# identifier (depending on the context) and must not match the 'null' value.
(?:(?:null|true|false)\b)|
# Capture all number formats. Every variant is allowed an optional '-' or '+' prefix.
(?:[-+]?(?:
# All of the following variants end in a word character. Use '\b' to prevent
# matching numbers immediately followed by another word character, for example,
# 'NaNo', 'Infinity_', or '0xadef1234ghi'.
(?:(?:
NaN|
Infinity|
(?:0[xX][0-9a-fA-F]+)| # hexadecimal notation
(?:[0-9]+[eE][+-]?[0-9]+)| # exponent notation
(?:[0-9]*\.[0-9]+) # decimal notation
)\b)|
# Capture integers, with an optional trailing decimal point.
# If the value ends in a digit (no trailing decimal point), apply `\b` to prevent
# matching integers immediatly followed by a word character (for example, 1200PDT).
# But if the integer has a trailing decimal, the '\b' does not apply. (Since '.' is
# not itself a '\w' word character, the '\b' would have the opposite affect,
# matching only if the next character is a word character, unless there is no next
# character.)
(?:
[0-9]+(?:\.|\b)
)
))
)"#;
static ref NON_STRING_PRIMITIVE: usize = 5;
static ref UNQUOTED_PROPERTY_NAME_PATTERN: &'static str = r#"[\$\w&&[^\d]][\$\w]*"#;
static ref UNQUOTED_PROPERTY_NAME_REGEX: Regex =
Regex::new(&exact_match(&*UNQUOTED_PROPERTY_NAME_PATTERN)).unwrap();
static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING: String =
r#"(?:("#.to_owned() + *UNQUOTED_PROPERTY_NAME_PATTERN + r#")[\s&&[^\n]]*:)"#;
static ref UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN: &'static str =
&UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN_STRING;
static ref UNQUOTED_PROPERTY_NAME_AND_COLON: usize = 6;
static ref OPEN_QUOTE_PATTERN: &'static str = r#"(["'])"#;
static ref OPEN_QUOTE: usize = 7;
static ref BRACE_PATTERN: &'static str = r#"([{}\[\]])"#;
static ref BRACE: usize = 8;
static ref COMMA_PATTERN: &'static str = r#"(,)"#;
static ref COMMA: usize = 9;
static ref NEXT_TOKEN: Regex = Regex::new(
&from_start(&(r#"(?:"#.to_owned()
+ &vec![
*WHITESPACE_PATTERN,
*NEWLINE_PATTERN,
*LINE_COMMENT_SLASHES_PATTERN,
*OPEN_BLOCK_COMMENT_PATTERN,
*NON_STRING_PRIMITIVE_PATTERN,
*UNQUOTED_PROPERTY_NAME_AND_COLON_PATTERN,
*OPEN_QUOTE_PATTERN,
*BRACE_PATTERN,
*COMMA_PATTERN,
].join("|")
+ r#")"#))
).unwrap();
static ref LINE_COMMENT: Regex = Regex::new(
&from_start(r#"([^\n]*)"#)
).unwrap();
static ref BLOCK_COMMENT: Regex = Regex::new(
&from_start(r#"((?:.|\n)*?)\*/"#)
).unwrap();
static ref SINGLE_QUOTED: Regex = Regex::new(
&from_start(r#"((?:(?:\\\\)|(?:\\')|(?:\\\n)|(?:[^'\n]))*)(?:')"#)
).unwrap();
static ref DOUBLE_QUOTED: Regex = Regex::new(
&from_start(r#"((?:(?:\\\\)|(?:\\")|(?:\\\n)|(?:[^"\n]))*)(?:")"#)
).unwrap();
static ref COLON: Regex = Regex::new(
&from_start(r#"([\s&&[^\n]]*:)"#)
).unwrap();
}
fn matches_unquoted_property_name(strval: &str) -> bool {
const KEYWORDS: &'static [&'static str] = &["true", "false", "null"];
UNQUOTED_PROPERTY_NAME_REGEX.is_match(strval) && !KEYWORDS.contains(&strval)
}
struct Capturer {
regex: &'static Regex,
overall_match: Option<String>,
locations: CaptureLocations,
}
impl Capturer {
fn new(regex: &'static Regex) -> Self {
Self { regex, overall_match: None, locations: regex.capture_locations() }
}
fn capture<'a>(&mut self, text: &'a str) -> Option<Match<'a>> {
let captures = self.regex.captures_read(&mut self.locations, text);
if let Some(captures) = &captures {
self.overall_match = Some(text[0..captures.end()].to_string());
} else {
self.overall_match = None;
}
captures
}
fn overall_match<'a>(&'a self) -> Option<&'a str> {
self.overall_match.as_deref()
}
fn captured<'a>(&'a self, i: usize) -> Option<&'a str> {
if let (Some(overall_match), Some((start, end))) =
(&self.overall_match, self.locations.get(i))
{
Some(&overall_match[start..end])
} else {
None
}
}
}
pub(crate) struct Parser<'parser> {
remaining: &'parser str,
filename: &'parser Option<String>,
current_line: &'parser str,
next_line: &'parser str,
line_number: usize,
column_number: usize,
next_line_number: usize,
next_column_number: usize,
scope_stack: Vec<Rc<RefCell<Value>>>,
colon_capturer: Capturer,
}
impl<'parser> Parser<'parser> {
pub fn new(remaining: &'parser str, filename: &'parser Option<String>) -> Self {
let current_line = remaining.lines().next().unwrap();
Self {
remaining,
filename,
current_line,
next_line: current_line,
line_number: 1,
column_number: 1,
next_line_number: 1,
next_column_number: 1,
scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))],
colon_capturer: Capturer::new(&COLON),
}
}
fn current_scope(&self) -> Rc<RefCell<Value>> {
assert!(self.scope_stack.len() > 0);
self.scope_stack.last().unwrap().clone()
}
fn with_container<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut dyn Container) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Array(array) => f(array),
Value::Object(object) => f(object),
unexpected => Err(Error::internal(
self.location(),
format!(
"Current scope should be an Array or Object, but scope was {:?}",
unexpected
),
)),
}
}
fn with_array<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut Array) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Array(array) => f(array),
unexpected => Err(self.error(format!(
"Invalid Array token found while parsing an {:?} (mismatched braces?)",
unexpected
))),
}
}
fn with_object<F, T>(&self, f: F) -> Result<T, Error>
where
F: FnOnce(&mut Object) -> Result<T, Error>,
{
match &mut *self.current_scope().borrow_mut() {
Value::Object(object) => f(object),
unexpected => Err(self.error(format!(
"Invalid Object token found while parsing an {:?} (mismatched braces?)",
unexpected
))),
}
}
fn is_in_array(&self) -> bool {
match &mut *self.current_scope().borrow_mut() {
Value::Array(_) => true,
_ => false,
}
}
fn is_in_object(&self) -> bool {
!self.is_in_array()
}
fn add_value(&mut self, value: Value) -> Result<(), Error> {
let is_container = value.is_object() || value.is_array();
let value_ref = Rc::new(RefCell::new(value));
self.with_container(|container| container.add_value(value_ref.clone(), self))?;
if is_container {
self.scope_stack.push(value_ref.clone());
}
Ok(())
}
fn on_newline(&mut self) -> Result<(), Error> {
self.with_container(|container| container.on_newline())
}
fn add_line_comment(
&self,
captured: Option<&str>,
pending_new_line_comment_block: bool,
) -> Result<bool, Error> {
match captured {
Some(content) => {
let content = content.trim_end();
self.with_container(|container| {
container.add_line_comment(content, pending_new_line_comment_block)
})
}
None => Err(Error::internal(
self.location(),
"Line comment regex should support empty line comment",
)),
}
}
fn add_block_comment(&self, captured: Option<&str>) -> Result<(), Error> {
match captured {
Some(content) => {
let indent_count = self.column_number - 3;
let indent = " ".repeat(indent_count);
if content
.lines()
.enumerate()
.find(|(index, line)| {
*index > 0 && !line.starts_with(&indent) && line.trim() != ""
})
.is_some()
{
self.with_container(|container| {
container.add_block_comment(Comment::Block {
lines: content.lines().map(|line| line.to_owned()).collect(),
align: false,
})
})
} else {
let line_count = content.lines().count();
let trimmed_lines = content
.lines()
.enumerate()
.map(|(index, line)| {
if index == 0 {
line
} else if index == line_count - 1 && line.trim() == "" {
line.trim()
} else {
&line[indent_count..]
}
})
.collect::<Vec<&str>>();
self.with_container(|container| {
container.add_block_comment(Comment::Block {
lines: trimmed_lines.iter().map(|line| line.to_string()).collect(),
align: true,
})
})
}
}
None => {
return Err(Error::internal(
self.location(),
"Block comment regex should support empty block comment",
))
}
}
}
fn take_pending_comments(&mut self) -> Result<Vec<Comment>, Error> {
self.with_container(|container| Ok(container.take_pending_comments()))
}
fn set_pending_property(&self, name: &str) -> Result<(), Error> {
self.with_object(|object| object.set_pending_property(name.to_string(), self))
}
fn add_quoted_string(&mut self, quote: &str, captured: Option<&str>) -> Result<(), Error> {
match captured {
Some(unquoted) => {
if self.is_in_object()
&& !self.with_object(|object| object.has_pending_property())?
{
let captured = self.colon_capturer.capture(self.remaining);
if self.consume_if_matched(captured) {
if matches_unquoted_property_name(&unquoted) {
self.set_pending_property(unquoted)
} else {
self.set_pending_property(&format!("{}{}{}", quote, &unquoted, quote))
}
} else {
return Err(self.error("Property name separator (:) missing"));
}
} else {
let comments = self.take_pending_comments()?;
self.add_value(Primitive::new(
format!("{}{}{}", quote, &unquoted, quote),
comments,
))
}
}
None => return Err(self.error("Unclosed string")),
}
}
fn add_non_string_primitive(&mut self, non_string_primitive: &str) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Primitive::new(non_string_primitive.to_string(), comments))
}
fn on_brace(&mut self, brace: &str) -> Result<(), Error> {
match brace {
"{" => self.open_object(),
"}" => self.close_object(),
"[" => self.open_array(),
"]" => self.close_array(),
unexpected => Err(Error::internal(
self.location(),
format!("regex returned unexpected brace string: {}", unexpected),
)),
}
}
fn open_object(&mut self) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Object::new(comments))
}
fn exit_scope(&mut self) -> Result<(), Error> {
self.scope_stack.pop();
Ok(())
}
fn close_object(&mut self) -> Result<(), Error> {
self.with_object(|object| object.close(self))?;
self.exit_scope()
}
fn open_array(&mut self) -> Result<(), Error> {
let comments = self.take_pending_comments()?;
self.add_value(Array::new(comments))
}
fn close_array(&mut self) -> Result<(), Error> {
self.with_array(|array| array.close(self))?;
self.exit_scope()
}
fn end_value(&self) -> Result<(), Error> {
self.with_container(|container| container.end_value(self))
}
pub fn location(&self) -> Option<Location> {
Some(Location::new(self.filename.clone(), self.line_number, self.column_number))
}
pub fn error(&self, err: impl std::fmt::Display) -> Error {
let mut indicator = " ".repeat(self.column_number - 1) + "^";
if self.column_number < self.next_column_number - 1 {
indicator += &"~".repeat(if self.line_number == self.next_line_number {
self.next_column_number - self.column_number - 1
} else {
self.current_line.len() - self.column_number
});
}
Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator))
}
fn consume_if_matched<'a>(&mut self, matched: Option<Match<'a>>) -> bool {
self.column_number = self.next_column_number;
if self.line_number < self.next_line_number {
self.line_number = self.next_line_number;
self.current_line = self.next_line;
}
if let Some(matched) = matched {
self.remaining = &self.remaining[matched.end()..];
for (index, c) in matched.as_str().chars().enumerate() {
if c == '\n' {
self.next_line_number += 1;
self.next_column_number = 1;
if index < self.remaining.len() {
self.next_line =
self.remaining[index..].lines().next().unwrap_or(self.current_line);
}
} else {
self.next_column_number += 1;
}
}
true
} else {
false
}
}
fn capture(&mut self, capturer: &mut Capturer) -> bool {
self.consume_if_matched(capturer.capture(self.remaining))
}
fn consume<'a>(&mut self, capturer: &'a mut Capturer) -> Option<&'a str> {
if self.capture(capturer) {
capturer.captured(1)
} else {
None
}
}
pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
self.remaining = buffer;
let mut next_token = Capturer::new(&NEXT_TOKEN);
let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
let mut line_comment = Capturer::new(&LINE_COMMENT);
let mut block_comment = Capturer::new(&BLOCK_COMMENT);
let mut just_captured_line_comment = false;
let mut pending_blank_line = false;
let mut pending_new_line_comment_block = false;
while self.remaining.len() > 0 {
let mut reset_line_comment_break_check = true;
if self.capture(&mut next_token) {
if let Some(_) = next_token.captured(*WHITESPACE) {
reset_line_comment_break_check = false;
Ok(()) } else if let Some(_) = next_token.captured(*NEWLINE) {
reset_line_comment_break_check = false;
if just_captured_line_comment {
if pending_blank_line {
pending_new_line_comment_block = true;
pending_blank_line = false;
} else if !pending_new_line_comment_block {
pending_blank_line = true;
}
}
self.on_newline()
} else if let Some(_) = next_token.captured(*COMMA) {
self.end_value()
} else if let Some(brace) = next_token.captured(*BRACE) {
self.on_brace(&brace)
} else if let Some(non_string_primitive) =
next_token.captured(*NON_STRING_PRIMITIVE)
{
self.add_non_string_primitive(&non_string_primitive)
} else if let Some(quote) = next_token.captured(*OPEN_QUOTE) {
let quoted_string = if quote == "'" {
self.consume(&mut single_quoted)
} else {
self.consume(&mut double_quoted)
};
self.add_quoted_string("e, quoted_string)
} else if let Some(unquoted_property_name) =
next_token.captured(*UNQUOTED_PROPERTY_NAME_AND_COLON)
{
self.set_pending_property(unquoted_property_name)
} else if let Some(_line_comment_start) = next_token.captured(*LINE_COMMENT_SLASHES)
{
reset_line_comment_break_check = false;
pending_blank_line = false;
let line_comment = self.consume(&mut line_comment);
if self.add_line_comment(line_comment, pending_new_line_comment_block)? {
just_captured_line_comment = true;
if pending_new_line_comment_block {
pending_new_line_comment_block = false;
}
} Ok(())
} else if let Some(_block_comment_start) = next_token.captured(*OPEN_BLOCK_COMMENT)
{
let block_comment = self.consume(&mut block_comment);
self.add_block_comment(block_comment)
} else {
Err(Error::internal(
self.location(),
format!(
"NEXT_TOKEN matched an unexpected capture group: {}",
next_token.overall_match().unwrap_or("")
),
))
}
} else {
Err(self.error("Unexpected token"))
}?;
if reset_line_comment_break_check {
just_captured_line_comment = false;
pending_blank_line = false;
pending_new_line_comment_block = false;
}
}
self.remaining = "";
self.close_document()?;
match Rc::try_unwrap(self.scope_stack.pop().unwrap())
.map_err(|_| Error::internal(None, "Rc<> for document array could not be unwrapped."))?
.into_inner()
{
Value::Array(array) => Ok(array),
unexpected => Err(Error::internal(
self.location(),
format!("Final scope should be an Array, but scope was {:?}", unexpected),
)),
}
}
fn close_document(&mut self) -> Result<(), Error> {
if self.scope_stack.len() == 1 {
Ok(())
} else {
Err(self.error("Mismatched braces in the document"))
}
}
}
#[cfg(test)]
mod tests {
use {super::*, crate::test_error, proptest::prelude::*};
lazy_static! {
static ref NO_PERSIST: ProptestConfig = ProptestConfig {
failure_persistence: None,
.. ProptestConfig::default()
};
static ref EXTRA_CASES_NO_PERSIST: ProptestConfig = ProptestConfig {
failure_persistence: None,
cases: 1024,
.. ProptestConfig::default()
};
}
struct RegexTest<'a> {
error: Option<&'a str>,
prefix: &'a str,
matches: &'a str,
suffix: &'a str,
next_regex: Option<&'a Regex>,
next_prefix: &'a str,
next_matches: &'a str,
next_suffix: &'a str,
trailing: &'a str,
}
impl<'a> Default for RegexTest<'a> {
fn default() -> Self {
RegexTest {
error: None,
prefix: "",
matches: "",
suffix: "",
next_regex: None,
next_prefix: "",
next_matches: "",
next_suffix: "",
trailing: "",
}
}
}
fn try_capture(
regex: &Regex,
group_id: Option<usize>,
test: RegexTest<'_>,
) -> Result<String, Error> {
println!();
println!("pattern: '{}'", regex.as_str());
let trailing = test.next_suffix.to_owned() + test.trailing;
let test_string =
test.prefix.to_owned() + test.matches + test.suffix + test.next_matches + &trailing;
println!("capturing from: '{}'", test_string.escape_debug());
println!(
" {}{}{}{}",
" ".repeat(test.prefix.len()),
"^".repeat(test.matches.len()),
" ".repeat(test.suffix.len()),
"^".repeat(test.next_matches.len())
);
let group_id = group_id.unwrap_or(1);
println!("expected capture id: '{}'", group_id);
let capture = regex.captures(&test_string).ok_or_else(|| test_error!("capture failed"))?;
let overall_match = capture.get(0).ok_or_else(|| test_error!("regex did not match"))?;
println!(
"overall match: '{}', length = {}",
overall_match.as_str().escape_debug(),
overall_match.end()
);
let remaining = &test_string[overall_match.end()..];
println!("remaining: '{}'", remaining.escape_debug());
const OVERALL_MATCH: usize = 0;
let mut capture_ids = vec![];
for (index, subcapture) in capture.iter().enumerate() {
if index != OVERALL_MATCH {
if subcapture.is_some() {
capture_ids.push(index);
}
}
}
println!("capture ids = {:?}", capture_ids);
let captured_text = capture
.get(group_id)
.ok_or_else(|| test_error!(format!("capture group {} did not match", group_id)))?
.as_str();
println!("captured: '{}'", captured_text.escape_debug());
assert_eq!(captured_text, test.matches);
assert_eq!(capture_ids.len(), 1);
assert_eq!(remaining, test.next_matches.to_owned() + &trailing);
match test.next_regex {
Some(next_regex) => test_capture(
&*next_regex,
None,
RegexTest {
prefix: test.next_prefix,
matches: test.next_matches,
suffix: test.next_suffix,
trailing: test.trailing,
..Default::default()
},
),
None => Ok(captured_text.to_string()),
}
}
fn test_capture(
regex: &Regex,
group_id: Option<usize>,
test: RegexTest<'_>,
) -> Result<String, Error> {
let expected_error_str = test.error.clone();
match try_capture(regex, group_id, test) {
Ok(captured) => {
println!("SUCCESSFUL CAPTURE! ... '{}'", captured);
Ok(captured)
}
Err(actual_error) => match expected_error_str {
Some(expected_error_str) => match &actual_error {
Error::TestFailure(_location, actual_error_str) => {
if expected_error_str == actual_error_str {
println!("EXPECTED FAILURE (GOOD NEWS)! ... '{}'", actual_error);
Ok(format!("{}", actual_error))
} else {
println!("{}", actual_error);
println!("expected: {}", expected_error_str);
println!(" actual: {}", actual_error_str);
Err(test_error!(
"Actual error string did not match expected error string."
))
}
}
_unexpected_error_type => {
println!("expected: Test failure: {}", expected_error_str);
println!(" actual: {}", actual_error);
Err(test_error!(
"Actual error type did not match expected test failure type."
))
}
},
None => Err(actual_error),
},
}
}
fn test_regex(group_id: usize, test: RegexTest<'_>) -> Result<String, Error> {
test_capture(&NEXT_TOKEN, Some(group_id), test)
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_whitespace_no_newlines(
spaces in r#"[\s&&[^\n]]+"#,
trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_whitespace_until_newline(
spaces in r#"[\s&&[^\n]]+"#,
trailing_non_whitespace in r#"\n[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_plain_ascii_whitespace_no_newline(
spaces in r#"[ \t]+"#,
trailing_non_whitespace in r#"[^\s&&[^\n]]*"#,
) {
test_regex(
*WHITESPACE,
RegexTest {
matches: &spaces,
trailing: &trailing_non_whitespace,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_newline(
newline in r#"\n"#,
any_chars in r#"\PC*"#,
) {
test_regex(
*NEWLINE,
RegexTest { matches: &newline, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_line_comment(
line_comment_prefix in r#" line_comment_content in r#"(|[^\n][^\n]*)"#,
more_lines_or_eof in r#"(\n\PC*)?"#,
) {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: &line_comment_prefix,
next_regex: Some(&*LINE_COMMENT),
next_matches: &line_comment_content,
trailing: &more_lines_or_eof,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_empty_line_comment(
line_comment_prefix in r#" more_lines_or_eof in r#"(\n\PC*)?"#,
) {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: &line_comment_prefix,
next_regex: Some(&*LINE_COMMENT),
next_matches: "",
trailing: &more_lines_or_eof,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_block_comment(
block_comment_content in r#"([^*]|([*][^*/]))*"#,
optional_trailing_content in r#"\PC*"#,
) {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "",
trailing: &optional_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_empty_block_comment(
optional_trailing_content in r#"\PC*"#,
) {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "",
trailing: &optional_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_property_name(
propname in r#"[\w$&&[^\d]][\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
// Test two variations of invalid unquoted property name error handling, when expecting a match
// against the regex `UNQUOTED_PROPERTY_NAME_AND_COLON` numbered capture group pattern:
//
// 1) No generated test candidates match any `NEXT_TOKEN` pattern.
// 2) The first digit is a number, which does match a `NEXT_TOKEN` capture, but is an invalid
// property name.
//
// It's challenging to write a pattern for what does NOT constitute a valid property name since
// the set of things not part of a given set is infinite. Unicode support also can make it hard
// to define exhaustive patterns sometimes. So here are two tests for invalid unquoted property
// names, both of which validate that a property name cannot start with a digit. The difference
// between the two tests is:
//
// * The first test generates candidate property names that will not match any pattern in the
// `NEXT_TOKEN` regex, generating a "capture failed" error.
// * The second test successfully captures a `NEXT_TOKEN`, but it captures a number literal,
// not an `UNQUOTED_PROPERTY_NAME_AND_COLON`, generating a different error message:
// "capture group {n} did not match" (where '{n}' is the capture group number for
// `UNQUOTED_PROPERTY_NAME_AND_COLON`).
////////////////////////////////////////////////////////////////////////////////////////////////
// Excluding 0-9, e & E, and x and X from the allowed pattern set for the second character
// ensures the pattern generator will not generate strings with prefixes such as: `25`, `0X4`,
// `0xf`, and `3E2`.
proptest! {
#![proptest_config(EXTRA_CASES_NO_PERSIST)]
#[test]
fn bad_property_name(
propname in r#"[0-9][\w&&[^0-9eExX]][\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some("capture failed"),
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
// In this case, the second character is a dollar sign, which is legal for a property name,
// but _not_ a "Word" character in the regex `\w` pattern set. The `\b` (word boundary) applies,
// matching the digit as the `NEXT_TOKEN`, generating an error: "capture group {n} did not
proptest! {
#![proptest_config(EXTRA_CASES_NO_PERSIST)]
#[test]
fn bad_property_name_captures_number_first(
propname in r#"[0-9]\$[\w$]*"#,
whitespace_to_colon in r#"[\s&&[^\n]]*:"#,
trailing_content in r#"\PC+"#,
) {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some(
&format!("capture group {} did not match",
*UNQUOTED_PROPERTY_NAME_AND_COLON)
),
matches: &propname,
suffix: &whitespace_to_colon,
trailing: &trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_single_quoted_string(
single_quote in r#"'"#,
single_quoted_string in r#"(([^'\\\n])|(\\')|(\\\n)|(\\\\))*"#,
// comment inserted to balance closing braces [ and { for code editors
non_literal_trailing_content in r#"\s*[,:/\]\}]"#,
) {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: &single_quote,
next_regex: Some(&*SINGLE_QUOTED),
next_matches: &single_quoted_string,
next_suffix: &single_quote,
trailing: &non_literal_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_double_quoted_string(
double_quote in r#"""#,
double_quoted_string in r#"(([^"\\\n])|(\\")|(\\\n)|(\\\\))*"#,
non_literal_trailing_content in r#"\s*[,:/\]\}]?\PC*"#,
) {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: &double_quote,
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: &double_quoted_string,
next_suffix: &double_quote,
trailing: &non_literal_trailing_content,
..Default::default()
},
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_non_string_primitive(
non_string_primitive in
concat!(
r#"(null|true|false)|([-+]?(NaN|Infinity|(0[xX][0-9a-fA-F]+)"#,
r#"|([0-9]+[eE][+-]?[0-9]+)|([0-9]*\.[0-9]+)|([0-9]+\.?)))"#
),
ends_non_string_primitive in r#"(|([\s,\]\}]\PC*))"#,
) {
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &non_string_primitive,
trailing: &ends_non_string_primitive,
..Default::default()
}
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_brace(
brace in r#"[\[\{\}\]]"#,
// comment inserted to add a closing " since VSCode thinks prior quote is still open.
any_chars in r#"\PC*"#,
) {
test_regex(
*BRACE,
RegexTest { matches: &brace, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_comma(
comma in r#","#,
any_chars in r#"\PC*"#,
) {
test_regex(
*COMMA,
RegexTest { matches: &comma, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
proptest! {
#![proptest_config(NO_PERSIST)]
#[test]
fn test_colon(
colon in r#":"#,
any_chars in r#"\PC*"#,
) {
test_capture(
&*COLON,
None,
RegexTest { matches: &colon, trailing: &any_chars, ..Default::default() },
)
.unwrap();
}
}
#[test]
fn test_regex_line_comment() {
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: " some line comment",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: " some line comment",
trailing: "\n more lines",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
trailing: "\nan empty line comment",
..Default::default()
},
)
.unwrap();
test_regex(
*LINE_COMMENT_SLASHES,
RegexTest {
matches: "//",
next_regex: Some(&*LINE_COMMENT),
next_matches: "/\t some doc comment",
trailing: "\nmultiple lines\nare here\n",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_block_comment() {
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: " this is a single line block comment ",
next_suffix: "*/",
trailing: "\n\nproperty: ignored",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: " this is a
multiline block comment",
next_suffix: "*/",
trailing: "\n\nproperty: ignored",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_BLOCK_COMMENT,
RegexTest {
matches: "/*",
next_regex: Some(&*BLOCK_COMMENT),
next_matches: "",
next_suffix: "*/",
trailing: " to test an empty block comment",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_non_string_primitive() {
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "null", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NULL", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "nullify", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "true", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "True", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "truest", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "false", ..Default::default() })
.unwrap();
for prefix in &["", "-", "+"] {
for exp_prefix in &["", "-", "+"] {
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "123e" + exp_prefix + "456"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "123E" + exp_prefix + "456"),
..Default::default()
},
)
.unwrap();
}
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0x1a2b3e4f"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0X1a2b3e4f"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0x1A2B3E4F"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0X1a2B3e4F"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "0x1a2b3e4fg"),
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "0X"),
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(*NON_STRING_PRIMITIVE, RegexTest { matches: "NaN", ..Default::default() })
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NAN", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "NaN0", error: Some("capture failed"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: "Infinity", ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: "infinity",
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: "Infinity_",
error: Some("capture failed"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "0"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest {
matches: &(prefix.to_string() + "1234567890123456789012345678901234567890"),
..Default::default()
},
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "12345.67890"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + ".67890"), ..Default::default() },
)
.unwrap();
test_regex(
*NON_STRING_PRIMITIVE,
RegexTest { matches: &(prefix.to_string() + "12345."), ..Default::default() },
)
.unwrap();
}
}
#[test]
fn test_regex_unquoted_property_name() {
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: "propname",
suffix: ":",
trailing: " 'some property value',",
..Default::default()
},
)
.unwrap();
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
matches: "propname",
suffix: " :",
trailing: " 'some property value',",
..Default::default()
},
)
.unwrap();
test_regex(
*UNQUOTED_PROPERTY_NAME_AND_COLON,
RegexTest {
error: Some("capture failed"),
matches: "99propname",
suffix: ":",
trailing: " 'property names do not start with digits,",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_string() {
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "'",
next_regex: Some(&*SINGLE_QUOTED),
next_matches: "this is a simple single-quoted string",
next_suffix: "'",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "'",
next_regex: Some(&*SINGLE_QUOTED),
next_matches: " this is a \\
multiline \"text\" string",
next_suffix: "'",
trailing: ", end of value",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: "this is a simple double-quoted string",
next_suffix: "\"",
trailing: "",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: " this is a \\
multiline 'text' string with escaped \\\" double-quote",
next_suffix: "\"",
trailing: ", end of value",
..Default::default()
},
)
.unwrap();
test_regex(
*OPEN_QUOTE,
RegexTest {
matches: "\"",
next_regex: Some(&*DOUBLE_QUOTED),
next_matches: "",
next_suffix: "\"",
trailing: ", to test empty string",
..Default::default()
},
)
.unwrap();
}
#[test]
fn test_regex_braces() {
test_regex(*BRACE, RegexTest { matches: "[", trailing: " 1234 ]", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "[", trailing: "true]", ..Default::default() })
.unwrap();
test_regex(
*BRACE,
RegexTest { matches: "[", trailing: "\n 'item',\n 'item2'\n]", ..Default::default() },
)
.unwrap();
test_regex(*BRACE, RegexTest { matches: "]", trailing: ",[1234],", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "{", trailing: " 1234 }", ..Default::default() })
.unwrap();
test_regex(*BRACE, RegexTest { matches: "{", trailing: "true}", ..Default::default() })
.unwrap();
test_regex(
*BRACE,
RegexTest { matches: "{", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
)
.unwrap();
test_regex(*BRACE, RegexTest { matches: "}", trailing: ",{1234},", ..Default::default() })
.unwrap();
}
#[test]
fn test_regex_command_colon() {
test_regex(
*COMMA,
RegexTest { matches: ",", trailing: "\n 'item',\n 'item2'\n}", ..Default::default() },
)
.unwrap();
test_regex(*COMMA, RegexTest { matches: ",", trailing: "{1234},", ..Default::default() })
.unwrap();
test_capture(&*COLON, None, RegexTest { matches: ":", ..Default::default() }).unwrap();
test_capture(&*COLON, None, RegexTest { matches: " \t :", ..Default::default() }).unwrap();
test_capture(
&*COLON,
None,
RegexTest { error: Some("capture failed"), matches: " \n :", ..Default::default() },
)
.unwrap();
}
#[test]
fn test_enums() {
let line_comment = Comment::Line("a line comment".to_owned());
assert!(line_comment.is_line());
let block_comment =
Comment::Block { lines: vec!["a block".into(), "comment".into()], align: true };
assert!(block_comment.is_block());
let primitive_value = Primitive::new("l33t".to_owned(), vec![]);
assert!(primitive_value.is_primitive());
let array_value = Array::new(vec![]);
assert!(array_value.is_array());
let object_value = Object::new(vec![]);
assert!(object_value.is_object());
}
}