rosie-sys 1.3.1

A crate to build or link to librosie to access the Rosie Pattern Language
Documentation
---- -*- Mode: rpl; -*-                                                                             
----
---- json.rpl    some rpl patterns for processing json input
----
---- © Copyright IBM Corporation 2016, 2017, 2018.
---- LICENSE: MIT License (https://opensource.org/licenses/mit-license.html)
---- AUTHOR: Jamie A. Jennings


---------------------------------------------------------------------------------------------------
-- Note 1:  Why have a JSON pattern for Rosie?
--
-- RATIONALE: A language-integrated parser built specifically to parse JSON is usually the best way
-- to process json-encoded documents.  Occasionally, though, there will be a json-encoded field
-- within non-json data, e.g. log files in which some entries contain json data in the message
-- field.
--
-- This is where having a Rosie parser for JSON is very useful.
--
---------------------------------------------------------------------------------------------------
-- Note 2:  The pattern defined here, json.value, is NOT a validating parser.
--
-- When the pattern 'json.value ~' (which consumes whitespace after the JSON) is used in the tests
-- at https://github.com/nst/JSONTestSuite (see main page at http://seriot.ch/parsing_json.php), the
-- results are:
--   * json.value accepts all valid JSON input
--   * json.value ALSO accepts numbers that begin with + and 0 (e.g. +1, 02)
--   * json.value ALSO accepts any double quoted string, without validating escape sequences
--
-- An example of the last category are strings like these:
--   * "\F09F8C80" (should be "\UF09F8C80")
--   * "\u00A" (\u requires exactly 4 hex digits)
--   * "\a" (not a valid escape sequence in JSON)
--
-- RATIONALE: In actual data we have encountered in the field, numbers do sometimes start with + or
-- have a leading 0.  Similarly, not every string of characters constitutes a valid JSON encoding.
-- The RPL pattern json.value returns what is in the JSON input, and the consumer of the data must
-- interpret it.
--   * Most languages will convert numbers +1, +0, 02 to integers correctly.
--   * Modern languages provide routines for interpreting a string as UTF-8, and it is up to the
--     data consumer to decide how to handle an invalid UTF-8 string inside a JSON structure.
--     E.g. json.loads(s) in python will interpret JSON escape sequences and throw an error if there
--     is an invalid sequence.
--
---------------------------------------------------------------------------------------------------

package json

import word, num

local key = word.dq
local string = word.dq
local number = num.signed_number

local true = "true"
local false = "false"
local null = "null"

grammar
   member = key ":" value
   object = "{" ( member ("," member)* )? "}"
   array = "[" ( value ("," value)* )? "]"
in
   value = ~ string / number / object / array / true / false / null
end

-- test value accepts "true", "false", "null"
-- test value rejects "ture", "f", "NULL"
-- test value accepts "0", "123", "-1", "1.1001", "1.2e10", "1.2e-10", "+3.3"
-- test value accepts "123e65", "0e+1", "0e1", "20e1", "1E22", "1E-2", "1E+2", "123e45", "1e-2", "1e+2"
-- test value accepts "\"hello\"", "\"this string has \\\"embedded\\\" double quotes\""
-- test value rejects "hello", "\"this string has no \\\"final quote\\\" "
-- test value rejects "--2", "9.1.", "9.1.2", "++2", "2E02."

-- test value accepts "[]", "[1, 2, 3.14, \"V\", 6.02e23, true]", "[1, 2, 3]", "[1, 2, [7], [[8]]]"
-- test value rejects "[]]", "[", "[[]", "{1, 2}"

-- test value accepts "{\"one\":1}", "{ \"one\" :1}", "{ \"one\" : 1  }"
-- test value accepts "{\"one\":1, \"two\": 2}", "{\"one\":1, \"two\": 2, \"array\":[1,2]}"
-- test value accepts "[{\"v\":1}, {\"v\":2}, {\"v\":3}]"


-- As of RPL 1.2, a grammar only exposes one visible binding, which is the one after the 'in'
-- keyword.  So if we want to match json arrays or json objects, we need to define them.

grammar
   member = key ":" value
   object = "{" (member ("," member)*)? "}"
   value = ~ string / number / object / array / true / false / null
in
   array = "[" (value ("," value)*)? "]"
end

-- test array accepts "[{\"v\":1}, {\"v\":2}, {\"v\":3}]"
-- test array rejects "0", "true", "\"hello, world\"", "{\"one\":1}"

grammar
   member = key ":" value
   value = ~ string / number / object / array / true / false / null
   array = "[" (value ("," value)*)? "]"
in
   object = "{" (member ("," member)*)? "}"
end

-- test object accepts "{\"one\":1}", "{ \"one\" :1}", "{ \"one\" : 1  }"
-- test object accepts "{\"one\":1, \"two\": 2}", "{\"one\":1, \"two\": 2, \"array\":[1,2]}"
-- test object rejects "0", "true", "\"hello, world\"", "[1, 2, 3]"