json_ns/lib.rs
1//! The reference implementation for JSON-NS, a small and basic subset of JSON-LD. See the [blog
2//! post] for what this is and why it exists.
3//!
4//! [blog post]: https://stephank.nl/p/2018-10-20-a-proposal-for-standardising-a-subset-of-json-ld.html
5//!
6//! This implementation uses the `serde_json` crate types to represent JSON values. Doing basic
7//! processing involves creating a `Processor`, which holds some optional configuration, and giving
8//! it a `Value` to process:
9//!
10//! ```rust
11//! #[macro_use]
12//! extern crate serde_json as json;
13//! extern crate json_ns;
14//!
15//! use json_ns::Processor;
16//!
17//! fn main() {
18//! // Some example input.
19//! let input = json!({
20//! "@context": {
21//! "foo": "http://example.com/ns#"
22//! },
23//! "foo:hello": "world"
24//! });
25//!
26//! // Process the document, and use `bar` instead as the output prefix.
27//! let output = Processor::new()
28//! .add_rule("bar", "http://example.com/ns#")
29//! .process_value(&input);
30//!
31//! // Check that the output is what we expected.
32//! assert_eq!(output, json!({
33//! "bar:hello": "world"
34//! }));
35//! }
36//! ```
37//!
38//! Without the processor configuration, this code can be even shorter:
39//!
40//! ```rust,ignore
41//! let output = Processor::new().process_value(&input);
42//! ```
43//!
44//! In this case, the output document contains a property named `http://example.com/ns#hello`.
45//!
46//! Often, the bulk of the properties you expect are in a single namespace. In this case, it may be
47//! useful to set a default namespace on the output, for which properties are not prefixed at all:
48//!
49//! ```rust,ignore
50//! processor.add_rule("", "http://example.com/ns#");
51//! ```
52//!
53//! The output then contains a property named just `hello`. This is especially useful when passing
54//! the value on to `serde_json::from_value` to parse it into a struct that derives `Deserialize`.
55//!
56//! Note that the output should *not* itself be considered a JSON-NS document. Running input
57//! through a processor twice may produce unexpected results.
58//!
59//! That should cover the basics. More details can be found in the documentation of the structs,
60//! fields and functions.
61
62#[macro_use]
63extern crate cfg_if;
64extern crate serde_json as json;
65
66cfg_if! {
67 if #[cfg(test)] {
68 extern crate colored;
69 mod test;
70 }
71}
72
73use json::Value;
74use std::borrow::Cow;
75use std::collections::BTreeMap;
76use std::slice::Iter;
77
78type Map = json::Map<String, Value>;
79
80/// Iterator used to walk a value that may or may not be an array.
81enum OneOrMany<'a> {
82 None,
83 One(&'a Value),
84 Many(Iter<'a, Value>),
85}
86
87impl<'a> From<&'a Value> for OneOrMany<'a> {
88 fn from(value: &'a Value) -> Self {
89 match *value {
90 Value::Array(ref arr) => OneOrMany::Many(arr.iter()),
91 ref value => OneOrMany::One(value),
92 }
93 }
94}
95
96impl<'a> Iterator for OneOrMany<'a> {
97 type Item = &'a Value;
98 fn next(&mut self) -> Option<&'a Value> {
99 match *self {
100 OneOrMany::None => None,
101 OneOrMany::One(value) => {
102 *self = OneOrMany::None;
103 Some(value)
104 },
105 OneOrMany::Many(ref mut iter) => {
106 iter.next()
107 },
108 }
109 }
110}
111
112/// Structure holding the current context to interpret a document with.
113///
114/// An instance of this struct is part of the `Processor`, which can be modified to provide an
115/// external context to interpret documents with. Such a custom context can also be created from
116/// JSON using one of the `From` implementations.
117#[derive(Clone,Debug,Default)]
118pub struct Context {
119 /// The default namespace, for properties that are not a keyword, CURIE, or IRI.
120 pub ns: Option<String>,
121 /// The Default language for internationalised properties that don't specify one. The empty
122 /// string when not defined.
123 pub lang: String,
124 /// Map of defined CURIE prefixes to their base IRIs.
125 pub prefixes: BTreeMap<String, String>,
126 /// Map of defined aliases by their literal property names.
127 pub aliases: BTreeMap<String, String>,
128 /// Map of defined container mappings by their literal property names.
129 pub container: BTreeMap<String, String>,
130}
131
132impl Context {
133 /// An alias for `Context::default()`.
134 pub fn new() -> Context {
135 Context::default()
136 }
137
138 /// Merge an `@context` value into this structure.
139 pub fn merge_value(&mut self, value: &Value) {
140 for value in OneOrMany::from(value) {
141 match *value {
142 Value::Null => {
143 // A null clears the context.
144 *self = Context::default();
145 },
146 Value::Object(ref object) => {
147 // An object is merged into the context.
148 self.merge_object(object);
149 },
150 _ => {
151 // Captures remote context references, but also anything else we don't understand.
152 // These are simply ignored.
153 },
154 }
155 }
156 }
157
158 /// Merge an `@context` object into this structure.
159 pub fn merge_object(&mut self, object: &Map) {
160 for (key, value) in object {
161 if is_keyword(key) {
162 match key.as_str() {
163 "@vocab" => {
164 // Set the default namespace. May be null to clear it.
165 if let Some(ns) = value.as_str().filter(|s| is_absolute_iri(s)) {
166 self.ns = Some(ns.to_owned());
167 } else if value.is_null() {
168 self.ns = None;
169 }
170 },
171 "@language" => {
172 // Set the default language. May be null to clear it.
173 if let Some(lang) = value.as_str() {
174 self.lang = lang.to_owned();
175 } else if value.is_null() {
176 self.lang = "".to_owned();
177 }
178 },
179 _ => {},
180 }
181 } else {
182 match *value {
183 Value::String(ref string) => {
184 // Define a namespace.
185 if is_curie_prefix(key) && is_absolute_iri(string) {
186 self.prefixes.insert(key.to_owned(), string.to_owned());
187 }
188 },
189 Value::Object(ref object) => {
190 // Look for an alias.
191 let alias = object.get("@id")
192 .and_then(Value::as_str)
193 .filter(|string| !is_keyword(string));
194 if let Some(alias) = alias {
195 self.aliases.insert(key.to_owned(), alias.to_owned());
196 }
197
198 // Look for a container mapping.
199 let container = object.get("@container")
200 .and_then(Value::as_str);
201 if let Some(container) = container {
202 self.container.insert(key.to_owned(), container.to_owned());
203 }
204 },
205 Value::Null => {
206 // A null value is used to clear whatever was defined.
207 self.prefixes.remove(key);
208 self.aliases.remove(key);
209 self.container.remove(key);
210 },
211 _ => {},
212 }
213 }
214 }
215 }
216
217 /// Expand a name according to this context.
218 ///
219 /// A name may be an absolute IRI, a CURIE within a defined namespace, or a name in the default
220 /// namespace, otherwise `None` is returned (and the property or value should be dropped).
221 pub fn expand_name<'a>(&self, name: &'a str) -> Option<Cow<'a, str>> {
222 if name.starts_with('@') {
223 return None;
224 }
225
226 let mut parts = name.splitn(2, ':');
227 let prefix = parts.next().unwrap();
228 if let Some(suffix) = parts.next() {
229 if let Some(base) = self.prefixes.get(prefix) {
230 // A CURIE within a defined namespace.
231 Some(Cow::from(format!("{}{}", base, suffix)))
232 } else {
233 // An absolute IRI in some other scheme.
234 Some(Cow::from(name))
235 }
236 } else if let Some(ref base) = self.ns {
237 // A term in the default namespace.
238 Some(Cow::from(format!("{}{}", base, name)))
239 } else {
240 None
241 }
242 }
243}
244
245impl<'a> From<&'a Value> for Context {
246 fn from(value: &'a Value) -> Context {
247 let mut context = Context::default();
248 context.merge_value(value);
249 context
250 }
251}
252
253impl<'a> From<&'a Map> for Context {
254 fn from(object: &'a Map) -> Context {
255 let mut context = Context::default();
256 context.merge_object(object);
257 context
258 }
259}
260
261/// Structure holding the target context to reword a document to.
262///
263/// An instance of this struct is part of the `Processor`, which can be modified to provide rules
264/// according to which the output will be reworded.
265///
266/// By default, this context is empty, which will result in an output document containing only
267/// absolute IRIs.
268#[derive(Clone,Debug,Default)]
269pub struct TargetContext {
270 /// Pairs of CURIE prefixes and their respective base IRIs.
271 ///
272 /// For absolute IRIs that are about to be added to the output document, the processor will try
273 /// to find a matching prefix in this list. If found, a CURIE will be used instead.
274 ///
275 /// This list may also contain an entry with an empty string prefix, which then represents the
276 /// default namespace of the output document.
277 pub rules: Vec<(String, String)>,
278}
279
280impl TargetContext {
281 /// Alias for `TargetContext::default()`.
282 pub fn new() -> TargetContext {
283 TargetContext::default()
284 }
285
286 /// A short-hand for adding a rule.
287 pub fn add_rule(&mut self, prefix: &str, base: &str) -> &mut Self {
288 self.rules.push((prefix.to_owned(), base.to_owned()));
289 self
290 }
291
292 /// Compact an absolute IRI according to this context.
293 pub fn compact_iri<'a>(&self, iri: &'a str) -> Cow<'a, str> {
294 for (prefix, base) in &self.rules {
295 if iri.starts_with(base) {
296 let suffix = &iri[base.len()..];
297 if prefix.is_empty() {
298 // Matched the default namespace.
299 return Cow::from(suffix);
300 } else {
301 // Matched a prefix, generate a CURIE.
302 return Cow::from(format!("{}:{}", prefix, suffix));
303 }
304 }
305 }
306 // No match, output the absolute IRI.
307 Cow::from(iri)
308 }
309}
310
311/// A document processor.
312///
313/// This structure holds configuration for processing documents. The defaults are fine if the
314/// output document should contain only absolute IRIs, but usually you want to set some namespaces
315/// for the output document in the `TargetContext` contained within.
316#[derive(Clone,Debug,Default)]
317pub struct Processor {
318 /// External context added to the document. Defaults to an empty context, so only inline
319 /// contexts in the document itself are used.
320 pub context: Context,
321 /// Target context to reword the document to. Defaults to an empty context, so the result will
322 /// contain only absolute IRIs for all properties and types.
323 pub target: TargetContext,
324}
325
326impl Processor {
327 /// Alias for `Processor::default()`.
328 pub fn new() -> Processor {
329 Processor::default()
330 }
331
332 /// A short-hand for adding a rule to the contained `TargetContext`.
333 pub fn add_rule(&mut self, prefix: &str, base: &str) -> &mut Self {
334 self.target.add_rule(prefix, base);
335 self
336 }
337
338 /// Process a value, using the configuration in this struct.
339 pub fn process_value(&self, value: &Value) -> Value {
340 self.process_value_inner(value, &self.context)
341 }
342
343 /// Process an object, using the configuration in this struct.
344 pub fn process_object(&self, object: &Map) -> Map {
345 self.process_object_inner(object, &self.context)
346 }
347
348 /// Process a value with a local context.
349 fn process_value_inner(&self, value: &Value, context: &Context) -> Value {
350 match *value {
351 Value::Array(ref array) => {
352 let array = array.iter()
353 .map(|value| self.process_value_inner(value, context))
354 .collect::<Vec<_>>();
355 Value::Array(array)
356 },
357 Value::Object(ref object) => {
358 Value::Object(self.process_object_inner(object, context))
359 },
360 ref value => value.clone(),
361 }
362 }
363
364 /// Process an object with a local context.
365 fn process_object_inner(&self, object: &Map, context: &Context) -> Map {
366 // Extend the active context with the local context, if present.
367 let local_context = object.get("@context").map(|value| {
368 let mut context = context.clone();
369 context.merge_value(value);
370 context
371 });
372 let context = local_context.as_ref().unwrap_or(context);
373
374 let mut result = Map::with_capacity(object.len());
375 for (key, value) in object {
376 if key.starts_with('@') {
377 // A keyword property.
378 match key.as_str() {
379 "@id" => {
380 // Document ID, must be an absolute IRI.
381 if let Some(iri) = value.as_str().filter(|s| is_absolute_iri(s)) {
382 result.insert(key.clone(), Value::String(iri.to_owned()));
383 }
384 },
385 "@type" => {
386 // Document type, a string or array of strings, each of which expands to an
387 // absolute IRI. (We don't support `@type` on values, like JSON-LD.)
388 let value = OneOrMany::from(value)
389 .filter_map(|value| value.as_str())
390 .filter_map(|string| context.expand_name(string))
391 .map(|iri| self.target.compact_iri(&iri).into_owned())
392 .map(Value::String)
393 .collect::<Vec<_>>();
394 if !value.is_empty() {
395 result.insert(key.clone(), Value::Array(value));
396 }
397 },
398 _ => {
399 // Ignore `@context` (already processed) and other unrecognized keywords.
400 },
401 }
402
403 continue;
404 }
405
406 // Look for an alias.
407 let resolved = context.aliases.get(key).map(String::as_str).unwrap_or(key);
408
409 // Resolve in the current context.
410 let resolved = match context.expand_name(resolved) {
411 Some(iri) => self.target.compact_iri(&iri).into_owned(),
412 None => continue,
413 };
414
415 // Look for a container mapping of the original property name.
416 result.insert(resolved, match context.container.get(key).map(String::as_str) {
417 Some("@language") => {
418 // An internationalised property.
419 match *value {
420 Value::String(_) => {
421 // Normalise a string value to a language map with a single entry for
422 // the context default language.
423 let mut object = Map::with_capacity(1);
424 object.insert(context.lang.clone(), value.clone());
425 Value::Object(object)
426 },
427 Value::Object(ref object) => {
428 // Filter non-string values from the object.
429 let object = object.iter()
430 .filter(|(_, value)| value.is_string())
431 .map(|(key, value)| (key.clone(), value.clone()))
432 .collect();
433 Value::Object(object)
434 },
435 _ => {
436 // Drop unrecognised values.
437 continue;
438 },
439 }
440 },
441 _ => {
442 // No or unrecognized container mapping, which we treat as a normal value.
443 // Expand it by recursing.
444 self.process_value_inner(value, context)
445 },
446 });
447 }
448
449 result
450 }
451}
452
453/// Whether the input is a keyword.
454fn is_keyword(input: &str) -> bool {
455 input.starts_with('@')
456}
457
458/// Whether the input is a valid absolute IRI.
459fn is_absolute_iri(input: &str) -> bool {
460 input.contains(':') && !input.starts_with('@')
461}
462
463/// Whether the input is a valid CURIE prefix.
464fn is_curie_prefix(input: &str) -> bool {
465 !input.is_empty() && !input.contains(':') && !input.starts_with('@')
466}