marksonnet/lib.rs
1//! This crate provides a `Parser` which wraps a `pulldown_cmark::Parser`, while evaluating
2//! embedded Jsonnet code blocks. These code blocks are identified using the `marksonnet` language
3//! tag, and they are substituted in the resulting `Iterator` for the result of their evaluation.
4//!
5//! `marksonnet` blocks can be used to run any Jsonnet program and parse Markdown as though the
6//! result of that program were included in the text.
7//!
8//! ```
9//! # use pretty_assertions::assert_eq;
10//! use indoc; // multiline string literals
11//! use pulldown_cmark; // illustrate equivalent sources
12//!
13//! let input: &str = indoc::indoc! {r#"
14//! The first five fibonacci numbers are:
15//!
16//! ```marksonnet
17//! local fib(n) = if (n <= 2) then 1 else (fib(n-1) + fib(n-2));
18//! [fib(n) for n in std.range(1, 5)]
19//! ```
20//! "#};
21//!
22//! let expected: &str = indoc::indoc! {r#"
23//! The first five fibonacci numbers are:
24//!
25//! ```json
26//! [
27//! 1,
28//! 1,
29//! 2,
30//! 3,
31//! 5
32//! ]
33//! ```
34//! "#};
35//!
36//! assert_eq!(
37//! marksonnet::Parser::new(input).collect::<Vec<_>>(),
38//! pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
39//! )
40//! ```
41//!
42//! If the Jsonnet evaluates to a string, then it is substituted inline, rather than being JSON
43//! encoded and placed inside of a codeblock.
44//!
45//! ```
46//! # use pretty_assertions::assert_eq;
47//! use indoc; // multiline string literals
48//! use pulldown_cmark; // illustrate equivalent sources
49//!
50//! let input: &str = indoc::indoc! {r#"
51//! My favorite greeting is:
52//! ```marksonnet
53//! std.format('> %s, %s!', ['Hello', 'World'])
54//! ```
55//! "#};
56//!
57//! let expected: &str = indoc::indoc! {r#"
58//! My favorite greeting is:
59//!
60//! > Hello, World!"#};
61//!
62//!
63//! assert_eq!(
64//! marksonnet::Parser::new(input).collect::<Vec<_>>(),
65//! pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
66//! )
67//! ```
68//!
69//! Marksonnet also supports imports relative to the file directory.
70//!
71//! ```
72//! # use pretty_assertions::assert_eq;
73//! use indoc; // multiline string literals
74//! use pulldown_cmark; // illustrate equivalent sources
75//!
76//! let input: &str = indoc::indoc! {r#"
77//! This is the content of `example/sample.json`:
78//!
79//! ```marksonnet
80//! import 'example/sample.json'
81//! ```
82//! "#};
83//!
84//! let expected: &str = indoc::indoc! {r#"
85//! This is the content of `example/sample.json`:
86//!
87//! ```json
88//! {
89//! "bar": "baz",
90//! "foo": "bar"
91//! }
92//! ```
93//! "#};
94//!
95//! assert_eq!(
96//! marksonnet::Parser::new(input).collect::<Vec<_>>(),
97//! pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
98//! )
99//! ```
100//!
101//! Note that in this case, `example/sample.json` was imported by Jsonnet and parsed, then
102//! re-serialized.
103
104use jrsonnet_evaluator::{self, Val, manifest::JsonFormat, val::StrValue};
105use pulldown_cmark::{self, CodeBlockKind, CowStr, Event, Tag, TagEnd};
106use std::{collections::VecDeque, iter::Peekable, path::PathBuf};
107
108pub struct Parser<'a> {
109 /// A `pulldown_cmark::Parser` containing the Markdown source being processed.
110 source_parser: Peekable<pulldown_cmark::Parser<'a>>,
111
112 /// Contains the currently-emitting evaluation result if any.
113 eval_result: VecDeque<Event<'a>>,
114
115 /// Contains a list of Jsonnet library_paths (JPaths) to use for imports. The current directory
116 /// is implicitly included by Jrsonnet.
117 library_paths: Vec<PathBuf>,
118}
119
120impl<'a> Parser<'a> {
121 pub fn new(text: &'a str) -> Self {
122 Parser {
123 source_parser: pulldown_cmark::Parser::new(text).peekable(),
124 eval_result: VecDeque::new(),
125 library_paths: vec![],
126 }
127 }
128
129 pub fn with_library_paths(mut self, jpaths: &Vec<PathBuf>) -> Self {
130 self.library_paths = jpaths.clone();
131 return self;
132 }
133
134 pub fn add_library_path(&mut self, jpath: PathBuf) {
135 self.library_paths.push(jpath);
136 }
137
138 /// Whether the next Event is from eval_result.
139 fn has_eval_result(&self) -> bool {
140 self.eval_result.len() > 0
141 }
142
143 /// Check whether the next Event in the `source_parser` is a Marksonnet block start.
144 fn peek_marksonnet_block_start(&mut self) -> bool {
145 Self::is_marksonnet_block_start(&self.source_parser.peek())
146 }
147
148 /// Determine whether a given event is a Marksonnet block start.
149 fn is_marksonnet_block_start(event: &Option<&Event>) -> bool {
150 matches!(
151 event,
152 Some(Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(
153 CowStr::Borrowed("marksonnet")
154 ))))
155 )
156 }
157
158 /// eval_block consumes the upcoming marksonnet code block from `source_parser`, and stores the
159 /// resulting events in `eval_result`.
160 ///
161 /// Will panic using `debug_assert!` if:
162 /// - eval_result is nonempty
163 /// - the next event is not a marksonnet code block start
164 fn eval_block(&mut self) -> Result<(), jrsonnet_evaluator::Error> {
165 debug_assert!(
166 !self.has_eval_result(),
167 "eval_block called when eval_result is nonempty; it is {:?}",
168 self.eval_result
169 );
170
171 // Consume and drop the block start marker.
172 let block_start = self.source_parser.next();
173 debug_assert!(
174 Self::is_marksonnet_block_start(&block_start.as_ref()),
175 "eval_block called expecting next event to be a Marksonnet block start; it is {:?}",
176 block_start
177 );
178 drop(block_start);
179
180 let mut contents = String::new();
181
182 loop {
183 let event = self.source_parser.next();
184 match event {
185 // Add text events to the Jsonnet contents buffer.
186 Some(Event::Text(CowStr::Borrowed(text))) => contents.push_str(text),
187
188 // If we reach the end of the code block, break out of the loop.
189 Some(Event::End(TagEnd::CodeBlock)) => break,
190
191 // If we reach anything else, do something else
192 _ => unimplemented!("handle malformatted code block (encountered {:?})", event),
193 }
194 }
195
196 // Initialize a jrsonnet evaluator State using a builder pattern.
197 let mut s = jrsonnet_evaluator::State::builder();
198 s.import_resolver(jrsonnet_evaluator::FileImportResolver::new(
199 self.library_paths.clone(),
200 ));
201 s.context_initializer(jrsonnet_stdlib::ContextInitializer::new(
202 jrsonnet_evaluator::trace::PathResolver::new_cwd_fallback(),
203 ));
204 let s = s.build(); // shadow the builder with an instance
205 let val = Some(s.evaluate_snippet("<marksonnet>", contents));
206
207 match val {
208 Some(Ok(Val::Str(StrValue::Flat(text)))) => {
209 for event in pulldown_cmark::Parser::new(text.as_str()) {
210 // TODO: investigate whether into_static is a reasonable approach here.
211 //
212 // Because we're trying to be as lazily-evaluated an iterator as possible, we
213 // only act when `next()` is called. When we encounter a Marksonnet block, we
214 // must evaluate the whole thing. This gives us a result from jrsonnet in the
215 // form of interned IStr values. The pulldown_cmark Parser operates on an
216 // immutable reference to what we give it, and the Events that it emits may
217 // contain CowStr::Borrowed strings. Ultimately, these are references to a &str
218 // we converted in the scope of this function, so we need them to stay alive
219 // somehow.
220 //
221 // I don't know if into_static is appropriate for copying strings out of the
222 // function scope, or if there's something else to do (considering that
223 // self-referential structs are not allowed by the borrow checker.)
224 self.eval_result.push_back(event.into_static());
225 }
226 }
227 Some(Ok(val)) => {
228 let manifested = val.manifest(JsonFormat::cli(4))? + "\n";
229 self.eval_result = vec![
230 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
231 "json",
232 )))),
233 Event::Text(CowStr::Boxed(manifested.into())),
234 Event::End(TagEnd::CodeBlock),
235 ]
236 .into();
237 }
238 _ => todo!("error or unsupported Val variant: {:?}", val),
239 }
240 Ok(())
241 }
242}
243
244impl<'a> Iterator for Parser<'a> {
245 type Item = Event<'a>;
246
247 /// Get the next Event.
248 fn next(&mut self) -> Option<Self::Item> {
249 if self.has_eval_result() {
250 self.eval_result.pop_front()
251 } else if self.peek_marksonnet_block_start() {
252 // TODO: eval_block can currently fail and leave mangled event streams.
253 let _ = self.eval_block();
254 self.next()
255 } else {
256 self.source_parser.next()
257 }
258 }
259}
260
261#[cfg(test)]
262mod test {
263 use super::*;
264 use indoc::indoc;
265 use pretty_assertions::assert_eq;
266 use pulldown_cmark_to_cmark::cmark;
267
268 macro_rules! test {
269 // 3-argument version to check pulldown-cmark-to-cmark equivalence. This dodges some of the
270 // issues about non-equivalence of CowStr variants (e.g. Borrowed and Boxed strings).
271 ($name:ident, $input:expr, $expected:expr) => {
272 #[test]
273 fn $name() {
274 // Parse the input using our parser and convert to CommonMark.
275 let mut input = String::new();
276 let _ = cmark(Parser::new($input), &mut input);
277
278 // Parse the expected Event stream using the upstream parser and convert to
279 // CommonMark.
280 let mut expected = String::new();
281 let _ = cmark(pulldown_cmark::Parser::new($expected), &mut expected);
282
283 // Compare equivalence as strings.
284 assert_eq!(input, expected);
285 }
286 };
287 // 4-argument version to check cmark and event equivalence
288 ($name:ident, $input:expr, $expected:expr, $expected_events:expr) => {
289 #[test]
290 fn $name() {
291 // Parse the input using our parser and collect it into a Vec for later comparison.
292 let input_events: Vec<_> = Parser::new($input).collect();
293 let mut input_cmark = String::new();
294 let _ = cmark(input_events.iter(), &mut input_cmark); // convert cmark using the Vec
295 let input = (input_cmark, input_events);
296
297 // Parse the expected Event stream using the upstream parser and convert to
298 // CommonMark.
299 let mut expected_cmark = String::new();
300 let _ = cmark(pulldown_cmark::Parser::new($expected), &mut expected_cmark);
301 let expected = (expected_cmark, $expected_events);
302
303 // Compare simultaneously, for easier debugging, both pairs:
304 // - our parsed input serialized as cmark with the expected output parsed and
305 // re-serialized as cmark
306 // - our parsed input Events with the expected raw Events
307 assert_eq!(input, expected);
308 }
309 };
310 }
311
312 test!(
313 no_marksonnet,
314 indoc! {r#"This is a simple markdown document containing no marksonnet."#},
315 indoc! {r#"This is a simple markdown document containing no marksonnet."#}
316 );
317 test!(
318 empty_marksonnet_object,
319 indoc! {r#"
320 This is a simple markdown document containing a marksonnet object.
321
322 ```marksonnet
323 {}
324 ```
325
326 There is also text after it."#},
327 // NB: jrsonnet serializes `{}` as `"{ }"` for pretty-printing.
328 indoc! {r#"
329 This is a simple markdown document containing a marksonnet object.
330
331 ```json
332 { }
333 ```
334
335 There is also text after it."#}
336 );
337 test!(
338 simple_marksonnet_object_calculation,
339 indoc! {r#"```marksonnet
340 {
341 "value": 2 + 2
342 }
343 ```"#},
344 // NB: pulldown_cmark_to_cmark seems to prefix a newline when a codeblock is the first
345 // event. The Vec<Event> below shows the output insofar as marksonnet is concerned.
346 indoc! {r#"
347
348 ```json
349 {
350 "value": 4
351 }
352 ```"#},
353 vec![
354 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
355 "json"
356 )))),
357 Event::Text(CowStr::Boxed("{\n \"value\": 4\n}\n".into())),
358 Event::End(TagEnd::CodeBlock)
359 ]
360 );
361 test!(
362 simple_marksonnet_string,
363 indoc! {r#"```marksonnet
364 "Hello, world!"
365 ```"#},
366 indoc! {r#"Hello, world!"#}
367 );
368 test!(
369 sample_import,
370 indoc! {r#"```marksonnet
371 import 'example/sample.json'
372 ```"#},
373 indoc! {r#"
374 ```json
375 {
376 "bar": "baz",
377 "foo": "bar"
378 }
379 ```"#}
380 );
381 test!(
382 sample_importstr,
383 indoc! {r#"```marksonnet
384 importstr 'example/sample.md'
385 ```"#},
386 indoc! {r#"
387 # Sample!
388
389 This file is a sample.
390 "#}
391 );
392 test!(
393 sample_importstr_with_prefix,
394 indoc! {r#"
395 Here we have a prefix.
396
397 ```marksonnet
398 importstr 'example/sample.md'
399 ```"#},
400 indoc! {r#"
401 Here we have a prefix.
402
403 # Sample!
404
405 This file is a sample.
406 "#}
407 );
408}