marksonnet/
lib.rs

1//! This crate provides a `Parser` which wraps a `pulldown_cmark::Parser`, while evaluating
2//! embedded Jsonnet code blocks. These code blocks are identified using the `marksonnet` language
3//! tag, and they are substituted in the resulting `Iterator` for the result of their evaluation.
4//!
5//! `marksonnet` blocks can be used to run any Jsonnet program and parse Markdown as though the
6//! result of that program were included in the text.
7//!
8//! ```
9//!   # use pretty_assertions::assert_eq;
10//!   use indoc;           // multiline string literals
11//!   use pulldown_cmark;  // illustrate equivalent sources
12//!
13//!   let input: &str = indoc::indoc! {r#"
14//!     The first five fibonacci numbers are:
15//!
16//!     ```marksonnet
17//!     local fib(n) = if (n <= 2) then 1 else (fib(n-1) + fib(n-2));
18//!     [fib(n) for n in std.range(1, 5)]
19//!     ```
20//!   "#};
21//!
22//!   let expected: &str = indoc::indoc! {r#"
23//!     The first five fibonacci numbers are:
24//!
25//!     ```json
26//!     [
27//!         1,
28//!         1,
29//!         2,
30//!         3,
31//!         5
32//!     ]
33//!     ```
34//!   "#};
35//!
36//!   assert_eq!(
37//!     marksonnet::Parser::new(input).collect::<Vec<_>>(),
38//!     pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
39//!   )
40//! ```
41//!
42//! If the Jsonnet evaluates to a string, then it is substituted inline, rather than being JSON
43//! encoded and placed inside of a codeblock.
44//!
45//! ```
46//!   # use pretty_assertions::assert_eq;
47//!   use indoc;           // multiline string literals
48//!   use pulldown_cmark;  // illustrate equivalent sources
49//!
50//!   let input: &str = indoc::indoc! {r#"
51//!     My favorite greeting is:
52//!     ```marksonnet
53//!     std.format('> %s, %s!', ['Hello', 'World'])
54//!     ```
55//!   "#};
56//!
57//!   let expected: &str = indoc::indoc! {r#"
58//!     My favorite greeting is:
59//!
60//!      > Hello, World!"#};
61//!
62//!
63//!   assert_eq!(
64//!     marksonnet::Parser::new(input).collect::<Vec<_>>(),
65//!     pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
66//!   )
67//! ```
68//!
69//! Marksonnet also supports imports relative to the file directory.
70//!
71//! ```
72//!   # use pretty_assertions::assert_eq;
73//!   use indoc;           // multiline string literals
74//!   use pulldown_cmark;  // illustrate equivalent sources
75//!
76//!   let input: &str = indoc::indoc! {r#"
77//!     This is the content of `example/sample.json`:
78//!
79//!     ```marksonnet
80//!     import 'example/sample.json'
81//!     ```
82//!   "#};
83//!
84//!   let expected: &str = indoc::indoc! {r#"
85//!     This is the content of `example/sample.json`:
86//!
87//!     ```json
88//!     {
89//!         "bar": "baz",
90//!         "foo": "bar"
91//!     }
92//!     ```
93//!     "#};
94//!
95//!   assert_eq!(
96//!     marksonnet::Parser::new(input).collect::<Vec<_>>(),
97//!     pulldown_cmark::Parser::new(expected).collect::<Vec<_>>()
98//!   )
99//! ```
100//!
101//! Note that in this case, `example/sample.json` was imported by Jsonnet and parsed, then
102//! re-serialized.
103
104use jrsonnet_evaluator::{self, Val, manifest::JsonFormat, val::StrValue};
105use pulldown_cmark::{self, CodeBlockKind, CowStr, Event, Tag, TagEnd};
106use std::{collections::VecDeque, iter::Peekable, path::PathBuf};
107
108pub struct Parser<'a> {
109    /// A `pulldown_cmark::Parser` containing the Markdown source being processed.
110    source_parser: Peekable<pulldown_cmark::Parser<'a>>,
111
112    /// Contains the currently-emitting evaluation result if any.
113    eval_result: VecDeque<Event<'a>>,
114
115    /// Contains a list of Jsonnet library_paths (JPaths) to use for imports. The current directory
116    /// is implicitly included by Jrsonnet.
117    library_paths: Vec<PathBuf>,
118}
119
120impl<'a> Parser<'a> {
121    pub fn new(text: &'a str) -> Self {
122        Parser {
123            source_parser: pulldown_cmark::Parser::new(text).peekable(),
124            eval_result: VecDeque::new(),
125            library_paths: vec![],
126        }
127    }
128
129    pub fn with_library_paths(mut self, jpaths: &Vec<PathBuf>) -> Self {
130        self.library_paths = jpaths.clone();
131        return self;
132    }
133
134    pub fn add_library_path(&mut self, jpath: PathBuf) {
135        self.library_paths.push(jpath);
136    }
137
138    /// Whether the next Event is from eval_result.
139    fn has_eval_result(&self) -> bool {
140        self.eval_result.len() > 0
141    }
142
143    /// Check whether the next Event in the `source_parser` is a Marksonnet block start.
144    fn peek_marksonnet_block_start(&mut self) -> bool {
145        Self::is_marksonnet_block_start(&self.source_parser.peek())
146    }
147
148    /// Determine whether a given event is a Marksonnet block start.
149    fn is_marksonnet_block_start(event: &Option<&Event>) -> bool {
150        matches!(
151            event,
152            Some(Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(
153                CowStr::Borrowed("marksonnet")
154            ))))
155        )
156    }
157
158    /// eval_block consumes the upcoming marksonnet code block from `source_parser`, and stores the
159    /// resulting events in `eval_result`.
160    ///
161    /// Will panic using `debug_assert!` if:
162    /// - eval_result is nonempty
163    /// - the next event is not a marksonnet code block start
164    fn eval_block(&mut self) -> Result<(), jrsonnet_evaluator::Error> {
165        debug_assert!(
166            !self.has_eval_result(),
167            "eval_block called when eval_result is nonempty; it is {:?}",
168            self.eval_result
169        );
170
171        // Consume and drop the block start marker.
172        let block_start = self.source_parser.next();
173        debug_assert!(
174            Self::is_marksonnet_block_start(&block_start.as_ref()),
175            "eval_block called expecting next event to be a Marksonnet block start; it is {:?}",
176            block_start
177        );
178        drop(block_start);
179
180        let mut contents = String::new();
181
182        loop {
183            let event = self.source_parser.next();
184            match event {
185                // Add text events to the Jsonnet contents buffer.
186                Some(Event::Text(CowStr::Borrowed(text))) => contents.push_str(text),
187
188                // If we reach the end of the code block, break out of the loop.
189                Some(Event::End(TagEnd::CodeBlock)) => break,
190
191                // If we reach anything else, do something else
192                _ => unimplemented!("handle malformatted code block (encountered {:?})", event),
193            }
194        }
195
196        // Initialize a jrsonnet evaluator State using a builder pattern.
197        let mut s = jrsonnet_evaluator::State::builder();
198        s.import_resolver(jrsonnet_evaluator::FileImportResolver::new(
199            self.library_paths.clone(),
200        ));
201        s.context_initializer(jrsonnet_stdlib::ContextInitializer::new(
202            jrsonnet_evaluator::trace::PathResolver::new_cwd_fallback(),
203        ));
204        let s = s.build(); // shadow the builder with an instance
205        let val = Some(s.evaluate_snippet("<marksonnet>", contents));
206
207        match val {
208            Some(Ok(Val::Str(StrValue::Flat(text)))) => {
209                for event in pulldown_cmark::Parser::new(text.as_str()) {
210                    // TODO: investigate whether into_static is a reasonable approach here.
211                    //
212                    // Because we're trying to be as lazily-evaluated an iterator as possible, we
213                    // only act when `next()` is called. When we encounter a Marksonnet block, we
214                    // must evaluate the whole thing. This gives us a result from jrsonnet in the
215                    // form of interned IStr values. The pulldown_cmark Parser operates on an
216                    // immutable reference to what we give it, and the Events that it emits may
217                    // contain CowStr::Borrowed strings. Ultimately, these are references to a &str
218                    // we converted in the scope of this function, so we need them to stay alive
219                    // somehow.
220                    //
221                    // I don't know if into_static is appropriate for copying strings out of the
222                    // function scope, or if there's something else to do (considering that
223                    // self-referential structs are not allowed by the borrow checker.)
224                    self.eval_result.push_back(event.into_static());
225                }
226            }
227            Some(Ok(val)) => {
228                let manifested = val.manifest(JsonFormat::cli(4))? + "\n";
229                self.eval_result = vec![
230                    Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
231                        "json",
232                    )))),
233                    Event::Text(CowStr::Boxed(manifested.into())),
234                    Event::End(TagEnd::CodeBlock),
235                ]
236                .into();
237            }
238            _ => todo!("error or unsupported Val variant: {:?}", val),
239        }
240        Ok(())
241    }
242}
243
244impl<'a> Iterator for Parser<'a> {
245    type Item = Event<'a>;
246
247    /// Get the next Event.
248    fn next(&mut self) -> Option<Self::Item> {
249        if self.has_eval_result() {
250            self.eval_result.pop_front()
251        } else if self.peek_marksonnet_block_start() {
252            // TODO: eval_block can currently fail and leave mangled event streams.
253            let _ = self.eval_block();
254            self.next()
255        } else {
256            self.source_parser.next()
257        }
258    }
259}
260
261#[cfg(test)]
262mod test {
263    use super::*;
264    use indoc::indoc;
265    use pretty_assertions::assert_eq;
266    use pulldown_cmark_to_cmark::cmark;
267
268    macro_rules! test {
269        // 3-argument version to check pulldown-cmark-to-cmark equivalence. This dodges some of the
270        // issues about non-equivalence of CowStr variants (e.g. Borrowed and Boxed strings).
271        ($name:ident, $input:expr, $expected:expr) => {
272            #[test]
273            fn $name() {
274                // Parse the input using our parser and convert to CommonMark.
275                let mut input = String::new();
276                let _ = cmark(Parser::new($input), &mut input);
277
278                // Parse the expected Event stream using the upstream parser and convert to
279                // CommonMark.
280                let mut expected = String::new();
281                let _ = cmark(pulldown_cmark::Parser::new($expected), &mut expected);
282
283                // Compare equivalence as strings.
284                assert_eq!(input, expected);
285            }
286        };
287        // 4-argument version to check cmark and event equivalence
288        ($name:ident, $input:expr, $expected:expr, $expected_events:expr) => {
289            #[test]
290            fn $name() {
291                // Parse the input using our parser and collect it into a Vec for later comparison.
292                let input_events: Vec<_> = Parser::new($input).collect();
293                let mut input_cmark = String::new();
294                let _ = cmark(input_events.iter(), &mut input_cmark); // convert cmark using the Vec
295                let input = (input_cmark, input_events);
296
297                // Parse the expected Event stream using the upstream parser and convert to
298                // CommonMark.
299                let mut expected_cmark = String::new();
300                let _ = cmark(pulldown_cmark::Parser::new($expected), &mut expected_cmark);
301                let expected = (expected_cmark, $expected_events);
302
303                // Compare simultaneously, for easier debugging, both pairs:
304                // - our parsed input serialized as cmark with the expected output parsed and
305                // re-serialized as cmark
306                // - our parsed input Events with the expected raw Events
307                assert_eq!(input, expected);
308            }
309        };
310    }
311
312    test!(
313        no_marksonnet,
314        indoc! {r#"This is a simple markdown document containing no marksonnet."#},
315        indoc! {r#"This is a simple markdown document containing no marksonnet."#}
316    );
317    test!(
318        empty_marksonnet_object,
319        indoc! {r#"
320          This is a simple markdown document containing a marksonnet object.
321
322          ```marksonnet
323          {}
324          ```
325
326          There is also text after it."#},
327        // NB: jrsonnet serializes `{}` as `"{ }"` for pretty-printing.
328        indoc! {r#"
329            This is a simple markdown document containing a marksonnet object.
330
331            ```json
332            { }
333            ```
334
335            There is also text after it."#}
336    );
337    test!(
338        simple_marksonnet_object_calculation,
339        indoc! {r#"```marksonnet
340            {
341                "value": 2 + 2
342            }
343            ```"#},
344        // NB: pulldown_cmark_to_cmark seems to prefix a newline when a codeblock is the first
345        // event. The Vec<Event> below shows the output insofar as marksonnet is concerned.
346        indoc! {r#"
347
348            ```json
349            {
350                "value": 4
351            }
352            ```"#},
353        vec![
354            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(
355                "json"
356            )))),
357            Event::Text(CowStr::Boxed("{\n    \"value\": 4\n}\n".into())),
358            Event::End(TagEnd::CodeBlock)
359        ]
360    );
361    test!(
362        simple_marksonnet_string,
363        indoc! {r#"```marksonnet
364            "Hello, world!"
365            ```"#},
366        indoc! {r#"Hello, world!"#}
367    );
368    test!(
369        sample_import,
370        indoc! {r#"```marksonnet
371            import 'example/sample.json'
372            ```"#},
373        indoc! {r#"
374            ```json
375            {
376                "bar": "baz",
377                "foo": "bar"
378            }
379            ```"#}
380    );
381    test!(
382        sample_importstr,
383        indoc! {r#"```marksonnet
384            importstr 'example/sample.md'
385            ```"#},
386        indoc! {r#"
387            # Sample!
388
389            This file is a sample.
390            "#}
391    );
392    test!(
393        sample_importstr_with_prefix,
394        indoc! {r#"
395            Here we have a prefix.
396
397            ```marksonnet
398            importstr 'example/sample.md'
399            ```"#},
400        indoc! {r#"
401            Here we have a prefix.
402
403            # Sample!
404
405            This file is a sample.
406            "#}
407    );
408}