assemblage_view/
markup.rs

1//! A minimal markup language for Assemblage text blocks.
2//!
3//! ## Features
4//!
5//!   - _extremely minimal_: Only 4 block styles and 5 span styles.
6//!   - _simple to parse_: Each style corresponds to a single character.
7//!   - _unambiguous_: Only one way to write each style.
8//!   - _flat_: No nesting, neither for headings nor lists.
9//!
10//! ## Markup Example
11//!
12//! (Note that the following code block is not strictly speaking the markup language
13//! that is parsed by the functions provided in this crate, as these functions
14//! always parse _a single line of markup into a single AssemblageDB block_.)
15//!
16//! ```text
17//! # Headings start with "#".
18//! > Block quotes start with ">".
19//! - Lists...
20//! - ...start...
21//! - ...with...
22//! - ..."-"!
23//! , Oh and by the way, asides start with ",".
24//!
25//! The above 4 block styles are all there is to block styling.
26//! They can be combined in any order:
27//!
28//! #>, A block quote heading aside.
29//! ,>#> Also a block quote heading aside.
30//!
31//! But " " is needed to separate the block markers from the text:
32//!
33//! #This is just regular text, as block styles need to end with a " ".
34//! #>-This is also just regular text...
35//!
36//! There are also 5 different span styles:
37//!
38//! *These three words* are bold.
39//! And _this_ is italic.
40//! Words can be ~struck from a sentence~.
41//! Code can be displayed with a `monospaced typeface`!
42//! Some |parts of a sentence| can be marked and thus highlighted.
43//!
44//! Each span style can be escaped, for example in: 2 \* 2 = 4.
45//!
46//! And that's it!
47//! ```
48//!
49//! ## Why not Markdown?
50//!
51//! Markdown is relatively easy to write, but is far from simple to parse and
52//! process, with many different implementations that do not always follow the
53//! same specification. More importantly however, Markdown provides markup
54//! capabilities for _full documents_ including multiple (nested) hierarchy
55//! levels and the ability to include arbitrary HTML, which ties Markdown to the
56//! web.
57//!
58//! Instead, the ultra-minimal markup language implemented here provides markup
59//! only for text blocks (not full documents) and does not support any nesting,
60//! neither of headings nor of lists or other structures. This is deliberate, as
61//! nested structure and rich hierarchies arise from the _graph structure and
62//! interplay of different AssemblageDB nodes_, not as the result of a single and
63//! complex markup block.
64//!
65//! Minimal markup encourages structure through the combination of different
66//! documents, whereas sophisticated markup encourages siloization into fewer
67//! less richly connected documents.
68//!
69//! ## Specification (as ABNF)
70//!
71//! ```abnf
72//! markup       = [block-markup] span-markup
73//! block-markup = 1*(heading / quote / list / aside) " "
74//! heading      = "#"
75//! quote        = ">"
76//! list         = "-"
77//! aside        = ","
78//! span-markup  = normal / bold / italic / struck / mono / marked
79//! normal       = *(unescaped / escaped)
80//! unescaped    = ; all characters except "\", "*", "_", "~", "`", "|" and newline
81//! escaped      = "\\" / "\*" / "\_" / "\~" / "\`" / "|"
82//! bold         = "*" span-markup "*"
83//! italic       = "_" span-markup "_"
84//! struck       = "~" span-markup "~"
85//! mono         = "`" span-markup "`"
86//! marked       = "|" span-markup "|"
87//! ```
88use std::collections::{BTreeSet, HashSet};
89
90use assemblage_db::data::{BlockStyle, Layout, Node, SpanStyle};
91#[cfg(target_arch = "wasm32")]
92use wasm_bindgen::prelude::*;
93
94use crate::model::{Block, Span};
95
96/// The error type for conversions from markup to blocks.
97#[derive(Debug)]
98pub enum DeserializationError {
99    /// Errors raised while converting to/from JSON using serde.
100    SerdeError(serde_json::Error),
101    /// Markup for a single block must never contain any newlines.
102    FoundNewline,
103}
104
105impl From<serde_json::Error> for DeserializationError {
106    fn from(e: serde_json::Error) -> Self {
107        Self::SerdeError(e)
108    }
109}
110
111#[cfg(target_arch = "wasm32")]
112impl From<DeserializationError> for JsValue {
113    fn from(e: DeserializationError) -> Self {
114        match e {
115            DeserializationError::SerdeError(e) => JsValue::from_str(&e.to_string()),
116            DeserializationError::FoundNewline => {
117                JsValue::from_str("Found newline in block markup")
118            }
119        }
120    }
121}
122
123/// The error type for conversions from blocks to markup.
124#[derive(Debug)]
125pub enum SerializationError {
126    /// Block type does not support serialization.
127    InvalidBlockType(Block),
128    /// Span type does not support serialization.
129    InvalidSpanType(Span),
130}
131
132#[cfg(target_arch = "wasm32")]
133impl From<SerializationError> for JsValue {
134    fn from(e: SerializationError) -> Self {
135        match e {
136            SerializationError::InvalidBlockType(b) => {
137                JsValue::from_str(&format!("Invalid block type: {:?}", b))
138            }
139            SerializationError::InvalidSpanType(s) => {
140                JsValue::from_str(&format!("Invalid span type: {:?}", s))
141            }
142        }
143    }
144}
145
146/// Parses a single line of markup and converts it into a node tree.
147pub fn markup_to_node(markup: &str) -> Result<Node, DeserializationError> {
148    let block = parse_block(markup)?;
149    Ok(match block {
150        Block::Text { styles, spans } => {
151            let mut spans: Vec<Node> = spans
152                .iter()
153                .map(|s| match s {
154                    Span::Text { styles, text, .. } => {
155                        if styles.is_empty() {
156                            Node::text(text)
157                        } else {
158                            Node::styled(styles.clone(), Node::text(text))
159                        }
160                    }
161                    Span::Link { .. } => {
162                        panic!("Link spans should never be the result of parsing markup")
163                    }
164                })
165                .collect();
166            let span_node = if spans.len() == 1 {
167                spans.pop().unwrap()
168            } else {
169                Node::list(Layout::Chain, spans)
170            };
171            if styles.is_empty() {
172                Node::list(Layout::Page, vec![span_node])
173            } else {
174                Node::styled(styles, span_node)
175            }
176        }
177        Block::Cyclic => panic!("Cyclic blocks should never be the result of parsing markup"),
178    })
179}
180
181/// Parses a single line of markup and returns a block as a JSON string.
182#[cfg_attr(target_arch = "wasm32", wasm_bindgen)]
183#[cfg(target_arch = "wasm32")]
184pub fn markup_to_json(markup: &str) -> std::result::Result<String, JsValue> {
185    Ok(serde_json::to_string(&parse_block(markup)?).unwrap())
186}
187
188/// Parses a single line of markup and returns a block.
189pub fn markup_to_block(markup: &str) -> Result<Block, DeserializationError> {
190    parse_block(markup)
191}
192
193/// Converts a block (in form of a JSON string) into its markup string
194/// representation.
195#[cfg(target_arch = "wasm32")]
196#[wasm_bindgen]
197pub fn json_to_markup(markup: &str) -> std::result::Result<String, JsValue> {
198    let block: std::result::Result<Block, serde_json::Error> = serde_json::from_str(markup);
199    match block {
200        Ok(block) => Ok(block_to_markup(&block)?),
201        Err(e) => Err(JsValue::from_str(&format!("{:?}", e))),
202    }
203}
204
205/// Converts a block to its markup string representation.
206pub fn block_to_markup(block: &Block) -> Result<String, SerializationError> {
207    match block {
208        Block::Text { styles, spans } => as_markup(styles, spans),
209        Block::Cyclic => Err(SerializationError::InvalidBlockType(block.clone())),
210    }
211}
212
213fn parse_block(markup: &str) -> Result<Block, DeserializationError> {
214    if markup.contains('\n') {
215        return Err(DeserializationError::FoundNewline);
216    }
217    let (index, block_styles) = parse_block_styles_from_prefix(markup);
218    let markup = &markup[index..];
219    Ok(Block::Text {
220        styles: block_styles,
221        spans: parse_spans(markup),
222    })
223}
224
225fn parse_block_styles_from_prefix(markup: &str) -> (usize, BTreeSet<BlockStyle>) {
226    let mut styles = BTreeSet::new();
227    let (markup, is_escaped) = markup
228        .strip_prefix('\\')
229        .map_or((markup, false), |stripped| (stripped, true));
230    for (i, char) in markup.chars().enumerate() {
231        styles.insert(match char {
232            ',' => BlockStyle::Aside,
233            '>' => BlockStyle::Quote,
234            '-' => BlockStyle::List,
235            '#' => BlockStyle::Heading,
236            ' ' if is_escaped => return (1, BTreeSet::new()),
237            ' ' if styles.is_empty() => break,
238            ' ' => return (i + 1, styles),
239            _ => break,
240        });
241    }
242    (0, BTreeSet::new())
243}
244
245fn parse_spans(markup: &str) -> Vec<Span> {
246    let mut spans = Vec::new();
247    let mut buffer = Vec::new();
248    let mut active_styles = HashSet::new();
249    let mut is_escaped = false;
250    for char in markup.chars() {
251        let style = match char {
252            '*' => Some(SpanStyle::Bold),
253            '_' => Some(SpanStyle::Italic),
254            '~' => Some(SpanStyle::Struck),
255            '`' => Some(SpanStyle::Mono),
256            '|' => Some(SpanStyle::Marked),
257            _ => None,
258        };
259        if let Some(style) = style {
260            if is_escaped {
261                buffer.push(char);
262            } else {
263                if !buffer.is_empty() {
264                    spans.push(Span::Text {
265                        styles: active_styles.iter().copied().collect(),
266                        text: buffer.iter().collect(),
267                    });
268                }
269                buffer.clear();
270                if active_styles.contains(&style) {
271                    active_styles.remove(&style);
272                } else {
273                    active_styles.insert(style);
274                }
275            }
276        } else {
277            if is_escaped {
278                buffer.push('\\');
279            }
280            match char {
281                '\\' => {}
282                _ => buffer.push(char),
283            }
284        }
285        is_escaped = match char {
286            '\\' => !is_escaped,
287            _ => false,
288        };
289    }
290    if !buffer.is_empty() {
291        spans.push(Span::Text {
292            styles: active_styles.iter().copied().collect(),
293            text: buffer.iter().collect(),
294        });
295    }
296    spans
297}
298
299fn as_markup(styles: &BTreeSet<BlockStyle>, spans: &[Span]) -> Result<String, SerializationError> {
300    let mut markup = String::new();
301    for block_style in styles.iter().rev() {
302        match block_style {
303            BlockStyle::Aside => markup.push(','),
304            BlockStyle::Quote => markup.push('>'),
305            BlockStyle::List => markup.push('-'),
306            BlockStyle::Heading => markup.push('#'),
307        }
308    }
309    if !markup.is_empty() {
310        markup.push(' ');
311    }
312
313    if let Some(Span::Text { styles: _, text }) = spans.last() {
314        let (_, block_styles_in_prefix) = parse_block_styles_from_prefix(text);
315        if !block_styles_in_prefix.is_empty() {
316            markup.push('\\');
317        }
318    }
319
320    fn add_span_markup<'a>(markup: &mut String, styles: impl Iterator<Item = &'a SpanStyle>) {
321        for s in styles {
322            match s {
323                SpanStyle::Bold => markup.push('*'),
324                SpanStyle::Italic => markup.push('_'),
325                SpanStyle::Struck => markup.push('~'),
326                SpanStyle::Mono => markup.push('`'),
327                SpanStyle::Marked => markup.push('|'),
328            }
329        }
330    }
331
332    let mut active_styles = Vec::new();
333    for span in spans.iter() {
334        match span {
335            Span::Text { styles, text } => {
336                let mut closed_or_opened = Vec::new();
337                for i in (0..active_styles.len()).rev() {
338                    let s = active_styles[i];
339                    if !styles.iter().any(|next| *next == s) {
340                        closed_or_opened.push(s);
341                        active_styles.remove(i);
342                    }
343                }
344                for s in styles.iter().rev() {
345                    if !active_styles.iter().any(|active| active == s) {
346                        closed_or_opened.push(*s);
347                        active_styles.push(*s);
348                    }
349                }
350                add_span_markup(&mut markup, closed_or_opened.iter());
351                markup.push_str(
352                    &text
353                        .replace("\\", "\\\\")
354                        .replace("*", "\\*")
355                        .replace("_", "\\_")
356                        .replace("~", "\\~")
357                        .replace("`", "\\`")
358                        .replace("|", "\\|"),
359                );
360            }
361            _ => return Err(SerializationError::InvalidSpanType(span.clone())),
362        }
363    }
364    if !active_styles.is_empty() {
365        add_span_markup(&mut markup, active_styles.iter().rev());
366    }
367    Ok(markup)
368}