Skip to main content

gemtext/
lib.rs

1/// This module implements a simple text/gemini parser based on the description
2/// here: https://gemini.circumlunar.space/docs/specification.html
3use std::io::{self, Write};
4
5/// Build a gemini document up from a series of nodes.
6#[derive(Default)]
7pub struct Builder {
8    nodes: Vec<Node>,
9}
10
11impl Builder {
12    pub fn new() -> Builder {
13        Builder::default()
14    }
15
16    pub fn text<T: Into<String>>(mut self, data: T) -> Builder {
17        self.nodes.push(Node::Text(data.into()));
18        self
19    }
20
21    pub fn link<T: Into<String>>(mut self, to: T, name: Option<String>) -> Builder {
22        self.nodes.push(Node::Link {
23            to: to.into(),
24            name: name,
25        });
26        self
27    }
28
29    pub fn preformatted<T: Into<String>>(mut self, data: T) -> Builder {
30        self.nodes.push(Node::Preformatted(data.into()));
31        self
32    }
33
34    pub fn heading<T: Into<String>>(mut self, level: u8, body: T) -> Builder {
35        self.nodes.push(Node::Heading {
36            level: level,
37            body: body.into(),
38        });
39        self
40    }
41
42    pub fn list_item<T: Into<String>>(mut self, item: T) -> Builder {
43        self.nodes.push(Node::ListItem(item.into()));
44        self
45    }
46
47    pub fn quote<T: Into<String>>(mut self, body: T) -> Builder {
48        self.nodes.push(Node::Quote(body.into()));
49        self
50    }
51
52    pub fn build(self) -> Vec<Node> {
53        self.nodes
54    }
55}
56
57/// Render a set of nodes as a document to a writer.
58pub fn render(nodes: Vec<Node>, out: &mut impl Write) -> io::Result<()> {
59    use Node::*;
60
61    for node in nodes {
62        match node {
63            Text(body) => {
64                let special_prefixes = ["=>", "```", "#", "*", ">"];
65                if special_prefixes.iter().any(|prefix| body.starts_with(prefix)) {
66                    write!(out, " ")?;
67                }
68                write!(out, "{}\n", body)?
69            },
70            Link { to, name } => match name {
71                Some(name) => write!(out, "=> {} {}\n", to, name)?,
72                None => write!(out, "=> {}\n", to)?,
73            },
74            Preformatted(body) => write!(out, "```\n{}\n```\n", body)?,
75            Heading { level, body } => write!(out, "{} {}\n", "#".repeat(level as usize), body)?,
76            ListItem(body) => write!(out, "* {}\n", body)?,
77            Quote(body) => write!(out, "> {}\n", body)?,
78        };
79    }
80
81    Ok(())
82}
83
84/// Individual nodes of the document. Each node correlates to a line in the file.
85#[derive(Debug, PartialEq, Eq, Clone)]
86pub enum Node {
87    /// Text lines are the most fundamental line type - any line which does not
88    /// match the definition of another line type defined below defaults to
89    /// being a text line. The majority of lines in a typical text/gemini document will be text lines.
90    Text(String),
91
92    /// Lines beginning with the two characters "=>" are link lines, which have the following syntax:
93    ///
94    /// ```gemini
95    /// =>[<whitespace>]<URL>[<whitespace><USER-FRIENDLY LINK NAME>]
96    /// ```
97    ///
98    /// where:
99    ///
100    /// * `<whitespace>` is any non-zero number of consecutive spaces or tabs
101    /// * Square brackets indicate that the enclosed content is optional.
102    /// * `<URL>` is a URL, which may be absolute or relative. If the URL
103    ///   does not include a scheme, a scheme of `gemini://` is implied.
104    Link { to: String, name: Option<String> },
105
106    /// Any line whose first three characters are "```" (i.e. three consecutive
107    /// back ticks with no leading whitespace) are preformatted toggle lines.
108    /// These lines should NOT be included in the rendered output shown to the
109    /// user. Instead, these lines toggle the parser between preformatted mode
110    /// being "on" or "off". Preformatted mode should be "off" at the beginning
111    /// of a document. The current status of preformatted mode is the only
112    /// internal state a parser is required to maintain. When preformatted mode
113    /// is "on", the usual rules for identifying line types are suspended, and
114    /// all lines should be identified as preformatted text lines (see 5.4.4).
115    ///
116    /// Preformatted text lines should be presented to the user in a "neutral",
117    /// monowidth font without any alteration to whitespace or stylistic
118    /// enhancements. Graphical clients should use scrolling mechanisms to present
119    /// preformatted text lines which are longer than the client viewport, in
120    /// preference to wrapping. In displaying preformatted text lines, clients
121    /// should keep in mind applications like ASCII art and computer source
122    /// code: in particular, source code in languages with significant whitespace
123    /// (e.g. Python) should be able to be copied and pasted from the client into
124    /// a file and interpreted/compiled without any problems arising from the
125    /// client's manner of displaying them.
126    Preformatted(String),
127
128    /// Lines beginning with "#" are heading lines. Heading lines consist of one,
129    /// two or three consecutive "#" characters, followed by optional whitespace,
130    /// followed by heading text. The number of # characters indicates the "level"
131    /// of header; #, ## and ### can be thought of as analogous to `<h1>`, `<h2>`
132    /// and `<h3>` in HTML.
133    ///
134    /// Heading text should be presented to the user, and clients MAY use special
135    /// formatting, e.g. a larger or bold font, to indicate its status as a header
136    /// (simple clients may simply print the line, including its leading #s,
137    /// without any styling at all). However, the main motivation for the
138    /// definition of heading lines is not stylistic but to provide a
139    /// machine-readable representation of the internal structure of the document.
140    /// Advanced clients can use this information to, e.g. display an automatically
141    /// generated and hierarchically formatted "table of contents" for a long
142    /// document in a side-pane, allowing users to easily jump to specific sections
143    /// without excessive scrolling. CMS-style tools automatically generating menus
144    /// or Atom/RSS feeds for a directory of text/gemini files can use first
145    /// heading in the file as a human-friendly title.
146    Heading { level: u8, body: String },
147
148    /// Lines beginning with "* " are unordered list items. This line type exists
149    /// purely for stylistic reasons. The * may be replaced in advanced clients by
150    /// a bullet symbol. Any text after the "* " should be presented to the user as
151    /// if it were a text line, i.e. wrapped to fit the viewport and formatted
152    /// "nicely". Advanced clients can take the space of the bullet symbol into
153    /// account when wrapping long list items to ensure that all lines of text
154    /// corresponding to the item are offset an equal distance from the left of the screen.
155    ListItem(String),
156
157    /// Lines beginning with ">" are quote lines. This line type exists so that
158    /// advanced clients may use distinct styling to convey to readers the important
159    /// semantic information that certain text is being quoted from an external
160    /// source. For example, when wrapping long lines to the the viewport, each
161    /// resultant line may have a ">" symbol placed at the front.
162    Quote(String),
163}
164
165impl Node {
166    pub fn blank() -> Node {
167        Node::Text("".to_string())
168    }
169}
170
171pub fn parse(doc: &str) -> Vec<Node> {
172    let mut result: Vec<Node> = vec![];
173    let mut collect_preformatted: bool = false;
174    let mut preformatted_buffer: Vec<u8> = vec![];
175
176    for line in doc.lines() {
177        if line.starts_with("```") {
178            collect_preformatted = !collect_preformatted;
179            if !collect_preformatted {
180                result.push(Node::Preformatted(
181                    String::from_utf8(preformatted_buffer)
182                        .unwrap()
183                        .trim_end()
184                        .to_string(),
185                ));
186                preformatted_buffer = vec![];
187            }
188            continue;
189        }
190
191        if collect_preformatted && line != "```" {
192            write!(preformatted_buffer, "{}\n", line).unwrap();
193            continue;
194        }
195
196        // Quotes
197        if line.starts_with(">") {
198            result.push(Node::Quote(line[1..].trim().to_string()));
199            continue;
200        }
201
202        // List items
203        if line.starts_with("*") {
204            result.push(Node::ListItem(line[1..].trim().to_string()));
205            continue;
206        }
207
208        // Headings
209        if line.starts_with("###") {
210            result.push(Node::Heading {
211                level: 3,
212                body: line[3..].trim().to_string(),
213            });
214            continue;
215        }
216        if line.starts_with("##") {
217            result.push(Node::Heading {
218                level: 2,
219                body: line[2..].trim().to_string(),
220            });
221            continue;
222        }
223        if line.starts_with("#") {
224            result.push(Node::Heading {
225                level: 1,
226                body: line[1..].trim().to_string(),
227            });
228            continue;
229        }
230
231        // Links
232        if line.starts_with("=>") {
233            let sp = line[2..].split_ascii_whitespace().collect::<Vec<&str>>();
234
235            match sp.len() {
236                1 => result.push(Node::Link {
237                    to: sp[0].trim().to_string(),
238                    name: None,
239                }),
240                _ => result.push(Node::Link {
241                    to: sp[0].trim().to_string(),
242                    name: Some(sp[1..].join(" ").trim().to_string()),
243                }),
244            }
245
246            continue;
247        }
248
249        result.push(Node::Text(line.to_string()));
250    }
251
252    result
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258    #[test]
259    fn basic() {
260        let _ = pretty_env_logger::try_init();
261        let msg = include_str!("../../majc/src/help.gmi");
262        let doc = super::parse(msg);
263        assert_ne!(doc.len(), 0);
264    }
265
266    #[test]
267    fn quote() {
268        let _ = pretty_env_logger::try_init();
269        let msg = ">hi there";
270        let expected: Vec<Node> = vec![Node::Quote("hi there".to_string())];
271        assert_eq!(expected, parse(msg));
272    }
273
274    #[test]
275    fn list() {
276        let _ = pretty_env_logger::try_init();
277        let msg = "*hi there";
278        let expected: Vec<Node> = vec![Node::ListItem("hi there".to_string())];
279        assert_eq!(expected, parse(msg));
280    }
281
282    #[test]
283    fn preformatted() {
284        let _ = pretty_env_logger::try_init();
285        let msg = "```\n\
286                   hi there\n\
287                   ```\n\
288                   \n\
289                   Test\n";
290        let expected: Vec<Node> = vec![
291            Node::Preformatted("hi there".to_string()),
292            Node::Text(String::new()),
293            Node::Text("Test".to_string()),
294        ];
295        assert_eq!(expected, parse(msg));
296    }
297
298    #[test]
299    fn header() {
300        let _ = pretty_env_logger::try_init();
301        let msg = "#hi\n##there\n### my friends";
302        let expected: Vec<Node> = vec![
303            Node::Heading {
304                level: 1,
305                body: "hi".to_string(),
306            },
307            Node::Heading {
308                level: 2,
309                body: "there".to_string(),
310            },
311            Node::Heading {
312                level: 3,
313                body: "my friends".to_string(),
314            },
315        ];
316        assert_eq!(expected, parse(msg));
317    }
318
319    #[test]
320    fn link() {
321        let _ = pretty_env_logger::try_init();
322        let msg = "=>/\n=> / Go home";
323        let expected: Vec<Node> = vec![
324            Node::Link {
325                to: "/".to_string(),
326                name: None,
327            },
328            Node::Link {
329                to: "/".to_string(),
330                name: Some("Go home".to_string()),
331            },
332        ];
333        assert_eq!(expected, parse(msg));
334    }
335
336    #[test]
337    fn ambiguous_preformatted() {
338        let _ = pretty_env_logger::try_init();
339        let msg = include_str!("../../testdata/ambig_preformatted.gmi");
340        let expected: Vec<Node> = vec![
341            Node::Preformatted("FOO".to_string()),
342            Node::Text("Foo bar".to_string()),
343        ];
344        assert_eq!(expected, parse(msg));
345    }
346
347    #[test]
348    fn ambiguous_text() {
349        let _ = pretty_env_logger::try_init();
350        let original = Node::Text("#1 World's Best Coder".to_string());
351        let expected = " #1 World's Best Coder\n";
352        let mut rendered: Vec<u8> = vec![];
353        render(vec![original], &mut rendered).unwrap();
354        let rendered = String::from_utf8(rendered).unwrap();
355        assert_eq!(expected, rendered)
356    }
357}