gemtext/lib.rs
1/// This module implements a simple text/gemini parser based on the description
2/// here: https://gemini.circumlunar.space/docs/specification.html
3use std::io::{self, Write};
4
5/// Build a gemini document up from a series of nodes.
6#[derive(Default)]
7pub struct Builder {
8 nodes: Vec<Node>,
9}
10
11impl Builder {
12 pub fn new() -> Builder {
13 Builder::default()
14 }
15
16 pub fn text<T: Into<String>>(mut self, data: T) -> Builder {
17 self.nodes.push(Node::Text(data.into()));
18 self
19 }
20
21 pub fn link<T: Into<String>>(mut self, to: T, name: Option<String>) -> Builder {
22 self.nodes.push(Node::Link {
23 to: to.into(),
24 name: name,
25 });
26 self
27 }
28
29 pub fn preformatted<T: Into<String>>(mut self, data: T) -> Builder {
30 self.nodes.push(Node::Preformatted(data.into()));
31 self
32 }
33
34 pub fn heading<T: Into<String>>(mut self, level: u8, body: T) -> Builder {
35 self.nodes.push(Node::Heading {
36 level: level,
37 body: body.into(),
38 });
39 self
40 }
41
42 pub fn list_item<T: Into<String>>(mut self, item: T) -> Builder {
43 self.nodes.push(Node::ListItem(item.into()));
44 self
45 }
46
47 pub fn quote<T: Into<String>>(mut self, body: T) -> Builder {
48 self.nodes.push(Node::Quote(body.into()));
49 self
50 }
51
52 pub fn build(self) -> Vec<Node> {
53 self.nodes
54 }
55}
56
57/// Render a set of nodes as a document to a writer.
58pub fn render(nodes: Vec<Node>, out: &mut impl Write) -> io::Result<()> {
59 use Node::*;
60
61 for node in nodes {
62 match node {
63 Text(body) => {
64 let special_prefixes = ["=>", "```", "#", "*", ">"];
65 if special_prefixes.iter().any(|prefix| body.starts_with(prefix)) {
66 write!(out, " ")?;
67 }
68 write!(out, "{}\n", body)?
69 },
70 Link { to, name } => match name {
71 Some(name) => write!(out, "=> {} {}\n", to, name)?,
72 None => write!(out, "=> {}\n", to)?,
73 },
74 Preformatted(body) => write!(out, "```\n{}\n```\n", body)?,
75 Heading { level, body } => write!(out, "{} {}\n", "#".repeat(level as usize), body)?,
76 ListItem(body) => write!(out, "* {}\n", body)?,
77 Quote(body) => write!(out, "> {}\n", body)?,
78 };
79 }
80
81 Ok(())
82}
83
84/// Individual nodes of the document. Each node correlates to a line in the file.
85#[derive(Debug, PartialEq, Eq, Clone)]
86pub enum Node {
87 /// Text lines are the most fundamental line type - any line which does not
88 /// match the definition of another line type defined below defaults to
89 /// being a text line. The majority of lines in a typical text/gemini document will be text lines.
90 Text(String),
91
92 /// Lines beginning with the two characters "=>" are link lines, which have the following syntax:
93 ///
94 /// ```gemini
95 /// =>[<whitespace>]<URL>[<whitespace><USER-FRIENDLY LINK NAME>]
96 /// ```
97 ///
98 /// where:
99 ///
100 /// * `<whitespace>` is any non-zero number of consecutive spaces or tabs
101 /// * Square brackets indicate that the enclosed content is optional.
102 /// * `<URL>` is a URL, which may be absolute or relative. If the URL
103 /// does not include a scheme, a scheme of `gemini://` is implied.
104 Link { to: String, name: Option<String> },
105
106 /// Any line whose first three characters are "```" (i.e. three consecutive
107 /// back ticks with no leading whitespace) are preformatted toggle lines.
108 /// These lines should NOT be included in the rendered output shown to the
109 /// user. Instead, these lines toggle the parser between preformatted mode
110 /// being "on" or "off". Preformatted mode should be "off" at the beginning
111 /// of a document. The current status of preformatted mode is the only
112 /// internal state a parser is required to maintain. When preformatted mode
113 /// is "on", the usual rules for identifying line types are suspended, and
114 /// all lines should be identified as preformatted text lines (see 5.4.4).
115 ///
116 /// Preformatted text lines should be presented to the user in a "neutral",
117 /// monowidth font without any alteration to whitespace or stylistic
118 /// enhancements. Graphical clients should use scrolling mechanisms to present
119 /// preformatted text lines which are longer than the client viewport, in
120 /// preference to wrapping. In displaying preformatted text lines, clients
121 /// should keep in mind applications like ASCII art and computer source
122 /// code: in particular, source code in languages with significant whitespace
123 /// (e.g. Python) should be able to be copied and pasted from the client into
124 /// a file and interpreted/compiled without any problems arising from the
125 /// client's manner of displaying them.
126 Preformatted(String),
127
128 /// Lines beginning with "#" are heading lines. Heading lines consist of one,
129 /// two or three consecutive "#" characters, followed by optional whitespace,
130 /// followed by heading text. The number of # characters indicates the "level"
131 /// of header; #, ## and ### can be thought of as analogous to `<h1>`, `<h2>`
132 /// and `<h3>` in HTML.
133 ///
134 /// Heading text should be presented to the user, and clients MAY use special
135 /// formatting, e.g. a larger or bold font, to indicate its status as a header
136 /// (simple clients may simply print the line, including its leading #s,
137 /// without any styling at all). However, the main motivation for the
138 /// definition of heading lines is not stylistic but to provide a
139 /// machine-readable representation of the internal structure of the document.
140 /// Advanced clients can use this information to, e.g. display an automatically
141 /// generated and hierarchically formatted "table of contents" for a long
142 /// document in a side-pane, allowing users to easily jump to specific sections
143 /// without excessive scrolling. CMS-style tools automatically generating menus
144 /// or Atom/RSS feeds for a directory of text/gemini files can use first
145 /// heading in the file as a human-friendly title.
146 Heading { level: u8, body: String },
147
148 /// Lines beginning with "* " are unordered list items. This line type exists
149 /// purely for stylistic reasons. The * may be replaced in advanced clients by
150 /// a bullet symbol. Any text after the "* " should be presented to the user as
151 /// if it were a text line, i.e. wrapped to fit the viewport and formatted
152 /// "nicely". Advanced clients can take the space of the bullet symbol into
153 /// account when wrapping long list items to ensure that all lines of text
154 /// corresponding to the item are offset an equal distance from the left of the screen.
155 ListItem(String),
156
157 /// Lines beginning with ">" are quote lines. This line type exists so that
158 /// advanced clients may use distinct styling to convey to readers the important
159 /// semantic information that certain text is being quoted from an external
160 /// source. For example, when wrapping long lines to the the viewport, each
161 /// resultant line may have a ">" symbol placed at the front.
162 Quote(String),
163}
164
165impl Node {
166 pub fn blank() -> Node {
167 Node::Text("".to_string())
168 }
169}
170
171pub fn parse(doc: &str) -> Vec<Node> {
172 let mut result: Vec<Node> = vec![];
173 let mut collect_preformatted: bool = false;
174 let mut preformatted_buffer: Vec<u8> = vec![];
175
176 for line in doc.lines() {
177 if line.starts_with("```") {
178 collect_preformatted = !collect_preformatted;
179 if !collect_preformatted {
180 result.push(Node::Preformatted(
181 String::from_utf8(preformatted_buffer)
182 .unwrap()
183 .trim_end()
184 .to_string(),
185 ));
186 preformatted_buffer = vec![];
187 }
188 continue;
189 }
190
191 if collect_preformatted && line != "```" {
192 write!(preformatted_buffer, "{}\n", line).unwrap();
193 continue;
194 }
195
196 // Quotes
197 if line.starts_with(">") {
198 result.push(Node::Quote(line[1..].trim().to_string()));
199 continue;
200 }
201
202 // List items
203 if line.starts_with("*") {
204 result.push(Node::ListItem(line[1..].trim().to_string()));
205 continue;
206 }
207
208 // Headings
209 if line.starts_with("###") {
210 result.push(Node::Heading {
211 level: 3,
212 body: line[3..].trim().to_string(),
213 });
214 continue;
215 }
216 if line.starts_with("##") {
217 result.push(Node::Heading {
218 level: 2,
219 body: line[2..].trim().to_string(),
220 });
221 continue;
222 }
223 if line.starts_with("#") {
224 result.push(Node::Heading {
225 level: 1,
226 body: line[1..].trim().to_string(),
227 });
228 continue;
229 }
230
231 // Links
232 if line.starts_with("=>") {
233 let sp = line[2..].split_ascii_whitespace().collect::<Vec<&str>>();
234
235 match sp.len() {
236 1 => result.push(Node::Link {
237 to: sp[0].trim().to_string(),
238 name: None,
239 }),
240 _ => result.push(Node::Link {
241 to: sp[0].trim().to_string(),
242 name: Some(sp[1..].join(" ").trim().to_string()),
243 }),
244 }
245
246 continue;
247 }
248
249 result.push(Node::Text(line.to_string()));
250 }
251
252 result
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258 #[test]
259 fn basic() {
260 let _ = pretty_env_logger::try_init();
261 let msg = include_str!("../../majc/src/help.gmi");
262 let doc = super::parse(msg);
263 assert_ne!(doc.len(), 0);
264 }
265
266 #[test]
267 fn quote() {
268 let _ = pretty_env_logger::try_init();
269 let msg = ">hi there";
270 let expected: Vec<Node> = vec![Node::Quote("hi there".to_string())];
271 assert_eq!(expected, parse(msg));
272 }
273
274 #[test]
275 fn list() {
276 let _ = pretty_env_logger::try_init();
277 let msg = "*hi there";
278 let expected: Vec<Node> = vec![Node::ListItem("hi there".to_string())];
279 assert_eq!(expected, parse(msg));
280 }
281
282 #[test]
283 fn preformatted() {
284 let _ = pretty_env_logger::try_init();
285 let msg = "```\n\
286 hi there\n\
287 ```\n\
288 \n\
289 Test\n";
290 let expected: Vec<Node> = vec![
291 Node::Preformatted("hi there".to_string()),
292 Node::Text(String::new()),
293 Node::Text("Test".to_string()),
294 ];
295 assert_eq!(expected, parse(msg));
296 }
297
298 #[test]
299 fn header() {
300 let _ = pretty_env_logger::try_init();
301 let msg = "#hi\n##there\n### my friends";
302 let expected: Vec<Node> = vec![
303 Node::Heading {
304 level: 1,
305 body: "hi".to_string(),
306 },
307 Node::Heading {
308 level: 2,
309 body: "there".to_string(),
310 },
311 Node::Heading {
312 level: 3,
313 body: "my friends".to_string(),
314 },
315 ];
316 assert_eq!(expected, parse(msg));
317 }
318
319 #[test]
320 fn link() {
321 let _ = pretty_env_logger::try_init();
322 let msg = "=>/\n=> / Go home";
323 let expected: Vec<Node> = vec![
324 Node::Link {
325 to: "/".to_string(),
326 name: None,
327 },
328 Node::Link {
329 to: "/".to_string(),
330 name: Some("Go home".to_string()),
331 },
332 ];
333 assert_eq!(expected, parse(msg));
334 }
335
336 #[test]
337 fn ambiguous_preformatted() {
338 let _ = pretty_env_logger::try_init();
339 let msg = include_str!("../../testdata/ambig_preformatted.gmi");
340 let expected: Vec<Node> = vec![
341 Node::Preformatted("FOO".to_string()),
342 Node::Text("Foo bar".to_string()),
343 ];
344 assert_eq!(expected, parse(msg));
345 }
346
347 #[test]
348 fn ambiguous_text() {
349 let _ = pretty_env_logger::try_init();
350 let original = Node::Text("#1 World's Best Coder".to_string());
351 let expected = " #1 World's Best Coder\n";
352 let mut rendered: Vec<u8> = vec![];
353 render(vec![original], &mut rendered).unwrap();
354 let rendered = String::from_utf8(rendered).unwrap();
355 assert_eq!(expected, rendered)
356 }
357}