use crate::gemtext::{Document, GemtextContentBlock, HeadingLevel};
use alloc::string::String;
use alloc::{borrow::ToOwned, vec::Vec};
use core::str::FromStr;
fn parse_document(document: &str) -> Vec<GemtextContentBlock> {
use GemtextContentBlock::{Heading, Link, List, Pre, Quote, Text};
let mut lines = document.lines();
let mut body: Vec<GemtextContentBlock> = Vec::with_capacity(512);
let mut line = lines.next();
while let Some(definite_line) = line {
match GemtextLine::parse_normal(definite_line) {
GemtextLine::Heading { level, content } => {
body.push(Heading { level, content });
line = lines.next(); }
GemtextLine::Link { target, label } => {
body.push(Link { target, label });
line = lines.next(); }
GemtextLine::ListItem { content } => {
let mut items: Vec<String> = Vec::with_capacity(8);
items.push(content);
line = lines.next();
'list: while let Some(line_b) = line {
match GemtextLine::parse_normal(line_b) {
GemtextLine::ListItem { content } => {
items.push(content);
line = lines.next();
}
_ => break 'list,
}
}
body.push(List { items });
}
GemtextLine::PreformatToggle { alt_text } => {
let mut text_lines: Vec<String> = Vec::with_capacity(8);
line = lines.next();
'pre: while let Some(line_b) = line {
match GemtextLine::parse_pre_formatted(line_b) {
GemtextLine::Text { content } => {
text_lines.push(content);
line = lines.next();
}
GemtextLine::PreformatToggle { .. } => break 'pre,
_ => unreachable!("got something other than Text or Pre in preformat mode"),
}
}
let content = text_lines.join("\n");
if !content.is_empty() {
body.push(Pre { alt_text, content });
}
line = lines.next(); }
GemtextLine::Quote { content } => {
body.push(Quote { content });
line = lines.next(); }
GemtextLine::Text { content } if content.is_empty() => {
body.push(Text {
content: String::from("\n"),
});
line = lines.next(); }
GemtextLine::Text { content } => {
body.push(Text { content });
line = lines.next(); }
}
}
body
}
#[derive(Debug)]
enum GemtextLine {
Text {
content: String,
},
Link {
target: String,
label: Option<String>,
},
Heading {
level: HeadingLevel,
content: String,
},
ListItem {
content: String,
},
Quote {
content: String,
},
PreformatToggle {
alt_text: Option<String>,
},
}
impl GemtextLine {
fn parse_normal(line: &str) -> Self {
if let Some(heading) = line.strip_prefix("###") {
Self::Heading {
level: HeadingLevel::Three,
content: heading.to_owned().trim().to_owned(),
}
} else if let Some(heading) = line.strip_prefix("##") {
Self::Heading {
level: HeadingLevel::Two,
content: heading.to_owned().trim().to_owned(),
}
} else if let Some(heading) = line.strip_prefix("#") {
Self::Heading {
level: HeadingLevel::One,
content: heading.to_owned().trim().to_owned(),
}
} else if let Some(link) = line.strip_prefix("=>") {
let parts = link.trim();
if parts.is_empty() {
Self::Text {
content: line.trim().to_owned(), }
} else {
let target_end_idx = parts.find(char::is_whitespace).unwrap_or(parts.len());
let target = &parts[..target_end_idx];
let label = parts[target_end_idx..].trim();
if label.is_empty() {
Self::Link {
target: target.trim().to_owned(),
label: None,
}
} else {
Self::Link {
target: target.trim().to_owned(),
label: Some(label.trim().to_owned()), }
}
}
} else if let Some(item) = line.strip_prefix("* ") {
Self::ListItem {
content: item.trim().to_owned(),
}
} else if let Some(quote) = line.strip_prefix(">") {
Self::Quote {
content: quote.trim().to_owned(),
}
} else if let Some(alt_text) = line.strip_prefix("```") {
let alt_text = alt_text.trim();
if alt_text.is_empty() {
Self::PreformatToggle { alt_text: None }
} else {
Self::PreformatToggle {
alt_text: Some(alt_text.to_owned()),
}
}
} else {
Self::Text {
content: line.to_owned(),
}
}
}
fn parse_pre_formatted(line: &str) -> Self {
if line.strip_prefix("```").is_some() {
Self::PreformatToggle { alt_text: None }
} else {
Self::Text {
content: line.to_owned(),
}
}
}
}
impl FromStr for Document {
type Err = core::convert::Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let contents = parse_document(s);
Ok(Self { contents })
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn test_parses_headings() {
use GemtextContentBlock::Heading;
use HeadingLevel::{One, Three, Two};
let h1 = Heading {
level: One,
content: "Heading".into(),
};
let h2 = Heading {
level: Two,
content: "Heading".into(),
};
let h3 = Heading {
level: Three,
content: "Heading".into(),
};
let cases = [
("#Heading", h1.clone()),
("# Heading", h1.clone()),
("# Heading", h1.clone()),
("# Heading ", h1.clone()),
("# Heading ", h1),
("##Heading", h2.clone()),
("## Heading", h2.clone()),
("## Heading", h2),
("###Heading", h3.clone()),
("### Heading", h3.clone()),
("### Heading", h3.clone()),
("### Heading", h3),
];
for (test, expected) in cases {
let document: Document = test.parse().unwrap();
let lines = document.contents;
assert_eq!(lines.len(), 1);
let result = lines.first().expect("single-line document");
assert_eq!(*result, expected);
}
}
#[test]
fn test_parses_links() {
use GemtextContentBlock::Link;
#[rustfmt::skip]
let cases = [
("=> test", Link { target: "test".into(), label: None }),
("=> test link", Link { target: "test".into(), label: Some("link".into()) }),
("=> /foo", Link { target: "/foo".into(), label: None }),
("=> foo://bar", Link { target: "foo://bar".into(), label: None }),
("=> foo://bar ext", Link { target: "foo://bar".into(), label: Some("ext".into()) }),
("=> foo://bar foo://baz", Link { target: "foo://bar".into(), label: Some("foo://baz".into()) }),
];
for (test, expected) in cases {
let document: Document = test.parse().unwrap();
let lines = document.contents;
assert_eq!(lines.len(), 1);
let result = lines.first().expect("single-line document");
assert_eq!(*result, expected);
}
}
#[test]
fn test_one_newline_same_paragraph() {
let content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
let lines = parse_document(content);
assert_eq!(
lines,
vec![
GemtextContentBlock::Text {
content: String::from(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
)
},
GemtextContentBlock::Text {
content: String::from(
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
)
}
]
);
}
#[test]
fn test_two_newlines_different_paragraphs() {
use GemtextContentBlock::Text;
let content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
let line = parse_document(content);
assert_eq!(
line,
vec![
Text {
content: String::from(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
),
},
Text {
content: String::from("\n"),
},
Text {
content: String::from(
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
),
},
]
);
}
#[test]
fn test_parse_unclosed_pre() {
use GemtextContentBlock::{Heading, Pre, Text};
use HeadingLevel::One;
use pretty_assertions::assert_eq;
let content = include_str!("../tests/samples/no_close_pre.gmi");
let lines = parse_document(content);
assert_eq!(
lines,
vec![
Heading {
level: One,
content: String::from("Test"),
},
Text {
content: String::from(
"The following is an un-closed pre-formatted block. It should parse the same as if it were properly closed:"
)
},
Pre {
alt_text: None,
content: String::from("This is the block.")
},
]
);
}
#[test]
fn test_parse_pre_blocks() {
use GemtextContentBlock::Pre;
use pretty_assertions::assert_eq;
let content = include_str!("../tests/samples/pre_blocks.gmi");
let blocks = parse_document(content);
assert_eq!(
blocks,
vec![
Pre {
alt_text: None,
content: "This one has no alt text.".into()
},
Pre {
alt_text: Some("Things and stuff".into()),
content: "This has alt text.".into()
},
Pre {
alt_text: Some("gemtext".into()),
content: "This has Gemtext markup.\n=> gemini://geminiprotocol.net/docs/gemtext-specification.gmi Gemtext Specification"
.into()
},
]
);
}
#[test]
fn test_parse_gemtext_sample() {
use GemtextContentBlock::{Heading, Link, List, Pre, Quote, Text};
use HeadingLevel::{One, Three, Two};
use pretty_assertions::assert_eq;
let content = include_str!("../tests/samples/sample.gmi");
let lines = parse_document(content);
assert_eq!(
lines,
vec![
Heading {
level: One,
content: String::from("This is a heading!"),
},
Text {
content: String::from("\n")
},
Heading {
level: One,
content: String::from("This is also a heading!"),
},
Text {
content: String::from("\n")
},
Text {
content: String::from("This is some text. It is not special.")
},
Text {
content: String::from("\n")
},
Heading {
level: Two,
content: String::from("Another heading")
},
Text {
content: String::from("\n")
},
Heading {
level: Two,
content: String::from("yes, leading spaces are optional")
},
Text {
content: String::from("\n")
},
Link {
target: String::from(
"gemini://geminiprotocol.net/docs/gemtext-specification.gmi"
),
label: None
},
Text {
content: String::from("\n")
},
Link {
target: String::from(
"gemini://geminiprotocol.net/docs/gemtext-specification.gmi"
),
label: Some(String::from(
"Here is a friendly link to the Gemtext specification"
))
},
Text {
content: String::from("\n")
},
Link {
target: String::from(
"https://geminiprotocol.net/docs/gemtext-specification.gmi"
),
label: Some(String::from("gemini protocol isn't special for links"))
},
Text {
content: String::from("\n")
},
Link {
target: String::from("gemini://geminiprotocol.net/"),
label: Some(String::from("multiple kinds of whitespace"))
},
Text {
content: String::from("\n")
},
Link {
target: String::from("gemini://geminiprotocol.net/"),
label: Some(String::from("multiple whitespace between parts!"))
},
Text {
content: String::from("\n")
},
Link {
target: String::from("gemini://geminiprotocol.net/"),
label: Some(String::from("weird whitespace within tag!")),
},
Text {
content: String::from("\n")
},
Link {
target: String::from("/foo/bar/baz.txt"),
label: Some(String::from("leading slash still counts"))
},
Text {
content: String::from("\n")
},
Link {
target: String::from("/foo/bar.png"),
label: Some(String::from("Inline image!")),
},
Text {
content: String::from("\n")
},
Text {
content: String::from(
"Links may also be closer together. Here are some examples from the spec:"
)
},
Link {
target: String::from("gemini://example.org/"),
label: None
},
Link {
target: String::from("gemini://example.org/"),
label: Some(String::from("An example link"))
},
Link {
target: String::from("gemini://example.org/foo"),
label: Some(String::from("Another example link at the same host"))
},
Link {
target: String::from("foo/bar/baz.txt"),
label: Some(String::from("A relative link"))
},
Link {
target: String::from("gopher://example.org:70/1"),
label: Some(String::from("A gopher link"))
},
Text {
content: String::from("\n")
},
Link {
target: String::from("this"),
label: Some(String::from("shouldn't be a link, but it is."))
},
Link {
target: String::from("gemini://example.org/"),
label: Some(String::from("the leading space is optional"))
},
Text {
content: String::from("\n")
},
Text {
content: String::from("Not a link:"),
},
Text {
content: String::from("=>"),
},
Text {
content: String::from("\n"),
},
Heading {
level: Three,
content: String::from("Why not try lists?")
},
Text {
content: String::from("\n")
},
Text {
content: String::from("\n")
},
Heading {
level: Three,
content: String::from("spaces are still optional")
},
Text {
content: String::from("\n")
},
List {
items: vec![
String::from("This is a list item."),
String::from("Here's another."),
String::from("Still a list item!"),
]
},
Text {
content: String::from("*This is not.")
},
Text {
content: String::from("** This is also not.")
},
Text {
content: String::from("* Neither is this.")
},
Text {
content: String::from("- Not a list item.")
},
Text {
content: String::from(" - Not a list item.")
},
Text {
content: String::from("\n")
},
Quote {
content: String::from("Someone said this.")
},
Text {
content: String::from("\n")
},
Quote {
content: String::from("Someone also said this.")
},
Text {
content: String::from("\n")
},
Quote {
content: String::from("this is not the same quote."),
},
Quote {
content: String::new(),
},
Quote {
content: String::from("multiline quote! if only."),
},
Text {
content: String::from("\n")
},
Pre {
alt_text: None,
content: String::from(
r"This text is preformatted.
# Hello, world!
This isn't to be treated as Gemtext:
=> gemini://example.com
> no one said this"
)
},
Text {
content: String::from("\n")
},
Pre {
alt_text: None,
content: String::from("This is also plaintext")
},
Text {
content: String::from("\n")
},
Text {
content: String::from("\n")
},
Pre {
alt_text: Some(String::from("This is alt text")),
content: String::from("This text is also preformatted.")
},
Text {
content: String::from("\n")
},
Pre {
alt_text: Some(String::from(
r#"Art by Joan Stark of a camp site. A small tent faces a small campfire. There is a log nearby, perfect for sitting on. The initials "jgs" can be seen."#
)),
content: String::from(
r#" ______
jgs / /\
/ / \
/_____/----\_ (
" " ).
_ ___ o (:') o
(@))_)) o ~/~~\~ o
o o o"#
)
},
Text {
content: String::from("\n")
},
Text {
content: String::from(
"Syntax highlighting may be applied to preformatted blocks:"
)
},
Pre {
alt_text: Some(String::from("javascript")),
content: String::from(" column.substring(0,num)"),
},
Text {
content: String::from("\n")
},
Text {
content: String::from(
r#"The spec says, "Any text following the leading "```" of a preformat toggle line MUST be ignored by clients." So..."#
)
},
Text {
content: String::from("wow, what WAS that??")
},
]
);
}
#[test]
#[cfg(not(tarpaulin))]
fn bench_parsing() {
use std::time::Instant;
const ITERS: u128 = 500;
let mut times = Vec::with_capacity(ITERS as usize);
for _ in 0..ITERS {
let input = include_str!("../tests/samples/sample.gmi");
let start = Instant::now();
let _parsed = parse_document(input);
times.push(start.elapsed().as_nanos());
}
let average = times.iter().sum::<u128>() / ITERS;
const LIMIT: u128 = 80_000; assert!(average < LIMIT, "{average} ns is too slow!");
}
}