use comrak::nodes::{AstNode, NodeValue};
use crate::model::SearchEntry;
pub fn index_json(entries: &[SearchEntry]) -> String {
serde_json::to_string(entries).expect("SearchEntry serializes")
}
pub fn plaintext<'a>(root: &'a AstNode<'a>) -> String {
let mut buf = String::new();
for node in root.descendants() {
match &node.data.borrow().value {
NodeValue::Text(t) => {
push_unwrapping_wikilinks(&mut buf, t);
buf.push(' ');
}
NodeValue::Code(c) => {
buf.push(' ');
buf.push_str(&c.literal);
buf.push(' ');
}
_ => {}
}
}
buf.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn push_unwrapping_wikilinks(buf: &mut String, text: &str) {
let mut rest = text;
while let Some(open) = rest.find("[[") {
if let Some(close_rel) = rest[open + 2..].find("]]") {
let close = open + 2 + close_rel;
buf.push_str(&rest[..open]);
let inner = &rest[open + 2..close];
let display = match inner.split_once('|') {
Some((_t, label)) if !label.trim().is_empty() => label.trim(),
_ => inner.trim(),
};
buf.push(' ');
buf.push_str(display);
buf.push(' ');
rest = &rest[close + 2..];
} else {
break;
}
}
buf.push_str(rest);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::markdown::comrak_options;
use comrak::{parse_document, Arena};
fn plaintext_of(body: &str) -> String {
let arena = Arena::new();
let options = comrak_options();
let root = parse_document(&arena, body, &options);
plaintext(root)
}
#[test]
fn strips_markup_to_plaintext() {
let text = plaintext_of("# Title\n\nSome **bold** and `code` and a [link](/x).\n");
assert!(text.contains("Title"));
assert!(text.contains("Some bold and code and a link"));
assert!(!text.contains('#'));
assert!(!text.contains('*'));
assert!(!text.contains("/x"));
}
#[test]
fn includes_wikilink_inner_text() {
let text = plaintext_of("see [[guide/intro|The Intro]] here\n");
assert!(text.contains("The Intro") || text.contains("guide/intro"));
assert!(!text.contains("[["));
}
#[test]
fn unwraps_broken_wikilink_target_into_index() {
let text = plaintext_of("see [[missing-page]] x\n");
assert!(text.contains("missing-page"));
assert!(!text.contains("[["));
}
#[test]
fn unterminated_wikilink_bracket_is_left_literal() {
let text = plaintext_of("see [[half open\n");
assert!(text.contains("half open"));
assert!(text.contains("[["));
}
#[test]
fn collapses_whitespace() {
let text = plaintext_of("a\n\n\nb c\n");
assert_eq!(text, "a b c");
}
#[test]
fn serializes_index_to_json_array() {
use crate::model::SearchEntry;
let entries = vec![
SearchEntry {
slug: "a".into(),
title: "A".into(),
text: "alpha".into(),
},
SearchEntry {
slug: "b".into(),
title: "B".into(),
text: "beta".into(),
},
];
let json = index_json(&entries);
assert!(json.starts_with('['));
assert!(json.contains(r#""slug":"a""#));
assert!(json.contains(r#""title":"A""#));
assert!(json.contains(r#""text":"alpha""#));
}
}