1use std::collections::BTreeMap;
16
17use carta_ast::{Block, Document, Inline, MetaValue, QuoteType, Target};
18use carta_core::{Reader, ReaderOptions, Result, presets};
19
20use crate::commonmark::CommonmarkReader;
21use crate::html::parse_inline_fragment;
22
23#[derive(Debug, Default, Clone, Copy)]
25pub struct OpmlReader;
26
27impl Reader for OpmlReader {
28 fn read(&self, input: &str, _options: &ReaderOptions) -> Result<Document> {
29 let nodes = parse_nodes(input);
30 let mut blocks = Vec::new();
31 let head = find_child(&nodes, "head");
32 let body = find_child(&nodes, "body");
33 for node in body.map(element_children).unwrap_or_default() {
34 emit_outline(node, 1, &mut blocks)?;
35 }
36 Ok(Document {
37 api_version: carta_ast::ApiVersion::default(),
38 meta: build_meta(head)
39 .into_iter()
40 .map(|(k, v)| (k.into(), v))
41 .collect(),
42 blocks,
43 })
44 }
45}
46
47#[derive(Debug)]
50struct Element {
51 name: String,
52 attributes: BTreeMap<String, String>,
53 children: Vec<Element>,
54}
55
56fn element_children(element: &Element) -> Vec<&Element> {
57 element.children.iter().collect()
58}
59
60fn find_child<'a>(nodes: &'a [Element], name: &str) -> Option<&'a Element> {
63 for node in nodes {
64 if node.name == name {
65 return Some(node);
66 }
67 if let Some(found) = node.children.iter().find(|child| child.name == name) {
68 return Some(found);
69 }
70 }
71 None
72}
73
74fn emit_outline(outline: &Element, level: i32, blocks: &mut Vec<Block>) -> Result<()> {
75 if outline.name != "outline" {
76 return Ok(());
77 }
78 let heading = outline
79 .attributes
80 .get("text")
81 .map(|text| smart_inlines(parse_inline_fragment(text)))
82 .unwrap_or_default();
83 let heading = if is_link_outline(outline) {
84 let url = outline.attributes.get("url").cloned().unwrap_or_default();
85 vec![Inline::Link(
86 Box::default(),
87 heading,
88 Box::new(Target {
89 url: url.into(),
90 title: carta_ast::Text::default(),
91 }),
92 )]
93 } else {
94 heading
95 };
96 blocks.push(Block::Header(level, Box::default(), heading));
97 if let Some(note) = outline.attributes.get("_note") {
98 let parsed = CommonmarkReader.read(note, ¬e_options())?;
99 blocks.extend(parsed.blocks);
100 }
101 for child in &outline.children {
102 emit_outline(child, level + 1, blocks)?;
103 }
104 Ok(())
105}
106
107fn is_link_outline(outline: &Element) -> bool {
110 outline
111 .attributes
112 .get("type")
113 .is_some_and(|kind| kind.eq_ignore_ascii_case("link"))
114}
115
116fn note_options() -> ReaderOptions {
120 let mut options = ReaderOptions::default();
121 options.extensions = presets::MARKDOWN;
122 options.greedy_paragraphs = true;
123 options
124}
125
126fn build_meta(head: Option<&Element>) -> BTreeMap<String, MetaValue> {
127 let mut meta = BTreeMap::new();
128 let value = |name: &str| -> Option<&str> {
132 head.and_then(|head| head.children.iter().find(|child| child.name == name))
133 .map(|element| {
134 element
135 .attributes
136 .get("__text")
137 .map(String::as_str)
138 .unwrap_or_default()
139 })
140 };
141 let title = tokenize_meta(value("title").unwrap_or_default());
142 let date = tokenize_meta(value("dateModified").unwrap_or_default());
143 let author = match value("ownerName") {
144 Some(owner) => vec![MetaValue::MetaInlines(tokenize_meta(owner))],
145 None => Vec::new(),
146 };
147 meta.insert("title".to_owned(), MetaValue::MetaInlines(title));
148 meta.insert("author".to_owned(), MetaValue::MetaList(author));
149 meta.insert("date".to_owned(), MetaValue::MetaInlines(date));
150 meta
151}
152
153fn tokenize_meta(text: &str) -> Vec<Inline> {
159 let mut out = Vec::new();
160 let mut chars = text.chars().peekable();
161 let mut word = String::new();
162 while let Some(ch) = chars.next() {
163 if ch.is_whitespace() {
164 if !word.is_empty() {
165 out.push(Inline::Str(std::mem::take(&mut word).into()));
166 }
167 let mut has_newline = ch == '\n' || ch == '\r';
168 while let Some(&next) = chars.peek() {
169 if !next.is_whitespace() {
170 break;
171 }
172 has_newline |= next == '\n' || next == '\r';
173 chars.next();
174 }
175 out.push(if has_newline {
176 Inline::SoftBreak
177 } else {
178 Inline::Space
179 });
180 } else {
181 word.push(ch);
182 }
183 }
184 if !word.is_empty() {
185 out.push(Inline::Str(word.into()));
186 }
187 out
188}
189
190fn parse_nodes(input: &str) -> Vec<Element> {
193 let chars: Vec<char> = input.chars().collect();
194 let mut pos = 0;
195 let mut nodes = Vec::new();
196 while let Some(element) = next_element(&chars, &mut pos) {
197 nodes.push(element);
198 }
199 nodes
200}
201
202fn next_element(chars: &[char], pos: &mut usize) -> Option<Element> {
206 loop {
207 skip_to_tag(chars, pos);
208 if *pos >= chars.len() {
209 return None;
210 }
211 if skip_non_element(chars, pos) {
212 continue;
213 }
214 return parse_element(chars, pos);
215 }
216}
217
218fn skip_to_tag(chars: &[char], pos: &mut usize) {
220 while let Some(&ch) = chars.get(*pos) {
221 if ch == '<' {
222 return;
223 }
224 *pos += 1;
225 }
226}
227
228fn skip_non_element(chars: &[char], pos: &mut usize) -> bool {
231 if starts_with(chars, *pos, "<!--") {
232 skip_until(chars, pos, "-->");
233 return true;
234 }
235 if starts_with(chars, *pos, "<?") {
236 skip_until(chars, pos, "?>");
237 return true;
238 }
239 if starts_with(chars, *pos, "<!") {
240 skip_until(chars, pos, ">");
241 return true;
242 }
243 false
244}
245
246fn parse_element(chars: &[char], pos: &mut usize) -> Option<Element> {
248 if chars.get(*pos) != Some(&'<') {
249 return None;
250 }
251 *pos += 1;
252 let name = read_name(chars, pos);
253 if name.is_empty() {
254 skip_until(chars, pos, ">");
255 return None;
256 }
257 let mut attributes = BTreeMap::new();
258 loop {
259 skip_whitespace(chars, pos);
260 match chars.get(*pos) {
261 None => {
262 return Some(Element {
263 name,
264 attributes,
265 children: Vec::new(),
266 });
267 }
268 Some('/') => {
269 *pos += 1;
270 skip_until(chars, pos, ">");
271 return Some(Element {
272 name,
273 attributes,
274 children: Vec::new(),
275 });
276 }
277 Some('>') => {
278 *pos += 1;
279 break;
280 }
281 Some(_) => {
282 if let Some((key, value)) = read_attribute(chars, pos) {
283 attributes.insert(key, value);
284 } else {
285 *pos += 1;
286 }
287 }
288 }
289 }
290 let (children, text) = parse_children(chars, pos);
291 if !text.is_empty() {
292 attributes.insert("__text".to_owned(), text);
293 }
294 Some(Element {
295 name,
296 attributes,
297 children,
298 })
299}
300
301fn parse_children(chars: &[char], pos: &mut usize) -> (Vec<Element>, String) {
304 let mut children = Vec::new();
305 let mut text = String::new();
306 loop {
307 let mut run = String::new();
308 while let Some(&ch) = chars.get(*pos) {
309 if ch == '<' {
310 break;
311 }
312 run.push(ch);
313 *pos += 1;
314 }
315 text.push_str(&decode_entities(&run));
316 if *pos >= chars.len() {
317 break;
318 }
319 if starts_with(chars, *pos, "</") {
320 *pos += 2;
321 let _ = read_name(chars, pos);
322 skip_until(chars, pos, ">");
323 break;
324 }
325 if skip_non_element(chars, pos) {
326 continue;
327 }
328 if let Some(child) = parse_element(chars, pos) {
329 children.push(child);
330 } else {
331 skip_to_tag(chars, pos);
332 *pos = (*pos).saturating_add(1);
333 }
334 }
335 (children, text)
338}
339
340fn read_name(chars: &[char], pos: &mut usize) -> String {
341 let mut name = String::new();
342 while let Some(&ch) = chars.get(*pos) {
343 if ch.is_whitespace() || ch == '>' || ch == '/' {
344 break;
345 }
346 name.push(ch);
347 *pos += 1;
348 }
349 name
350}
351
352fn read_attribute(chars: &[char], pos: &mut usize) -> Option<(String, String)> {
355 let key = read_attr_name(chars, pos);
356 if key.is_empty() {
357 return None;
358 }
359 skip_whitespace(chars, pos);
360 if chars.get(*pos) != Some(&'=') {
361 return Some((key, String::new()));
362 }
363 *pos += 1;
364 skip_whitespace(chars, pos);
365 let Some("e @ ('"' | '\'')) = chars.get(*pos) else {
366 return Some((key, String::new()));
367 };
368 *pos += 1;
369 let mut raw = String::new();
370 while let Some(&ch) = chars.get(*pos) {
371 if ch == quote {
372 *pos += 1;
373 break;
374 }
375 raw.push(ch);
376 *pos += 1;
377 }
378 Some((key, decode_entities(&raw)))
379}
380
381fn read_attr_name(chars: &[char], pos: &mut usize) -> String {
382 let mut name = String::new();
383 while let Some(&ch) = chars.get(*pos) {
384 if ch.is_whitespace() || ch == '=' || ch == '>' || ch == '/' {
385 break;
386 }
387 name.push(ch);
388 *pos += 1;
389 }
390 name
391}
392
393fn skip_whitespace(chars: &[char], pos: &mut usize) {
394 while let Some(&ch) = chars.get(*pos) {
395 if !ch.is_whitespace() {
396 return;
397 }
398 *pos += 1;
399 }
400}
401
402fn starts_with(chars: &[char], pos: usize, prefix: &str) -> bool {
403 prefix
404 .chars()
405 .enumerate()
406 .all(|(offset, expected)| chars.get(pos + offset) == Some(&expected))
407}
408
409fn skip_until(chars: &[char], pos: &mut usize, marker: &str) {
412 let marker_len = marker.chars().count();
413 while *pos < chars.len() {
414 if starts_with(chars, *pos, marker) {
415 *pos += marker_len;
416 return;
417 }
418 *pos += 1;
419 }
420}
421
422fn decode_entities(text: &str) -> String {
425 let mut out = String::with_capacity(text.len());
426 let chars: Vec<char> = text.chars().collect();
427 let mut pos = 0;
428 while let Some(&ch) = chars.get(pos) {
429 if ch != '&' {
430 out.push(ch);
431 pos += 1;
432 continue;
433 }
434 let Some(end) = (pos + 1..chars.len()).find(|&index| chars.get(index) == Some(&';')) else {
435 out.push('&');
436 pos += 1;
437 continue;
438 };
439 let body: String = chars.get(pos + 1..end).unwrap_or_default().iter().collect();
440 if let Some(decoded) = decode_reference(&body) {
441 out.push_str(&decoded);
442 pos = end + 1;
443 } else {
444 out.push('&');
445 pos += 1;
446 }
447 }
448 out
449}
450
451fn decode_reference(body: &str) -> Option<String> {
452 match body {
453 "amp" => Some("&".to_owned()),
454 "lt" => Some("<".to_owned()),
455 "gt" => Some(">".to_owned()),
456 "quot" => Some("\"".to_owned()),
457 "apos" => Some("'".to_owned()),
458 _ => {
459 let code =
460 if let Some(hex) = body.strip_prefix("#x").or_else(|| body.strip_prefix("#X")) {
461 u32::from_str_radix(hex, 16).ok()?
462 } else if let Some(dec) = body.strip_prefix('#') {
463 dec.parse().ok()?
464 } else {
465 return None;
466 };
467 char::from_u32(code).map(|ch| ch.to_string())
468 }
469 }
470}
471
472fn smart_inlines(inlines: Vec<Inline>) -> Vec<Inline> {
479 let folded = inlines.into_iter().map(fold_inline).collect();
480 pair_quotes(folded)
481}
482
483fn fold_inline(inline: Inline) -> Inline {
487 match inline {
488 Inline::Str(text) => Inline::Str(fold_text(&text).into()),
489 Inline::Code(attr, text) => Inline::Code(attr, smart_code(&text).into()),
490 Inline::Emph(children) => Inline::Emph(smart_inlines(children)),
491 Inline::Underline(children) => Inline::Underline(smart_inlines(children)),
492 Inline::Strong(children) => Inline::Strong(smart_inlines(children)),
493 Inline::Strikeout(children) => Inline::Strikeout(smart_inlines(children)),
494 Inline::Superscript(children) => Inline::Superscript(smart_inlines(children)),
495 Inline::Subscript(children) => Inline::Subscript(smart_inlines(children)),
496 Inline::SmallCaps(children) => Inline::SmallCaps(smart_inlines(children)),
497 Inline::Quoted(kind, children) => Inline::Quoted(kind, smart_inlines(children)),
498 Inline::Span(attr, children) => Inline::Span(attr, smart_inlines(children)),
499 Inline::Link(attr, children, target) => Inline::Link(attr, smart_inlines(children), target),
500 Inline::Image(attr, children, target) => {
501 Inline::Image(attr, smart_inlines(children), target)
502 }
503 other => other,
504 }
505}
506
507fn fold_text(text: &str) -> String {
511 let mut out = String::with_capacity(text.len());
512 let mut chars = text.chars().peekable();
513 while let Some(ch) = chars.next() {
514 match ch {
515 '-' => {
516 let mut len = 1;
517 while chars.peek() == Some(&'-') {
518 chars.next();
519 len += 1;
520 }
521 out.push_str(&fold_dash_run(len));
522 }
523 '.' => {
524 let mut len = 1;
525 while chars.peek() == Some(&'.') {
526 chars.next();
527 len += 1;
528 }
529 out.push_str(&fold_ellipsis_run(len));
530 }
531 other => out.push(other),
532 }
533 }
534 out
535}
536
537fn smart_code(text: &str) -> String {
543 let folded = fold_text(text);
544 let mut run: Vec<RunTok> = Vec::new();
545 for ch in folded.chars() {
546 if ch == '\'' || ch == '"' {
547 run.push(RunTok::Quote(ch));
548 } else {
549 run.push(RunTok::Char(ch));
550 }
551 }
552 let mut items = classify_run(&run);
553 match_quotes(&mut items);
554 let mut out = String::with_capacity(folded.len());
555 for (index, item) in items.iter().enumerate() {
556 match item {
557 Item::Text(text) => out.push_str(text),
558 Item::Break(_) => {}
559 Item::Quote(quote) => out.push(match quote.partner {
560 Some(partner) if partner > index => paired_code_glyph(quote.ch, true),
563 Some(_) => paired_code_glyph(quote.ch, false),
564 None => quote.glyph,
565 }),
566 }
567 }
568 out
569}
570
571fn fold_dash_run(len: usize) -> String {
577 let (em, remainder) = match len % 3 {
578 1 => (len / 3, "-"),
580 2 => (len / 3, "\u{2013}"),
581 _ => (len / 3, ""),
582 };
583 let mut out = String::with_capacity(em * 3 + remainder.len());
584 out.extend(std::iter::repeat_n('\u{2014}', em));
585 out.push_str(remainder);
586 out
587}
588
589fn fold_ellipsis_run(len: usize) -> String {
592 let mut out = String::with_capacity(len);
593 out.extend(std::iter::repeat_n('\u{2026}', len / 3));
594 out.extend(std::iter::repeat_n('.', len % 3));
595 out
596}
597
598enum RunTok {
601 Char(char),
602 Quote(char),
603 Break(Inline),
604}
605
606fn pair_quotes(inlines: Vec<Inline>) -> Vec<Inline> {
611 let mut out = Vec::new();
612 let mut run: Vec<RunTok> = Vec::new();
613 for inline in inlines {
614 match inline {
615 Inline::Str(text) => {
616 for ch in text.chars() {
617 if ch == '\'' || ch == '"' {
618 run.push(RunTok::Quote(ch));
619 } else {
620 run.push(RunTok::Char(ch));
621 }
622 }
623 }
624 brk @ (Inline::Space | Inline::SoftBreak | Inline::LineBreak) => {
625 run.push(RunTok::Break(brk));
626 }
627 barrier => {
628 out.extend(resolve_run(&std::mem::take(&mut run)));
629 out.push(barrier);
630 }
631 }
632 }
633 out.extend(resolve_run(&run));
634 out
635}
636
637fn open_context(before: Option<char>) -> bool {
642 match before {
643 None => true,
644 Some(ch) => {
645 ch.is_whitespace()
646 || matches!(
647 ch,
648 '"' | '\''
649 | '$'
650 | '-'
651 | '.'
652 | '\\'
653 | '\u{2013}'
654 | '\u{2014}'
655 | '\u{2018}'
656 | '\u{2019}'
657 | '\u{201c}'
658 | '\u{201d}'
659 | '\u{2026}'
660 )
661 }
662 }
663}
664
665fn opens_quote(before: Option<char>, after: Option<char>) -> bool {
669 open_context(before) && after.is_some_and(|next| !next.is_whitespace())
670}
671
672fn can_close_quote(ch: char, after: Option<char>) -> bool {
676 if ch == '"' {
677 return true;
678 }
679 !after.is_some_and(char::is_alphanumeric)
680}
681
682fn directional_quote(ch: char, before: Option<char>, after: Option<char>) -> char {
688 if ch == '\'' {
689 return '\u{2019}';
690 }
691 if opens_quote(before, after) {
692 '\u{201c}'
693 } else {
694 '\u{201d}'
695 }
696}
697
698fn paired_code_glyph(ch: char, open: bool) -> char {
702 match (ch, open) {
703 ('\'', true) => '\u{2018}',
704 ('\'', false) => '\u{2019}',
705 (_, true) => '\u{201c}',
706 (_, false) => '\u{201d}',
707 }
708}
709
710enum Item {
713 Text(String),
714 Break(Inline),
715 Quote(QuoteItem),
716}
717
718struct QuoteItem {
722 ch: char,
723 can_open: bool,
724 can_close: bool,
725 glyph: char,
726 partner: Option<usize>,
727}
728
729fn resolve_run(run: &[RunTok]) -> Vec<Inline> {
733 let mut items = classify_run(run);
734 match_quotes(&mut items);
735 render_items(&items, &mut 0)
736}
737
738fn classify_run(run: &[RunTok]) -> Vec<Item> {
742 let context = run_context(run);
743 let mut items = Vec::new();
744 for (index, tok) in run.iter().enumerate() {
745 match tok {
746 RunTok::Char(ch) => match items.last_mut() {
747 Some(Item::Text(text)) => text.push(*ch),
748 _ => items.push(Item::Text(ch.to_string())),
749 },
750 RunTok::Break(brk) => items.push(Item::Break(brk.clone())),
751 RunTok::Quote(ch) => {
752 let (before, after) = context.get(index).copied().unwrap_or((None, None));
753 items.push(Item::Quote(QuoteItem {
754 ch: *ch,
755 can_open: opens_quote(before, after),
756 can_close: can_close_quote(*ch, after),
757 glyph: directional_quote(*ch, before, after),
758 partner: None,
759 }));
760 }
761 }
762 }
763 items
764}
765
766fn match_quotes(items: &mut [Item]) {
773 let mut open: Vec<usize> = Vec::new();
774 for index in 0..items.len() {
775 let Some(Item::Quote(quote)) = items.get(index) else {
776 continue;
777 };
778 let (ch, can_open, can_close) = (quote.ch, quote.can_open, quote.can_close);
779 let open_same = open.iter().rposition(|&i| quote_at(items, i) == ch);
780 if can_close
781 && let Some(stack_pos) = open_same
782 && let Some(&opener) = open.get(stack_pos)
783 && !(ch == '\'' && opener + 1 == index)
784 {
785 open.truncate(stack_pos);
786 set_partner(items, opener, index);
787 set_partner(items, index, opener);
788 } else if open_same.is_none() && can_open {
789 open.push(index);
790 }
791 }
792}
793
794fn quote_at(items: &[Item], index: usize) -> char {
796 match items.get(index) {
797 Some(Item::Quote(quote)) => quote.ch,
798 _ => '\0',
799 }
800}
801
802fn set_partner(items: &mut [Item], index: usize, partner: usize) {
803 if let Some(Item::Quote(quote)) = items.get_mut(index) {
804 quote.partner = Some(partner);
805 }
806}
807
808fn render_items(items: &[Item], cursor: &mut usize) -> Vec<Inline> {
813 let mut out: Vec<Inline> = Vec::new();
814 let mut pending = String::new();
815 let flush = |pending: &mut String, out: &mut Vec<Inline>| {
816 if !pending.is_empty() {
817 out.push(Inline::Str(std::mem::take(pending).into()));
818 }
819 };
820 while let Some(item) = items.get(*cursor) {
821 match item {
822 Item::Text(text) => {
823 pending.push_str(text);
824 *cursor += 1;
825 }
826 Item::Break(brk) => {
827 flush(&mut pending, &mut out);
828 out.push(brk.clone());
829 *cursor += 1;
830 }
831 Item::Quote(quote) => match quote.partner {
832 Some(partner) if partner > *cursor => {
833 flush(&mut pending, &mut out);
834 let ch = quote.ch;
835 *cursor += 1;
836 let inner = render_items(items, cursor);
837 *cursor += 1;
839 out.push(Inline::Quoted(quote_kind(ch), inner));
840 }
841 Some(_) => {
842 break;
844 }
845 None => {
846 pending.push(quote.glyph);
847 *cursor += 1;
848 }
849 },
850 }
851 }
852 flush(&mut pending, &mut out);
853 out
854}
855
856fn run_context(run: &[RunTok]) -> Vec<(Option<char>, Option<char>)> {
859 let plain: Vec<Option<char>> = run
860 .iter()
861 .map(|tok| match tok {
862 RunTok::Char(ch) | RunTok::Quote(ch) => Some(*ch),
863 RunTok::Break(_) => Some(' '),
864 })
865 .collect();
866 (0..run.len())
867 .map(|i| {
868 let before = i
869 .checked_sub(1)
870 .and_then(|j| plain.get(j))
871 .copied()
872 .flatten();
873 let after = plain.get(i + 1).copied().flatten();
874 (before, after)
875 })
876 .collect()
877}
878
879fn quote_kind(ch: char) -> QuoteType {
880 if ch == '\'' {
881 QuoteType::SingleQuote
882 } else {
883 QuoteType::DoubleQuote
884 }
885}
886
887#[cfg(test)]
888mod tests {
889 use super::*;
890
891 fn read(input: &str) -> Document {
892 OpmlReader
893 .read(input, &ReaderOptions::default())
894 .expect("outline input parses")
895 }
896
897 fn headers(document: &Document) -> Vec<(i32, String)> {
898 document
899 .blocks
900 .iter()
901 .filter_map(|block| match block {
902 Block::Header(level, _, inlines) => Some((*level, inline_text(inlines))),
903 _ => None,
904 })
905 .collect()
906 }
907
908 fn inline_text(inlines: &[Inline]) -> String {
909 inlines
910 .iter()
911 .map(|inline| match inline {
912 Inline::Str(text) => text.as_str(),
913 Inline::Space => " ",
914 _ => "",
915 })
916 .collect()
917 }
918
919 #[test]
920 fn nesting_assigns_header_levels() {
921 let document = read(
922 "<opml><body>\
923 <outline text=\"A\">\
924 <outline text=\"B\"><outline text=\"C\"/></outline>\
925 </outline>\
926 </body></opml>",
927 );
928 assert_eq!(
929 headers(&document),
930 [
931 (1, "A".to_owned()),
932 (2, "B".to_owned()),
933 (3, "C".to_owned()),
934 ]
935 );
936 }
937
938 #[test]
939 fn sibling_outlines_share_a_level() {
940 let document = read("<opml><body><outline text=\"A\"/><outline text=\"B\"/></body></opml>");
941 assert_eq!(
942 headers(&document),
943 [(1, "A".to_owned()), (1, "B".to_owned())]
944 );
945 }
946
947 #[test]
948 fn note_attribute_parses_as_markdown() {
949 let document = read("<opml><body><outline text=\"H\" _note=\"**b**\"/></body></opml>");
950 assert!(matches!(
951 document.blocks.first(),
952 Some(Block::Header(1, _, _))
953 ));
954 let Some(Block::Para(inlines)) = document.blocks.get(1) else {
955 panic!("expected the note to parse into a paragraph");
956 };
957 assert!(matches!(inlines.first(), Some(Inline::Strong(_))));
958 }
959
960 #[test]
961 fn text_attribute_tokenizes_on_whitespace() {
962 let document = read("<opml><body><outline text=\"Hello World\"/></body></opml>");
963 let Some(Block::Header(_, _, inlines)) = document.blocks.first() else {
964 panic!("expected a header");
965 };
966 assert!(matches!(
967 inlines.as_slice(),
968 [Inline::Str(first), Inline::Space, Inline::Str(second)]
969 if first == "Hello" && second == "World"
970 ));
971 }
972
973 fn first_header_inlines(input: &str) -> Vec<Inline> {
974 let document = read(input);
975 match document.blocks.into_iter().next() {
976 Some(Block::Header(_, _, inlines)) => inlines,
977 _ => panic!("expected a header"),
978 }
979 }
980
981 fn outline(text: &str) -> String {
982 format!("<opml><body><outline text=\"{text}\"/></body></opml>")
983 }
984
985 #[test]
986 fn text_attribute_parses_inline_html_markup() {
987 let inlines = first_header_inlines(&outline(
988 "<strong>Bold</strong> and <em>it</em>",
989 ));
990 assert_eq!(
991 inlines,
992 vec![
993 Inline::Strong(vec![Inline::Str("Bold".to_owned().into())]),
994 Inline::Space,
995 Inline::Str("and".to_owned().into()),
996 Inline::Space,
997 Inline::Emph(vec![Inline::Str("it".to_owned().into())]),
998 ]
999 );
1000 }
1001
1002 #[test]
1003 fn text_attribute_decodes_entities_twice_then_parses_code() {
1004 let inlines = first_header_inlines(&outline("a <code>c</code> b &amp; z"));
1007 assert_eq!(
1008 inlines,
1009 vec![
1010 Inline::Str("a".to_owned().into()),
1011 Inline::Space,
1012 Inline::Code(Box::default(), "c".to_owned().into()),
1013 Inline::Space,
1014 Inline::Str("b".to_owned().into()),
1015 Inline::Space,
1016 Inline::Str("&".to_owned().into()),
1017 Inline::Space,
1018 Inline::Str("z".to_owned().into()),
1019 ]
1020 );
1021 }
1022
1023 #[test]
1024 fn text_attribute_parses_nested_markup() {
1025 let inlines = first_header_inlines(&outline(
1026 "<strong><em>both</em></strong>",
1027 ));
1028 assert_eq!(
1029 inlines,
1030 vec![Inline::Strong(vec![Inline::Emph(vec![Inline::Str(
1031 "both".to_owned().into()
1032 )])])]
1033 );
1034 }
1035
1036 #[test]
1037 fn text_attribute_parses_superscript_and_subscript() {
1038 let inlines = first_header_inlines(&outline(
1039 "x<sup>2</sup><sub>n</sub>",
1040 ));
1041 assert_eq!(
1042 inlines,
1043 vec![
1044 Inline::Str("x".to_owned().into()),
1045 Inline::Superscript(vec![Inline::Str("2".to_owned().into())]),
1046 Inline::Subscript(vec![Inline::Str("n".to_owned().into())]),
1047 ]
1048 );
1049 }
1050
1051 #[test]
1052 fn text_attribute_parses_an_anchor_into_a_link() {
1053 let inlines = first_header_inlines(&outline(
1054 "<a href="http://e.com">l</a>",
1055 ));
1056 let Some(Inline::Link(_, label, target)) = inlines.first() else {
1057 panic!("expected a link");
1058 };
1059 assert_eq!(label, &vec![Inline::Str("l".to_owned().into())]);
1060 assert_eq!(target.url, "http://e.com");
1061 }
1062
1063 #[test]
1064 fn named_character_reference_in_text_decodes_once_decoded() {
1065 let inlines = first_header_inlines(&outline("c &copy; r"));
1067 assert_eq!(
1068 inlines,
1069 vec![
1070 Inline::Str("c".to_owned().into()),
1071 Inline::Space,
1072 Inline::Str("\u{a9}".to_owned().into()),
1073 Inline::Space,
1074 Inline::Str("r".to_owned().into()),
1075 ]
1076 );
1077 }
1078
1079 #[test]
1080 fn link_outline_wraps_heading_in_a_link_to_its_url() {
1081 let document = read(
1082 "<opml><body><outline type=\"link\" text=\"Site\" url=\"http://e.com/p\"/></body></opml>",
1083 );
1084 let Some(Block::Header(1, _, inlines)) = document.blocks.first() else {
1085 panic!("expected a header");
1086 };
1087 let Some(Inline::Link(_, label, target)) = inlines.first() else {
1088 panic!("expected a link heading");
1089 };
1090 assert_eq!(label, &vec![Inline::Str("Site".to_owned().into())]);
1091 assert_eq!(target.url, "http://e.com/p");
1092 assert_eq!(target.title, "");
1093 }
1094
1095 #[test]
1096 fn link_outline_without_url_links_to_an_empty_target() {
1097 let document = read("<opml><body><outline type=\"LINK\" text=\"Site\"/></body></opml>");
1098 let Some(Block::Header(_, _, inlines)) = document.blocks.into_iter().next() else {
1099 panic!("expected a header");
1100 };
1101 let Some(Inline::Link(_, _, target)) = inlines.first() else {
1102 panic!("expected a link heading");
1103 };
1104 assert_eq!(target.url, "");
1105 }
1106
1107 #[test]
1108 fn non_link_outline_with_a_url_keeps_a_plain_heading() {
1109 let document =
1110 read("<opml><body><outline text=\"Site\" url=\"http://e.com/p\"/></body></opml>");
1111 let Some(Block::Header(_, _, inlines)) = document.blocks.first() else {
1112 panic!("expected a header");
1113 };
1114 assert_eq!(inlines.as_slice(), [Inline::Str("Site".to_owned().into())]);
1115 }
1116
1117 #[test]
1118 fn missing_text_attribute_yields_an_empty_heading() {
1119 let document = read("<opml><body><outline/></body></opml>");
1120 assert_eq!(headers(&document), [(1, String::new())]);
1121 }
1122
1123 #[test]
1124 fn single_quoted_attributes_are_read() {
1125 let document = read("<opml><body><outline text='quoted'/></body></opml>");
1126 assert_eq!(headers(&document), [(1, "quoted".to_owned())]);
1127 }
1128
1129 #[test]
1130 fn comments_instructions_and_doctype_are_skipped() {
1131 let document = read(
1132 "<?xml version=\"1.0\"?><!DOCTYPE opml><opml><!-- c -->\
1133 <body><outline text=\"A\"/></body></opml>",
1134 );
1135 assert_eq!(headers(&document), [(1, "A".to_owned())]);
1136 }
1137
1138 #[test]
1139 fn metadata_is_drawn_from_the_head() {
1140 let document = read(
1141 "<opml><head><title>T</title><ownerName>Me</ownerName>\
1142 <dateModified>2020</dateModified></head><body></body></opml>",
1143 );
1144 assert!(matches!(
1145 document.meta.get("title"),
1146 Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "T"
1147 ));
1148 assert!(matches!(
1149 document.meta.get("date"),
1150 Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "2020"
1151 ));
1152 let Some(MetaValue::MetaList(authors)) = document.meta.get("author") else {
1153 panic!("expected an author list");
1154 };
1155 assert!(matches!(
1156 authors.first(),
1157 Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "Me"
1158 ));
1159 }
1160
1161 #[test]
1162 fn absent_owner_yields_an_empty_author_list() {
1163 let document = read("<opml><head><title>T</title></head><body></body></opml>");
1164 assert!(matches!(
1165 document.meta.get("author"),
1166 Some(MetaValue::MetaList(authors)) if authors.is_empty()
1167 ));
1168 }
1169
1170 #[test]
1171 fn named_entities_decode() {
1172 assert_eq!(
1173 decode_entities("a & b <c> "d" 'e'"),
1174 "a & b <c> \"d\" 'e'"
1175 );
1176 }
1177
1178 #[test]
1179 fn numeric_entities_decode_in_decimal_and_hex() {
1180 assert_eq!(decode_entities("ABC"), "ABC");
1181 }
1182
1183 #[test]
1184 fn malformed_or_unknown_references_are_left_verbatim() {
1185 assert_eq!(decode_entities("&"), "&");
1186 assert_eq!(decode_entities("&nosuch;"), "&nosuch;");
1187 assert_eq!(decode_entities("&#zz;"), "&#zz;");
1188 assert_eq!(decode_entities("bare & text"), "bare & text");
1189 }
1190
1191 #[test]
1192 fn malformed_markup_does_not_panic() {
1193 let _ = read("<opml><body><outline text=\"x\"><outline text=\"y\"></body>");
1194 let _ = read("<<<>>><opml attr");
1195 let _ = read("");
1196 }
1197
1198 fn title_inlines(document: &Document) -> Vec<Inline> {
1199 match document.meta.get("title") {
1200 Some(MetaValue::MetaInlines(inlines)) => inlines.clone(),
1201 _ => panic!("expected title inlines"),
1202 }
1203 }
1204
1205 #[test]
1206 fn text_attribute_pairs_double_quotes_into_a_quoted_span() {
1207 let inlines = first_header_inlines(&outline(""hi""));
1208 assert_eq!(
1209 inlines,
1210 vec![Inline::Quoted(
1211 QuoteType::DoubleQuote,
1212 vec![Inline::Str("hi".to_owned().into())]
1213 )]
1214 );
1215 }
1216
1217 #[test]
1218 fn text_attribute_pairs_single_quotes_into_a_quoted_span() {
1219 let inlines = first_header_inlines(&outline("'hi'"));
1220 assert_eq!(
1221 inlines,
1222 vec![Inline::Quoted(
1223 QuoteType::SingleQuote,
1224 vec![Inline::Str("hi".to_owned().into())]
1225 )]
1226 );
1227 }
1228
1229 #[test]
1230 fn text_attribute_curls_an_apostrophe() {
1231 let inlines = first_header_inlines(&outline("it's"));
1232 assert_eq!(inlines, vec![Inline::Str("it\u{2019}s".to_owned().into())]);
1233 }
1234
1235 #[test]
1236 fn text_attribute_folds_dashes_and_ellipsis() {
1237 let inlines = first_header_inlines(&outline("a---b--c...d"));
1238 assert_eq!(
1240 inlines,
1241 vec![Inline::Str(
1242 "a\u{2014}b\u{2013}c\u{2026}d".to_owned().into()
1243 )]
1244 );
1245 }
1246
1247 #[test]
1248 fn dash_runs_fold_greedily_to_em_dashes() {
1249 assert_eq!(fold_dash_run(1), "-");
1250 assert_eq!(fold_dash_run(2), "\u{2013}");
1251 assert_eq!(fold_dash_run(3), "\u{2014}");
1252 assert_eq!(fold_dash_run(4), "\u{2014}-");
1253 assert_eq!(fold_dash_run(5), "\u{2014}\u{2013}");
1254 assert_eq!(fold_dash_run(6), "\u{2014}\u{2014}");
1255 assert_eq!(fold_dash_run(7), "\u{2014}\u{2014}-");
1256 }
1257
1258 #[test]
1259 fn ellipsis_runs_fold_per_group_of_three() {
1260 assert_eq!(fold_ellipsis_run(1), ".");
1261 assert_eq!(fold_ellipsis_run(2), "..");
1262 assert_eq!(fold_ellipsis_run(3), "\u{2026}");
1263 assert_eq!(fold_ellipsis_run(4), "\u{2026}.");
1264 assert_eq!(fold_ellipsis_run(6), "\u{2026}\u{2026}");
1265 }
1266
1267 #[test]
1268 fn text_attribute_resolves_an_unpaired_double_quote_directionally() {
1269 let opener = first_header_inlines(&outline(""open only"));
1272 assert_eq!(
1273 opener.first(),
1274 Some(&Inline::Str("\u{201c}open".to_owned().into()))
1275 );
1276 let closer = first_header_inlines(&outline("close only""));
1277 assert_eq!(
1278 closer.last(),
1279 Some(&Inline::Str("only\u{201d}".to_owned().into()))
1280 );
1281 }
1282
1283 #[test]
1284 fn double_quotes_do_not_nest_within_their_own_kind() {
1285 let inlines = first_header_inlines(&outline(""a "b" c""));
1287 assert_eq!(
1288 inlines,
1289 vec![
1290 Inline::Quoted(
1291 QuoteType::DoubleQuote,
1292 vec![Inline::Str("a".to_owned().into()), Inline::Space]
1293 ),
1294 Inline::Str("b\u{201d}".to_owned().into()),
1295 Inline::Space,
1296 Inline::Str("c\u{201d}".to_owned().into()),
1297 ]
1298 );
1299 }
1300
1301 #[test]
1302 fn a_different_quote_kind_nests() {
1303 let inlines = first_header_inlines(&outline(""a 'b' c""));
1304 assert_eq!(
1305 inlines,
1306 vec![Inline::Quoted(
1307 QuoteType::DoubleQuote,
1308 vec![
1309 Inline::Str("a".to_owned().into()),
1310 Inline::Space,
1311 Inline::Quoted(
1312 QuoteType::SingleQuote,
1313 vec![Inline::Str("b".to_owned().into())]
1314 ),
1315 Inline::Space,
1316 Inline::Str("c".to_owned().into()),
1317 ]
1318 )]
1319 );
1320 }
1321
1322 #[test]
1323 fn two_straight_single_quotes_stay_apostrophes() {
1324 let inlines = first_header_inlines(&outline("''"));
1325 assert_eq!(
1326 inlines,
1327 vec![Inline::Str("\u{2019}\u{2019}".to_owned().into())]
1328 );
1329 }
1330
1331 #[test]
1332 fn code_span_curls_quotes_into_glyph_pairs() {
1333 let inlines = first_header_inlines(&outline("<code>'q'</code>"));
1334 assert_eq!(
1336 inlines,
1337 vec![Inline::Code(
1338 Box::default(),
1339 "\u{2018}q\u{2019}".to_owned().into()
1340 )]
1341 );
1342 }
1343
1344 #[test]
1345 fn code_span_curls_an_apostrophe_and_folds_dashes() {
1346 let inlines = first_header_inlines(&outline("<code>it's --- x</code>"));
1347 assert_eq!(
1348 inlines,
1349 vec![Inline::Code(
1350 Box::default(),
1351 "it\u{2019}s \u{2014} x".to_owned().into()
1352 )]
1353 );
1354 }
1355
1356 #[test]
1357 fn smart_typography_recurses_into_inline_markup() {
1358 let inlines = first_header_inlines(&outline("<em>"hi"</em>"));
1359 assert_eq!(
1360 inlines,
1361 vec![Inline::Emph(vec![Inline::Quoted(
1362 QuoteType::DoubleQuote,
1363 vec![Inline::Str("hi".to_owned().into())]
1364 )])]
1365 );
1366 }
1367
1368 #[test]
1369 fn note_body_uses_the_markdown_preset() {
1370 let document = read(
1373 "<opml><body><outline text=\"H\" _note=\"Term : Definition\"/></body></opml>",
1374 );
1375 assert!(
1376 document
1377 .blocks
1378 .iter()
1379 .any(|block| matches!(block, Block::DefinitionList(_))),
1380 "expected the note to parse a definition list"
1381 );
1382 }
1383
1384 #[test]
1385 fn note_body_applies_smart_typography() {
1386 let document = read("<opml><body><outline text=\"H\" _note=\"it's\"/></body></opml>");
1387 let Some(Block::Para(inlines)) = document.blocks.get(1) else {
1388 panic!("expected a note paragraph");
1389 };
1390 assert_eq!(inlines, &vec![Inline::Str("it\u{2019}s".to_owned().into())]);
1391 }
1392
1393 #[test]
1394 fn metadata_keeps_straight_quotes_dashes_and_dots() {
1395 let document = read(
1397 "<opml><head><title>"a" --- it's ...</title></head><body></body></opml>",
1398 );
1399 assert_eq!(
1400 title_inlines(&document),
1401 vec![
1402 Inline::Str("\"a\"".to_owned().into()),
1403 Inline::Space,
1404 Inline::Str("---".to_owned().into()),
1405 Inline::Space,
1406 Inline::Str("it's".to_owned().into()),
1407 Inline::Space,
1408 Inline::Str("...".to_owned().into()),
1409 ]
1410 );
1411 }
1412
1413 #[test]
1414 fn metadata_preserves_boundary_whitespace_as_space() {
1415 let document = read("<opml><head><title> a b </title></head><body></body></opml>");
1416 assert_eq!(
1417 title_inlines(&document),
1418 vec![
1419 Inline::Space,
1420 Inline::Str("a".to_owned().into()),
1421 Inline::Space,
1422 Inline::Str("b".to_owned().into()),
1423 Inline::Space,
1424 ]
1425 );
1426 }
1427
1428 #[test]
1429 fn metadata_turns_an_internal_newline_into_a_soft_break() {
1430 let document =
1431 read("<opml><head><title>line one\nline two</title></head><body></body></opml>");
1432 assert_eq!(
1433 title_inlines(&document),
1434 vec![
1435 Inline::Str("line".to_owned().into()),
1436 Inline::Space,
1437 Inline::Str("one".to_owned().into()),
1438 Inline::SoftBreak,
1439 Inline::Str("line".to_owned().into()),
1440 Inline::Space,
1441 Inline::Str("two".to_owned().into()),
1442 ]
1443 );
1444 }
1445
1446 #[test]
1447 fn present_but_empty_owner_contributes_an_empty_author() {
1448 let document = read("<opml><head><ownerName></ownerName></head><body></body></opml>");
1451 let Some(MetaValue::MetaList(authors)) = document.meta.get("author") else {
1452 panic!("expected an author list");
1453 };
1454 assert_eq!(authors, &vec![MetaValue::MetaInlines(Vec::new())]);
1455 }
1456}