Skip to main content

carta_readers/
opml.rs

1//! Outline reader: parses a nested outline of `<outline>` elements into the document model.
2//!
3//! Each outline becomes a header whose level is its nesting depth (a top-level outline is level 1,
4//! its child level 2, and so on). The header inlines come from the outline's `text` attribute,
5//! parsed as a fragment of HTML inline markup (so `<strong>`, `<em>`, `<code>`, links, and the like
6//! become their inline constructs); the outline's `_note` attribute is parsed as markdown blocks. An
7//! outline of `type="link"` wraps its heading content in a link to its `url`. The document metadata
8//! is drawn from the document head: `title`, `ownerName` (as the author list), and `dateModified`
9//! (as the date), each taken as plain text.
10//!
11//! XML is parsed by a small hand-written scanner over the subset the format uses — elements,
12//! attributes with entity decoding, self-closing tags, and nesting. The scanner is panic-free on
13//! malformed input: unrecognized or unbalanced markup is skipped rather than rejected.
14
15use std::collections::BTreeMap;
16
17use carta_ast::{Block, Document, Inline, MetaValue, QuoteType, Target};
18use carta_core::{Reader, ReaderOptions, Result, presets};
19
20use crate::commonmark::CommonmarkReader;
21use crate::html::parse_inline_fragment;
22
23/// Parses an outline document into the document model.
24#[derive(Debug, Default, Clone, Copy)]
25pub struct OpmlReader;
26
27impl Reader for OpmlReader {
28    fn read(&self, input: &str, _options: &ReaderOptions) -> Result<Document> {
29        let nodes = parse_nodes(input);
30        let mut blocks = Vec::new();
31        let head = find_child(&nodes, "head");
32        let body = find_child(&nodes, "body");
33        for node in body.map(element_children).unwrap_or_default() {
34            emit_outline(node, 1, &mut blocks)?;
35        }
36        Ok(Document {
37            api_version: carta_ast::ApiVersion::default(),
38            meta: build_meta(head)
39                .into_iter()
40                .map(|(k, v)| (k.into(), v))
41                .collect(),
42            blocks,
43        })
44    }
45}
46
47/// A parsed XML element with its decoded attributes and its element children. Text nodes are not
48/// retained: the format carries its content in attributes.
49#[derive(Debug)]
50struct Element {
51    name: String,
52    attributes: BTreeMap<String, String>,
53    children: Vec<Element>,
54}
55
56fn element_children(element: &Element) -> Vec<&Element> {
57    element.children.iter().collect()
58}
59
60/// The first descendant search is shallow by design: `head` and `body` are direct children of the
61/// document root, found among the top-level parse and the root `opml` element's children.
62fn find_child<'a>(nodes: &'a [Element], name: &str) -> Option<&'a Element> {
63    for node in nodes {
64        if node.name == name {
65            return Some(node);
66        }
67        if let Some(found) = node.children.iter().find(|child| child.name == name) {
68            return Some(found);
69        }
70    }
71    None
72}
73
74fn emit_outline(outline: &Element, level: i32, blocks: &mut Vec<Block>) -> Result<()> {
75    if outline.name != "outline" {
76        return Ok(());
77    }
78    let heading = outline
79        .attributes
80        .get("text")
81        .map(|text| smart_inlines(parse_inline_fragment(text)))
82        .unwrap_or_default();
83    let heading = if is_link_outline(outline) {
84        let url = outline.attributes.get("url").cloned().unwrap_or_default();
85        vec![Inline::Link(
86            Box::default(),
87            heading,
88            Box::new(Target {
89                url: url.into(),
90                title: carta_ast::Text::default(),
91            }),
92        )]
93    } else {
94        heading
95    };
96    blocks.push(Block::Header(level, Box::default(), heading));
97    if let Some(note) = outline.attributes.get("_note") {
98        let parsed = CommonmarkReader.read(note, &note_options())?;
99        blocks.extend(parsed.blocks);
100    }
101    for child in &outline.children {
102        emit_outline(child, level + 1, blocks)?;
103    }
104    Ok(())
105}
106
107/// An outline of `type="link"` (case-insensitive) names a hyperlink: its heading content is wrapped
108/// in a link to the outline's `url`, which may be absent (an empty target).
109fn is_link_outline(outline: &Element) -> bool {
110    outline
111        .attributes
112        .get("type")
113        .is_some_and(|kind| kind.eq_ignore_ascii_case("link"))
114}
115
116/// Reader options for a `_note` body: the extended Markdown dialect's full extension set (so smart
117/// typography, definition lists, and the other Markdown-flavored constructs are on) with greedy
118/// paragraphs, so a bare following line continues the paragraph rather than opening a new block.
119fn note_options() -> ReaderOptions {
120    let mut options = ReaderOptions::default();
121    options.extensions = presets::MARKDOWN;
122    options.greedy_paragraphs = true;
123    options
124}
125
126fn build_meta(head: Option<&Element>) -> BTreeMap<String, MetaValue> {
127    let mut meta = BTreeMap::new();
128    // The element's text content, or `None` when the element is absent. A present element with empty
129    // or whitespace-only content is distinguished from an absent one, which matters for the author
130    // list: a present `ownerName` always contributes an entry, even an empty one.
131    let value = |name: &str| -> Option<&str> {
132        head.and_then(|head| head.children.iter().find(|child| child.name == name))
133            .map(|element| {
134                element
135                    .attributes
136                    .get("__text")
137                    .map(String::as_str)
138                    .unwrap_or_default()
139            })
140    };
141    let title = tokenize_meta(value("title").unwrap_or_default());
142    let date = tokenize_meta(value("dateModified").unwrap_or_default());
143    let author = match value("ownerName") {
144        Some(owner) => vec![MetaValue::MetaInlines(tokenize_meta(owner))],
145        None => Vec::new(),
146    };
147    meta.insert("title".to_owned(), MetaValue::MetaInlines(title));
148    meta.insert("author".to_owned(), MetaValue::MetaList(author));
149    meta.insert("date".to_owned(), MetaValue::MetaInlines(date));
150    meta
151}
152
153/// Tokenize a metadata value into inlines, preserving boundary whitespace. Each maximal
154/// non-whitespace run becomes one `Str`; each maximal whitespace run becomes a single break — a
155/// `SoftBreak` when the run spans a line ending, otherwise a `Space`. Leading and trailing
156/// whitespace is kept, unlike inline body text where it is trimmed. Smart typography is not applied:
157/// metadata values keep their straight quotes, hyphens, and dots verbatim.
158fn tokenize_meta(text: &str) -> Vec<Inline> {
159    let mut out = Vec::new();
160    let mut chars = text.chars().peekable();
161    let mut word = String::new();
162    while let Some(ch) = chars.next() {
163        if ch.is_whitespace() {
164            if !word.is_empty() {
165                out.push(Inline::Str(std::mem::take(&mut word).into()));
166            }
167            let mut has_newline = ch == '\n' || ch == '\r';
168            while let Some(&next) = chars.peek() {
169                if !next.is_whitespace() {
170                    break;
171                }
172                has_newline |= next == '\n' || next == '\r';
173                chars.next();
174            }
175            out.push(if has_newline {
176                Inline::SoftBreak
177            } else {
178                Inline::Space
179            });
180        } else {
181            word.push(ch);
182        }
183    }
184    if !word.is_empty() {
185        out.push(Inline::Str(word.into()));
186    }
187    out
188}
189
190/// Parse the top-level elements of a document. Anything outside an element (prolog, stray text) is
191/// skipped.
192fn parse_nodes(input: &str) -> Vec<Element> {
193    let chars: Vec<char> = input.chars().collect();
194    let mut pos = 0;
195    let mut nodes = Vec::new();
196    while let Some(element) = next_element(&chars, &mut pos) {
197        nodes.push(element);
198    }
199    nodes
200}
201
202/// Scan the next element starting at or after `pos`. Returns `None` at end of input. Comments,
203/// processing instructions, declarations, and DOCTYPE are skipped; text between elements is
204/// captured into the parent via [`parse_children`].
205fn next_element(chars: &[char], pos: &mut usize) -> Option<Element> {
206    loop {
207        skip_to_tag(chars, pos);
208        if *pos >= chars.len() {
209            return None;
210        }
211        if skip_non_element(chars, pos) {
212            continue;
213        }
214        return parse_element(chars, pos);
215    }
216}
217
218/// Skip past characters until the next `<`.
219fn skip_to_tag(chars: &[char], pos: &mut usize) {
220    while let Some(&ch) = chars.get(*pos) {
221        if ch == '<' {
222            return;
223        }
224        *pos += 1;
225    }
226}
227
228/// If the tag at `pos` is a comment, processing instruction, declaration, or DOCTYPE, skip it and
229/// return `true`. A closing tag is also consumed here so a caller scanning siblings stops.
230fn skip_non_element(chars: &[char], pos: &mut usize) -> bool {
231    if starts_with(chars, *pos, "<!--") {
232        skip_until(chars, pos, "-->");
233        return true;
234    }
235    if starts_with(chars, *pos, "<?") {
236        skip_until(chars, pos, "?>");
237        return true;
238    }
239    if starts_with(chars, *pos, "<!") {
240        skip_until(chars, pos, ">");
241        return true;
242    }
243    false
244}
245
246/// Parse one element whose `<` is at `pos`, including its children up to the matching close tag.
247fn parse_element(chars: &[char], pos: &mut usize) -> Option<Element> {
248    if chars.get(*pos) != Some(&'<') {
249        return None;
250    }
251    *pos += 1;
252    let name = read_name(chars, pos);
253    if name.is_empty() {
254        skip_until(chars, pos, ">");
255        return None;
256    }
257    let mut attributes = BTreeMap::new();
258    loop {
259        skip_whitespace(chars, pos);
260        match chars.get(*pos) {
261            None => {
262                return Some(Element {
263                    name,
264                    attributes,
265                    children: Vec::new(),
266                });
267            }
268            Some('/') => {
269                *pos += 1;
270                skip_until(chars, pos, ">");
271                return Some(Element {
272                    name,
273                    attributes,
274                    children: Vec::new(),
275                });
276            }
277            Some('>') => {
278                *pos += 1;
279                break;
280            }
281            Some(_) => {
282                if let Some((key, value)) = read_attribute(chars, pos) {
283                    attributes.insert(key, value);
284                } else {
285                    *pos += 1;
286                }
287            }
288        }
289    }
290    let (children, text) = parse_children(chars, pos);
291    if !text.is_empty() {
292        attributes.insert("__text".to_owned(), text);
293    }
294    Some(Element {
295        name,
296        attributes,
297        children,
298    })
299}
300
301/// Parse the content of an open element up to its matching `</name>`: nested elements become
302/// children, and the concatenated raw text (entity-decoded) is returned for leaf elements.
303fn parse_children(chars: &[char], pos: &mut usize) -> (Vec<Element>, String) {
304    let mut children = Vec::new();
305    let mut text = String::new();
306    loop {
307        let mut run = String::new();
308        while let Some(&ch) = chars.get(*pos) {
309            if ch == '<' {
310                break;
311            }
312            run.push(ch);
313            *pos += 1;
314        }
315        text.push_str(&decode_entities(&run));
316        if *pos >= chars.len() {
317            break;
318        }
319        if starts_with(chars, *pos, "</") {
320            *pos += 2;
321            let _ = read_name(chars, pos);
322            skip_until(chars, pos, ">");
323            break;
324        }
325        if skip_non_element(chars, pos) {
326            continue;
327        }
328        if let Some(child) = parse_element(chars, pos) {
329            children.push(child);
330        } else {
331            skip_to_tag(chars, pos);
332            *pos = (*pos).saturating_add(1);
333        }
334    }
335    // The raw text is returned untrimmed: a metadata value's boundary whitespace is significant and
336    // is turned into boundary `Space`/`SoftBreak` inlines by [`tokenize_meta`].
337    (children, text)
338}
339
340fn read_name(chars: &[char], pos: &mut usize) -> String {
341    let mut name = String::new();
342    while let Some(&ch) = chars.get(*pos) {
343        if ch.is_whitespace() || ch == '>' || ch == '/' {
344            break;
345        }
346        name.push(ch);
347        *pos += 1;
348    }
349    name
350}
351
352/// Read one `key="value"` (or single-quoted) attribute. Returns `None` when the cursor is not at a
353/// name character.
354fn read_attribute(chars: &[char], pos: &mut usize) -> Option<(String, String)> {
355    let key = read_attr_name(chars, pos);
356    if key.is_empty() {
357        return None;
358    }
359    skip_whitespace(chars, pos);
360    if chars.get(*pos) != Some(&'=') {
361        return Some((key, String::new()));
362    }
363    *pos += 1;
364    skip_whitespace(chars, pos);
365    let Some(&quote @ ('"' | '\'')) = chars.get(*pos) else {
366        return Some((key, String::new()));
367    };
368    *pos += 1;
369    let mut raw = String::new();
370    while let Some(&ch) = chars.get(*pos) {
371        if ch == quote {
372            *pos += 1;
373            break;
374        }
375        raw.push(ch);
376        *pos += 1;
377    }
378    Some((key, decode_entities(&raw)))
379}
380
381fn read_attr_name(chars: &[char], pos: &mut usize) -> String {
382    let mut name = String::new();
383    while let Some(&ch) = chars.get(*pos) {
384        if ch.is_whitespace() || ch == '=' || ch == '>' || ch == '/' {
385            break;
386        }
387        name.push(ch);
388        *pos += 1;
389    }
390    name
391}
392
393fn skip_whitespace(chars: &[char], pos: &mut usize) {
394    while let Some(&ch) = chars.get(*pos) {
395        if !ch.is_whitespace() {
396            return;
397        }
398        *pos += 1;
399    }
400}
401
402fn starts_with(chars: &[char], pos: usize, prefix: &str) -> bool {
403    prefix
404        .chars()
405        .enumerate()
406        .all(|(offset, expected)| chars.get(pos + offset) == Some(&expected))
407}
408
409/// Advance the cursor past the next occurrence of `marker`, consuming the marker. If the marker is
410/// absent the cursor moves to the end of input.
411fn skip_until(chars: &[char], pos: &mut usize, marker: &str) {
412    let marker_len = marker.chars().count();
413    while *pos < chars.len() {
414        if starts_with(chars, *pos, marker) {
415            *pos += marker_len;
416            return;
417        }
418        *pos += 1;
419    }
420}
421
422/// Decode the XML entity references the format uses: the five named entities and numeric character
423/// references in decimal and hexadecimal. An unrecognized or malformed reference is left verbatim.
424fn decode_entities(text: &str) -> String {
425    let mut out = String::with_capacity(text.len());
426    let chars: Vec<char> = text.chars().collect();
427    let mut pos = 0;
428    while let Some(&ch) = chars.get(pos) {
429        if ch != '&' {
430            out.push(ch);
431            pos += 1;
432            continue;
433        }
434        let Some(end) = (pos + 1..chars.len()).find(|&index| chars.get(index) == Some(&';')) else {
435            out.push('&');
436            pos += 1;
437            continue;
438        };
439        let body: String = chars.get(pos + 1..end).unwrap_or_default().iter().collect();
440        if let Some(decoded) = decode_reference(&body) {
441            out.push_str(&decoded);
442            pos = end + 1;
443        } else {
444            out.push('&');
445            pos += 1;
446        }
447    }
448    out
449}
450
451fn decode_reference(body: &str) -> Option<String> {
452    match body {
453        "amp" => Some("&".to_owned()),
454        "lt" => Some("<".to_owned()),
455        "gt" => Some(">".to_owned()),
456        "quot" => Some("\"".to_owned()),
457        "apos" => Some("'".to_owned()),
458        _ => {
459            let code =
460                if let Some(hex) = body.strip_prefix("#x").or_else(|| body.strip_prefix("#X")) {
461                    u32::from_str_radix(hex, 16).ok()?
462                } else if let Some(dec) = body.strip_prefix('#') {
463                    dec.parse().ok()?
464                } else {
465                    return None;
466                };
467            char::from_u32(code).map(|ch| ch.to_string())
468        }
469    }
470}
471
472/// Apply smart typography to an inline tree: straight double and single quotes become curly quotes
473/// (paired into `Quoted` spans where they enclose a run, otherwise directional glyphs); runs of
474/// hyphens fold to en/em dashes; runs of three dots fold to an ellipsis. Container inlines are
475/// transformed recursively; the content of a code span is transformed textually (its quotes become
476/// directional glyphs rather than `Quoted` spans). Quote pairing does not cross a non-text inline:
477/// such an inline is a hard boundary for the pairing search.
478fn smart_inlines(inlines: Vec<Inline>) -> Vec<Inline> {
479    let folded = inlines.into_iter().map(fold_inline).collect();
480    pair_quotes(folded)
481}
482
483/// Recurse into one inline applying the textual smart transforms (dashes, dots, and — in code and
484/// string contexts — directional quote glyphs). Quote *pairing* into `Quoted` spans is left to
485/// [`pair_quotes`], which sees the whole run.
486fn fold_inline(inline: Inline) -> Inline {
487    match inline {
488        Inline::Str(text) => Inline::Str(fold_text(&text).into()),
489        Inline::Code(attr, text) => Inline::Code(attr, smart_code(&text).into()),
490        Inline::Emph(children) => Inline::Emph(smart_inlines(children)),
491        Inline::Underline(children) => Inline::Underline(smart_inlines(children)),
492        Inline::Strong(children) => Inline::Strong(smart_inlines(children)),
493        Inline::Strikeout(children) => Inline::Strikeout(smart_inlines(children)),
494        Inline::Superscript(children) => Inline::Superscript(smart_inlines(children)),
495        Inline::Subscript(children) => Inline::Subscript(smart_inlines(children)),
496        Inline::SmallCaps(children) => Inline::SmallCaps(smart_inlines(children)),
497        Inline::Quoted(kind, children) => Inline::Quoted(kind, smart_inlines(children)),
498        Inline::Span(attr, children) => Inline::Span(attr, smart_inlines(children)),
499        Inline::Link(attr, children, target) => Inline::Link(attr, smart_inlines(children), target),
500        Inline::Image(attr, children, target) => {
501            Inline::Image(attr, smart_inlines(children), target)
502        }
503        other => other,
504    }
505}
506
507/// Fold the dash and dot runs of a plain text string: `---` and longer fold to em/en dashes, `...`
508/// folds to an ellipsis. Straight quotes are left untouched here — they are resolved by
509/// [`pair_quotes`], which can see their surrounding context across the whole run.
510fn fold_text(text: &str) -> String {
511    let mut out = String::with_capacity(text.len());
512    let mut chars = text.chars().peekable();
513    while let Some(ch) = chars.next() {
514        match ch {
515            '-' => {
516                let mut len = 1;
517                while chars.peek() == Some(&'-') {
518                    chars.next();
519                    len += 1;
520                }
521                out.push_str(&fold_dash_run(len));
522            }
523            '.' => {
524                let mut len = 1;
525                while chars.peek() == Some(&'.') {
526                    chars.next();
527                    len += 1;
528                }
529                out.push_str(&fold_ellipsis_run(len));
530            }
531            other => out.push(other),
532        }
533    }
534    out
535}
536
537/// Smart-transform the verbatim content of a code span: fold dash and dot runs, and curl its
538/// quotes. A code span holds only a string, so a matched quote pair renders as its two directional
539/// glyphs (left then right) rather than a `Quoted` node; an unmatched quote becomes a directional
540/// glyph. The same opener/closer pairing drives both, so `'q'` curls to `‘q’` and a lone leading
541/// `'open` to `’open`.
542fn smart_code(text: &str) -> String {
543    let folded = fold_text(text);
544    let mut run: Vec<RunTok> = Vec::new();
545    for ch in folded.chars() {
546        if ch == '\'' || ch == '"' {
547            run.push(RunTok::Quote(ch));
548        } else {
549            run.push(RunTok::Char(ch));
550        }
551    }
552    let mut items = classify_run(&run);
553    match_quotes(&mut items);
554    let mut out = String::with_capacity(folded.len());
555    for (index, item) in items.iter().enumerate() {
556        match item {
557            Item::Text(text) => out.push_str(text),
558            Item::Break(_) => {}
559            Item::Quote(quote) => out.push(match quote.partner {
560                // The opener of a matched pair turns to the left glyph, its closer to the right
561                // glyph; an unmatched quote keeps its directional fallback.
562                Some(partner) if partner > index => paired_code_glyph(quote.ch, true),
563                Some(_) => paired_code_glyph(quote.ch, false),
564                None => quote.glyph,
565            }),
566        }
567    }
568    out
569}
570
571/// Fold a run of `len` hyphens into em (`—`) and en (`–`) dashes, greedily preferring em dashes:
572/// the run is built from as many em dashes as fit, then the remainder closes it. A remainder of
573/// two is one en dash, a remainder of one is a single literal hyphen, and a remainder of zero
574/// leaves the em dashes alone. So `--` is one en dash, `---` one em dash, `----` an em dash plus a
575/// hyphen, and `-----` an em dash plus an en dash.
576fn fold_dash_run(len: usize) -> String {
577    let (em, remainder) = match len % 3 {
578        // A remainder of one borrows nothing: the run is `len / 3` em dashes then a lone hyphen.
579        1 => (len / 3, "-"),
580        2 => (len / 3, "\u{2013}"),
581        _ => (len / 3, ""),
582    };
583    let mut out = String::with_capacity(em * 3 + remainder.len());
584    out.extend(std::iter::repeat_n('\u{2014}', em));
585    out.push_str(remainder);
586    out
587}
588
589/// Fold a run of `len` dots into one ellipsis (`…`) per group of three, leaving any trailing one or
590/// two dots literal.
591fn fold_ellipsis_run(len: usize) -> String {
592    let mut out = String::with_capacity(len);
593    out.extend(std::iter::repeat_n('\u{2026}', len / 3));
594    out.extend(std::iter::repeat_n('.', len % 3));
595    out
596}
597
598/// One position in a flattened text run: an ordinary character, a quote delimiter, or a break
599/// (a space or a soft/hard line break, which the inline tree carries as its own node).
600enum RunTok {
601    Char(char),
602    Quote(char),
603    Break(Inline),
604}
605
606/// Resolve straight quotes across the inline sequence. Within each maximal run of text inlines
607/// (`Str` plus break nodes), pair a quote opener with a later closer of the same kind into a
608/// `Quoted` span; any quote that does not pair becomes a directional glyph. A non-text inline ends
609/// the current run and is itself a word-like boundary for the flanking of adjacent quotes.
610fn pair_quotes(inlines: Vec<Inline>) -> Vec<Inline> {
611    let mut out = Vec::new();
612    let mut run: Vec<RunTok> = Vec::new();
613    for inline in inlines {
614        match inline {
615            Inline::Str(text) => {
616                for ch in text.chars() {
617                    if ch == '\'' || ch == '"' {
618                        run.push(RunTok::Quote(ch));
619                    } else {
620                        run.push(RunTok::Char(ch));
621                    }
622                }
623            }
624            brk @ (Inline::Space | Inline::SoftBreak | Inline::LineBreak) => {
625                run.push(RunTok::Break(brk));
626            }
627            barrier => {
628                out.extend(resolve_run(&std::mem::take(&mut run)));
629                out.push(barrier);
630            }
631        }
632    }
633    out.extend(resolve_run(&run));
634    out
635}
636
637/// Whether the character before a quote permits it to open a span: the start of the run, whitespace,
638/// or one of a small set of leading characters (a dash glyph, a dot, a backslash, a currency sign,
639/// an ellipsis, or an already-curled quote). A quote glued to a letter, a digit, or an opening
640/// bracket does not satisfy this — there it reads as a closer or apostrophe instead.
641fn open_context(before: Option<char>) -> bool {
642    match before {
643        None => true,
644        Some(ch) => {
645            ch.is_whitespace()
646                || matches!(
647                    ch,
648                    '"' | '\''
649                        | '$'
650                        | '-'
651                        | '.'
652                        | '\\'
653                        | '\u{2013}'
654                        | '\u{2014}'
655                        | '\u{2018}'
656                        | '\u{2019}'
657                        | '\u{201c}'
658                        | '\u{201d}'
659                        | '\u{2026}'
660                )
661        }
662    }
663}
664
665/// Whether a quote opens a span here: its preceding character permits opening and a non-whitespace
666/// character follows it. A quote followed by whitespace (or the run's end) cannot open — it reads as
667/// a closing glyph or apostrophe.
668fn opens_quote(before: Option<char>, after: Option<char>) -> bool {
669    open_context(before) && after.is_some_and(|next| !next.is_whitespace())
670}
671
672/// Whether a quote at this position may end a quoted span. A double quote always closes an open
673/// double quote. A single quote closes only when it is not glued to a following alphanumeric — that
674/// case is an apostrophe inside or after a word (`it's`, `dogs'`), not a closing quote.
675fn can_close_quote(ch: char, after: Option<char>) -> bool {
676    if ch == '"' {
677        return true;
678    }
679    !after.is_some_and(char::is_alphanumeric)
680}
681
682/// The directional glyph an unpaired straight quote becomes. A single quote always becomes the right
683/// single glyph (`’`), which doubles as the apostrophe. A double quote becomes the left glyph (`“`)
684/// only where it reads as an opener — its preceding character permits opening (start of run,
685/// whitespace, a dash, or one of the other leading characters) and a non-space character follows it;
686/// otherwise it becomes the right glyph (`”`).
687fn directional_quote(ch: char, before: Option<char>, after: Option<char>) -> char {
688    if ch == '\'' {
689        return '\u{2019}';
690    }
691    if opens_quote(before, after) {
692        '\u{201c}'
693    } else {
694        '\u{201d}'
695    }
696}
697
698/// The directional glyph a paired quote contributes inside a code span, where a pair is rendered as
699/// its two directional glyphs rather than a `Quoted` node: the left glyph (`‘`/`“`) on open, the
700/// right glyph (`’`/`”`) on close.
701fn paired_code_glyph(ch: char, open: bool) -> char {
702    match (ch, open) {
703        ('\'', true) => '\u{2018}',
704        ('\'', false) => '\u{2019}',
705        (_, true) => '\u{201c}',
706        (_, false) => '\u{201d}',
707    }
708}
709
710/// One position in the run after quote classification: settled text, a break node, or a quote with
711/// the context flags that decide whether it may open or close a span and the glyph it falls back to.
712enum Item {
713    Text(String),
714    Break(Inline),
715    Quote(QuoteItem),
716}
717
718/// A classified quote delimiter: its kind, whether its surrounding characters let it open or close a
719/// span, the glyph it becomes when it stays unmatched, and (once matching runs) the index of the
720/// partner it pairs with.
721struct QuoteItem {
722    ch: char,
723    can_open: bool,
724    can_close: bool,
725    glyph: char,
726    partner: Option<usize>,
727}
728
729/// Resolve a single flattened text run into inlines by pairing its quotes. A first pass classifies
730/// each quote by its context; a second matches openers to closers; the matched pairs become
731/// `Quoted` spans and every unmatched quote becomes its directional glyph.
732fn resolve_run(run: &[RunTok]) -> Vec<Inline> {
733    let mut items = classify_run(run);
734    match_quotes(&mut items);
735    render_items(&items, &mut 0)
736}
737
738/// Classify the run into [`Item`]s: consecutive characters coalesce into one text item, breaks pass
739/// through, and each quote records whether its context lets it open or close and the glyph it falls
740/// back to.
741fn classify_run(run: &[RunTok]) -> Vec<Item> {
742    let context = run_context(run);
743    let mut items = Vec::new();
744    for (index, tok) in run.iter().enumerate() {
745        match tok {
746            RunTok::Char(ch) => match items.last_mut() {
747                Some(Item::Text(text)) => text.push(*ch),
748                _ => items.push(Item::Text(ch.to_string())),
749            },
750            RunTok::Break(brk) => items.push(Item::Break(brk.clone())),
751            RunTok::Quote(ch) => {
752                let (before, after) = context.get(index).copied().unwrap_or((None, None));
753                items.push(Item::Quote(QuoteItem {
754                    ch: *ch,
755                    can_open: opens_quote(before, after),
756                    can_close: can_close_quote(*ch, after),
757                    glyph: directional_quote(*ch, before, after),
758                    partner: None,
759                }));
760            }
761        }
762    }
763    items
764}
765
766/// Match quote openers to closers across the classified run with a stack of still-open quotes.
767/// Scanning left to right, a quote of a kind already open closes that span (recorded as a mutual
768/// partner link), abandoning any inner openers of the other kind that never closed — so a span does
769/// not straddle a closed inner span. A quote with no open partner of its kind opens a new span where
770/// its context permits; quotes of one kind do not nest within their own kind. A single quote never
771/// forms an empty pair, so `''` stays two apostrophes.
772fn match_quotes(items: &mut [Item]) {
773    let mut open: Vec<usize> = Vec::new();
774    for index in 0..items.len() {
775        let Some(Item::Quote(quote)) = items.get(index) else {
776            continue;
777        };
778        let (ch, can_open, can_close) = (quote.ch, quote.can_open, quote.can_close);
779        let open_same = open.iter().rposition(|&i| quote_at(items, i) == ch);
780        if can_close
781            && let Some(stack_pos) = open_same
782            && let Some(&opener) = open.get(stack_pos)
783            && !(ch == '\'' && opener + 1 == index)
784        {
785            open.truncate(stack_pos);
786            set_partner(items, opener, index);
787            set_partner(items, index, opener);
788        } else if open_same.is_none() && can_open {
789            open.push(index);
790        }
791    }
792}
793
794/// The kind of the quote item at `index`, or a placeholder that matches nothing.
795fn quote_at(items: &[Item], index: usize) -> char {
796    match items.get(index) {
797        Some(Item::Quote(quote)) => quote.ch,
798        _ => '\0',
799    }
800}
801
802fn set_partner(items: &mut [Item], index: usize, partner: usize) {
803    if let Some(Item::Quote(quote)) = items.get_mut(index) {
804        quote.partner = Some(partner);
805    }
806}
807
808/// Render the classified, matched items into inlines starting at `*cursor`, consuming items until
809/// the run ends or a closing quote whose opener precedes `*cursor` is reached. A matched opening
810/// quote recurses to gather its span's content into a `Quoted`; an unmatched quote contributes its
811/// directional glyph; text and breaks pass through.
812fn render_items(items: &[Item], cursor: &mut usize) -> Vec<Inline> {
813    let mut out: Vec<Inline> = Vec::new();
814    let mut pending = String::new();
815    let flush = |pending: &mut String, out: &mut Vec<Inline>| {
816        if !pending.is_empty() {
817            out.push(Inline::Str(std::mem::take(pending).into()));
818        }
819    };
820    while let Some(item) = items.get(*cursor) {
821        match item {
822            Item::Text(text) => {
823                pending.push_str(text);
824                *cursor += 1;
825            }
826            Item::Break(brk) => {
827                flush(&mut pending, &mut out);
828                out.push(brk.clone());
829                *cursor += 1;
830            }
831            Item::Quote(quote) => match quote.partner {
832                Some(partner) if partner > *cursor => {
833                    flush(&mut pending, &mut out);
834                    let ch = quote.ch;
835                    *cursor += 1;
836                    let inner = render_items(items, cursor);
837                    // Step past the closing partner that ended the recursion.
838                    *cursor += 1;
839                    out.push(Inline::Quoted(quote_kind(ch), inner));
840                }
841                Some(_) => {
842                    // The closing partner of an open span: stop so the opener's frame collects it.
843                    break;
844                }
845                None => {
846                    pending.push(quote.glyph);
847                    *cursor += 1;
848                }
849            },
850        }
851    }
852    flush(&mut pending, &mut out);
853    out
854}
855
856/// For each token in the run, the character immediately before and after it (skipping nothing —
857/// breaks count as spaces, run edges as `None`). Used to decide quote flanking with full context.
858fn run_context(run: &[RunTok]) -> Vec<(Option<char>, Option<char>)> {
859    let plain: Vec<Option<char>> = run
860        .iter()
861        .map(|tok| match tok {
862            RunTok::Char(ch) | RunTok::Quote(ch) => Some(*ch),
863            RunTok::Break(_) => Some(' '),
864        })
865        .collect();
866    (0..run.len())
867        .map(|i| {
868            let before = i
869                .checked_sub(1)
870                .and_then(|j| plain.get(j))
871                .copied()
872                .flatten();
873            let after = plain.get(i + 1).copied().flatten();
874            (before, after)
875        })
876        .collect()
877}
878
879fn quote_kind(ch: char) -> QuoteType {
880    if ch == '\'' {
881        QuoteType::SingleQuote
882    } else {
883        QuoteType::DoubleQuote
884    }
885}
886
887#[cfg(test)]
888mod tests {
889    use super::*;
890
891    fn read(input: &str) -> Document {
892        OpmlReader
893            .read(input, &ReaderOptions::default())
894            .expect("outline input parses")
895    }
896
897    fn headers(document: &Document) -> Vec<(i32, String)> {
898        document
899            .blocks
900            .iter()
901            .filter_map(|block| match block {
902                Block::Header(level, _, inlines) => Some((*level, inline_text(inlines))),
903                _ => None,
904            })
905            .collect()
906    }
907
908    fn inline_text(inlines: &[Inline]) -> String {
909        inlines
910            .iter()
911            .map(|inline| match inline {
912                Inline::Str(text) => text.as_str(),
913                Inline::Space => " ",
914                _ => "",
915            })
916            .collect()
917    }
918
919    #[test]
920    fn nesting_assigns_header_levels() {
921        let document = read(
922            "<opml><body>\
923             <outline text=\"A\">\
924             <outline text=\"B\"><outline text=\"C\"/></outline>\
925             </outline>\
926             </body></opml>",
927        );
928        assert_eq!(
929            headers(&document),
930            [
931                (1, "A".to_owned()),
932                (2, "B".to_owned()),
933                (3, "C".to_owned()),
934            ]
935        );
936    }
937
938    #[test]
939    fn sibling_outlines_share_a_level() {
940        let document = read("<opml><body><outline text=\"A\"/><outline text=\"B\"/></body></opml>");
941        assert_eq!(
942            headers(&document),
943            [(1, "A".to_owned()), (1, "B".to_owned())]
944        );
945    }
946
947    #[test]
948    fn note_attribute_parses_as_markdown() {
949        let document = read("<opml><body><outline text=\"H\" _note=\"**b**\"/></body></opml>");
950        assert!(matches!(
951            document.blocks.first(),
952            Some(Block::Header(1, _, _))
953        ));
954        let Some(Block::Para(inlines)) = document.blocks.get(1) else {
955            panic!("expected the note to parse into a paragraph");
956        };
957        assert!(matches!(inlines.first(), Some(Inline::Strong(_))));
958    }
959
960    #[test]
961    fn text_attribute_tokenizes_on_whitespace() {
962        let document = read("<opml><body><outline text=\"Hello   World\"/></body></opml>");
963        let Some(Block::Header(_, _, inlines)) = document.blocks.first() else {
964            panic!("expected a header");
965        };
966        assert!(matches!(
967            inlines.as_slice(),
968            [Inline::Str(first), Inline::Space, Inline::Str(second)]
969                if first == "Hello" && second == "World"
970        ));
971    }
972
973    fn first_header_inlines(input: &str) -> Vec<Inline> {
974        let document = read(input);
975        match document.blocks.into_iter().next() {
976            Some(Block::Header(_, _, inlines)) => inlines,
977            _ => panic!("expected a header"),
978        }
979    }
980
981    fn outline(text: &str) -> String {
982        format!("<opml><body><outline text=\"{text}\"/></body></opml>")
983    }
984
985    #[test]
986    fn text_attribute_parses_inline_html_markup() {
987        let inlines = first_header_inlines(&outline(
988            "&lt;strong&gt;Bold&lt;/strong&gt; and &lt;em&gt;it&lt;/em&gt;",
989        ));
990        assert_eq!(
991            inlines,
992            vec![
993                Inline::Strong(vec![Inline::Str("Bold".to_owned().into())]),
994                Inline::Space,
995                Inline::Str("and".to_owned().into()),
996                Inline::Space,
997                Inline::Emph(vec![Inline::Str("it".to_owned().into())]),
998            ]
999        );
1000    }
1001
1002    #[test]
1003    fn text_attribute_decodes_entities_twice_then_parses_code() {
1004        // The XML layer decodes the attribute once (`&amp;amp;` becomes `&amp;`); the inline parse
1005        // decodes again (`&amp;` becomes `&`) and reads the `<code>` element.
1006        let inlines = first_header_inlines(&outline("a &lt;code&gt;c&lt;/code&gt; b &amp;amp; z"));
1007        assert_eq!(
1008            inlines,
1009            vec![
1010                Inline::Str("a".to_owned().into()),
1011                Inline::Space,
1012                Inline::Code(Box::default(), "c".to_owned().into()),
1013                Inline::Space,
1014                Inline::Str("b".to_owned().into()),
1015                Inline::Space,
1016                Inline::Str("&".to_owned().into()),
1017                Inline::Space,
1018                Inline::Str("z".to_owned().into()),
1019            ]
1020        );
1021    }
1022
1023    #[test]
1024    fn text_attribute_parses_nested_markup() {
1025        let inlines = first_header_inlines(&outline(
1026            "&lt;strong&gt;&lt;em&gt;both&lt;/em&gt;&lt;/strong&gt;",
1027        ));
1028        assert_eq!(
1029            inlines,
1030            vec![Inline::Strong(vec![Inline::Emph(vec![Inline::Str(
1031                "both".to_owned().into()
1032            )])])]
1033        );
1034    }
1035
1036    #[test]
1037    fn text_attribute_parses_superscript_and_subscript() {
1038        let inlines = first_header_inlines(&outline(
1039            "x&lt;sup&gt;2&lt;/sup&gt;&lt;sub&gt;n&lt;/sub&gt;",
1040        ));
1041        assert_eq!(
1042            inlines,
1043            vec![
1044                Inline::Str("x".to_owned().into()),
1045                Inline::Superscript(vec![Inline::Str("2".to_owned().into())]),
1046                Inline::Subscript(vec![Inline::Str("n".to_owned().into())]),
1047            ]
1048        );
1049    }
1050
1051    #[test]
1052    fn text_attribute_parses_an_anchor_into_a_link() {
1053        let inlines = first_header_inlines(&outline(
1054            "&lt;a href=&quot;http://e.com&quot;&gt;l&lt;/a&gt;",
1055        ));
1056        let Some(Inline::Link(_, label, target)) = inlines.first() else {
1057            panic!("expected a link");
1058        };
1059        assert_eq!(label, &vec![Inline::Str("l".to_owned().into())]);
1060        assert_eq!(target.url, "http://e.com");
1061    }
1062
1063    #[test]
1064    fn named_character_reference_in_text_decodes_once_decoded() {
1065        // `&amp;copy;` survives the XML decode as `&copy;`, which the inline parse turns into ©.
1066        let inlines = first_header_inlines(&outline("c &amp;copy; r"));
1067        assert_eq!(
1068            inlines,
1069            vec![
1070                Inline::Str("c".to_owned().into()),
1071                Inline::Space,
1072                Inline::Str("\u{a9}".to_owned().into()),
1073                Inline::Space,
1074                Inline::Str("r".to_owned().into()),
1075            ]
1076        );
1077    }
1078
1079    #[test]
1080    fn link_outline_wraps_heading_in_a_link_to_its_url() {
1081        let document = read(
1082            "<opml><body><outline type=\"link\" text=\"Site\" url=\"http://e.com/p\"/></body></opml>",
1083        );
1084        let Some(Block::Header(1, _, inlines)) = document.blocks.first() else {
1085            panic!("expected a header");
1086        };
1087        let Some(Inline::Link(_, label, target)) = inlines.first() else {
1088            panic!("expected a link heading");
1089        };
1090        assert_eq!(label, &vec![Inline::Str("Site".to_owned().into())]);
1091        assert_eq!(target.url, "http://e.com/p");
1092        assert_eq!(target.title, "");
1093    }
1094
1095    #[test]
1096    fn link_outline_without_url_links_to_an_empty_target() {
1097        let document = read("<opml><body><outline type=\"LINK\" text=\"Site\"/></body></opml>");
1098        let Some(Block::Header(_, _, inlines)) = document.blocks.into_iter().next() else {
1099            panic!("expected a header");
1100        };
1101        let Some(Inline::Link(_, _, target)) = inlines.first() else {
1102            panic!("expected a link heading");
1103        };
1104        assert_eq!(target.url, "");
1105    }
1106
1107    #[test]
1108    fn non_link_outline_with_a_url_keeps_a_plain_heading() {
1109        let document =
1110            read("<opml><body><outline text=\"Site\" url=\"http://e.com/p\"/></body></opml>");
1111        let Some(Block::Header(_, _, inlines)) = document.blocks.first() else {
1112            panic!("expected a header");
1113        };
1114        assert_eq!(inlines.as_slice(), [Inline::Str("Site".to_owned().into())]);
1115    }
1116
1117    #[test]
1118    fn missing_text_attribute_yields_an_empty_heading() {
1119        let document = read("<opml><body><outline/></body></opml>");
1120        assert_eq!(headers(&document), [(1, String::new())]);
1121    }
1122
1123    #[test]
1124    fn single_quoted_attributes_are_read() {
1125        let document = read("<opml><body><outline text='quoted'/></body></opml>");
1126        assert_eq!(headers(&document), [(1, "quoted".to_owned())]);
1127    }
1128
1129    #[test]
1130    fn comments_instructions_and_doctype_are_skipped() {
1131        let document = read(
1132            "<?xml version=\"1.0\"?><!DOCTYPE opml><opml><!-- c -->\
1133             <body><outline text=\"A\"/></body></opml>",
1134        );
1135        assert_eq!(headers(&document), [(1, "A".to_owned())]);
1136    }
1137
1138    #[test]
1139    fn metadata_is_drawn_from_the_head() {
1140        let document = read(
1141            "<opml><head><title>T</title><ownerName>Me</ownerName>\
1142             <dateModified>2020</dateModified></head><body></body></opml>",
1143        );
1144        assert!(matches!(
1145            document.meta.get("title"),
1146            Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "T"
1147        ));
1148        assert!(matches!(
1149            document.meta.get("date"),
1150            Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "2020"
1151        ));
1152        let Some(MetaValue::MetaList(authors)) = document.meta.get("author") else {
1153            panic!("expected an author list");
1154        };
1155        assert!(matches!(
1156            authors.first(),
1157            Some(MetaValue::MetaInlines(inlines)) if inline_text(inlines) == "Me"
1158        ));
1159    }
1160
1161    #[test]
1162    fn absent_owner_yields_an_empty_author_list() {
1163        let document = read("<opml><head><title>T</title></head><body></body></opml>");
1164        assert!(matches!(
1165            document.meta.get("author"),
1166            Some(MetaValue::MetaList(authors)) if authors.is_empty()
1167        ));
1168    }
1169
1170    #[test]
1171    fn named_entities_decode() {
1172        assert_eq!(
1173            decode_entities("a &amp; b &lt;c&gt; &quot;d&quot; &apos;e&apos;"),
1174            "a & b <c> \"d\" 'e'"
1175        );
1176    }
1177
1178    #[test]
1179    fn numeric_entities_decode_in_decimal_and_hex() {
1180        assert_eq!(decode_entities("&#65;&#x42;&#X43;"), "ABC");
1181    }
1182
1183    #[test]
1184    fn malformed_or_unknown_references_are_left_verbatim() {
1185        assert_eq!(decode_entities("&amp"), "&amp");
1186        assert_eq!(decode_entities("&nosuch;"), "&nosuch;");
1187        assert_eq!(decode_entities("&#zz;"), "&#zz;");
1188        assert_eq!(decode_entities("bare & text"), "bare & text");
1189    }
1190
1191    #[test]
1192    fn malformed_markup_does_not_panic() {
1193        let _ = read("<opml><body><outline text=\"x\"><outline text=\"y\"></body>");
1194        let _ = read("<<<>>><opml attr");
1195        let _ = read("");
1196    }
1197
1198    fn title_inlines(document: &Document) -> Vec<Inline> {
1199        match document.meta.get("title") {
1200            Some(MetaValue::MetaInlines(inlines)) => inlines.clone(),
1201            _ => panic!("expected title inlines"),
1202        }
1203    }
1204
1205    #[test]
1206    fn text_attribute_pairs_double_quotes_into_a_quoted_span() {
1207        let inlines = first_header_inlines(&outline("&quot;hi&quot;"));
1208        assert_eq!(
1209            inlines,
1210            vec![Inline::Quoted(
1211                QuoteType::DoubleQuote,
1212                vec![Inline::Str("hi".to_owned().into())]
1213            )]
1214        );
1215    }
1216
1217    #[test]
1218    fn text_attribute_pairs_single_quotes_into_a_quoted_span() {
1219        let inlines = first_header_inlines(&outline("&apos;hi&apos;"));
1220        assert_eq!(
1221            inlines,
1222            vec![Inline::Quoted(
1223                QuoteType::SingleQuote,
1224                vec![Inline::Str("hi".to_owned().into())]
1225            )]
1226        );
1227    }
1228
1229    #[test]
1230    fn text_attribute_curls_an_apostrophe() {
1231        let inlines = first_header_inlines(&outline("it&apos;s"));
1232        assert_eq!(inlines, vec![Inline::Str("it\u{2019}s".to_owned().into())]);
1233    }
1234
1235    #[test]
1236    fn text_attribute_folds_dashes_and_ellipsis() {
1237        let inlines = first_header_inlines(&outline("a---b--c...d"));
1238        // Three hyphens fold to an em dash, two to an en dash, three dots to an ellipsis.
1239        assert_eq!(
1240            inlines,
1241            vec![Inline::Str(
1242                "a\u{2014}b\u{2013}c\u{2026}d".to_owned().into()
1243            )]
1244        );
1245    }
1246
1247    #[test]
1248    fn dash_runs_fold_greedily_to_em_dashes() {
1249        assert_eq!(fold_dash_run(1), "-");
1250        assert_eq!(fold_dash_run(2), "\u{2013}");
1251        assert_eq!(fold_dash_run(3), "\u{2014}");
1252        assert_eq!(fold_dash_run(4), "\u{2014}-");
1253        assert_eq!(fold_dash_run(5), "\u{2014}\u{2013}");
1254        assert_eq!(fold_dash_run(6), "\u{2014}\u{2014}");
1255        assert_eq!(fold_dash_run(7), "\u{2014}\u{2014}-");
1256    }
1257
1258    #[test]
1259    fn ellipsis_runs_fold_per_group_of_three() {
1260        assert_eq!(fold_ellipsis_run(1), ".");
1261        assert_eq!(fold_ellipsis_run(2), "..");
1262        assert_eq!(fold_ellipsis_run(3), "\u{2026}");
1263        assert_eq!(fold_ellipsis_run(4), "\u{2026}.");
1264        assert_eq!(fold_ellipsis_run(6), "\u{2026}\u{2026}");
1265    }
1266
1267    #[test]
1268    fn text_attribute_resolves_an_unpaired_double_quote_directionally() {
1269        // An opener-context quote followed by a word becomes the left glyph; one with no following
1270        // word becomes the right glyph.
1271        let opener = first_header_inlines(&outline("&quot;open only"));
1272        assert_eq!(
1273            opener.first(),
1274            Some(&Inline::Str("\u{201c}open".to_owned().into()))
1275        );
1276        let closer = first_header_inlines(&outline("close only&quot;"));
1277        assert_eq!(
1278            closer.last(),
1279            Some(&Inline::Str("only\u{201d}".to_owned().into()))
1280        );
1281    }
1282
1283    #[test]
1284    fn double_quotes_do_not_nest_within_their_own_kind() {
1285        // The inner double quote closes the outer span rather than nesting; the rest stay glyphs.
1286        let inlines = first_header_inlines(&outline("&quot;a &quot;b&quot; c&quot;"));
1287        assert_eq!(
1288            inlines,
1289            vec![
1290                Inline::Quoted(
1291                    QuoteType::DoubleQuote,
1292                    vec![Inline::Str("a".to_owned().into()), Inline::Space]
1293                ),
1294                Inline::Str("b\u{201d}".to_owned().into()),
1295                Inline::Space,
1296                Inline::Str("c\u{201d}".to_owned().into()),
1297            ]
1298        );
1299    }
1300
1301    #[test]
1302    fn a_different_quote_kind_nests() {
1303        let inlines = first_header_inlines(&outline("&quot;a &apos;b&apos; c&quot;"));
1304        assert_eq!(
1305            inlines,
1306            vec![Inline::Quoted(
1307                QuoteType::DoubleQuote,
1308                vec![
1309                    Inline::Str("a".to_owned().into()),
1310                    Inline::Space,
1311                    Inline::Quoted(
1312                        QuoteType::SingleQuote,
1313                        vec![Inline::Str("b".to_owned().into())]
1314                    ),
1315                    Inline::Space,
1316                    Inline::Str("c".to_owned().into()),
1317                ]
1318            )]
1319        );
1320    }
1321
1322    #[test]
1323    fn two_straight_single_quotes_stay_apostrophes() {
1324        let inlines = first_header_inlines(&outline("&apos;&apos;"));
1325        assert_eq!(
1326            inlines,
1327            vec![Inline::Str("\u{2019}\u{2019}".to_owned().into())]
1328        );
1329    }
1330
1331    #[test]
1332    fn code_span_curls_quotes_into_glyph_pairs() {
1333        let inlines = first_header_inlines(&outline("&lt;code&gt;&apos;q&apos;&lt;/code&gt;"));
1334        // A matched pair inside a code span renders as its left and right glyphs, not a Quoted node.
1335        assert_eq!(
1336            inlines,
1337            vec![Inline::Code(
1338                Box::default(),
1339                "\u{2018}q\u{2019}".to_owned().into()
1340            )]
1341        );
1342    }
1343
1344    #[test]
1345    fn code_span_curls_an_apostrophe_and_folds_dashes() {
1346        let inlines = first_header_inlines(&outline("&lt;code&gt;it&apos;s --- x&lt;/code&gt;"));
1347        assert_eq!(
1348            inlines,
1349            vec![Inline::Code(
1350                Box::default(),
1351                "it\u{2019}s \u{2014} x".to_owned().into()
1352            )]
1353        );
1354    }
1355
1356    #[test]
1357    fn smart_typography_recurses_into_inline_markup() {
1358        let inlines = first_header_inlines(&outline("&lt;em&gt;&quot;hi&quot;&lt;/em&gt;"));
1359        assert_eq!(
1360            inlines,
1361            vec![Inline::Emph(vec![Inline::Quoted(
1362                QuoteType::DoubleQuote,
1363                vec![Inline::Str("hi".to_owned().into())]
1364            )])]
1365        );
1366    }
1367
1368    #[test]
1369    fn note_body_uses_the_markdown_preset() {
1370        // A definition list is a Markdown-dialect construct absent from bare CommonMark; its presence
1371        // confirms the note body is parsed with the extended Markdown extension set.
1372        let document = read(
1373            "<opml><body><outline text=\"H\" _note=\"Term&#10;:   Definition\"/></body></opml>",
1374        );
1375        assert!(
1376            document
1377                .blocks
1378                .iter()
1379                .any(|block| matches!(block, Block::DefinitionList(_))),
1380            "expected the note to parse a definition list"
1381        );
1382    }
1383
1384    #[test]
1385    fn note_body_applies_smart_typography() {
1386        let document = read("<opml><body><outline text=\"H\" _note=\"it&apos;s\"/></body></opml>");
1387        let Some(Block::Para(inlines)) = document.blocks.get(1) else {
1388            panic!("expected a note paragraph");
1389        };
1390        assert_eq!(inlines, &vec![Inline::Str("it\u{2019}s".to_owned().into())]);
1391    }
1392
1393    #[test]
1394    fn metadata_keeps_straight_quotes_dashes_and_dots() {
1395        // Document metadata is not smart-transformed: its punctuation stays verbatim.
1396        let document = read(
1397            "<opml><head><title>&quot;a&quot; --- it&apos;s ...</title></head><body></body></opml>",
1398        );
1399        assert_eq!(
1400            title_inlines(&document),
1401            vec![
1402                Inline::Str("\"a\"".to_owned().into()),
1403                Inline::Space,
1404                Inline::Str("---".to_owned().into()),
1405                Inline::Space,
1406                Inline::Str("it's".to_owned().into()),
1407                Inline::Space,
1408                Inline::Str("...".to_owned().into()),
1409            ]
1410        );
1411    }
1412
1413    #[test]
1414    fn metadata_preserves_boundary_whitespace_as_space() {
1415        let document = read("<opml><head><title>  a b  </title></head><body></body></opml>");
1416        assert_eq!(
1417            title_inlines(&document),
1418            vec![
1419                Inline::Space,
1420                Inline::Str("a".to_owned().into()),
1421                Inline::Space,
1422                Inline::Str("b".to_owned().into()),
1423                Inline::Space,
1424            ]
1425        );
1426    }
1427
1428    #[test]
1429    fn metadata_turns_an_internal_newline_into_a_soft_break() {
1430        let document =
1431            read("<opml><head><title>line one\nline two</title></head><body></body></opml>");
1432        assert_eq!(
1433            title_inlines(&document),
1434            vec![
1435                Inline::Str("line".to_owned().into()),
1436                Inline::Space,
1437                Inline::Str("one".to_owned().into()),
1438                Inline::SoftBreak,
1439                Inline::Str("line".to_owned().into()),
1440                Inline::Space,
1441                Inline::Str("two".to_owned().into()),
1442            ]
1443        );
1444    }
1445
1446    #[test]
1447    fn present_but_empty_owner_contributes_an_empty_author() {
1448        // A present `ownerName`, even with empty content, yields one author entry — distinct from an
1449        // absent element, which yields none.
1450        let document = read("<opml><head><ownerName></ownerName></head><body></body></opml>");
1451        let Some(MetaValue::MetaList(authors)) = document.meta.get("author") else {
1452            panic!("expected an author list");
1453        };
1454        assert_eq!(authors, &vec![MetaValue::MetaInlines(Vec::new())]);
1455    }
1456}