Skip to main content

dmc_parser/
refs.rs

1//! Link- and footnote-reference tables. CM 4.7 + 6.3 + GFM footnotes require
2//! a two-pass parse: harvest all definitions first, then resolve `[label]` /
3//! `[text][label]` / `[label][]` references during the main parse.
4
5use std::collections::HashMap;
6
7/// Destination URL + optional title.
8pub type LinkRef = (String, Option<String>);
9
10#[derive(Debug, Default, Clone)]
11pub struct RefMap {
12  links: HashMap<String, LinkRef>,
13}
14
15impl RefMap {
16  pub fn new() -> Self {
17    Self::default()
18  }
19
20  /// First definition wins (CM 4.7).
21  pub fn insert(&mut self, label: &str, url: String, title: Option<String>) {
22    let key = normalize_label(label);
23    if !key.is_empty() {
24      self.links.entry(key).or_insert((url, title));
25    }
26  }
27
28  pub fn get(&self, label: &str) -> Option<&LinkRef> {
29    self.links.get(&normalize_label(label))
30  }
31
32  pub fn is_empty(&self) -> bool {
33    self.links.is_empty()
34  }
35}
36
37/// CM 4.7: case-fold + whitespace-collapse, leading/trailing trimmed.
38/// Backslash escapes are NOT unescaped here, so `[foo\!]` and `[foo!]`
39/// match different labels.
40pub fn normalize_label(s: &str) -> String {
41  let mut out = String::with_capacity(s.len());
42  let mut prev_ws = true;
43  for c in s.chars() {
44    if c.is_whitespace() {
45      if !prev_ws {
46        out.push(' ');
47        prev_ws = true;
48      }
49    } else {
50      push_case_folded(&mut out, c);
51      prev_ws = false;
52    }
53  }
54  if out.ends_with(' ') {
55    out.pop();
56  }
57  out
58}
59
60/// CM 4.7/6.3 use Unicode case folding for label matching. `to_lowercase`
61/// matches that for most code points, but `ß` (U+00DF) folds to `ss` and
62/// capital `ẞ` (U+1E9E) lowercases to `ß`, so `[ẞ]` would fail to match
63/// `[SS]:` without an explicit `ß` -> `ss` fold. Full Unicode case folding
64/// would need a dedicated table; this only patches the CM-critical case.
65fn push_case_folded(out: &mut String, c: char) {
66  for low in c.to_lowercase() {
67    if low == '\u{00DF}' {
68      out.push_str("ss");
69    } else {
70      out.push(low);
71    }
72  }
73}
74
75/// Parse a `LinkRefDef` lexeme into `(label, url, title)`. The lexer
76/// already validated gross structure; failures here mean missing `]` / `:`.
77pub fn parse_link_ref_def(raw: &str) -> Option<(String, String, Option<String>)> {
78  let bytes = raw.as_bytes();
79  if bytes.first() != Some(&b'[') {
80    return None;
81  }
82  let mut i = 1usize;
83  while i < bytes.len() && bytes[i] != b']' {
84    if bytes[i] == b'\\' && i + 1 < bytes.len() {
85      i += 2;
86      continue;
87    }
88    i += 1;
89  }
90  if i >= bytes.len() {
91    return None;
92  }
93  let label = raw[1..i].to_string();
94  let after = i + 1;
95  if bytes.get(after) != Some(&b':') {
96    return None;
97  }
98  let mut j = after + 1;
99  while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
100    j += 1;
101  }
102  // Destination: `<...>` (spaces allowed) or a bare run to whitespace.
103  let (url, mut k) = if bytes.get(j) == Some(&b'<') {
104    let start = j + 1;
105    let mut p = start;
106    while p < bytes.len() && bytes[p] != b'>' && bytes[p] != b'\n' {
107      p += 1;
108    }
109    if p >= bytes.len() || bytes[p] != b'>' {
110      return None;
111    }
112    (raw[start..p].to_string(), p + 1)
113  } else {
114    let start = j;
115    let mut p = start;
116    while p < bytes.len() && !matches!(bytes[p], b' ' | b'\t' | b'\n') {
117      p += 1;
118    }
119    if start == p {
120      return None;
121    }
122    (raw[start..p].to_string(), p)
123  };
124  // Optional title: `"..."`, `'...'`, or `(...)` after whitespace.
125  while k < bytes.len() && matches!(bytes[k], b' ' | b'\t' | b'\n') {
126    k += 1;
127  }
128  let title = if k >= bytes.len() {
129    None
130  } else {
131    let rest = raw[k..].trim_end();
132    if rest.is_empty() {
133      None
134    } else {
135      let bs = rest.as_bytes();
136      let first = *bs.first()?;
137      let last = *bs.last()?;
138      let starts_title = matches!(first, b'"' | b'\'' | b'(');
139      let matched =
140        (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') || (first == b'(' && last == b')');
141      if starts_title {
142        if matched && rest.len() >= 2 {
143          Some(rest[1..rest.len() - 1].to_string())
144        } else {
145          return None;
146        }
147      } else {
148        return None;
149      }
150    }
151  };
152  Some((label, url, title))
153}