use std::ops::Range;
use std::sync::OnceLock;
use pulldown_cmark::{Event, LinkType, Tag};
use regex::Regex;
use rustc_hash::FxHashMap;
use crate::util::regex::compile_static;
const MAX_LABEL_CHARS: usize = 999;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct NormalisedLabel(String);
impl NormalisedLabel {
pub(crate) fn from_raw(raw: &str) -> Option<Self> {
if raw.chars().count() > MAX_LABEL_CHARS {
return None;
}
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
let mut out = String::with_capacity(trimmed.len());
let mut in_ws = false;
for ch in trimmed.chars() {
if ch.is_whitespace() {
in_ws = true;
continue;
}
if in_ws && !out.is_empty() {
out.push(' ');
}
in_ws = false;
for low in ch.to_lowercase() {
out.push(low);
}
}
if out.is_empty() {
return None;
}
Some(Self(out))
}
#[cfg(test)]
pub(crate) fn as_str(&self) -> &str {
&self.0
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct ReferenceHandle(u32);
#[derive(Clone, Debug)]
pub(crate) struct LinkTarget {
pub(crate) label_raw: String,
pub(crate) dest: String,
pub(crate) title: Option<String>,
pub(crate) raw_range: Range<usize>,
}
#[derive(Debug, Default)]
pub(crate) struct ReferenceTable {
by_label: FxHashMap<NormalisedLabel, ReferenceHandle>,
targets: Vec<LinkTarget>,
duplicate_count: u32,
}
impl ReferenceTable {
pub(crate) fn empty() -> Self {
Self::default()
}
pub(crate) fn resolve(&self, raw: &str) -> Option<ReferenceHandle> {
let norm = NormalisedLabel::from_raw(raw)?;
self.by_label.get(&norm).copied()
}
#[cfg(test)]
pub(crate) fn target(&self, h: ReferenceHandle) -> Option<&LinkTarget> {
self.targets.get(h.0 as usize)
}
pub(crate) fn iter(&self) -> impl Iterator<Item = &LinkTarget> {
self.targets.iter()
}
#[cfg(test)]
pub(crate) fn is_empty(&self) -> bool {
self.targets.is_empty()
}
fn insert(&mut self, label_raw: String, dest: String, title: Option<String>) {
let Some(norm) = NormalisedLabel::from_raw(&label_raw) else {
return;
};
if self.by_label.contains_key(&norm) {
self.duplicate_count = self.duplicate_count.saturating_add(1);
return;
}
let idx = u32::try_from(self.targets.len()).unwrap_or(u32::MAX);
let handle = ReferenceHandle(idx);
self.targets.push(LinkTarget {
label_raw,
dest,
title,
raw_range: usize::MAX..usize::MAX,
});
self.by_label.insert(norm, handle);
}
}
#[tracing::instrument(level = "debug", skip(events, source), fields(events = events.len()))]
pub(crate) fn build_reference_table(events: &[Event<'_>], source: &str) -> ReferenceTable {
let casings = scan_def_label_casings(source);
let mut table = ReferenceTable::empty();
for ev in events {
let Event::Start(tag) = ev else { continue };
let (lt, dest, title, id) = match tag {
Tag::Link {
link_type,
dest_url,
title,
id,
}
| Tag::Image {
link_type,
dest_url,
title,
id,
} => (*link_type, dest_url, title, id),
Tag::Paragraph
| Tag::Heading { .. }
| Tag::BlockQuote(_)
| Tag::CodeBlock(_)
| Tag::HtmlBlock
| Tag::List(_)
| Tag::Item
| Tag::FootnoteDefinition(_)
| Tag::DefinitionList
| Tag::DefinitionListTitle
| Tag::DefinitionListDefinition
| Tag::Table(_)
| Tag::TableHead
| Tag::TableRow
| Tag::TableCell
| Tag::Emphasis
| Tag::Strong
| Tag::Strikethrough
| Tag::Superscript
| Tag::Subscript
| Tag::MetadataBlock(_) => continue,
};
if !matches!(lt, LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut) {
continue;
}
if id.is_empty() {
continue;
}
let Some(norm) = NormalisedLabel::from_raw(id) else {
continue;
};
let label_raw = casings.get(&norm).cloned().unwrap_or_else(|| id.to_string());
let title_str = title.to_string();
let title_opt = if title_str.is_empty() { None } else { Some(title_str) };
table.insert(label_raw, dest.to_string(), title_opt);
}
tracing::debug!(defs = table.targets.len(), dupes = table.duplicate_count, "refs built");
table
}
fn scan_def_label_casings(source: &str) -> FxHashMap<NormalisedLabel, String> {
let mut out: FxHashMap<NormalisedLabel, String> = FxHashMap::default();
let re = label_prefix_regex();
for line in source.lines() {
let Some(caps) = re.captures(line) else {
continue;
};
let Some(lab) = caps.get(1) else { continue };
let Some(norm) = NormalisedLabel::from_raw(lab.as_str()) else {
continue;
};
out.entry(norm).or_insert_with(|| lab.as_str().to_string());
}
out
}
fn label_prefix_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| compile_static(r"^ {0,3}\[((?:[^\]\\\n]|\\.)+)\]:"))
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use crate::parse;
use crate::source::{CanonicalSource, Source};
fn build(src: &str) -> ReferenceTable {
let source = Source::new(src);
let events: Vec<Event<'_>> = parse::collect_events(
CanonicalSource::from_source(&source),
parse::options(crate::ParseOptions::default()),
)
.expect("test Markdown parses");
build_reference_table(&events, src)
}
#[test]
fn normalisation_trims_and_collapses() {
let n = NormalisedLabel::from_raw(" Foo Bar ").expect("valid");
assert_eq!(n.as_str(), "foo bar");
}
#[test]
fn normalisation_case_folds() {
let a = NormalisedLabel::from_raw("HELLO").expect("a");
let b = NormalisedLabel::from_raw("hello").expect("b");
assert_eq!(a, b);
}
#[test]
fn normalisation_rejects_empty() {
assert!(NormalisedLabel::from_raw("").is_none());
assert!(NormalisedLabel::from_raw(" ").is_none());
}
#[test]
fn normalisation_rejects_overlong() {
let big = "a".repeat(1000);
assert!(NormalisedLabel::from_raw(&big).is_none());
let ok = "a".repeat(999);
assert!(NormalisedLabel::from_raw(&ok).is_some());
}
#[test]
fn table_resolves_case_insensitively() {
let table = build("[Foo]: https://example.com\n\n[Foo]\n");
let h = table.resolve("FOO").expect("resolves");
let t = table.target(h).expect("target");
assert_eq!(t.dest, "https://example.com");
assert_eq!(t.label_raw, "Foo");
}
#[test]
fn table_unresolved_label_returns_none() {
let table = build("");
assert!(table.resolve("missing").is_none());
}
#[test]
fn table_skips_unknown_reference_links() {
let table = build("[missing]\n");
assert!(table.is_empty());
}
#[test]
fn table_handles_multiline_definition() {
let table = build("[foo]:\n/url\n\n[foo]\n");
let t = table.target(table.resolve("foo").expect("resolves")).expect("target");
assert_eq!(t.dest, "/url");
}
#[test]
fn label_casing_preserved_from_def_not_link() {
let table = build("[Foo]: https://example.com\n\n[FOO]\n");
let t = table.target(table.resolve("foo").expect("resolves")).expect("target");
assert_eq!(t.label_raw, "Foo");
}
}