use crate::options::Dialect;
use std::collections::HashSet;
use std::sync::Arc;
use crate::parser::blocks::reference_links::try_parse_reference_definition;
pub type RefdefMap = Arc<HashSet<String>>;
pub fn normalize_label(label: &str) -> String {
let trimmed = label.trim();
let mut out = String::with_capacity(trimmed.len());
let mut prev_ws = false;
for ch in trimmed.chars() {
if ch.is_whitespace() {
if !prev_ws {
out.push(' ');
prev_ws = true;
}
} else {
for low in ch.to_lowercase() {
out.push(low);
}
prev_ws = false;
}
}
out.replace('ß', "ss")
}
pub fn collect_refdef_labels(input: &str, dialect: Dialect) -> RefdefMap {
let mut set: HashSet<String> = HashSet::new();
let bytes = input.as_bytes();
let mut pos = 0;
while pos < bytes.len() {
let mut gate = pos;
while gate < bytes.len() && gate - pos < 3 && bytes[gate] == b' ' {
gate += 1;
}
let gate_byte = bytes.get(gate).copied();
if gate_byte == Some(b'[') {
if let Some((consumed, label, _url, _title)) =
try_parse_reference_definition(&input[pos..], dialect)
{
set.insert(normalize_label(&label));
pos += consumed.max(1);
continue;
}
} else if gate_byte == Some(b'>')
&& let Some(stripped) = strip_blockquote_line(&input[pos..])
&& let Some((_, label, _, _)) = try_parse_reference_definition(&stripped, dialect)
{
set.insert(normalize_label(&label));
}
match memchr_newline(&bytes[pos..]) {
Some(off) => {
pos += off + 1;
}
None => break,
}
}
Arc::new(set)
}
fn memchr_newline(bytes: &[u8]) -> Option<usize> {
bytes.iter().position(|&b| b == b'\n')
}
fn line_starts_with_blockquote(text: &str) -> bool {
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() && i < 3 && bytes[i] == b' ' {
i += 1;
}
bytes.get(i) == Some(&b'>')
}
fn strip_blockquote_line(text: &str) -> Option<String> {
if !line_starts_with_blockquote(text) {
return None;
}
let mut out = String::with_capacity(text.len());
for line in text.split_inclusive('\n') {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() && i < 3 && bytes[i] == b' ' {
i += 1;
}
if bytes.get(i) != Some(&b'>') {
break;
}
i += 1;
if bytes.get(i) == Some(&b' ') {
i += 1;
}
out.push_str(&line[i..]);
}
Some(out)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn collects_simple_refdef() {
let map = collect_refdef_labels("[foo]: /url\n", Dialect::CommonMark);
assert!(map.contains("foo"));
}
#[test]
fn collects_multiple_refdefs() {
let input = "[foo]: /a\n[bar]: /b\n[baz]: /c\n";
let map = collect_refdef_labels(input, Dialect::CommonMark);
assert!(map.contains("foo"));
assert!(map.contains("bar"));
assert!(map.contains("baz"));
}
#[test]
fn does_not_collect_non_refdef_lines() {
let input = "Just a paragraph.\n\nAnother one.\n";
let map = collect_refdef_labels(input, Dialect::CommonMark);
assert!(map.is_empty());
}
#[test]
fn collects_after_paragraph() {
let input = "Some paragraph.\n\n[foo]: /url\n";
let map = collect_refdef_labels(input, Dialect::CommonMark);
assert!(map.contains("foo"));
}
#[test]
fn case_folded_label() {
let map = collect_refdef_labels("[FOO Bar]: /url\n", Dialect::CommonMark);
assert!(map.contains("foo bar"));
}
#[test]
fn collapses_internal_whitespace() {
assert_eq!(normalize_label(" foo bar\tbaz "), "foo bar baz");
}
#[test]
fn label_523_is_not_collected() {
let map = collect_refdef_labels("*foo [bar* baz]\n", Dialect::CommonMark);
assert!(map.is_empty());
}
}