Skip to main content

dmc_parser/
slugger.rs

1//! GitHub-style heading-anchor slug generator.
2//!
3//! Mirrors `github-slugger` (the npm package used by `rehype-slug`):
4//! lowercase, drop ASCII punctuation/symbols, drop control chars, replace
5//! whitespace runs with `-`, collapse repeated `-`, trim. Punctuation is
6//! stripped (NOT replaced), so `0.4.3` -> `043`, `It's` -> `its` -
7//! matching velite output.
8//!
9//! For dedupe (`#patch-changes`, `#patch-changes-1`, `#patch-changes-2`)
10//! use [`Slugger`], which threads a count map across one document's
11//! headings.
12
13use std::collections::HashMap;
14
15/// Compute the GitHub-style slug for a single heading, ignoring dedupe.
16/// For document-wide dedupe, use [`Slugger::slug`].
17pub fn github_slugify(input: &str) -> String {
18  let lower = input.trim().to_lowercase();
19  let mut out = String::with_capacity(lower.len());
20  let mut prev_dash = false;
21  for ch in lower.chars() {
22    if ch.is_control() {
23      continue;
24    }
25    if ch.is_whitespace() {
26      if !prev_dash && !out.is_empty() {
27        out.push('-');
28        prev_dash = true;
29      }
30      continue;
31    }
32    // Keep letters, digits, '_', '-'. Drop everything else (`.`, `'`,
33    // `:`, etc) - github-slugger's "strip, don't replace" semantics.
34    if ch.is_alphanumeric() || ch == '_' || ch == '-' {
35      // Treat existing '-' the same as ws so we collapse runs.
36      if ch == '-' {
37        if prev_dash {
38          continue;
39        }
40        out.push('-');
41        prev_dash = true;
42      } else {
43        out.push(ch);
44        prev_dash = false;
45      }
46    }
47  }
48  // Trim trailing '-' (leading '-' was avoided by the empty-out guard).
49  while out.ends_with('-') {
50    out.pop();
51  }
52  out
53}
54
55/// Document-scoped slugger. Tracks how many times each base slug has
56/// already been emitted; collisions get a `-1`, `-2`, ... suffix.
57#[derive(Debug, Default)]
58pub struct Slugger {
59  seen: HashMap<String, u32>,
60}
61
62impl Slugger {
63  pub fn new() -> Self {
64    Self { seen: HashMap::new() }
65  }
66
67  /// Compute the slug for `text`, suffixing with `-N` when the base slug
68  /// has been emitted `N` times before. Empty input -> empty string;
69  /// dedupe still applies, so two empty headings get `""` and `"-1"`.
70  pub fn slug(&mut self, text: &str) -> String {
71    let base = github_slugify(text);
72    let count = self.seen.entry(base.clone()).or_insert(0);
73    let out = if *count == 0 { base.clone() } else { format!("{}-{}", base, *count) };
74    *count += 1;
75    out
76  }
77}
78
79#[cfg(test)]
80mod tests {
81  use super::*;
82
83  #[test]
84  fn strips_dots() {
85    assert_eq!(github_slugify("0.4.3"), "043");
86  }
87
88  #[test]
89  fn strips_apostrophes() {
90    assert_eq!(github_slugify("How It's Built"), "how-its-built");
91  }
92
93  #[test]
94  fn replaces_spaces_with_dash() {
95    assert_eq!(github_slugify("Patch Changes"), "patch-changes");
96  }
97
98  #[test]
99  fn collapses_runs() {
100    assert_eq!(github_slugify("Hello -- World"), "hello-world");
101    assert_eq!(github_slugify("foo   bar"), "foo-bar");
102  }
103
104  #[test]
105  fn dedupes() {
106    let mut s = Slugger::new();
107    assert_eq!(s.slug("Patch Changes"), "patch-changes");
108    assert_eq!(s.slug("Patch Changes"), "patch-changes-1");
109    assert_eq!(s.slug("Patch Changes"), "patch-changes-2");
110  }
111
112  #[test]
113  fn keeps_underscores_and_existing_dashes() {
114    assert_eq!(github_slugify("foo_bar-baz"), "foo_bar-baz");
115  }
116}