1use crate::category::merge_multi_categories;
15use crate::underscore::replace_underscores;
16use crate::unicode::{casefold, normalize_nfkc};
17use once_cell::sync::Lazy;
18use regex::Regex;
19use trim_in_place::TrimInPlace;
20
21macro_rules! regex {
22 ($name:tt, $expr:expr) => {
23 static $name: Lazy<Regex> = Lazy::new(|| Regex::new($expr).unwrap());
24 };
25}
26
27regex!(NON_NORMAL, r"[^\p{L}\p{N}\-:_]");
28regex!(LEADING_OR_TRAILING_DASHES, r"(^-+)|(-+$)");
29regex!(MULTIPLE_DASHES, r"-{2,}");
30regex!(MULTIPLE_COLONS, r":{2,}");
31regex!(COLON_DASH, r"(:-)|(-:)");
32regex!(UNDERSCORE_DASH, r"(_-)|(-_)");
33regex!(LEADING_OR_TRAILING_COLON, r"(^:)|(:$)");
34
35pub fn normalize(text: &mut String) {
46 text.trim_in_place();
51
52 if text.starts_with('/') {
54 text.replace_range(..1, "");
55 }
56
57 normalize_nfkc(text);
59
60 casefold(text);
64
65 replace_in_place(text, &NON_NORMAL, "-");
67
68 merge_multi_categories(text);
71
72 replace_underscores(text);
76
77 replace_in_place(text, &LEADING_OR_TRAILING_DASHES, "");
79
80 replace_in_place(text, &MULTIPLE_DASHES, "-");
82 replace_in_place(text, &MULTIPLE_COLONS, ":");
83
84 replace_in_place(text, &COLON_DASH, ":");
86 replace_in_place(text, &UNDERSCORE_DASH, "_");
87
88 replace_in_place(text, &LEADING_OR_TRAILING_COLON, "");
90
91 if text.starts_with("_default:") {
93 text.replace_range(..9, "");
94 }
95}
96
97fn replace_in_place(text: &mut String, regex: &Regex, replace_with: &str) {
98 use regex::Captures;
99 use std::ops::Range;
100
101 fn get_range(captures: Captures) -> Range<usize> {
102 let mtch = captures.get(0).unwrap();
103 let start = mtch.start();
104 let end = mtch.end();
105
106 start..end
107 }
108
109 while let Some(captures) = regex.captures(text) {
110 let range = get_range(captures);
111 text.replace_range(range, replace_with);
112 }
113}
114
115#[test]
116fn test_normalize() {
117 macro_rules! check {
118 ($input:expr, $expected:expr $(,)?) => {{
119 let mut text = str!($input);
120 normalize(&mut text);
121 assert_eq!(text, $expected, "Normalized text doesn't match expected");
122 }};
123 }
124
125 check!("", "");
126 check!("Big Cheese Horace", "big-cheese-horace");
127 check!("bottom--Text", "bottom-text");
128 check!("Tufto's Proposal", "tufto-s-proposal");
129 check!(" - Test - ", "test");
130 check!("--TEST--", "test");
131 check!("-test-", "test");
132 check!(":test", "test");
133 check!("test:", "test");
134 check!(":test:", "test");
135 check!("/Some Page", "some-page");
136 check!("some/Page", "some-page");
137 check!("some,Page", "some-page");
138 check!("End of Death Hub", "end-of-death-hub");
139 check!("$100 is a lot of money", "100-is-a-lot-of-money");
140 check!("$100 is a lot of money!", "100-is-a-lot-of-money");
141 check!("snake_case", "snake-case");
142 check!("long__snake__case", "long-snake-case");
143 check!("snake-_dash", "snake-dash");
144 check!("snake_-dash", "snake-dash");
145 check!("snake_-_dash", "snake-dash");
146 check!("_template", "_template");
147 check!("_template_", "_template");
148 check!("__template", "_template");
149 check!("__template_", "_template");
150 check!("template_", "template");
151 check!("template__", "template");
152 check!("_Template", "_template");
153 check!("_Template_", "_template");
154 check!("__Template", "_template");
155 check!("__Template_", "_template");
156 check!("Template_", "template");
157 check!("Template__", "template");
158 check!(" <[ TEST ]> ", "test");
159 check!("ÄÀ-áö ðñæ_þß*řƒŦ", "äà-áö-ðñæ-þß-řƒŧ");
160 check!("Site-五", "site-五");
161 check!("ᒥᐢᑕᓇᐢᑯᐍᐤ--1", "ᒥᐢᑕᓇᐢᑯᐍᐤ-1");
162 check!("ᒥᐢᑕᓇᐢᑯᐍᐤ:_template", "ᒥᐢᑕᓇᐢᑯᐍᐤ:_template");
163 check!("🚗A‱B⁜C", "a-b-c");
164 check!("Ⰰ_á_X", "ⰰ-á-x");
165 check!("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", "");
166 check!("Component:image block", "component:image-block");
167 check!("fragment:scp-4447-2", "fragment:scp-4447-2");
168 check!("fragment::scp-4447-2", "fragment:scp-4447-2");
169 check!("FRAGMENT:SCP-4447 (2)", "fragment:scp-4447-2");
170 check!("protected_:fragment_:page", "protected-fragment:page");
171 check!("protected:_fragment_:page", "protected-fragment:page");
172 check!("fragment:_template", "fragment:_template");
173 check!("fragment:__template", "fragment:_template");
174 check!("fragment:_template_", "fragment:_template");
175 check!("fragment::_template", "fragment:_template");
176 check!("_default:_template", "_template");
177 check!("_default:__template", "_template");
178 check!("_default:_template_", "_template");
179 check!("_default::_template", "_template");
180 check!("/fragment:_template", "fragment:_template");
181 check!("/fragment:__template", "fragment:_template");
182 check!("/fragment:_template_", "fragment:_template");
183 check!("/fragment::_template", "fragment:_template");
184 check!("/_default:_template", "_template");
185 check!("/_default:__template", "_template");
186 check!("/_default:_template_", "_template");
187 check!("/_default::_template", "_template");
188 check!(
189 "protected:fragment:_template",
190 "protected-fragment:_template",
191 );
192 check!(
193 "protected:fragment:__template",
194 "protected-fragment:_template",
195 );
196 check!(
197 "protected:fragment:_template_",
198 "protected-fragment:_template",
199 );
200 check!(
201 "protected:fragment::_template",
202 "protected-fragment:_template",
203 );
204 check!(
205 "protected::fragment:_template",
206 "protected-fragment:_template",
207 );
208 check!(
209 "protected::fragment::_template",
210 "protected-fragment:_template",
211 );
212 check!(
213 "protected:archived:fragment:page",
214 "protected-archived-fragment:page",
215 );
216}