1use pulldown_cmark::{Event, Options, Parser, Tag};
8use regex::Regex;
9use serde::Serialize;
10use std::sync::LazyLock;
11
12static WIKILINK_RE: LazyLock<Regex> =
13 LazyLock::new(|| Regex::new(r"\[\[([^\[\]]+)\]\]").expect("Invalid wikilink regex"));
14
15#[derive(Debug, Serialize, Clone, PartialEq, Eq)]
17pub struct WikiLink {
18 pub target: String,
20 pub alias: Option<String>,
22 pub slug: String,
24 pub line_number: usize,
26}
27
28pub fn slugify(name: &str) -> String {
40 let trimmed = name.trim().to_lowercase();
41
42 let segments: Vec<String> = trimmed
43 .split('/')
44 .filter_map(|segment| {
45 let normalized = slugify_segment(segment);
46 if normalized.is_empty() { None } else { Some(normalized) }
47 })
48 .collect();
49
50 segments.join("/")
51}
52
53fn slugify_segment(segment: &str) -> String {
55 let mut result = String::with_capacity(segment.len());
56
57 for ch in segment.chars() {
58 if ch.is_alphanumeric() {
59 result.push(ch);
60 } else if ch == ' ' || ch == '_' || ch == '-' {
61 result.push('-');
62 }
63 }
65
66 let mut collapsed = String::with_capacity(result.len());
68 let mut prev_hyphen = false;
69 for ch in result.chars() {
70 if ch == '-' {
71 if !prev_hyphen && !collapsed.is_empty() {
72 collapsed.push('-');
73 }
74 prev_hyphen = true;
75 } else {
76 prev_hyphen = false;
77 collapsed.push(ch);
78 }
79 }
80
81 collapsed.trim_matches('-').to_string()
82}
83
84pub fn title_from_slug(slug: &str) -> String {
89 let leaf = slug.rsplit('/').next().unwrap_or(slug);
90 leaf.split('-')
91 .filter(|s| !s.is_empty())
92 .map(|word| {
93 let mut chars = word.chars();
94 match chars.next() {
95 Some(c) => {
96 let upper: String = c.to_uppercase().collect();
97 format!("{upper}{}", chars.as_str())
98 }
99 None => String::new(),
100 }
101 })
102 .collect::<Vec<_>>()
103 .join(" ")
104}
105
106pub fn extract_h1(content: &str) -> Option<String> {
112 for line in content.lines() {
113 let trimmed = line.trim();
114 if trimmed.is_empty() {
115 continue;
116 }
117 if let Some(rest) = trimmed.strip_prefix("# ") {
118 let title = rest.trim();
119 if !title.is_empty() {
120 return Some(title.to_string());
121 }
122 }
123 break;
125 }
126 None
127}
128
129fn code_byte_ranges(content: &str) -> Vec<(usize, usize)> {
134 let parser = Parser::new_ext(content, Options::all()).into_offset_iter();
135 let mut ranges = Vec::new();
136
137 for (event, range) in parser {
138 match event {
139 Event::Start(Tag::CodeBlock(_)) | Event::Code(_) => {
140 ranges.push((range.start, range.end));
141 }
142 _ => {}
143 }
144 }
145
146 ranges
147}
148
149fn is_in_code(offset: usize, code_ranges: &[(usize, usize)]) -> bool {
151 code_ranges.iter().any(|(s, e)| offset >= *s && offset < *e)
152}
153
154fn line_number_at(content: &str, byte_offset: usize) -> usize {
156 content[..byte_offset].matches('\n').count() + 1
157}
158
159pub fn extract_links(content: &str) -> Vec<WikiLink> {
165 let code_ranges = code_byte_ranges(content);
166 let mut links = Vec::new();
167
168 for cap in WIKILINK_RE.captures_iter(content) {
169 let m = cap.get(0).expect("Regex match should exist");
170
171 if is_in_code(m.start(), &code_ranges) {
172 continue;
173 }
174
175 let body = cap.get(1).expect("Capture group 1 should exist").as_str();
176 let (target, alias) = parse_link_body(body);
177
178 let target_for_slug = if target.to_lowercase().ends_with(".md") {
180 &target[..target.len() - 3]
181 } else {
182 &target
183 };
184 let slug = slugify(target_for_slug);
185
186 if !slug.is_empty() {
187 links.push(WikiLink {
188 target,
189 alias,
190 slug,
191 line_number: line_number_at(content, m.start()),
192 });
193 }
194 }
195
196 links
197}
198
199fn parse_link_body(body: &str) -> (String, Option<String>) {
200 if let Some(pipe_pos) = body.find('|') {
201 let target = body[..pipe_pos].trim().to_string();
202 let alias = body[pipe_pos + 1..].trim().to_string();
203 (target, Some(alias))
204 } else {
205 (body.trim().to_string(), None)
206 }
207}
208
209pub fn replace_wikilinks(content: &str, old_slug: &str, new_title: &str) -> String {
213 let code_ranges = code_byte_ranges(content);
214 let mut result = String::with_capacity(content.len());
215 let mut last_end = 0;
216
217 for cap in WIKILINK_RE.captures_iter(content) {
218 let m = cap.get(0).expect("Regex match should exist");
219 let start = m.start();
220 let end = m.end();
221
222 if is_in_code(start, &code_ranges) {
223 continue;
224 }
225
226 let body = cap.get(1).expect("Capture group 1 should exist").as_str();
227 let (target, alias) = parse_link_body(body);
228 let target_slug = slugify(&target);
229
230 if target_slug == old_slug {
231 result.push_str(&content[last_end..start]);
232
233 match alias {
234 Some(a) => result.push_str(&format!("[[{new_title}|{a}]]")),
235 None => result.push_str(&format!("[[{new_title}]]")),
236 }
237
238 last_end = end;
239 }
240 }
241
242 result.push_str(&content[last_end..]);
243 result
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn test_slugify_basic() {
252 assert_eq!(slugify("My Page Name"), "my-page-name");
253 }
254
255 #[test]
256 fn test_slugify_folder() {
257 assert_eq!(slugify("folder/My Page"), "folder/my-page");
258 }
259
260 #[test]
261 fn test_slugify_special_chars() {
262 assert_eq!(slugify("Hello, World! (2024)"), "hello-world-2024");
263 }
264
265 #[test]
266 fn test_slugify_underscores() {
267 assert_eq!(slugify("my_page_name"), "my-page-name");
268 }
269
270 #[test]
271 fn test_slugify_unicode() {
272 assert_eq!(slugify("Tagesnotiz"), "tagesnotiz");
273 assert_eq!(slugify("日本語ページ"), "日本語ページ");
274 }
275
276 #[test]
277 fn test_slugify_whitespace() {
278 assert_eq!(slugify(" spaces everywhere "), "spaces-everywhere");
279 }
280
281 #[test]
282 fn test_extract_simple() {
283 let links = extract_links("see [[Page One]] here");
284 assert_eq!(links.len(), 1);
285 assert_eq!(links[0].target, "Page One");
286 assert_eq!(links[0].slug, "page-one");
287 assert_eq!(links[0].alias, None);
288 assert_eq!(links[0].line_number, 1);
289 }
290
291 #[test]
292 fn test_extract_alias() {
293 let links = extract_links("see [[Page One|click here]]");
294 assert_eq!(links.len(), 1);
295 assert_eq!(links[0].target, "Page One");
296 assert_eq!(links[0].alias, Some("click here".to_string()));
297 assert_eq!(links[0].slug, "page-one");
298 }
299
300 #[test]
301 fn test_extract_multiple() {
302 let links = extract_links("[[A]] and [[B|bee]] plus [[folder/C]]");
303 assert_eq!(links.len(), 3);
304 assert_eq!(links[0].slug, "a");
305 assert_eq!(links[1].slug, "b");
306 assert_eq!(links[1].alias, Some("bee".to_string()));
307 assert_eq!(links[2].slug, "folder/c");
308 }
309
310 #[test]
311 fn test_extract_in_fenced_code_block() {
312 let content = "before\n```\n[[not a link]]\n```\nafter [[real link]]";
313 let links = extract_links(content);
314 assert_eq!(links.len(), 1);
315 assert_eq!(links[0].slug, "real-link");
316 }
317
318 #[test]
319 fn test_extract_in_inline_code() {
320 let links = extract_links("see `[[not a link]]` here");
321 assert_eq!(links.len(), 0);
322 }
323
324 #[test]
325 fn test_extract_line_numbers() {
326 let content = "line one\n[[Link A]]\nline three\nline four\n[[Link B]]";
327 let links = extract_links(content);
328 assert_eq!(links.len(), 2);
329 assert_eq!(links[0].line_number, 2);
330 assert_eq!(links[1].line_number, 5);
331 }
332
333 #[test]
334 fn test_nested_brackets_no_panic() {
335 let links = extract_links("[[outer [[inner]] end]]");
336 assert!(links.len() <= 2);
337 }
338
339 #[test]
340 fn test_empty_link_ignored() {
341 let links = extract_links("[[]]");
342 assert_eq!(links.len(), 0);
343 }
344
345 #[test]
346 fn test_replace_simple() {
347 let result = replace_wikilinks("see [[Rust]] here", "rust", "Rust Language");
348 assert_eq!(result, "see [[Rust Language]] here");
349 }
350
351 #[test]
352 fn test_replace_preserves_alias() {
353 let result = replace_wikilinks("see [[Rust|my fav]] here", "rust", "Rust Language");
354 assert_eq!(result, "see [[Rust Language|my fav]] here");
355 }
356
357 #[test]
358 fn test_replace_skips_code_block() {
359 let content = "before [[Rust]]\n```\n[[Rust]]\n```\nafter [[Rust]]";
360 let result = replace_wikilinks(content, "rust", "Rust Language");
361 assert_eq!(
362 result,
363 "before [[Rust Language]]\n```\n[[Rust]]\n```\nafter [[Rust Language]]"
364 );
365 }
366
367 #[test]
368 fn test_replace_skips_inline_code() {
369 let result = replace_wikilinks("see `[[Rust]]` and [[Rust]]", "rust", "Rust Language");
370 assert_eq!(result, "see `[[Rust]]` and [[Rust Language]]");
371 }
372
373 #[test]
374 fn test_slugify_spaces_only() {
375 assert_eq!(slugify(" "), "");
376 }
377
378 #[test]
379 fn test_slugify_punctuation_only() {
380 assert_eq!(slugify("!@#$%"), "");
381 }
382
383 #[test]
384 fn test_title_from_slug_basic() {
385 assert_eq!(title_from_slug("my-page"), "My Page");
386 }
387
388 #[test]
389 fn test_title_from_slug_with_folder() {
390 assert_eq!(title_from_slug("notes/deep/my-page"), "My Page");
391 }
392
393 #[test]
394 fn test_title_from_slug_single_word() {
395 assert_eq!(title_from_slug("cael"), "Cael");
396 }
397
398 #[test]
399 fn test_extract_h1_basic() {
400 assert_eq!(extract_h1("# My Title\n\nContent here"), Some("My Title".to_string()));
401 }
402
403 #[test]
404 fn test_extract_h1_with_leading_blanks() {
405 assert_eq!(extract_h1("\n\n# Title\nContent"), Some("Title".to_string()));
406 }
407
408 #[test]
409 fn test_extract_h1_none_without_h1() {
410 assert_eq!(extract_h1("No heading here\nJust text"), None);
411 }
412
413 #[test]
414 fn test_extract_h1_ignores_h2() {
415 assert_eq!(extract_h1("## Not an H1"), None);
416 }
417
418 #[test]
419 fn test_extract_h1_ignores_deep_h1() {
420 assert_eq!(extract_h1("Some text\n# Not the title"), None);
422 }
423
424 #[test]
425 fn test_extract_h1_empty_content() {
426 assert_eq!(extract_h1(""), None);
427 assert_eq!(extract_h1("\n\n\n"), None);
428 }
429
430 #[test]
433 fn test_slugify_idempotent() {
434 let cases = ["My Page", "folder/My Page", "a b c", "日本語ページ", "C++ Guide"];
435 for input in &cases {
436 let once = slugify(input);
437 let twice = slugify(&once);
438 assert_eq!(once, twice, "slugify not idempotent for: {input}");
439 }
440 }
441
442 #[test]
443 fn test_slugify_double_slash() {
444 assert_eq!(slugify("folder//page"), "folder/page");
445 }
446
447 #[test]
448 fn test_slugify_leading_trailing_slash() {
449 assert_eq!(slugify("/page/"), "page");
450 assert_eq!(slugify("/folder/page/"), "folder/page");
451 }
452
453 #[test]
454 fn test_slugify_slash_only() {
455 assert_eq!(slugify("/"), "");
456 assert_eq!(slugify("///"), "");
457 }
458
459 #[test]
462 fn test_extract_skips_indented_code_block() {
463 let content = "normal text\n\n [[not a link]]\n\nreal [[link]]";
464 let links = extract_links(content);
465 assert_eq!(links.len(), 1);
466 assert_eq!(links[0].slug, "link");
467 }
468
469 #[test]
470 fn test_extract_skips_tilde_fence() {
471 let content = "before\n~~~\n[[not a link]]\n~~~\nafter [[real]]";
472 let links = extract_links(content);
473 assert_eq!(links.len(), 1);
474 assert_eq!(links[0].slug, "real");
475 }
476
477 #[test]
478 fn test_extract_skips_multi_backtick_inline() {
479 let content = "see ``[[not a link]]`` and [[real]]";
480 let links = extract_links(content);
481 assert_eq!(links.len(), 1);
482 assert_eq!(links[0].slug, "real");
483 }
484
485 #[test]
486 fn test_replace_skips_indented_code_block() {
487 let content = "[[Rust]]\n\n [[Rust]]\n\n[[Rust]]";
488 let result = replace_wikilinks(content, "rust", "Rust Language");
489 assert!(result.contains("[[Rust Language]]"));
490 assert!(result.contains(" [[Rust]]"));
492 }
493}