1#![allow(missing_docs)] pub mod fs;
4mod string;
5pub(crate) mod toml_ext;
6use crate::errors::Error;
7use log::error;
8use once_cell::sync::Lazy;
9use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
10use regex::Regex;
11
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17pub use self::string::{
18 take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
19 take_rustdoc_include_lines,
20};
21
22pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
24 static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s\s+").unwrap());
25 RE.replace_all(text, " ")
26}
27
28pub fn normalize_id(content: &str) -> String {
31 content
32 .chars()
33 .filter_map(|ch| {
34 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
35 Some(ch.to_ascii_lowercase())
36 } else if ch.is_whitespace() {
37 Some('-')
38 } else {
39 None
40 }
41 })
42 .collect::<String>()
43}
44
45#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
49pub fn id_from_content(content: &str) -> String {
50 let mut content = content.to_string();
51
52 static HTML: Lazy<Regex> = Lazy::new(|| Regex::new(r"(<.*?>)").unwrap());
54 content = HTML.replace_all(&content, "").into();
55 const REPL_SUB: &[&str] = &["<", ">", "&", "'", """];
56 for sub in REPL_SUB {
57 content = content.replace(sub, "");
58 }
59
60 let trimmed = content.trim().trim_start_matches('#').trim();
62 normalize_id(trimmed)
63}
64
65pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
71 let id = {
72 #[allow(deprecated)]
73 id_from_content(content)
74 };
75
76 let id_count = id_counter.entry(id.clone()).or_insert(0);
78 let unique_id = match *id_count {
79 0 => id,
80 id_count => format!("{}-{}", id, id_count),
81 };
82 *id_count += 1;
83 unique_id
84}
85
86fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
96 static SCHEME_LINK: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
97 static MD_LINK: Lazy<Regex> =
98 Lazy::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
99
100 fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
101 if dest.starts_with('#') {
102 if let Some(path) = path {
104 let mut base = path.display().to_string();
105 if base.ends_with(".md") {
106 base.replace_range(base.len() - 3.., ".html");
107 }
108 return format!("{}{}", base, dest).into();
109 } else {
110 return dest;
111 }
112 }
113 if !SCHEME_LINK.is_match(&dest) {
115 let mut fixed_link = String::new();
117 if let Some(path) = path {
118 let base = path
119 .parent()
120 .expect("path can't be empty")
121 .to_str()
122 .expect("utf-8 paths only");
123 if !base.is_empty() {
124 write!(fixed_link, "{}/", base).unwrap();
125 }
126 }
127
128 if let Some(caps) = MD_LINK.captures(&dest) {
129 fixed_link.push_str(&caps["link"]);
130 fixed_link.push_str(".html");
131 if let Some(anchor) = caps.name("anchor") {
132 fixed_link.push_str(anchor.as_str());
133 }
134 } else {
135 fixed_link.push_str(&dest);
136 };
137 return CowStr::from(fixed_link);
138 }
139 dest
140 }
141
142 fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
143 static HTML_LINK: Lazy<Regex> =
152 Lazy::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
153
154 HTML_LINK
155 .replace_all(&html, |caps: ®ex::Captures<'_>| {
156 let fixed = fix(caps[2].into(), path);
157 format!("{}{}\"", &caps[1], fixed)
158 })
159 .into_owned()
160 .into()
161 }
162
163 match event {
164 Event::Start(Tag::Link(link_type, dest, title)) => {
165 Event::Start(Tag::Link(link_type, fix(dest, path), title))
166 }
167 Event::Start(Tag::Image(link_type, dest, title)) => {
168 Event::Start(Tag::Image(link_type, fix(dest, path), title))
169 }
170 Event::Html(html) => Event::Html(fix_html(html, path)),
171 _ => event,
172 }
173}
174
175pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
177 render_markdown_with_path(text, curly_quotes, None)
178}
179
180pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
181 let mut opts = Options::empty();
182 opts.insert(Options::ENABLE_TABLES);
183 opts.insert(Options::ENABLE_FOOTNOTES);
184 opts.insert(Options::ENABLE_STRIKETHROUGH);
185 opts.insert(Options::ENABLE_TASKLISTS);
186 if curly_quotes {
187 opts.insert(Options::ENABLE_SMART_PUNCTUATION);
188 }
189 Parser::new_ext(text, opts)
190}
191
192pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
193 let mut s = String::with_capacity(text.len() * 3 / 2);
194 let p = new_cmark_parser(text, curly_quotes);
195 let events = p
196 .map(clean_codeblock_headers)
197 .map(|event| adjust_links(event, path))
198 .flat_map(|event| {
199 let (a, b) = wrap_tables(event);
200 a.into_iter().chain(b)
201 });
202
203 html::push_html(&mut s, events);
204 s
205}
206
207fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
209 match event {
210 Event::Start(Tag::Table(_)) => (
211 Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
212 Some(event),
213 ),
214 Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
215 _ => (Some(event), None),
216 }
217}
218
219fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
220 match event {
221 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
222 let info: String = info
223 .chars()
224 .map(|x| match x {
225 ' ' | '\t' => ',',
226 _ => x,
227 })
228 .filter(|ch| !ch.is_whitespace())
229 .collect();
230
231 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
232 }
233 _ => event,
234 }
235}
236
237pub fn log_backtrace(e: &Error) {
239 error!("Error: {}", e);
240
241 for cause in e.chain().skip(1) {
242 error!("\tCaused By: {}", cause);
243 }
244}
245
246pub(crate) fn bracket_escape(mut s: &str) -> String {
247 let mut escaped = String::with_capacity(s.len());
248 let needs_escape: &[char] = &['<', '>'];
249 while let Some(next) = s.find(needs_escape) {
250 escaped.push_str(&s[..next]);
251 match s.as_bytes()[next] {
252 b'<' => escaped.push_str("<"),
253 b'>' => escaped.push_str(">"),
254 _ => unreachable!(),
255 }
256 s = &s[next + 1..];
257 }
258 escaped.push_str(s);
259 escaped
260}
261
262#[cfg(test)]
263mod tests {
264 use super::bracket_escape;
265
266 mod render_markdown {
267 use super::super::render_markdown;
268
269 #[test]
270 fn preserves_external_links() {
271 assert_eq!(
272 render_markdown("[example](https://www.rust-lang.org/)", false),
273 "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
274 );
275 }
276
277 #[test]
278 fn it_can_adjust_markdown_links() {
279 assert_eq!(
280 render_markdown("[example](example.md)", false),
281 "<p><a href=\"example.html\">example</a></p>\n"
282 );
283 assert_eq!(
284 render_markdown("[example_anchor](example.md#anchor)", false),
285 "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
286 );
287
288 assert_eq!(
290 render_markdown("[phantom data](foo.html#phantomdata)", false),
291 "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
292 );
293 }
294
295 #[test]
296 fn it_can_wrap_tables() {
297 let src = r#"
298| Original | Punycode | Punycode + Encoding |
299|-----------------|-----------------|---------------------|
300| føø | f-5gaa | f_5gaa |
301"#;
302 let out = r#"
303<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
304<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
305</tbody></table>
306</div>
307"#.trim();
308 assert_eq!(render_markdown(src, false), out);
309 }
310
311 #[test]
312 fn it_can_keep_quotes_straight() {
313 assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
314 }
315
316 #[test]
317 fn it_can_make_quotes_curly_except_when_they_are_in_code() {
318 let input = r#"
319'one'
320```
321'two'
322```
323`'three'` 'four'"#;
324 let expected = r#"<p>‘one’</p>
325<pre><code>'two'
326</code></pre>
327<p><code>'three'</code> ‘four’</p>
328"#;
329 assert_eq!(render_markdown(input, true), expected);
330 }
331
332 #[test]
333 fn whitespace_outside_of_codeblock_header_is_preserved() {
334 let input = r#"
335some text with spaces
336```rust
337fn main() {
338// code inside is unchanged
339}
340```
341more text with spaces
342"#;
343
344 let expected = r#"<p>some text with spaces</p>
345<pre><code class="language-rust">fn main() {
346// code inside is unchanged
347}
348</code></pre>
349<p>more text with spaces</p>
350"#;
351 assert_eq!(render_markdown(input, false), expected);
352 assert_eq!(render_markdown(input, true), expected);
353 }
354
355 #[test]
356 fn rust_code_block_properties_are_passed_as_space_delimited_class() {
357 let input = r#"
358```rust,no_run,should_panic,property_3
359```
360"#;
361
362 let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
363"#;
364 assert_eq!(render_markdown(input, false), expected);
365 assert_eq!(render_markdown(input, true), expected);
366 }
367
368 #[test]
369 fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
370 let input = r#"
371```rust, no_run,,,should_panic , ,property_3
372```
373"#;
374
375 let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
376"#;
377 assert_eq!(render_markdown(input, false), expected);
378 assert_eq!(render_markdown(input, true), expected);
379 }
380
381 #[test]
382 fn rust_code_block_without_properties_has_proper_html_class() {
383 let input = r#"
384```rust
385```
386"#;
387
388 let expected = r#"<pre><code class="language-rust"></code></pre>
389"#;
390 assert_eq!(render_markdown(input, false), expected);
391 assert_eq!(render_markdown(input, true), expected);
392
393 let input = r#"
394```rust
395```
396"#;
397 assert_eq!(render_markdown(input, false), expected);
398 assert_eq!(render_markdown(input, true), expected);
399 }
400 }
401
402 #[allow(deprecated)]
403 mod id_from_content {
404 use super::super::id_from_content;
405
406 #[test]
407 fn it_generates_anchors() {
408 assert_eq!(
409 id_from_content("## Method-call expressions"),
410 "method-call-expressions"
411 );
412 assert_eq!(id_from_content("## **Bold** title"), "bold-title");
413 assert_eq!(id_from_content("## `Code` title"), "code-title");
414 assert_eq!(
415 id_from_content("## title <span dir=rtl>foo</span>"),
416 "title-foo"
417 );
418 }
419
420 #[test]
421 fn it_generates_anchors_from_non_ascii_initial() {
422 assert_eq!(
423 id_from_content("## `--passes`: add more rustdoc passes"),
424 "--passes-add-more-rustdoc-passes"
425 );
426 assert_eq!(
427 id_from_content("## 中文標題 CJK title"),
428 "中文標題-cjk-title"
429 );
430 assert_eq!(id_from_content("## Über"), "Über");
431 }
432 }
433
434 mod html_munging {
435 use super::super::{normalize_id, unique_id_from_content};
436
437 #[test]
438 fn it_normalizes_ids() {
439 assert_eq!(
440 normalize_id("`--passes`: add more rustdoc passes"),
441 "--passes-add-more-rustdoc-passes"
442 );
443 assert_eq!(
444 normalize_id("Method-call 🐙 expressions \u{1f47c}"),
445 "method-call--expressions-"
446 );
447 assert_eq!(normalize_id("_-_12345"), "_-_12345");
448 assert_eq!(normalize_id("12345"), "12345");
449 assert_eq!(normalize_id("中文"), "中文");
450 assert_eq!(normalize_id("にほんご"), "にほんご");
451 assert_eq!(normalize_id("한국어"), "한국어");
452 assert_eq!(normalize_id(""), "");
453 }
454
455 #[test]
456 fn it_generates_unique_ids_from_content() {
457 assert_eq!(
459 unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
460 "中文標題-cjk-title"
461 );
462 assert_eq!(
463 unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
464 "中文標題-cjk-title"
465 );
466
467 let mut id_counter = Default::default();
469 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
470 assert_eq!(
471 unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
472 "中文標題-cjk-title"
473 );
474 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
475 assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
476 }
477 }
478
479 #[test]
480 fn escaped_brackets() {
481 assert_eq!(bracket_escape(""), "");
482 assert_eq!(bracket_escape("<"), "<");
483 assert_eq!(bracket_escape(">"), ">");
484 assert_eq!(bracket_escape("<>"), "<>");
485 assert_eq!(bracket_escape("<test>"), "<test>");
486 assert_eq!(bracket_escape("a<test>b"), "a<test>b");
487 }
488}