1use core::slice::Iter;
2use std::borrow::Cow;
3use std::string::String;
4use std::sync::Arc;
5use std::vec::Vec;
6
7use pulldown_cmark::Event;
8use thiserror::Error;
9
10use crate::{CMarkItem, File, FileDocs, TextSource};
11
12#[derive(Clone, Debug, Default, PartialEq)]
14pub struct CMarkData(Vec<Arc<CMarkItem>>);
15
16pub type CMarkDataIter<'a> = Iter<'a, Arc<CMarkItem>>;
18
19impl CMarkData {
20 pub fn from_items(items: Vec<Arc<CMarkItem>>) -> Self {
22 Self(items)
23 }
24
25 pub fn from_file(file: Arc<File>) -> Self {
27 Self::from_text_source(TextSource::File(file))
28 }
29
30 pub fn from_file_docs(file_docs: Arc<FileDocs>) -> Self {
32 Self::from_text_source(TextSource::FileDocs(file_docs))
33 }
34
35 pub fn from_text_source(text_source: TextSource) -> Self {
37 use crate::IntoStatic;
38 use pulldown_cmark::Parser;
39
40 let text = match &text_source {
41 TextSource::File(file) => file.text(),
42 TextSource::FileDocs(file_docs) => file_docs.docs(),
43 };
44
45 Self(
46 Parser::new(text)
47 .into_offset_iter()
48 .map(|(event, range)| {
49 CMarkItem::from(event.into_static(), range, text_source.clone())
50 })
51 .collect(),
52 )
53 .concat_texts()
54 }
55
56 pub fn into_items(self) -> Vec<Arc<CMarkItem>> {
58 self.0
59 }
60
61 pub fn iter(&self) -> CMarkDataIter<'_> {
63 self.0.iter()
64 }
65
66 pub fn iter_events(&self) -> impl Iterator<Item = &Event<'_>> {
68 self.0.iter().filter_map(|item| item.event())
69 }
70
71 fn map<F>(self, func: F) -> Self
72 where
73 F: FnMut(Arc<CMarkItem>) -> Arc<CMarkItem>,
74 {
75 Self(self.0.into_iter().map(func).collect())
76 }
77
78 pub fn concat_texts(self) -> Self {
89 use core::mem::take;
90
91 let mut result = Vec::new();
92 let mut text_nodes = Vec::new();
93 let mut text_value = String::new();
94
95 for node in self.0.into_iter() {
96 match node.event() {
97 None => {
98 result.push(node);
99 }
100 Some(Event::Text(event_text)) => {
101 text_value += event_text;
102 text_nodes.push(node);
103 }
104 Some(_) => {
105 if let Some(text_node) =
106 merge_text_nodes(take(&mut text_nodes), take(&mut text_value))
107 {
108 result.push(text_node);
109 }
110 result.push(node);
111 }
112 }
113 }
114
115 Self(result)
116 }
117}
118
119fn merge_text_nodes(nodes: Vec<Arc<CMarkItem>>, text: String) -> Option<Arc<CMarkItem>> {
120 use crate::CMarkItemAsModified;
121 use pulldown_cmark::CowStr;
122
123 match nodes.len() {
124 0 => None,
125 1 => Some(nodes.into_iter().next().unwrap()),
126 _ => Some(nodes.into_modified(
127 Event::Text(CowStr::Boxed(text.into_boxed_str())),
128 Cow::from("concat_texts()"),
129 )),
130 }
131}
132
133impl CMarkData {
134 pub fn increment_heading_levels(self) -> Self {
142 use crate::CMarkItemAsModified;
143 use pulldown_cmark::{Tag, TagEnd};
144
145 self.map(|node| {
146 let event = match node.event() {
147 Some(Event::Start(Tag::Heading {
148 level,
149 id,
150 classes,
151 attrs,
152 })) => Some(Event::Start(Tag::Heading {
153 level: increase_heading_level(*level),
154 id: id.clone(),
155 classes: classes.clone(),
156 attrs: attrs.clone(),
157 })),
158 Some(Event::End(TagEnd::Heading(level))) => {
159 Some(Event::End(TagEnd::Heading(increase_heading_level(*level))))
160 }
161 _ => None,
162 };
163 if let Some(event) = event {
164 node.into_modified(event, Cow::from("increment_heading_levels()"))
165 } else {
166 node
167 }
168 })
169 }
170
171 pub fn add_title(self, text: &str) -> Self {
175 use pulldown_cmark::{CowStr, HeadingLevel, Tag, TagEnd};
176 use std::string::ToString;
177
178 let heading = std::vec![
179 CMarkItem::new(
180 Event::Start(Tag::Heading {
181 level: HeadingLevel::H1,
182 id: None,
183 classes: std::vec![],
184 attrs: std::vec![]
185 }),
186 Cow::from("add_title()")
187 ),
188 CMarkItem::new(
189 Event::Text(CowStr::Boxed(text.to_string().into_boxed_str())),
190 Cow::from("add_title()")
191 ),
192 CMarkItem::new(
193 Event::End(TagEnd::Heading(HeadingLevel::H1)),
194 Cow::from("add_title()")
195 ),
196 ];
197
198 Self(heading.into_iter().chain(self.0).collect())
199 }
200
201 #[allow(clippy::match_like_matches_macro)] pub fn remove_images_only_paragraph<P>(self, mut predicate: P) -> Self
205 where
206 P: FnMut(&[&str]) -> bool,
207 {
208 use crate::CMarkItemAsRemoved;
209 use core::mem::take;
210 use pulldown_cmark::{Tag, TagEnd};
211 use std::string::ToString;
212
213 let mut result = Vec::new();
214 let mut paragraph = Vec::new();
215 let mut image_urls = Vec::new();
216 let mut is_image = false;
217 let mut is_already_removed = false;
218
219 for node in self.0.into_iter() {
220 if is_already_removed {
221 result.push(node);
222 continue;
223 }
224
225 if !paragraph.is_empty() {
226 if is_image {
227 let event = node.event();
228 is_image = if let Some(Event::End(TagEnd::Image { .. })) = event {
229 false
230 } else {
231 true
232 };
233 paragraph.push(node);
234 } else {
235 paragraph.push(node);
236 let node = paragraph.last().unwrap();
237 let event = node.event();
238 match event {
239 Some(Event::End(TagEnd::Paragraph)) => {
240 let urls: Vec<String> = take(&mut image_urls);
241 let urls: Vec<&str> = urls.iter().map(|url| url.as_str()).collect();
242 if !urls.is_empty() && predicate(&urls) {
243 result
244 .push(take(&mut paragraph).into_removed(Cow::from(
245 "remove_images_only_paragraphs()",
246 )));
247 is_already_removed = true;
248 } else {
249 result.append(&mut paragraph);
250 }
251 }
252 Some(Event::Start(Tag::Image { dest_url, .. })) => {
253 image_urls.push(dest_url.as_ref().to_string());
254 is_image = true;
255 }
256 Some(Event::Start(Tag::Link { .. }))
257 | Some(Event::End(TagEnd::Link { .. }))
258 | Some(Event::SoftBreak)
259 | None => {}
260 Some(_) => {
261 result.append(&mut paragraph);
262 }
263 }
264 }
265 } else {
266 let event = node.event();
267 match event {
268 Some(Event::Start(Tag::Paragraph)) => paragraph.push(node),
269 _ => result.push(node),
270 }
271 }
272 }
273
274 result.append(&mut paragraph);
275
276 Self(result)
277 }
278
279 pub fn remove_badges_paragraph(self) -> Self {
281 let patterns = crate::badge_url_patterns();
282 self.remove_images_only_paragraph(|image_urls| {
283 image_urls
284 .iter()
285 .any(|url| patterns.iter().any(|pattern| pattern.matches(url)))
286 })
287 }
288
289 pub fn remove_section(self, heading: &str, level: u32) -> Self {
291 use core::mem::take;
292 use pulldown_cmark::Tag;
293
294 let mut section = Vec::new();
295 let mut result = Vec::new();
296 let mut is_already_removed = false;
297
298 for node in self.0.into_iter() {
299 if !is_already_removed {
300 let event = node.event();
301 if let Some(Event::Start(Tag::Heading {
302 level: node_level, ..
303 })) = event
304 {
305 if heading_level(*node_level) <= level {
306 let (mut section, is_removed) =
307 into_removed_section_if_matched(take(&mut section), heading, level);
308 result.append(&mut section);
309 is_already_removed = is_removed;
310 }
311 }
312 }
313 if is_already_removed {
314 result.push(node);
315 } else {
316 section.push(node);
317 }
318 }
319
320 result.append(&mut into_removed_section_if_matched(take(&mut section), heading, level).0);
321
322 Self(result)
323 }
324
325 pub fn remove_documentation_section(self) -> Self {
327 self.remove_section("Documentation", 2)
328 }
329}
330
331fn into_removed_section_if_matched(
332 section: Vec<Arc<CMarkItem>>,
333 heading: &str,
334 level: u32,
335) -> (Vec<Arc<CMarkItem>>, bool) {
336 use crate::CMarkItemAsRemoved;
337 use std::vec;
338
339 if is_matched_section(§ion, heading, level) {
340 (
341 vec![section.into_removed(Cow::from(std::format!(
342 "remove_section(name = \"{}\", level = {})",
343 heading,
344 level
345 )))],
346 true,
347 )
348 } else {
349 (section, false)
350 }
351}
352
353fn is_matched_section(section: &[Arc<CMarkItem>], heading: &str, level: u32) -> bool {
354 use pulldown_cmark::Tag;
355
356 let first_event = section.first().and_then(|node| node.event());
357 let second_event = section.get(1).and_then(|node| node.event());
358 if let (
359 Some(Event::Start(Tag::Heading {
360 level: node_level, ..
361 })),
362 Some(Event::Text(node_text)),
363 ) = (first_event, second_event)
364 {
365 heading_level(*node_level) == level && node_text.as_ref() == heading
366 } else {
367 false
368 }
369}
370
371impl CMarkData {
372 pub fn disallow_absolute_blob_links(
375 self,
376 repository_url: &str,
377 ) -> Result<Self, DisallowUrlsWithPrefixError> {
378 self.disallow_urls_with_prefix(&blob_path_prefix(repository_url))
379 }
380
381 pub fn disallow_absolute_docs_links(
384 self,
385 package_name: &str,
386 documentation_url: &str,
387 ) -> Result<Self, DisallowUrlsWithPrefixError> {
388 self.disallow_urls_with_prefix(&docs_path_prefix(package_name, documentation_url))
389 }
390
391 pub fn disallow_urls_with_prefix(
393 self,
394 prefix: &str,
395 ) -> Result<Self, DisallowUrlsWithPrefixError> {
396 use pulldown_cmark::Tag;
397 use std::string::ToString;
398
399 for node in &self.0 {
400 if let Some(Event::Start(Tag::Link { dest_url, .. })) = node.event() {
401 if dest_url.starts_with(prefix) {
402 return Err(DisallowUrlsWithPrefixError::PrefixFound {
403 url: dest_url.as_ref().to_string(),
404 prefix: prefix.to_string(),
405 });
406 }
407 }
408 }
409
410 Ok(self)
411 }
412
413 pub fn use_absolute_blob_urls(self, repository_url: &str) -> Self {
416 self.with_absolute_urls(&blob_path_prefix(repository_url))
417 }
418
419 pub fn use_absolute_docs_urls(self, package_name: &str, documentation_url: &str) -> Self {
422 self.with_absolute_urls(&docs_path_prefix(package_name, documentation_url))
423 }
424
425 pub fn with_absolute_urls(self, prefix: &str) -> Self {
427 use std::format;
428
429 self.map_links(
430 |url| {
431 if !is_absolute_url(url) && !is_fragment(url) {
432 Cow::from([prefix, url].concat())
433 } else {
434 Cow::from(url)
435 }
436 },
437 Cow::from(format!("with_absolute_urls(prefix = \"{}\")", prefix)),
438 )
439 }
440
441 pub fn map_links<F>(self, mut func: F, note: impl Into<Cow<'static, str>>) -> Self
443 where
444 for<'b> F: FnMut(&'b str) -> Cow<'b, str>,
445 {
446 use crate::CMarkItemAsModified;
447 use pulldown_cmark::{CowStr, Tag};
448
449 fn map_link<'a, F>(tag: &Tag<'a>, mut func: F) -> Option<Tag<'a>>
450 where
451 for<'b> F: FnMut(&'b str) -> Cow<'b, str>,
452 {
453 if let Tag::Link {
454 link_type,
455 dest_url,
456 title,
457 id,
458 } = tag
459 {
460 let new_url = func(dest_url.as_ref());
461 if dest_url.as_ref() != new_url.as_ref() {
462 let title = title.clone();
463 return Some(Tag::Link {
464 link_type: *link_type,
465 dest_url: CowStr::from(new_url.into_owned()),
466 title: title.clone(),
467 id: id.clone(),
468 });
469 }
470 }
471 None
472 }
473
474 let note = note.into();
475 self.map(|node| {
476 let event = match node.event() {
477 Some(Event::Start(tag)) => map_link(tag, &mut func).map(Event::Start),
478 _ => None,
479 };
480 match event {
481 Some(event) => node.into_modified(event, note.clone()),
482 None => node,
483 }
484 })
485 }
486}
487
488fn is_absolute_url(url: &str) -> bool {
489 is_url_with_scheme(url)
490}
491
492fn is_fragment(url: &str) -> bool {
493 url.starts_with('#')
494}
495
496#[allow(clippy::match_like_matches_macro)] fn is_url_with_scheme(url: &str) -> bool {
498 if let Some(scheme) = url.split("//").next() {
499 if scheme.is_empty() {
500 return true;
501 } else if scheme.ends_with(':') && scheme.len() >= 2 {
502 let scheme = &scheme[..scheme.len() - 1];
503 if let b'a'..=b'z' | b'A'..=b'Z' = scheme.as_bytes()[0] {
504 return scheme.as_bytes()[1..].iter().all(|ch| {
505 if let b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'.' | b'-' = ch {
506 true
507 } else {
508 false
509 }
510 });
511 }
512 }
513 }
514 false
515}
516
517fn without_trailing_slash(value: &str) -> &str {
518 match value.as_bytes().last() {
519 Some(b'/') => &value[..value.len() - 1],
520 _ => value,
521 }
522}
523
524fn blob_path_prefix(repository_url: &str) -> String {
525 use std::string::ToString;
526 without_trailing_slash(repository_url).to_string() + "/blob/master/"
527}
528
529fn docs_path_prefix(package_name: &str, documentation_url: &str) -> String {
530 use std::string::ToString;
531
532 let url = without_trailing_slash(documentation_url);
533 let name = package_name.to_string().replace('-', "_");
534 [url, "/*/", &name, "/"].concat()
535}
536
537impl CMarkData {
538 pub fn remove_codeblock_tag(self, tag: &str) -> Self {
540 self.remove_codeblock_tags(&[tag])
541 }
542
543 pub fn remove_codeblock_tags(self, tags: &[&str]) -> Self {
545 use crate::CMarkItemAsModified;
546
547 self.map(|node| {
548 let event = match node.event() {
549 Some(Event::Start(tag)) => remove_codeblock_tag_tags(tag, tags).map(Event::Start),
550 _ => None,
551 };
552 match event {
553 Some(event) => node.into_modified(
554 event,
555 Cow::from(std::format!("remove_codeblock_tags(tags = {:?})", tags)),
556 ),
557 None => node,
558 }
559 })
560 }
561}
562
563fn remove_codeblock_tag_tags<'a>(
564 event_tag: &pulldown_cmark::Tag<'a>,
565 tags: &[&str],
566) -> Option<pulldown_cmark::Tag<'a>> {
567 use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
568
569 if let Tag::CodeBlock(CodeBlockKind::Fenced(ref node_tags)) = event_tag {
570 let has_tags = node_tags
571 .split(',')
572 .any(|node_tag| tags.iter().any(|tag| &node_tag == tag));
573 if has_tags {
574 let node_tags: Vec<_> = node_tags
575 .split(',')
576 .filter(|node_tag| !tags.iter().any(|tag| node_tag == tag))
577 .collect();
578 let node_tags = CowStr::Boxed(node_tags.join(",").into_boxed_str());
579 return Some(Tag::CodeBlock(CodeBlockKind::Fenced(node_tags)));
580 }
581 }
582 None
583}
584
585impl CMarkData {
586 pub fn remove_codeblock_rust_test_tags(self) -> Self {
590 use crate::codeblock_rust_test_tags;
591
592 self.remove_codeblock_tags(codeblock_rust_test_tags())
593 }
594
595 pub fn use_default_codeblock_tag(self, tag: &str) -> Self {
597 use crate::CMarkItemAsModified;
598
599 self.map(|node| {
600 let event = match node.event() {
601 Some(Event::Start(node_tag)) => {
602 map_default_codeblock_tag(node_tag, tag).map(Event::Start)
603 }
604 _ => None,
605 };
606 match event {
607 Some(event) => node.into_modified(
608 event,
609 Cow::from(std::format!("use_default_codeblock_tag(tag = \"{}\")", tag)),
610 ),
611 None => node,
612 }
613 })
614 }
615}
616
617fn map_default_codeblock_tag<'a>(
618 event_tag: &pulldown_cmark::Tag<'a>,
619 tag: &str,
620) -> Option<pulldown_cmark::Tag<'a>> {
621 use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
622 use std::string::ToString;
623
624 if let Tag::CodeBlock(CodeBlockKind::Fenced(ref node_tag)) = event_tag {
625 if node_tag.as_ref() == "" {
626 return Some(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Boxed(
627 tag.to_string().into_boxed_str(),
628 ))));
629 }
630 }
631 None
632}
633
634impl CMarkData {
635 pub fn use_default_codeblock_rust_tag(self) -> Self {
637 self.use_default_codeblock_tag("rust")
638 }
639
640 pub fn remove_hidden_rust_code(self) -> Self {
644 use crate::CMarkItemAsModified;
645 use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
646
647 let mut is_rust_codeblock = false;
648
649 self.map(|node| {
650 match node.event() {
651 Some(Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tags)))) => {
652 is_rust_codeblock |= tags.split(',').any(|tag| tag == "rust")
653 }
654 Some(Event::Text(text)) => {
655 if is_rust_codeblock {
656 let text: Vec<_> = text
657 .split('\n')
658 .filter(|line| *line != "#" && !line.starts_with("# "))
659 .collect();
660 let text = text.join("\n");
661 let event = Event::Text(CowStr::Boxed(text.into_boxed_str()));
662 return node.into_modified(event, Cow::from("remove_hidden_rust_code()"));
663 }
664 }
665 _ => {}
666 };
667 node
668 })
669 }
670}
671
672#[derive(Clone, Debug, Error)]
674pub enum DisallowUrlsWithPrefixError {
675 #[error("The url `{url}` use a prohibited prefix `{prefix}`.")]
677 PrefixFound {
678 url: String,
680 prefix: String,
682 },
683}
684
685fn increase_heading_level(level: pulldown_cmark::HeadingLevel) -> pulldown_cmark::HeadingLevel {
686 use pulldown_cmark::HeadingLevel;
687
688 match level {
689 HeadingLevel::H1 => HeadingLevel::H2,
690 HeadingLevel::H2 => HeadingLevel::H3,
691 HeadingLevel::H3 => HeadingLevel::H4,
692 HeadingLevel::H4 => HeadingLevel::H5,
693 HeadingLevel::H5 | HeadingLevel::H6 => HeadingLevel::H6,
694 }
695}
696
697fn heading_level(level: pulldown_cmark::HeadingLevel) -> u32 {
698 use pulldown_cmark::HeadingLevel;
699
700 match level {
701 HeadingLevel::H1 => 1,
702 HeadingLevel::H2 => 2,
703 HeadingLevel::H3 => 3,
704 HeadingLevel::H4 => 4,
705 HeadingLevel::H5 => 5,
706 HeadingLevel::H6 => 6,
707 }
708}
709
710#[test]
711fn test_is_url_with_scheme() {
712 assert!(!is_url_with_scheme("Foo"));
713 assert!(!is_url_with_scheme("crate::Foo"));
714 assert!(is_url_with_scheme("//Foo"));
715 assert!(!is_url_with_scheme("://Foo"));
716 assert!(is_url_with_scheme("a://Foo"));
717 assert!(is_url_with_scheme("Z://Foo"));
718 assert!(!is_url_with_scheme("0://Foo"));
719 assert!(is_url_with_scheme("aa://Foo"));
720 assert!(is_url_with_scheme("a0://Foo"));
721 assert!(is_url_with_scheme("a+://Foo"));
722 assert!(is_url_with_scheme("a.://Foo"));
723 assert!(is_url_with_scheme("a-://Foo"));
724 assert!(!is_url_with_scheme("a?://Foo"));
725 assert!(is_url_with_scheme("http://Foo"));
726 assert!(is_url_with_scheme("https://Foo"));
727}