#![cfg(feature = "pulldown-cmark")]
use core::slice::Iter;
use std::borrow::Cow;
use std::string::String;
use std::sync::Arc;
use std::vec::Vec;
use pulldown_cmark::Event;
#[cfg(feature = "thiserror")]
use thiserror::Error;
use crate::{CMarkItem, File, FileDocs, TextSource};
#[derive(Clone, Debug, Default, PartialEq)]
pub struct CMarkData(Vec<Arc<CMarkItem>>);
pub type CMarkDataIter<'a> = Iter<'a, Arc<CMarkItem>>;
impl CMarkData {
pub fn from_items(items: Vec<Arc<CMarkItem>>) -> Self {
Self(items)
}
pub fn from_file(file: Arc<File>) -> Self {
Self::from_text_source(TextSource::File(file))
}
pub fn from_file_docs(file_docs: Arc<FileDocs>) -> Self {
Self::from_text_source(TextSource::FileDocs(file_docs))
}
pub fn from_text_source(text_source: TextSource) -> Self {
use crate::IntoStatic;
use pulldown_cmark::Parser;
let text = match &text_source {
TextSource::File(file) => file.text(),
TextSource::FileDocs(file_docs) => file_docs.docs(),
};
Self(
Parser::new(text)
.into_offset_iter()
.map(|(event, range)| {
CMarkItem::from(event.into_static(), range, text_source.clone())
})
.collect(),
)
.concat_texts()
}
pub fn into_items(self) -> Vec<Arc<CMarkItem>> {
self.0
}
pub fn iter(&self) -> CMarkDataIter<'_> {
self.0.iter()
}
pub fn iter_events(&self) -> impl Iterator<Item = &Event<'_>> {
self.0.iter().filter_map(|item| item.event())
}
fn map<F>(self, func: F) -> Self
where
F: FnMut(Arc<CMarkItem>) -> Arc<CMarkItem>,
{
Self(self.0.into_iter().map(func).collect())
}
pub fn concat_texts(self) -> Self {
use core::mem::take;
let mut result = Vec::new();
let mut text_nodes = Vec::new();
let mut text_value = String::new();
for node in self.0.into_iter() {
match node.event() {
None => {
result.push(node);
}
Some(Event::Text(event_text)) => {
text_value += event_text;
text_nodes.push(node);
}
Some(_) => {
if let Some(text_node) =
merge_text_nodes(take(&mut text_nodes), take(&mut text_value))
{
result.push(text_node);
}
result.push(node);
}
}
}
Self(result)
}
}
fn merge_text_nodes(nodes: Vec<Arc<CMarkItem>>, text: String) -> Option<Arc<CMarkItem>> {
use crate::CMarkItemAsModified;
use pulldown_cmark::CowStr;
match nodes.len() {
0 => None,
1 => Some(nodes.into_iter().next().unwrap()),
_ => Some(nodes.into_modified(
Event::Text(CowStr::Boxed(text.into_boxed_str())),
Cow::from("concat_texts()"),
)),
}
}
impl CMarkData {
pub fn increment_heading_levels(self) -> Self {
use crate::CMarkItemAsModified;
use pulldown_cmark::Tag;
self.map(|node| {
let event = match node.event() {
Some(Event::Start(Tag::Heading(level))) => {
Some(Event::Start(Tag::Heading(level + 1)))
}
Some(Event::End(Tag::Heading(level))) => Some(Event::End(Tag::Heading(level + 1))),
_ => None,
};
if let Some(event) = event {
node.into_modified(event, Cow::from("increment_heading_levels()"))
} else {
node
}
})
}
pub fn add_title(self, text: &str) -> Self {
use pulldown_cmark::{CowStr, Tag};
use std::string::ToString;
let heading = std::vec![
CMarkItem::new(Event::Start(Tag::Heading(1)), Cow::from("add_title()")),
CMarkItem::new(
Event::Text(CowStr::Boxed(text.to_string().into_boxed_str())),
Cow::from("add_title()")
),
CMarkItem::new(Event::End(Tag::Heading(1)), Cow::from("add_title()")),
];
Self(heading.into_iter().chain(self.0.into_iter()).collect())
}
#[allow(clippy::match_like_matches_macro)] pub fn remove_images_only_paragraph<P>(self, mut predicate: P) -> Self
where
P: FnMut(&[&str]) -> bool,
{
use crate::CMarkItemAsRemoved;
use core::mem::take;
use pulldown_cmark::Tag;
use std::string::ToString;
let mut result = Vec::new();
let mut paragraph = Vec::new();
let mut image_urls = Vec::new();
let mut is_image = false;
let mut is_already_removed = false;
for node in self.0.into_iter() {
if is_already_removed {
result.push(node);
continue;
}
if !paragraph.is_empty() {
if is_image {
let event = node.event();
is_image = if let Some(Event::End(Tag::Image(..))) = event {
false
} else {
true
};
paragraph.push(node);
} else {
paragraph.push(node);
let node = paragraph.last().unwrap();
let event = node.event();
match event {
Some(Event::End(Tag::Paragraph)) => {
let urls: Vec<String> = take(&mut image_urls);
let urls: Vec<&str> = urls.iter().map(|url| url.as_str()).collect();
if !urls.is_empty() && predicate(&urls) {
result
.push(take(&mut paragraph).into_removed(Cow::from(
"remove_images_only_paragraphs()",
)));
is_already_removed = true;
} else {
result.append(&mut paragraph);
}
}
Some(Event::Start(Tag::Image(_, url, _))) => {
image_urls.push(url.as_ref().to_string());
is_image = true;
}
Some(Event::Start(Tag::Link(..)))
| Some(Event::End(Tag::Link(..)))
| Some(Event::SoftBreak)
| None => {}
Some(_) => {
result.append(&mut paragraph);
}
}
}
} else {
let event = node.event();
match event {
Some(Event::Start(Tag::Paragraph)) => paragraph.push(node),
_ => result.push(node),
}
}
}
result.append(&mut paragraph);
Self(result)
}
#[cfg(feature = "glob")]
pub fn remove_badges_paragraph(self) -> Self {
let patterns = crate::badge_url_patterns();
self.remove_images_only_paragraph(|image_urls| {
image_urls
.iter()
.any(|url| patterns.iter().any(|pattern| pattern.matches(url)))
})
}
pub fn remove_section(self, heading: &str, level: u32) -> Self {
use core::mem::take;
use pulldown_cmark::Tag;
let mut section = Vec::new();
let mut result = Vec::new();
let mut is_already_removed = false;
for node in self.0.into_iter() {
if !is_already_removed {
let event = node.event();
if let Some(Event::Start(Tag::Heading(node_level))) = event {
if *node_level <= level {
let (mut section, is_removed) =
into_removed_section_if_matched(take(&mut section), heading, level);
result.append(&mut section);
is_already_removed = is_removed;
}
}
}
if is_already_removed {
result.push(node);
} else {
section.push(node);
}
}
result.append(&mut into_removed_section_if_matched(take(&mut section), heading, level).0);
Self(result)
}
pub fn remove_documentation_section(self) -> Self {
self.remove_section("Documentation", 2)
}
}
fn into_removed_section_if_matched(
section: Vec<Arc<CMarkItem>>,
heading: &str,
level: u32,
) -> (Vec<Arc<CMarkItem>>, bool) {
use crate::CMarkItemAsRemoved;
use std::vec;
if is_matched_section(§ion, heading, level) {
(
vec![section.into_removed(Cow::from(std::format!(
"remove_section(name = \"{}\", level = {})",
heading,
level
)))],
true,
)
} else {
(section, false)
}
}
fn is_matched_section(section: &[Arc<CMarkItem>], heading: &str, level: u32) -> bool {
use pulldown_cmark::Tag;
let first_event = section.get(0).and_then(|node| node.event());
let second_event = section.get(1).and_then(|node| node.event());
if let (Some(Event::Start(Tag::Heading(node_level))), Some(Event::Text(node_text))) =
(first_event, second_event)
{
*node_level == level && node_text.as_ref() == heading
} else {
false
}
}
impl CMarkData {
#[cfg(feature = "thiserror")]
pub fn disallow_absolute_blob_links(
self,
repository_url: &str,
) -> Result<Self, DisallowUrlsWithPrefixError> {
self.disallow_urls_with_prefix(&blob_path_prefix(repository_url))
}
#[cfg(feature = "thiserror")]
pub fn disallow_absolute_docs_links(
self,
package_name: &str,
documentation_url: &str,
) -> Result<Self, DisallowUrlsWithPrefixError> {
self.disallow_urls_with_prefix(&docs_path_prefix(package_name, documentation_url))
}
#[cfg(feature = "thiserror")]
pub fn disallow_urls_with_prefix(
self,
prefix: &str,
) -> Result<Self, DisallowUrlsWithPrefixError> {
use pulldown_cmark::Tag;
use std::string::ToString;
for node in &self.0 {
if let Some(Event::Start(Tag::Link(_, url, _))) = node.event() {
if url.starts_with(&*prefix) {
return Err(DisallowUrlsWithPrefixError::PrefixFound {
url: url.as_ref().to_string(),
prefix: prefix.to_string(),
});
}
}
}
Ok(self)
}
pub fn use_absolute_blob_urls(self, repository_url: &str) -> Self {
self.with_absolute_urls(&blob_path_prefix(repository_url))
}
pub fn use_absolute_docs_urls(self, package_name: &str, documentation_url: &str) -> Self {
self.with_absolute_urls(&docs_path_prefix(package_name, documentation_url))
}
pub fn with_absolute_urls(self, prefix: &str) -> Self {
use std::format;
self.map_links(
|url| {
if !is_absolute_url(url) && !is_fragment(url) {
Cow::from([prefix, url].concat())
} else {
Cow::from(url)
}
},
Cow::from(format!("with_absolute_urls(prefix = \"{}\")", prefix)),
)
}
pub fn map_links<F>(self, mut func: F, note: impl Into<Cow<'static, str>>) -> Self
where
for<'b> F: FnMut(&'b str) -> Cow<'b, str>,
{
use crate::CMarkItemAsModified;
use pulldown_cmark::{CowStr, Tag};
fn map_link<'a, F>(tag: &Tag<'a>, mut func: F) -> Option<Tag<'a>>
where
for<'b> F: FnMut(&'b str) -> Cow<'b, str>,
{
if let Tag::Link(ty, url, title) = tag {
let new_url = func(url.as_ref());
std::println!("from: {}\nto: {}", url.as_ref(), new_url.as_ref());
if url.as_ref() != new_url.as_ref() {
let title = title.clone();
return Some(Tag::Link(
*ty,
CowStr::from(new_url.into_owned()),
title.clone(),
));
}
}
None
}
let note = note.into();
self.map(|node| {
let event = match node.event() {
Some(Event::Start(tag)) => map_link(tag, &mut func).map(Event::Start),
Some(Event::End(tag)) => map_link(tag, &mut func).map(Event::End),
_ => None,
};
match event {
Some(event) => node.into_modified(event, note.clone()),
None => node,
}
})
}
}
fn is_absolute_url(url: &str) -> bool {
is_url_with_scheme(url)
}
fn is_fragment(url: &str) -> bool {
url.starts_with('#')
}
#[allow(clippy::match_like_matches_macro)] fn is_url_with_scheme(url: &str) -> bool {
if let Some(scheme) = url.split("//").next() {
if scheme.is_empty() {
return true;
} else if scheme.ends_with(':') && scheme.len() >= 2 {
let scheme = &scheme[..scheme.len() - 1];
if let b'a'..=b'z' | b'A'..=b'Z' = scheme.as_bytes()[0] {
return scheme.as_bytes()[1..].iter().all(|ch| {
if let b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'.' | b'-' = ch {
true
} else {
false
}
});
}
}
}
false
}
fn without_trailing_slash(value: &str) -> &str {
match value.as_bytes().last() {
Some(b'/') => &value[..value.len() - 1],
_ => value,
}
}
fn blob_path_prefix(repository_url: &str) -> String {
use std::string::ToString;
without_trailing_slash(repository_url).to_string() + "/blob/master/"
}
fn docs_path_prefix(package_name: &str, documentation_url: &str) -> String {
use std::string::ToString;
let url = without_trailing_slash(documentation_url);
let name = package_name.to_string().replace('-', "_");
[url, "/*/", &name, "/"].concat()
}
impl CMarkData {
pub fn remove_codeblock_tag(self, tag: &str) -> Self {
self.remove_codeblock_tags(&[tag])
}
pub fn remove_codeblock_tags(self, tags: &[&str]) -> Self {
use crate::CMarkItemAsModified;
self.map(|node| {
let event = match node.event() {
Some(Event::Start(tag)) => remove_codeblock_tag_tags(tag, tags).map(Event::Start),
Some(Event::End(tag)) => remove_codeblock_tag_tags(tag, tags).map(Event::End),
_ => None,
};
match event {
Some(event) => node.into_modified(
event,
Cow::from(std::format!("remove_codeblock_tags(tags = {:?})", tags)),
),
None => node,
}
})
}
}
fn remove_codeblock_tag_tags<'a>(
event_tag: &pulldown_cmark::Tag<'a>,
tags: &[&str],
) -> Option<pulldown_cmark::Tag<'a>> {
use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
if let Tag::CodeBlock(CodeBlockKind::Fenced(ref node_tags)) = event_tag {
let has_tags = node_tags
.split(',')
.any(|node_tag| tags.iter().any(|tag| &node_tag == tag));
if has_tags {
let node_tags: Vec<_> = node_tags
.split(',')
.filter(|node_tag| !tags.iter().any(|tag| node_tag == tag))
.collect();
let node_tags = CowStr::Boxed(node_tags.join(",").into_boxed_str());
return Some(Tag::CodeBlock(CodeBlockKind::Fenced(node_tags)));
}
}
None
}
impl CMarkData {
pub fn remove_codeblock_rust_test_tags(self) -> Self {
use crate::codeblock_rust_test_tags;
self.remove_codeblock_tags(codeblock_rust_test_tags())
}
pub fn use_default_codeblock_tag(self, tag: &str) -> Self {
use crate::CMarkItemAsModified;
self.map(|node| {
let event = match node.event() {
Some(Event::Start(node_tag)) => {
map_default_codeblock_tag(node_tag, tag).map(Event::Start)
}
Some(Event::End(node_tag)) => {
map_default_codeblock_tag(node_tag, tag).map(Event::End)
}
_ => None,
};
match event {
Some(event) => node.into_modified(
event,
Cow::from(std::format!("use_default_codeblock_tag(tag = \"{}\")", tag)),
),
None => node,
}
})
}
}
fn map_default_codeblock_tag<'a>(
event_tag: &pulldown_cmark::Tag<'a>,
tag: &str,
) -> Option<pulldown_cmark::Tag<'a>> {
use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
use std::string::ToString;
if let Tag::CodeBlock(CodeBlockKind::Fenced(ref node_tag)) = event_tag {
if node_tag.as_ref() == "" {
return Some(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Boxed(
tag.to_string().into_boxed_str(),
))));
}
}
None
}
impl CMarkData {
pub fn use_default_codeblock_rust_tag(self) -> Self {
self.use_default_codeblock_tag("rust")
}
pub fn remove_hidden_rust_code(self) -> Self {
use crate::CMarkItemAsModified;
use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
let mut is_rust_codeblock = false;
self.map(|node| {
match node.event() {
Some(Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tags)))) => {
is_rust_codeblock |= tags.split(',').any(|tag| tag == "rust")
}
Some(Event::End(Tag::CodeBlock(..))) => {
is_rust_codeblock = false;
}
Some(Event::Text(text)) => {
if is_rust_codeblock {
let text: Vec<_> = text
.split('\n')
.filter(|line| *line != "#" && !line.starts_with("# "))
.collect();
let text = text.join("\n");
let event = Event::Text(CowStr::Boxed(text.into_boxed_str()));
return node.into_modified(event, Cow::from("remove_hidden_rust_code()"));
}
}
_ => {}
};
node
})
}
}
#[cfg(feature = "thiserror")]
#[derive(Clone, Debug, Error)]
pub enum DisallowUrlsWithPrefixError {
#[error("The url `{url}` use a prohibited prefix `{prefix}`.")]
PrefixFound {
url: String,
prefix: String,
},
}
#[test]
fn test_is_url_with_scheme() {
assert!(!is_url_with_scheme("Foo"));
assert!(!is_url_with_scheme("crate::Foo"));
assert!(is_url_with_scheme("//Foo"));
assert!(!is_url_with_scheme("://Foo"));
assert!(is_url_with_scheme("a://Foo"));
assert!(is_url_with_scheme("Z://Foo"));
assert!(!is_url_with_scheme("0://Foo"));
assert!(is_url_with_scheme("aa://Foo"));
assert!(is_url_with_scheme("a0://Foo"));
assert!(is_url_with_scheme("a+://Foo"));
assert!(is_url_with_scheme("a.://Foo"));
assert!(is_url_with_scheme("a-://Foo"));
assert!(!is_url_with_scheme("a?://Foo"));
assert!(is_url_with_scheme("http://Foo"));
assert!(is_url_with_scheme("https://Foo"));
}