use std::borrow::Cow;
use std::ptr;
use std::str;
use crate::ctype::{ispunct, isspace, isspace_char};
use crate::entity;
use crate::parser::AutolinkType;
#[derive(PartialEq, Eq)]
pub enum Case {
Preserve,
Fold,
}
pub fn unescape(s: &mut String) {
let b = unsafe { s.as_bytes_mut() };
let mut r = 0;
let mut prev = None;
let mut found = 0;
while r < b.len() {
if b[r] == b'\\' && r + 1 < b.len() && ispunct(b[r + 1]) {
if b[r + 1] == b'\\' {
r += 1;
}
if let Some(prev) = prev {
let window = &mut b[(prev + 1 - found)..r];
shift_buf_left(window, found);
}
prev = Some(r);
found += 1;
}
r += 1;
}
if let Some(prev) = prev {
let window = &mut b[(prev + 1 - found)..r];
shift_buf_left(window, found);
}
if found > 0 {
let new_size = b.len() - found;
b[new_size] = b'\0';
s.truncate(new_size);
}
}
pub fn clean_autolink(mut url: &str, kind: AutolinkType) -> Cow<'_, str> {
url = trim_slice(url);
if url.is_empty() {
return "".into();
}
if kind == AutolinkType::Email {
let mut buf = String::with_capacity(url.len() + "mailto:".len());
buf.push_str("mailto:");
buf.push_str(&entity::unescape_html(url));
buf.into()
} else {
entity::unescape_html(url)
}
}
pub fn normalize_code(v: &str) -> Cow<'_, str> {
let b = v.as_bytes();
let mut r = String::new();
let mut offset = 0;
let mut i = 0;
let mut contains_nonspace = false;
while i < b.len() {
match b[i] {
b'\r' => {
if i + 1 == v.len() || b[i + 1] != b'\n' {
r.push_str(&v[offset..i]);
r.push(' ');
offset = i + 1;
}
}
b'\n' => {
r.push_str(&v[offset..i]);
r.push(' ');
offset = i + 1;
}
b' ' => {}
_ => contains_nonspace = true,
}
i += 1
}
if offset == 0 {
if contains_nonspace && b[0] == b' ' && b[i - 1] == b' ' {
return v[1..i - 1].into();
} else {
return v.into();
}
}
r.push_str(&v[offset..i]);
let bytes = unsafe { r.as_bytes_mut() };
let len = bytes.len();
if contains_nonspace && bytes[0] == b' ' && bytes[len - 1] == b' ' {
shift_buf_left(bytes, 1);
r.truncate(len - 2);
}
r.into()
}
pub fn remove_trailing_blank_lines(line: &mut String) {
line.truncate(remove_trailing_blank_lines_ix(line));
}
pub fn remove_trailing_blank_lines_slice(line: &str) -> &str {
&line[..remove_trailing_blank_lines_ix(line)]
}
fn remove_trailing_blank_lines_ix(line: &str) -> usize {
let line_bytes = line.as_bytes();
let mut i = line.len() - 1;
loop {
let c = line_bytes[i];
if c != b' ' && c != b'\t' && !is_line_end_char(c) {
break;
}
if i == 0 {
return 0;
}
i -= 1;
}
for (i, c) in line_bytes.iter().enumerate().take(line.len()).skip(i) {
if !is_line_end_char(*c) {
continue;
}
return i;
}
line.len()
}
pub fn is_line_end_char(ch: u8) -> bool {
matches!(ch, 10 | 13)
}
pub fn is_space_or_tab(ch: u8) -> bool {
matches!(ch, 9 | 32)
}
pub fn chop_trailing_hashes(mut line: &str) -> &str {
line = rtrim_slice(line);
let orig_n = line.len() - 1;
let mut n = orig_n;
let bytes = line.as_bytes();
while bytes[n] == b'#' {
if n == 0 {
return line;
}
n -= 1;
}
if n != orig_n && is_space_or_tab(bytes[n]) {
rtrim_slice(&line[..n])
} else {
line
}
}
pub fn rtrim(line: &mut String) -> usize {
let spaces = line
.as_bytes()
.iter()
.rev()
.take_while(|&&b| isspace(b))
.count();
let new_len = line.len() - spaces;
line.truncate(new_len);
spaces
}
pub fn ltrim(line: &mut String) -> usize {
let bytes = unsafe { line.as_bytes_mut() };
let spaces = bytes.iter().take_while(|&&b| isspace(b)).count();
if spaces > 0 {
shift_buf_left(bytes, spaces);
let new_len = bytes.len() - spaces;
bytes[new_len] = b'\0';
line.truncate(new_len);
}
spaces
}
pub fn trim(line: &mut String) {
ltrim(line);
rtrim(line);
}
pub fn ltrim_slice(i: &str) -> &str {
i.trim_start_matches(isspace_char)
}
pub fn rtrim_slice(i: &str) -> &str {
i.trim_end_matches(isspace_char)
}
pub fn trim_slice(i: &str) -> &str {
rtrim_slice(ltrim_slice(i))
}
pub fn trim_cow(s: &mut Cow<str>) {
match s {
Cow::Borrowed(str) => *s = Cow::Borrowed(trim_slice(str)),
Cow::Owned(string) => trim(string),
}
}
fn shift_buf_left(buf: &mut [u8], n: usize) {
if n == 0 {
return;
}
assert!(n <= buf.len());
let keep = buf.len() - n;
unsafe {
let dst = buf.as_mut_ptr();
let src = dst.add(n);
ptr::copy(src, dst, keep);
}
}
pub fn clean_url(url: &str) -> Cow<'static, str> {
let url = trim_slice(url);
if url.is_empty() {
return "".into();
}
let mut b = entity::unescape_html(url).into_owned();
unescape(&mut b);
b.into()
}
pub fn clean_title(title: &str) -> Cow<'static, str> {
let title_len = title.len();
if title_len == 0 {
return "".into();
}
let bytes = title.as_bytes();
let first = bytes[0];
let last = bytes[title_len - 1];
let mut b = if (first == b'\'' && last == b'\'')
|| (first == b'(' && last == b')')
|| (first == b'"' && last == b'"')
{
entity::unescape_html(&title[1..title_len - 1])
} else {
entity::unescape_html(title)
}
.into_owned();
unescape(&mut b);
b.into()
}
pub fn is_blank(s: &str) -> bool {
for c in s.as_bytes() {
match c {
10 | 13 => return true,
32 | 9 => (),
_ => return false,
}
}
true
}
pub fn normalize_label(i: &str, casing: Case) -> String {
let i = trim_slice(i);
let mut v = String::with_capacity(i.len());
let mut last_was_whitespace = false;
for c in i.chars() {
if c.is_whitespace() {
if !last_was_whitespace {
last_was_whitespace = true;
v.push(' ');
}
} else {
last_was_whitespace = false;
v.push(c);
}
}
if casing == Case::Fold {
caseless::default_case_fold_str(&v)
} else {
v
}
}
#[test]
fn normalize_label_fold_test() {
assert_eq!(normalize_label("Abc \t\ndef", Case::Preserve), "Abc def");
assert_eq!(normalize_label("Abc \t\ndef", Case::Fold), "abc def");
assert_eq!(normalize_label("Straẞe", Case::Preserve), "Straẞe");
assert_eq!(normalize_label("Straẞe", Case::Fold), "strasse");
}
pub fn split_off_front_matter<'s>(mut s: &'s str, delimiter: &str) -> Option<(&'s str, &'s str)> {
s = trim_start_match(s, "\u{feff}");
if !s.starts_with(delimiter) {
return None;
}
let mut start = delimiter.len();
if s[start..].starts_with('\n') {
start += 1;
} else if s[start..].starts_with("\r\n") {
start += 2;
} else {
return None;
}
start += match s[start..]
.find(&("\n".to_string() + delimiter + "\r\n"))
.or_else(|| s[start..].find(&("\n".to_string() + delimiter + "\n")))
.or_else(|| s[start..].find(&("\n".to_string() + delimiter))) {
Some(n) => n + 1 + delimiter.len(),
None => return None,
};
if start == s.len() {
return Some((s, ""));
}
start += if s[start..].starts_with('\n') {
1
} else if s[start..].starts_with("\r\n") {
2
} else {
return None;
};
start += if s[start..].starts_with('\n') {
1
} else if s[start..].starts_with("\r\n") {
2
} else {
0
};
Some((&s[..start], &s[start..]))
}
pub fn trim_start_match<'s>(s: &'s str, pat: &str) -> &'s str {
s.strip_prefix(pat).unwrap_or(s)
}
#[cfg(test)]
pub mod tests {
use super::{normalize_code, normalize_label, split_off_front_matter};
use crate::strings::Case;
#[test]
fn normalize_code_handles_lone_newline() {
assert_eq!(normalize_code("\n"), " ");
}
#[test]
fn normalize_code_handles_lone_space() {
assert_eq!(normalize_code(" "), " ");
}
#[test]
fn front_matter() {
assert_eq!(
split_off_front_matter("---\nfoo: bar\n---\nHiiii", "---"),
Some(("---\nfoo: bar\n---\n", "Hiiii"))
);
assert_eq!(
split_off_front_matter(
"\u{feff}!@#\r\n\r\nfoo: !@# \r\nquux\n!@#\r\n\n\nYes!\n",
"!@#"
),
Some(("!@#\r\n\r\nfoo: !@# \r\nquux\n!@#\r\n\n", "\nYes!\n"))
);
assert_eq!(
split_off_front_matter(
"\u{feff}!@#\r\n\r\nfoo: \n!@# \r\nquux\n!@#\r\n\n\nYes!\n",
"!@#"
),
Some(("!@#\r\n\r\nfoo: \n!@# \r\nquux\n!@#\r\n\n", "\nYes!\n"))
);
}
#[test]
fn normalize_label_lowercase() {
assert_eq!(normalize_label(" Foo\u{A0}BAR ", Case::Fold), "foo bar");
assert_eq!(normalize_label(" FooİBAR ", Case::Fold), "fooi\u{307}bar");
}
#[test]
fn normalize_label_preserve() {
assert_eq!(
normalize_label(" Foo\u{A0}BAR ", Case::Preserve),
"Foo BAR"
);
assert_eq!(normalize_label(" FooİBAR ", Case::Preserve), "FooİBAR");
}
}