use std::borrow::Cow;
use regex::Regex;
use regex::Captures;
use crate::common::{NB_CHAR, NB_CHAR_NARROW, NB_CHAR_EM};
pub fn nb_spaces_html<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
let input = input.into();
lazy_static! {
static ref REGEX: Regex = Regex::new(r"\S*\x{202F}[\S\x{202F}]*").unwrap();
static ref REGEX_LOCAL: Regex = Regex::new(r"\x{202F}").unwrap();
}
if REGEX.is_match(&input) {
let res = REGEX.replace_all(&input, |caps: &Captures| {
format!("<span class = \"nnbsp\">{}</span>",
REGEX_LOCAL.replace_all(&caps[0], " "))
});
Cow::Owned(res.into_owned())
} else {
input
}
}
#[deprecated(
since="1.0.0",
note="Renamed nb_spaces_html"
)]
pub fn nnbsp<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
nb_spaces_html(input)
}
pub fn nb_spaces_tex<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
let input = input.into();
if let Some(first) = input.chars().position(|c| match c {
NB_CHAR | NB_CHAR_NARROW | NB_CHAR_EM => true,
_ => false,
}) {
let mut chars = input.chars().collect::<Vec<_>>();
let rest = chars.split_off(first);
let mut output = chars.into_iter().collect::<String>();
for c in rest {
match c {
NB_CHAR_NARROW => output.push_str("\\,"),
NB_CHAR_EM => output.push_str("\\enspace "),
NB_CHAR => output.push('~'),
_ => output.push(c),
}
}
Cow::Owned(output)
} else {
input.into()
}
}
pub fn html<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
lazy_static! {
static ref REGEX: Regex = Regex::new("[<>&]").unwrap();
}
let input = input.into();
let first = REGEX.find(&input)
.map(|mat| mat.start());
if let Some(first) = first {
let len = input.len();
let mut output = Vec::with_capacity(len + len / 2);
output.extend_from_slice(input[0..first].as_bytes());
let rest = input[first..].bytes();
for c in rest {
match c {
b'<' => output.extend_from_slice(b"<"),
b'>' => output.extend_from_slice(b">"),
b'&' => output.extend_from_slice(b"&"),
_ => output.push(c),
}
}
Cow::Owned(String::from_utf8(output).unwrap())
} else {
input
}
}
pub fn quotes<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
let input = input.into();
if input.contains('"') {
let mut output = String::with_capacity(input.len());
for c in input.chars() {
match c {
'"' => output.push('\''),
_ => output.push(c),
}
}
Cow::Owned(output)
} else {
input
}
}
pub fn tex<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
let input = input.into();
const REGEX_LITERAL: &'static str = r"[!<>&%$#_\x7E\x2D\{\}\[\]\^\\]";
lazy_static! {
static ref REGEX: Regex = Regex::new(REGEX_LITERAL).unwrap();
}
let first = REGEX.find(&input)
.map(|mat| mat.start());
if let Some(first) = first {
let len = input.len();
let mut output = Vec::with_capacity(len + len / 2);
output.extend_from_slice(input[0..first].as_bytes());
let mut bytes: Vec<_> = input[first..].bytes().collect();
bytes.push(b' '); for win in bytes.windows(2) {
let c = win[0];
let next = win[1];
match c {
b'-' => {
if next == b'-' {
output.extend_from_slice(br"-{}");
} else {
output.push(c);
}
}
b'&' => output.extend_from_slice(br"\&"),
b'%' => output.extend_from_slice(br"\%"),
b'$' => output.extend_from_slice(br"\$"),
b'#' => output.extend_from_slice(br"\#"),
b'_' => output.extend_from_slice(br"\_"),
b'{' => output.extend_from_slice(br"\{"),
b'}' => output.extend_from_slice(br"\}"),
b'[' => output.extend_from_slice(br"{[}"),
b']' => output.extend_from_slice(br"{]}"),
b'~' => output.extend_from_slice(br"\textasciitilde{}"),
b'^' => output.extend_from_slice(br"\textasciicircum{}"),
b'<' => output.extend_from_slice(br"\textless{}"),
b'>' => output.extend_from_slice(br"\textgreater{}"),
b'!' => output.extend_from_slice(br"!{}"),
b'\\' => output.extend_from_slice(br"\textbackslash{}"),
_ => output.push(c),
}
}
Cow::Owned(String::from_utf8(output).unwrap())
} else {
input
}
}
#[test]
fn html_0() {
let s = "Some string without any character to escape";
let result = html(s);
assert_eq!(s, &result);
}
#[test]
fn tex_0() {
let s = "Some string without any character to escape";
let result = tex(s);
assert_eq!(s, &result);
}
#[test]
fn nb_spaces_0() {
let s = "Some string without any character to escape";
let result = nb_spaces_html(s);
assert_eq!(s, &result);
}
#[test]
fn tex_nb_spaces_0() {
let s = "Some string without any character to escape";
let result = nb_spaces_tex(s);
assert_eq!(s, &result);
}
#[test]
fn quotes_0() {
let s = "Some string without any character to escape";
let result = quotes(s);
assert_eq!(s, &result);
}
#[test]
fn html_1() {
let s = "<p>Some characters need escaping & something</p>";
let expected = "<p>Some characters need escaping & something</p>";
let actual = html(s);
assert_eq!(expected, &actual);
}
#[test]
fn html_2() {
let actual = html("<foo> & <bar>");
let expected = "<foo> & <bar>";
assert_eq!(&actual, expected);
}
#[test]
fn tex_braces() {
let actual = tex(r"\foo{bar}");
let expected = r"\textbackslash{}foo\{bar\}";
assert_eq!(&actual, expected);
}
#[test]
fn tex_square_braces() {
let actual = tex(r"foo[bar]");
let expected = r"foo{[}bar{]}";
assert_eq!(&actual, expected);
}
#[test]
fn tex_dashes() {
let actual = tex("--foo, ---bar");
let expected = r"-{}-foo, -{}-{}-bar";
assert_eq!(&actual, expected);
}
#[test]
fn tex_numbers() {
let actual = tex(r"30000$ is 10% of number #1 income");
let expected = r"30000\$ is 10\% of number \#1 income";
assert_eq!(&actual, expected);
}
#[test]
fn quotes_escape() {
let actual = quotes(r#"Some text with "quotes""#);
let expected = r#"Some text with 'quotes'"#;
assert_eq!(&actual, expected);
}
#[test]
fn nnbsp_1() {
let actual = nb_spaces_html("Test ?"); let expected = "<span class = \"nnbsp\">Test ?</span>";
assert_eq!(&actual, expected);
}
#[test]
fn nnbsp_2() {
let actual = nb_spaces_html("Ceci est un « Test » !"); let expected = "Ceci est un <span class = \"nnbsp\">« Test » !</span>";
assert_eq!(&actual, expected);
}