use combine::char::*;
use combine::combinator::*;
use combine::primitives::{ParseError, ParseResult, Parser, Stream};
use std::fmt::Display;
use std::str::FromStr;
type CustomCharParser<I> = Expected<Satisfy<I, fn(char) -> bool>>;
pub fn split_bom(s: &str) -> (&str, &str) {
if s.as_bytes().iter().take(3).eq([0xEF, 0xBB, 0xBF].iter()) {
s.split_at(3)
} else if s.as_bytes().iter().take(2).eq([0xFE, 0xFF].iter()) {
s.split_at(2)
} else {
("", s)
}
}
#[test]
#[allow(unsafe_code)]
fn test_split_bom() {
let bom1_vec = &[0xEF, 0xBB, 0xBF];
let bom2_vec = &[0xFE, 0xFF];
let bom1 = unsafe { ::std::str::from_utf8_unchecked(bom1_vec) };
let bom2 = unsafe { ::std::str::from_utf8_unchecked(bom2_vec) };
assert_eq!(
split_bom(unsafe { ::std::str::from_utf8_unchecked(&[0xEF, 0xBB, 0xBF, 'a' as u8, 'b' as u8, 'c' as u8]) }),
(bom1, "abc")
);
assert_eq!(
split_bom(unsafe { ::std::str::from_utf8_unchecked(&[0xFE, 0xFF, 'd' as u8, 'e' as u8, 'g' as u8]) }),
(bom2, "deg")
);
assert_eq!(split_bom("bla"), ("", "bla"));
assert_eq!(split_bom(""), ("", ""));
}
#[inline]
#[allow(trivial_casts)]
pub fn ws<I>() -> CustomCharParser<I>
where
I: Stream<Item = char>,
{
fn f(c: char) -> bool {
c == ' ' || c == '\t'
}
satisfy(f as fn(_) -> _).expected("tab or space")
}
pub fn number_i64<I>(input: I) -> ParseResult<i64, I>
where
I: Stream<Item = char>,
{
(optional(char('-')), many1(digit()))
.map(|(a, c): (Option<_>, String)| {
let i: i64 = FromStr::from_str(&c).unwrap();
match a {
Some(_) => -i,
None => i,
}
})
.expected("positive or negative number")
.parse_stream(input)
}
pub fn parse_error_to_string<I, R, P>(p: ParseError<I>) -> String
where
I: Stream<Item = char, Range = R, Position = P>,
R: PartialEq + Clone + Display,
P: Ord + Display,
{
p.to_string()
.trim()
.lines()
.fold("".to_string(), |a, b| if a.is_empty() { b.to_string() } else { a + "; " + b })
}
pub fn dedup_string_parts<T, F>(v: Vec<T>, mut extract_fn: F) -> Vec<T>
where
F: FnMut(&mut T) -> Option<&mut String>,
{
let mut result = Vec::new();
for mut part in v {
let mut push_part = true;
if let Some(last_part) = result.last_mut() {
if let Some(exchangeable_text) = extract_fn(last_part) {
if let Some(new_text) = extract_fn(&mut part) {
exchangeable_text.push_str(new_text);
push_part = false;
}
}
}
if push_part {
result.push(part);
}
}
result
}
type SplittedLine = (String , String );
pub fn get_lines_non_destructive(s: &str) -> Vec<SplittedLine> {
let mut result = Vec::new();
let mut rest = s;
loop {
if rest.is_empty() {
return result;
}
match rest.char_indices().find(|&(_, c)| c == '\r' || c == '\n') {
Some((idx, _)) => {
let (line_str, new_rest) = rest.split_at(idx);
rest = new_rest;
let line = line_str.to_string();
if rest.starts_with("\r\n") {
result.push((line, "\r\n".to_string()));
rest = &rest[2..];
} else if rest.starts_with('\n') {
result.push((line, "\n".to_string()));
rest = &rest[1..];
} else if rest.starts_with('\r') {
result.push((line, "\r".to_string()));
rest = &rest[1..];
}
}
None => {
result.push((rest.to_string(), "".to_string()));
return result;
}
}
}
}
#[test]
fn get_lines_non_destructive_test0() {
let lines = ["", "aaabb", "aaabb\r\nbcccc\n\r\n ", "aaabb\r\nbcccc"];
for &full_line in lines.iter() {
let joined: String = get_lines_non_destructive(full_line)
.into_iter()
.flat_map(|(s1, s2)| vec![s1, s2].into_iter())
.collect();
assert_eq!(full_line, joined);
}
}
pub fn trim_non_destructive(s: &str) -> (String, String, String) {
let (begin, rest) = trim_left(s);
let (end, rest2) = trim_left(&rest.chars().rev().collect::<String>());
(begin, rest2.chars().rev().collect(), end.chars().rev().collect())
}
fn trim_left(s: &str) -> (String, String) {
(many(ws()), many(r#try(any())), eof())
.map(|t| (t.0, t.1))
.parse(s)
.expect("the trim parser should accept any input")
.0
}