use crate::errors::*;
use crate::{LineColumn, Range, Span};
use core::ops::{Bound, RangeBounds};
use fs_err as fs;
use std::io::Read;
use std::path::Path;
#[derive(Debug, PartialEq, Eq)]
struct LineSepStat {
first_appearance: usize,
count: usize,
newline: &'static str,
}
#[inline(always)]
fn extract_delimiter_inner<'a>(
mut iter: impl Iterator<Item = usize>,
newline: &'static str,
) -> Option<LineSepStat> {
if let Some(first) = iter.next() {
let n = iter.count() + 1;
Some(LineSepStat {
first_appearance: first,
count: n,
newline,
})
} else {
None
}
}
pub fn extract_delimiter<'s>(s: &'s str) -> Option<&'static str> {
let lf = memchr::memchr_iter(b'\n', s.as_bytes());
let cr = memchr::memchr_iter(b'\r', s.as_bytes());
let crlf = memchr::memmem::find_iter(s.as_bytes(), "\r\n");
let lfcr = memchr::memmem::find_iter(s.as_bytes(), "\n\r");
let lfcr = extract_delimiter_inner(lfcr, "\n\r");
let crlf = extract_delimiter_inner(crlf, "\r\n");
let lf = extract_delimiter_inner(lf, "\n").map(|mut stat| {
stat.count = stat.count.saturating_sub(std::cmp::max(
crlf.as_ref().map(|stat| stat.count).unwrap_or_default(),
lfcr.as_ref().map(|stat| stat.count).unwrap_or_default(),
));
stat
});
let cr = extract_delimiter_inner(cr, "\r").map(|mut stat| {
stat.count = stat.count.saturating_sub(std::cmp::max(
crlf.as_ref().map(|stat| stat.count).unwrap_or_default(),
lfcr.as_ref().map(|stat| stat.count).unwrap_or_default(),
));
stat
});
vec![cr, lf, crlf, lfcr]
.into_iter()
.filter_map(|x| x)
.max_by(|b, a| {
if a.count == b.count {
a.first_appearance.cmp(&b.first_appearance)
} else {
b.count.cmp(&a.count)
}
})
.map(|x| x.newline)
}
pub fn iter_with_line_column_from<'a>(
s: &'a str,
start_point: LineColumn,
) -> impl Iterator<Item = (char, usize, usize, LineColumn)> + 'a {
#[derive(Clone)]
struct State {
cursor: LineColumn,
previous_char_was_newline: bool,
}
let initial = State {
cursor: start_point,
previous_char_was_newline: false,
};
s.char_indices()
.enumerate()
.map(|(idx, (byte_offset, c))| (idx, byte_offset, c))
.scan(initial, |state, (idx, byte_offset, c)| -> Option<_> {
let cursor = state.cursor;
state.previous_char_was_newline = c == '\n';
if state.previous_char_was_newline {
state.cursor.line += 1;
state.cursor.column = 0;
} else {
state.cursor.column += 1;
}
Some((c, byte_offset, idx, cursor))
})
}
pub fn iter_with_line_column<'a>(
s: &'a str,
) -> impl Iterator<Item = (char, usize, usize, LineColumn)> + 'a {
iter_with_line_column_from(s, LineColumn { line: 1, column: 0 })
}
pub fn load_span_from<R>(mut source: R, span: Span) -> Result<String>
where
R: Read,
{
log::trace!("Loading {span:?} from source");
if span.start.line < 1 {
return Err(Error::Span(
"Lines are 1-indexed, can't be less than 1".to_string(),
));
}
if span.end.line < span.start.line {
return Err(Error::Span(
"Line range would be negative, bail".to_string(),
));
}
if span.end.line == span.start.line && span.end.column < span.start.column {
return Err(Error::Span(
"Column range would be negative, bail".to_string(),
));
}
let mut s = String::with_capacity(256);
source
.read_to_string(&mut s)
.expect("Must read successfully");
let extraction = iter_with_line_column(s.as_str())
.skip_while(|(_c, _byte_offset, _idx, cursor)| {
cursor.line < span.start.line
|| (cursor.line == span.start.line && cursor.column < span.start.column)
})
.take_while(|(_c, _byte_offset, _idx, cursor)| {
cursor.line < span.end.line
|| (cursor.line == span.end.line && cursor.column <= span.end.column)
})
.fuse()
.map(|(c, _byte_offset, _idx, _cursor)| c)
.collect::<String>();
Ok(extraction)
}
#[allow(unused)]
pub(crate) fn load_span_from_file(path: impl AsRef<Path>, span: Span) -> Result<String> {
let path = path.as_ref();
let path = fs::canonicalize(&path)?;
let ro = fs::OpenOptions::new().read(true).open(&path)?;
let mut reader = std::io::BufReader::new(ro);
load_span_from(reader, span)
}
pub fn sub_chars(s: &str, range: Range) -> String {
s.chars()
.skip(range.start)
.take(range.len())
.collect::<String>()
}
pub fn byte_range_to_char_range<R>(s: &str, byte_range: R) -> Option<Range>
where
R: RangeBounds<usize>,
{
let mut peekable = s.char_indices().enumerate().peekable();
let mut range = Range { start: 0, end: 0 };
let mut started = false;
while let Some((idx, (byte_offset, _c))) = peekable.next() {
match byte_range.start_bound() {
Bound::Included(&start) if byte_offset == start => {
started = true;
range.start = idx;
}
Bound::Included(&start) if byte_offset > start && !started => {
started = true;
range.start = idx.saturating_sub(1);
}
Bound::Excluded(_start) => {
unreachable!("Exclusive start bounds do not exist. qed");
}
_ => {}
}
match byte_range.end_bound() {
Bound::Included(&end) if byte_offset > end => {
range.end = idx;
return Some(range);
}
Bound::Excluded(&end) if byte_offset >= end => {
range.end = idx;
return Some(range);
}
_ => {}
}
if peekable.peek().is_none() {
if started {
range.end = idx + 1;
return Some(range);
}
}
}
None
}
pub fn byte_range_to_char_range_many<R>(s: &str, byte_ranges: &[R]) -> Vec<Range>
where
R: std::ops::RangeBounds<usize> + std::fmt::Debug,
{
let mut peekable = s.char_indices().enumerate().peekable();
let mut cursor = 0usize;
let mut acc = Vec::with_capacity(byte_ranges.len());
for byte_range in byte_ranges {
let mut range = Range { start: 0, end: 0 };
let mut started = false;
'inner: while let Some((idx, (byte_offset, _c))) = peekable.peek() {
cursor = *idx;
let byte_offset = *byte_offset;
match byte_range.start_bound() {
Bound::Included(&start) if byte_offset == start => {
started = true;
range.start = cursor;
}
Bound::Included(&start) if byte_offset > start && !started => {
started = true;
range.start = cursor.saturating_sub(1);
}
Bound::Excluded(_start) => {
unreachable!("Exclusive start bounds do not exist. qed");
}
_ => {}
}
match byte_range.end_bound() {
Bound::Included(&end) if byte_offset > end => {
range.end = cursor;
acc.push(range.clone());
started = false;
break 'inner;
}
Bound::Excluded(&end) if byte_offset >= end => {
range.end = cursor;
acc.push(range.clone());
started = false;
break 'inner;
}
_ => {}
}
let _ = peekable.next();
}
if started {
range.end = cursor + 1;
acc.push(range);
}
}
acc
}
pub fn sub_char_range<'s, R>(s: &'s str, range: R) -> &'s str
where
R: RangeBounds<usize>,
{
let mut peekable = s.char_indices().enumerate().peekable();
let mut byte_range = Range { start: 0, end: 0 };
let mut started = false;
'loopy: while let Some((idx, (byte_offset_start, _c))) = peekable.next() {
match range.start_bound() {
Bound::Included(&start) if idx == start => {
started = true;
byte_range.start = byte_offset_start;
}
Bound::Excluded(_start) => {
unreachable!("Exclusive start bounds do not exist. qed");
}
_ => {}
}
match range.end_bound() {
Bound::Included(&end) if idx > end => {
byte_range.end = byte_offset_start;
break 'loopy;
}
Bound::Excluded(&end) if idx >= end => {
byte_range.end = byte_offset_start;
break 'loopy;
}
_ => {}
}
if peekable.peek().is_none() {
if started {
byte_range.end = s.len();
}
}
}
&s[byte_range]
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! lcc {
($line:literal, $column:literal, $c:literal) => {
(
LineColumn {
line: $line,
column: $column,
},
$c,
)
};
}
#[test]
fn iter_chars() {
const S: &str = r#"
abc
d
"#;
const S2: &str = r#"c
d"#;
const EXPECT: &[(LineColumn, char)] = &[
lcc!(1, 0, '\n'),
lcc!(2, 0, 'a'),
lcc!(2, 1, 'b'),
lcc!(2, 2, 'c'),
lcc!(2, 3, '\n'),
lcc!(3, 0, 'd'),
lcc!(3, 1, '\n'),
];
iter_with_line_column(S).zip(EXPECT.iter()).for_each(
|((c, _byte_offset, _idx, lc), (expected_lc, expected_c))| {
assert_eq!(lc, expected_lc.clone());
assert_eq!(c, expected_c.clone());
},
);
const SPAN: Span = Span {
start: LineColumn { line: 2, column: 2 },
end: LineColumn { line: 3, column: 0 },
};
assert_eq!(
load_span_from(&mut S.as_bytes(), SPAN).expect("Must succeed"),
S2.to_owned()
);
}
#[test]
fn iter_span_doc_0_trivial() {
const SOURCE: &str = r##"#[doc=r#"Zebra
Schlupfwespe,
Grünfink"#]"##;
const S2: &str = r#"Zebra
Schlupfwespe,
Grünfink"#;
const SPAN: Span = Span {
start: LineColumn {
line: 1,
column: 0 + 9,
}, end: LineColumn { line: 3, column: 7 }, };
assert_eq!(
load_span_from(&mut SOURCE.as_bytes(), SPAN).expect("Must succeed"),
S2.to_owned()
);
}
#[test]
fn iter_span_doc_1_trailing_newline() {
const SOURCE: &str = r##"#[doc=r#"Zebra
Schlupfwespe,
"#]"##;
const S2: &str = r#"Zebra
Schlupfwespe,
"#;
const SPAN: Span = Span {
start: LineColumn {
line: 1,
column: 0 + 9,
}, end: LineColumn {
line: 2,
column: 13,
}, };
assert_eq!(
load_span_from(&mut SOURCE.as_bytes(), SPAN).expect("Must succeed"),
S2.to_owned()
);
}
#[test]
fn sub_a() {
const A: &str = "a🐲o🌡i🡴f🕧aodnferntkng";
const A_EXPECTED: &str = "a🐲o";
assert_eq!(sub_char_range(A, 0..3), A_EXPECTED);
assert_eq!(sub_char_range(A, ..3), A_EXPECTED);
assert_eq!(sub_chars(A, 0..3), A_EXPECTED.to_owned());
}
#[test]
fn sub_b() {
const B: &str = "fff🦦🡴🕧";
const B_EXPECTED: &str = "🦦🡴🕧";
assert_eq!(sub_char_range(B, 3..=5), B_EXPECTED);
assert_eq!(sub_char_range(B, 3..), B_EXPECTED);
}
#[test]
fn sub_c() {
const B: &str = "fff🦦🡴🕧";
const B_EXPECTED: &str = "";
assert_eq!(sub_char_range(B, 10..), B_EXPECTED);
assert_eq!(sub_char_range(B, 15..16), B_EXPECTED);
}
#[test]
fn range_bytes_to_chars() {
assert_eq!(byte_range_to_char_range("🕱™🐡", 4..7), Some(1..2));
assert_eq!(byte_range_to_char_range("🕱12™🐡", 6..13), Some(3..5));
assert_eq!(byte_range_to_char_range("🕱12™🐡", 0..0), Some(0..0));
assert_eq!(byte_range_to_char_range("🕱12™🐡", 25..26), None);
}
#[test]
fn range_bytes_to_chars_many() {
assert_eq!(
byte_range_to_char_range_many("🕱™🐡", &[4..7, 7..11]),
vec![1..2, 2..3]
);
assert_eq!(
byte_range_to_char_range_many("🕱™🐡", &[0..0, 4..11]),
vec![0..0, 1..3]
);
}
}