litcheck_core/text/
mod.rs1mod display;
2pub use self::display::DisplayCommaSeparated;
3
4use std::borrow::Cow;
5
6use crate::range::Range;
7
8#[derive(Debug, Copy, Clone)]
10pub enum LineEnding {
11 Lf,
13 Crlf,
15 None,
17}
18
19#[derive(Debug, Copy, Clone)]
21pub struct Newline {
22 pub offset: usize,
23 pub ty: LineEnding,
24}
25impl Newline {
26 #[inline(always)]
27 pub fn offset(&self) -> usize {
28 self.offset
29 }
30
31 #[inline]
32 pub fn next_line_start(&self) -> usize {
33 match self.ty {
34 LineEnding::Lf => self.offset + 1,
35 LineEnding::Crlf => self.offset + 2,
36 LineEnding::None => self.offset,
37 }
38 }
39
40 #[inline]
41 pub fn is_crlf(&self) -> bool {
42 matches!(self.ty, LineEnding::Crlf)
43 }
44
45 pub fn next(buffer: &[u8]) -> Self {
47 Self::next_from(buffer, 0)
48 }
49
50 pub fn next_from(buffer: &[u8], offset: usize) -> Self {
55 match memchr::memchr(b'\n', &buffer[offset..]) {
56 Some(index) => {
57 let index = offset + index;
58 if index > 0 {
59 let line_end = index - 1;
60 match unsafe { *buffer.get_unchecked(line_end) } {
61 b'\r' => Self {
62 ty: LineEnding::Crlf,
63 offset: line_end,
64 },
65 _ => Self {
66 ty: LineEnding::Lf,
67 offset: index,
68 },
69 }
70 } else {
71 Self {
72 ty: LineEnding::Lf,
73 offset: index,
74 }
75 }
76 }
77 None => Self {
78 ty: LineEnding::None,
79 offset: buffer.len(),
80 },
81 }
82 }
83
84 pub fn prev(buffer: &[u8]) -> Self {
86 Self::prev_from(buffer, 0)
87 }
88
89 pub fn prev_from(buffer: &[u8], offset: usize) -> Self {
94 match memchr::memrchr(b'\n', &buffer[..offset]) {
95 Some(index) => {
96 if index > 0 {
97 let prev_line_end = index - 1;
98 match unsafe { *buffer.get_unchecked(prev_line_end) } {
99 b'\r' => Self {
100 ty: LineEnding::Crlf,
101 offset: prev_line_end,
102 },
103 _ => Self {
104 ty: LineEnding::Crlf,
105 offset: index,
106 },
107 }
108 } else {
109 Self {
110 ty: LineEnding::Lf,
111 offset: index,
112 }
113 }
114 }
115 None => Self {
116 ty: LineEnding::None,
117 offset: 0,
118 },
119 }
120 }
121}
122
123pub fn find_next_lf_or_eof(buffer: &[u8], range: Range<usize>) -> Option<usize> {
124 memchr::memchr(b'\n', &buffer[range]).map(|idx| range.start + idx)
125}
126
127pub fn find_next_crlf_or_eof(buffer: &[u8], range: Range<usize>) -> Option<usize> {
128 match memchr::memchr(b'\n', &buffer[range]) {
129 Some(0) => None,
130 Some(index) => {
131 let line_end = index - 1;
132 match unsafe { *buffer.get_unchecked(line_end) } {
133 b'\r' => Some(line_end),
134 _ => None,
135 }
136 }
137 None => None,
138 }
139}
140
141pub fn find_prev_lf_or_eof(buffer: &[u8], range: Range<usize>) -> Option<usize> {
142 memchr::memrchr(b'\n', &buffer[range]).map(|idx| range.start + idx)
143}
144
145pub fn find_prev_crlf_or_eof(buffer: &[u8], range: Range<usize>) -> Option<usize> {
146 match memchr::memrchr(b'\n', &buffer[range]) {
147 Some(0) => None,
148 Some(index) => {
149 let line_end = index - 1;
150 match unsafe { *buffer.get_unchecked(line_end) } {
151 b'\r' => Some(line_end),
152 _ => None,
153 }
154 }
155 None => None,
156 }
157}
158
159pub fn is_char_boundary(buffer: &[u8], offset: usize) -> bool {
163 match buffer.get(offset) {
166 None => offset == buffer.len(),
168 Some(&b) => b <= 0b0111_1111 || b >= 0b1100_0000,
175 }
176}
177
178pub fn canonicalize_horizontal_whitespace(
179 s: Cow<'_, str>,
180 strict_whitespace: bool,
181) -> Cow<'_, str> {
182 if strict_whitespace {
183 return s;
184 }
185
186 if s.contains(is_non_canonical_horizontal_whitespace) {
187 Cow::Owned(s.replace(is_non_canonical_horizontal_whitespace, " "))
188 } else {
189 s
190 }
191}
192
193#[inline]
194fn is_non_canonical_horizontal_whitespace(c: char) -> bool {
195 match c {
196 '\t' => true,
197 '\u{00A0}'
199 | '\u{1680}'
200 | '\u{2000}'..='\u{200A}'
201 | '\u{202F}'
202 | '\u{205F}'
203 | '\u{3000}' => true,
204 _ => false,
205 }
206}