minify_html_onepass/proc/
mod.rs1use crate::err::debug_repr;
2use crate::err::Error;
3use crate::err::ErrorType;
4use crate::err::ProcessingResult;
5use crate::proc::range::ProcessorRange;
6use crate::proc::MatchAction::*;
7use crate::proc::MatchMode::*;
8use aho_corasick::AhoCorasick;
9use core::fmt;
10use memchr::memchr;
11use minify_html_common::gen::codepoints::Lookup;
12use minify_html_common::spec::tag::EMPTY_SLICE;
13use std::fmt::Debug;
14use std::fmt::Formatter;
15use std::ops::Index;
16use std::ops::IndexMut;
17
18pub mod checkpoint;
19pub mod entity;
20pub mod range;
21
22#[allow(dead_code)]
23pub enum MatchMode {
24 IsChar(u8),
25 IsNotChar(u8),
26 WhileChar(u8),
27 WhileNotChar(u8),
28 ThroughChar(u8),
30
31 IsPred(fn(u8) -> bool),
32 IsNotPred(fn(u8) -> bool),
33 WhilePred(fn(u8) -> bool),
34 WhileNotPred(fn(u8) -> bool),
35
36 IsInLookup(&'static Lookup),
37 WhileInLookup(&'static Lookup),
38 WhileNotInLookup(&'static Lookup),
39
40 IsSeq(&'static [u8]),
41 WhileNotSeq(&'static AhoCorasick),
42 ThroughSeq(&'static AhoCorasick),
43}
44
45pub enum MatchAction {
46 Keep,
47 Discard,
48 MatchOnly,
49}
50
51pub struct Processor<'d> {
53 code: &'d mut [u8],
54 read_next: usize,
56 write_next: usize,
58}
59
60impl<'d> Index<ProcessorRange> for Processor<'d> {
61 type Output = [u8];
62
63 #[inline(always)]
64 fn index(&self, index: ProcessorRange) -> &Self::Output {
65 &self.code[index.start..index.end]
66 }
67}
68
69impl<'d> IndexMut<ProcessorRange> for Processor<'d> {
70 #[inline(always)]
71 fn index_mut(&mut self, index: ProcessorRange) -> &mut Self::Output {
72 debug_assert!(index.end <= self.write_next);
73 &mut self.code[index.start..index.end]
74 }
75}
76
77#[allow(dead_code)]
78impl<'d> Processor<'d> {
79 #[inline(always)]
81 pub fn new(code: &mut [u8]) -> Processor {
82 Processor {
83 write_next: 0,
84 read_next: 0,
85 code,
86 }
87 }
88
89 #[inline(always)]
92 fn _in_bounds(&self, offset: usize) -> bool {
93 self.read_next + offset < self.code.len()
94 }
95
96 #[inline(always)]
101 fn _read_offset(&self, offset: usize) -> u8 {
102 self.code[self.read_next + offset]
103 }
104
105 #[inline(always)]
106 fn _maybe_read_offset(&self, offset: usize) -> Option<u8> {
107 self.code.get(self.read_next + offset).copied()
108 }
109
110 #[inline(always)]
111 fn _maybe_read_slice_offset(&self, offset: usize, count: usize) -> Option<&[u8]> {
112 self
113 .code
114 .get(self.read_next + offset..self.read_next + offset + count)
115 }
116
117 #[inline(always)]
120 fn _shift(&mut self, amount: usize) {
121 if self.read_next != self.write_next {
123 self
124 .code
125 .copy_within(self.read_next..self.read_next + amount, self.write_next);
126 };
127 self.read_next += amount;
128 self.write_next += amount;
129 }
130
131 #[inline(always)]
132 fn _replace(&mut self, start: usize, end: usize, data: &[u8]) -> usize {
133 debug_assert!(start <= end);
134 let added = data.len() - (end - start);
135 debug_assert!(self.write_next + added <= self.read_next);
137 self.code.copy_within(end..self.write_next, end + added);
138 self.code[start..start + data.len()].copy_from_slice(data);
139 self.write_next += added;
141 added
142 }
143
144 #[inline(always)]
145 fn _insert(&mut self, at: usize, data: &[u8]) -> usize {
146 self._replace(at, at, data)
147 }
148
149 #[inline(always)]
151 fn _one<C: FnOnce(u8) -> bool>(&mut self, cond: C) -> usize {
152 self._maybe_read_offset(0).filter(|n| cond(*n)).is_some() as usize
153 }
154
155 #[inline(always)]
156 fn _many<C: Fn(u8) -> bool>(&mut self, cond: C) -> usize {
157 let mut count = 0usize;
158 while self
159 ._maybe_read_offset(count)
160 .filter(|c| cond(*c))
161 .is_some()
162 {
163 count += 1;
164 }
165 count
166 }
167
168 #[inline(always)]
169 fn _remaining(&self) -> usize {
170 self.code.len() - self.read_next
171 }
172
173 #[inline(always)]
174 pub fn m(&mut self, mode: MatchMode, action: MatchAction) -> ProcessorRange {
175 let count = match mode {
176 IsChar(c) => self._one(|n| n == c),
177 IsNotChar(c) => self._one(|n| n != c),
178 WhileChar(c) => self._many(|n| n == c),
179 WhileNotChar(c) => memchr(c, &self.code[self.read_next..]).unwrap_or(self._remaining()),
180 ThroughChar(c) => memchr(c, &self.code[self.read_next..]).map_or(0, |p| p + 1),
181
182 IsInLookup(lookup) => self._one(|n| lookup[n]),
183 WhileInLookup(lookup) => self._many(|n| lookup[n]),
184 WhileNotInLookup(lookup) => self._many(|n| !lookup[n]),
185
186 IsPred(p) => self._one(p),
187 IsNotPred(p) => self._one(|n| !p(n)),
188 WhilePred(p) => self._many(p),
189 WhileNotPred(p) => self._many(|n| !p(n)),
190
191 IsSeq(seq) => self
192 ._maybe_read_slice_offset(0, seq.len())
193 .filter(|src| *src == seq)
194 .map_or(0, |_| seq.len()),
195 WhileNotSeq(seq) => seq
196 .find(&self.code[self.read_next..])
197 .map_or(self._remaining(), |m| m.start()),
198 ThroughSeq(seq) => seq
200 .find(&self.code[self.read_next..])
201 .map_or(0, |m| m.end()),
202 };
203 let start = match action {
206 Discard | MatchOnly => self.read_next,
207 Keep => self.write_next,
208 };
209 match action {
210 Discard => self.read_next += count,
211 Keep => self._shift(count),
212 MatchOnly => {}
213 };
214
215 ProcessorRange {
216 start,
217 end: start + count,
218 }
219 }
220
221 #[inline(always)]
224 pub fn at_end(&self) -> bool {
225 !self._in_bounds(0)
226 }
227
228 #[inline(always)]
229 pub fn get_or_empty(&self, r: Option<ProcessorRange>) -> &[u8] {
230 r.and_then(|r| self.code.get(r.start..r.end))
231 .unwrap_or(EMPTY_SLICE)
232 }
233
234 #[inline(always)]
235 pub fn require_not_at_end(&self) -> ProcessingResult<()> {
236 if self.at_end() {
237 Err(ErrorType::UnexpectedEnd)
238 } else {
239 Ok(())
240 }
241 }
242
243 #[inline(always)]
245 pub fn read_len(&self) -> usize {
246 self.read_next
247 }
248
249 #[inline(always)]
250 pub fn reserve_output(&mut self, amount: usize) {
251 self.write_next += amount;
252 }
253
254 #[inline(always)]
258 pub fn peek(&self, offset: usize) -> Option<u8> {
259 self._maybe_read_offset(offset)
260 }
261
262 #[inline(always)]
263 pub fn peek_many(&self, offset: usize, count: usize) -> Option<&[u8]> {
264 self._maybe_read_slice_offset(offset, count)
265 }
266
267 pub fn last_is(&self, c: u8) -> bool {
269 self.write_next > 0 && self.code[self.write_next - 1] == c
270 }
271
272 #[inline(always)]
276 pub fn skip(&mut self) -> ProcessingResult<u8> {
277 self
278 ._maybe_read_offset(0)
279 .map(|c| {
280 self.read_next += 1;
281 c
282 })
283 .ok_or(ErrorType::UnexpectedEnd)
284 }
285
286 #[inline(always)]
287 pub fn skip_amount_expect(&mut self, amount: usize) {
288 debug_assert!(!self.at_end(), "skip known characters");
289 self.read_next += amount;
290 }
291
292 #[inline(always)]
293 pub fn skip_expect(&mut self) {
294 debug_assert!(!self.at_end(), "skip known character");
295 self.read_next += 1;
296 }
297
298 #[inline(always)]
301 pub fn write(&mut self, c: u8) {
302 self.code[self.write_next] = c;
303 self.write_next += 1;
304 }
305
306 #[inline(always)]
307 pub fn make_lowercase(&mut self, range: ProcessorRange) {
308 self.code[range.start..range.end].make_ascii_lowercase();
309 }
310
311 pub fn undo_write(&mut self, len: usize) {
312 self.write_next -= len;
313 }
314
315 #[inline(always)]
316 pub fn write_range(&mut self, s: ProcessorRange) -> ProcessorRange {
317 let dest_start = self.write_next;
318 let dest_end = dest_start + s.len();
319 self.code.copy_within(s.start..s.end, dest_start);
320 self.write_next = dest_end;
321 ProcessorRange {
322 start: dest_start,
323 end: dest_end,
324 }
325 }
326
327 #[inline(always)]
329 pub fn write_slice(&mut self, s: &[u8]) {
330 self.code[self.write_next..self.write_next + s.len()].copy_from_slice(s);
331 self.write_next += s.len();
332 }
333
334 #[inline(always)]
335 pub fn write_utf8(&mut self, c: char) {
336 let mut encoded = [0u8; 4];
337 self.write_slice(c.encode_utf8(&mut encoded).as_bytes());
338 }
339
340 #[inline(always)]
342 pub fn accept(&mut self) -> ProcessingResult<u8> {
343 self
344 ._maybe_read_offset(0)
345 .map(|c| {
346 self.code[self.write_next] = c;
347 self.read_next += 1;
348 self.write_next += 1;
349 c
350 })
351 .ok_or(ErrorType::UnexpectedEnd)
352 }
353
354 #[inline(always)]
355 pub fn accept_expect(&mut self) -> u8 {
356 debug_assert!(!self.at_end());
357 let c = self._read_offset(0);
358 self.code[self.write_next] = c;
359 self.read_next += 1;
360 self.write_next += 1;
361 c
362 }
363
364 #[inline(always)]
365 pub fn accept_amount_expect(&mut self, count: usize) {
366 debug_assert!(self._in_bounds(count - 1));
367 self._shift(count);
368 }
369
370 #[inline(always)]
372 pub fn finish(self) -> Result<usize, Error> {
373 debug_assert!(self.at_end());
374 Ok(self.write_next)
375 }
376}
377
378impl Debug for Processor<'_> {
379 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
380 f.write_str(&debug_repr(
381 self.code,
382 self.read_next as isize,
383 self.write_next as isize,
384 ))?;
385 Ok(())
386 }
387}