minify_html_common/
pattern.rs

1use aho_corasick::AhoCorasick;
2
3// Can't use pub const fn constructor due to Copy trait, so allow directly creating struct publicly for now.
4pub struct TrieNode<V: 'static + Copy> {
5  // Using a children array of size 256 would probably be fastest, but waste too much memory and cause slow compiles
6  // and large binaries. Instead, we only store the children between the first and last defined (see `gen/trie.ts`).
7  // When getting a child, use `index - offset`.
8  pub offset: usize,
9  pub value: Option<V>,
10  pub children: &'static [Option<&'static TrieNode<V>>],
11}
12
13pub enum TrieNodeMatch<V: 'static + Copy> {
14  Found { len: usize, value: V },
15  NotFound { reached: usize },
16}
17
18#[allow(dead_code)]
19impl<V: 'static + Copy> TrieNode<V> {
20  // Find the node that matches the shortest prefix of {@param text} that:
21  // - has a value (except the start node if it has a value);
22  // - fails to match any further characters (the node itself matches); or,
23  // - the entire text (essentially same as previous point).
24  //
25  // For example, given a trie with only two paths "&amp" and "&amp;":
26  // - "&amp" will return node `p`.
27  // - "&ampere" will return node `p`.
28  // - "&amp;" will return node `p`.
29  // - "&amp;ere" will return node `p`.
30  // - "&am" will return node `m`.
31  //   - Further matching "p;" will return node `p`.
32  //   - Further matching "xyz" will return node `m` (itself).
33  // - "&amx" will return node `m`.
34  // - "&ax" will return node `a`.
35  // - "+ax" will return itself.
36  // - "" will return itself.
37  pub fn shortest_matching_prefix(&self, text: &[u8], from: usize) -> (&TrieNode<V>, usize) {
38    let mut node: &TrieNode<V> = self;
39    let mut pos = from;
40    while let Some(&c) = text.get(pos) {
41      match node.children.get((c as usize).wrapping_sub(node.offset)) {
42        Some(Some(child)) => node = child,
43        None | Some(None) => break,
44      };
45      pos += 1;
46      if node.value.is_some() {
47        break;
48      };
49    }
50    (node, pos)
51  }
52
53  pub fn longest_matching_prefix(&self, text: &[u8]) -> TrieNodeMatch<V> {
54    let mut node: &TrieNode<V> = self;
55    let mut value: Option<TrieNodeMatch<V>> = None;
56    let mut pos = 0;
57    while let Some(&c) = text.get(pos) {
58      match node.children.get((c as usize).wrapping_sub(node.offset)) {
59        Some(Some(child)) => node = child,
60        None | Some(None) => break,
61      };
62      pos += 1;
63      if let Some(v) = node.value {
64        value = Some(TrieNodeMatch::Found { len: pos, value: v });
65      }
66    }
67    value.unwrap_or(TrieNodeMatch::NotFound { reached: pos })
68  }
69}
70
71pub struct Replacer {
72  searcher: AhoCorasick,
73  replacements: Vec<Vec<u8>>,
74}
75
76impl Replacer {
77  pub fn new(searcher: AhoCorasick, replacements: Vec<Vec<u8>>) -> Replacer {
78    Replacer {
79      searcher,
80      replacements,
81    }
82  }
83
84  pub fn replace_all(&self, src: &[u8]) -> Vec<u8> {
85    self.searcher.replace_all_bytes(src, &self.replacements)
86  }
87}