1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3#![cfg_attr(
18 feature = "streams",
19 doc = r##"
20 - `"streams"` - Mask [`Stream`] implementations by wrapping them.
21"##
22)]
23#![cfg_attr(
24 not(feature = "streams"),
25 doc = r##"
26 - `"streams"` - Support for masking streams.
27"##
28)]
29#![warn(missing_docs)]
30use core::fmt::{Debug, Error, Formatter};
31use std::collections::BTreeMap;
32
33#[cfg(feature = "streams")]
34use bytes::Bytes;
35#[cfg(feature = "streams")]
36use futures::stream::Stream;
37
38#[derive(Clone, Eq, PartialEq)]
45pub struct MatchData<'a> {
46 pub prefix: &'a [u8],
51 pub suffix: &'a [u8],
56 pub mask_prefix: bool,
59}
60
61impl<'a> Debug for MatchData<'a> {
62 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
63 write!(
64 f,
65 "[PDATA {:?} {:?}{}]",
66 String::from_utf8_lossy(self.prefix),
67 String::from_utf8_lossy(self.suffix),
68 if self.mask_prefix { " MP" } else { "" },
69 )
70 }
71}
72
73#[derive(Clone, Eq, PartialEq)]
74struct Match<'a, 'b>
75where
76 'b: 'a,
77{
78 data: &'a MatchData<'b>,
79 match_idx: usize,
80 offset: usize,
81}
82
83impl<'a, 'b> Match<'a, 'b> {
84 pub fn new(data: &'a MatchData<'b>, match_idx: usize, offset: usize) -> Self {
85 Self {
86 data,
87 match_idx,
88 offset,
89 }
90 }
91
92 pub fn index(&self) -> usize {
93 self.match_idx
94 }
95
96 pub fn past_offset(&self, offset: &usize) -> bool {
97 self.offset >= *offset
98 }
99
100 pub fn allowed_next(&self) -> &'_ [u8] {
101 if self.offset < self.data.prefix.len() {
102 &self.data.prefix[self.offset..self.offset + 1]
103 } else {
104 self.data.suffix
105 }
106 }
107
108 pub fn try_next(&self, action: u8) -> (Option<Self>, Option<(usize, usize)>) {
109 if self.offset < self.data.prefix.len() {
110 if action == self.data.prefix[self.offset] {
111 let offset = self.offset + 1;
112 let span = (self.data.mask_prefix && offset == self.data.prefix.len())
113 .then_some((self.data.prefix.len(), 0));
114 (Some(Match::new(self.data, self.match_idx, offset)), span)
115 } else {
116 (None, None)
117 }
118 } else if self.data.suffix.contains(&action) {
119 if !self.data.prefix.is_empty() {
120 let offset = std::cmp::min(self.offset + 1, self.data.prefix.len() + 2);
123 let span = if self.data.mask_prefix && !self.data.prefix.is_empty() {
124 Some((2, 0))
125 } else {
126 Some((offset - self.data.prefix.len(), 0))
127 };
128 (Some(Match::new(self.data, self.match_idx, offset)), span)
129 } else {
130 (None, Some((1, 0)))
133 }
134 } else {
135 (None, None)
136 }
137 }
138
139 pub fn prefix_length(&self) -> usize {
141 let pfx = if self.data.mask_prefix {
142 self.offset
143 } else {
144 0
145 };
146
147 if self.offset < self.data.prefix.len() + 1 {
148 pfx
149 } else {
150 pfx + (self.offset - self.data.prefix.len())
151 }
152 }
153}
154
155impl<'a, 'b> Debug for Match<'a, 'b> {
156 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
157 write!(
158 f,
159 "[PREFIX '{}' '{}' {}]",
160 String::from_utf8_lossy(self.data.prefix),
161 String::from_utf8_lossy(self.data.suffix),
162 self.offset
163 )
164 }
165}
166
167#[derive(Clone, Default, PartialEq, Eq)]
168struct State<'a, 'b> {
169 matches: Vec<Match<'a, 'b>>,
170 text: Vec<u8>,
171 spans: Vec<(usize, usize)>,
172 text_offset: usize,
173}
174
175impl<'a, 'b> State<'a, 'b> {
176 fn new(
177 matches: Vec<Match<'a, 'b>>,
178 text: Vec<u8>,
179 spans: Vec<(usize, usize)>,
180 text_offset: usize,
181 ) -> Self {
182 Self {
183 matches,
184 text,
185 spans,
186 text_offset,
187 }
188 }
189
190 fn generate_actions(&self, datas: &[MatchData]) -> Vec<Option<u8>> {
191 let mut res = Vec::new();
192 for pfx in self.matches.iter() {
193 for ch in pfx.allowed_next() {
194 if !res.contains(&Some(*ch)) {
195 res.push(Some(*ch));
196 }
197 }
198 }
199
200 for data in datas.iter() {
201 if !data.prefix.is_empty() {
202 let ch = data.prefix[0];
203 if !res.contains(&Some(ch)) {
204 res.push(Some(ch))
205 }
206 } else {
207 for ch in data.suffix.as_ref().iter() {
208 if !res.contains(&Some(*ch)) {
209 res.push(Some(*ch))
210 }
211 }
212 }
213 }
214
215 res.push(None);
216 res
217 }
218}
219
220impl<'a, 'b> std::fmt::Debug for State<'a, 'b> {
221 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
222 write!(
223 f,
224 "[STATE '{}' {:?}",
225 String::from_utf8_lossy(&self.text),
226 self.spans
227 )?;
228 for s in self.matches.iter() {
229 write!(f, " {:?}", s)?;
230 }
231 write!(f, " TxtOff: {}", self.text_offset)?;
232 write!(f, "]")
233 }
234}
235
236#[derive(Clone, Default, PartialEq, Eq, Ord, PartialOrd)]
237struct Link {
238 source: usize,
239 target: usize,
240 action: u8,
241 emitted: Option<Vec<u8>>,
242}
243
244impl Link {
245 pub fn new(source: usize, target: usize, action: u8, emitted: Option<Vec<u8>>) -> Self {
246 Self {
247 source,
248 target,
249 action,
250 emitted,
251 }
252 }
253}
254
255impl Debug for Link {
256 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
257 write!(
258 f,
259 "[LINK {} -> {} '{}' ({}){}]",
260 self.source,
261 self.target,
262 char::from_u32(self.action as u32).unwrap_or('?'),
263 self.action,
264 if let Some(emitted) = &self.emitted {
265 format!(r#" "{}""#, String::from_utf8_lossy(emitted))
266 } else {
267 String::new()
268 }
269 )
270 }
271}
272
273#[derive(Debug, Clone, Copy)]
274struct LinkKey(usize);
275
276#[derive(Clone, Debug)]
277struct Links {
278 source_offset: Vec<usize>,
279 target: Vec<usize>,
280 actions: Vec<u8>,
281 emitted: Vec<Option<Vec<u8>>>,
282}
283
284impl Links {
285 pub fn new(mut links: Vec<Link>) -> Self {
286 links.sort_by(|a, b| a.source.cmp(&b.source));
287 let mut source_offset = Vec::new();
288 let mut target = Vec::new();
289 let mut actions = Vec::new();
290 let mut emitted = Vec::new();
291 let mut prev_state = 0;
292 source_offset.push(0);
293 for link in links {
294 while prev_state < link.source {
295 source_offset.push(target.len());
296 prev_state += 1;
297 }
298 target.push(link.target);
299 actions.push(link.action);
300 emitted.push(link.emitted);
301 }
302 source_offset.push(target.len());
303 Self {
304 source_offset,
305 target,
306 actions,
307 emitted,
308 }
309 }
310
311 pub fn get(&self, state: usize, action: u8) -> Option<LinkKey> {
312 let start = self.source_offset[state];
313 let end = self.source_offset[state + 1];
314 for i in start..end {
315 if self.actions[i] == action {
316 return Some(LinkKey(i));
317 }
318 }
319 None
320 }
321
322 pub fn target(&self, key: LinkKey) -> usize {
323 self.target[key.0]
324 }
325
326 pub fn emitted(&self, key: LinkKey) -> Option<&Vec<u8>> {
327 self.emitted[key.0].as_ref()
328 }
329}
330
331#[derive(Clone, Debug)]
332struct DefaultLinks {
333 emitted: Vec<Option<Vec<u8>>>,
334}
335
336impl DefaultLinks {
337 pub fn new(default_links: BTreeMap<usize, Option<Vec<u8>>>) -> Self {
338 let mut emitted = Vec::new();
339 for (k, v) in default_links {
340 if k >= emitted.len() {
341 emitted.resize(k + 1, None);
342 }
343 emitted[k] = v;
344 }
345 Self { emitted }
346 }
347
348 pub fn get(&self, state: usize) -> Option<&Vec<u8>> {
349 self.emitted[state].as_ref()
350 }
351}
352
353fn unify_spans(spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
354 if spans.is_empty() {
355 return Vec::new();
356 }
357
358 let mut buf = spans.to_vec();
359 buf.sort();
360 let mut res = Vec::new();
361
362 let mut cur_span = buf.first().copied().unwrap();
363 for span in buf.iter() {
364 let new_cur_span = if span.0 < cur_span.1 {
365 (cur_span.0, std::cmp::max(cur_span.1, span.1))
366 } else {
367 res.push(cur_span);
368 *span
369 };
370 cur_span = new_cur_span;
371 }
372 res.push(cur_span);
373
374 res
375}
376
377fn mask_spans<'a>(
378 spans: &[(usize, usize)],
379 input: &'a [u8],
380 mask: &[u8],
381 offset: usize,
382) -> Vec<u8> {
383 let mut res = Vec::new();
384 let mut span = 0;
385
386 while span < spans.len() {
387 if spans[span].0 >= offset {
388 break;
389 }
390 res.extend_from_slice(mask);
391 if spans[span].1 > offset {
392 break;
393 }
394 span += 1;
395 }
396
397 for (i, ch) in input.iter().enumerate().map(|(i, ch)| (i + offset, ch)) {
398 if span == spans.len() || i < spans[span].0 {
399 res.push(*ch);
400 } else {
401 if i == spans[span].0 {
402 res.extend_from_slice(mask);
403 }
404 if i + 1 == spans[span].1 {
405 span += 1;
406 }
407 }
408 }
409 res
410}
411
412#[derive(Clone, Debug)]
433pub struct Masker {
434 links: Links,
435 default_links: DefaultLinks,
436}
437
438impl Masker {
439 pub fn new<S, T>(input_data: &[S], mask: T) -> Masker
461 where
462 S: AsRef<[u8]>,
463 T: AsRef<[u8]>,
464 {
465 Self::new_with_match_data(input_data, &[], mask)
466 }
467
468 pub fn new_with_match_data<S, T>(input_data: &[S], match_data: &[MatchData], mask: T) -> Masker
500 where
501 S: AsRef<[u8]>,
502 T: AsRef<[u8]>,
503 {
504 let prefix_data = input_data
505 .iter()
506 .map(|s| MatchData {
507 prefix: s.as_ref(),
508 suffix: &[],
509 mask_prefix: true,
510 })
511 .chain(match_data.iter().cloned())
512 .collect::<Vec<_>>();
513
514 let mut states: Vec<State<'_, '_>> = vec![Default::default()];
515 let mut links = Vec::new();
516 let mut default_links = BTreeMap::new();
517 let mut work = vec![0usize];
518
519 let mut coverage = BTreeMap::new();
520 for d1 in 0..prefix_data.len() {
521 for d2 in 0..prefix_data.len() {
522 let mut failed = false;
523 for j in 0..prefix_data[d2].prefix.len() {
525 if !prefix_data[d1].suffix.contains(&prefix_data[d2].prefix[j]) {
526 failed = true;
527 break;
528 }
529 }
530 if failed {
531 continue;
532 }
533 for ch in prefix_data[d2].suffix {
534 if !prefix_data[d1].suffix.contains(ch) {
535 failed = true;
536 break;
537 }
538 }
539 if failed {
540 continue;
541 }
542 coverage.insert((d1, d2), prefix_data[d1].prefix.len() + 1);
544 }
545 }
546
547 while let Some(index) = work.pop() {
548 let actions = states[index].generate_actions(&prefix_data);
549
550 for action in actions {
551 let mut new_matches = Vec::new();
553 let mut new_spans = states[index].spans.to_vec();
554 let new_text = {
555 let mut t = states[index].text.clone();
556 if let Some(action) = action {
557 t.push(action);
558 }
559 t
560 };
561 let text_offset = states[index].text_offset;
562 let full_text_len = new_text.len() + text_offset;
563
564 if let Some(action) = action {
565 for pfx in states[index].matches.iter() {
566 let (pfx, span) = pfx.try_next(action);
567 if let Some(new_pfx) = pfx {
568 if !new_matches.contains(&new_pfx) {
569 new_matches.push(new_pfx);
570 }
571 }
572 if let Some((s1, s2)) = span {
573 new_spans.push((
574 full_text_len - std::cmp::min(full_text_len, s1),
575 full_text_len - std::cmp::min(full_text_len, s2),
576 ));
577 }
578 }
579
580 for (ix, data) in prefix_data.iter().enumerate() {
581 let mut covered = false;
582 for pfx in states[index].matches.iter() {
583 if let Some(start) = coverage.get(&(pfx.index(), ix)) {
584 if pfx.past_offset(start) {
585 covered = true;
586 break;
587 }
588 }
589 }
590 if covered {
591 continue;
592 }
593
594 let pfx = Match::new(data, ix, 0);
595 let (pfx, span) = pfx.try_next(action);
596 if let Some(new_pfx) = pfx {
597 if !new_matches.contains(&new_pfx) {
598 new_matches.push(new_pfx);
599 }
600 }
601 if let Some((s1, s2)) = span {
602 new_spans.push((
603 full_text_len - std::cmp::min(full_text_len, s1),
604 full_text_len - std::cmp::min(full_text_len, s2),
605 ));
606 }
607 }
608 }
609
610 let unified_spans = unify_spans(&new_spans);
612 let mut emitted_spans = Vec::new();
613 let mut kept_spans = Vec::new();
614 let new_extent = new_matches
616 .iter()
617 .map(|m| m.prefix_length())
618 .max()
619 .unwrap_or(0usize);
620 let mut first_kept_char = full_text_len - std::cmp::min(full_text_len, new_extent);
621
622 for (x1, x2) in unified_spans {
623 if x2 + new_extent <= full_text_len {
626 emitted_spans.push((x1, x2));
627 } else {
628 kept_spans.push((x1, x2));
629 first_kept_char = std::cmp::min(first_kept_char, x1);
632 }
633 }
634
635 let emitted_text = if first_kept_char > 0 {
636 let s = mask_spans(
637 &emitted_spans,
638 &new_text[0..(first_kept_char - text_offset)],
639 mask.as_ref(),
640 text_offset,
641 );
642 if !s.is_empty() {
643 Some(s)
644 } else {
645 None
646 }
647 } else {
648 None
649 };
650
651 let (new_text, new_text_offset) = if first_kept_char > text_offset {
653 (&new_text[(first_kept_char - text_offset)..], 0)
654 } else {
655 (new_text.as_slice(), text_offset - first_kept_char)
656 };
657
658 let mut kept_spans = kept_spans
660 .into_iter()
661 .map(|(a, b)| (a - first_kept_char, b - first_kept_char))
662 .collect::<Vec<_>>();
663 kept_spans.sort_by(|a: &(usize, usize), b: &(usize, usize)| a.0.cmp(&b.0));
664
665 let cleared = if let Some(first_span) = kept_spans.first().copied() {
668 if first_span.0 == 0 && first_span.1 > 0 {
669 first_span.1
670 } else {
671 0
672 }
673 } else {
674 0
675 };
676
677 let (new_text, new_text_offset) = if cleared > 0 {
678 if cleared > new_text_offset {
679 (&new_text[(cleared - new_text_offset)..], 1)
680 } else {
681 (new_text, new_text_offset - cleared + 1)
682 }
683 } else {
684 (new_text, 0)
685 };
686 let kept_spans = if cleared > 0 {
687 kept_spans
688 .into_iter()
689 .map(|(a, b)| {
690 (
691 a - std::cmp::min(a, cleared - 1),
692 b - std::cmp::min(b, cleared - 1),
693 )
694 })
695 .collect::<Vec<_>>()
696 } else {
697 kept_spans
698 };
699
700 let new_state =
701 State::new(new_matches, new_text.to_vec(), kept_spans, new_text_offset);
702
703 let new_index = if let Some(new_index) = states.iter().position(|x| x == &new_state)
704 {
705 new_index
706 } else {
707 let new_index = states.len();
708 states.push(new_state);
709 work.push(new_index);
710 new_index
711 };
712
713 if let Some(action) = action {
714 let lnk = Link::new(index, new_index, action, emitted_text);
715 links.push(lnk);
716 } else {
717 default_links.insert(index, emitted_text);
718 }
719 }
720 }
721
722 Self {
723 links: Links::new(links),
724 default_links: DefaultLinks::new(default_links),
725 }
726 }
727
728 pub fn mask_slice<S>(&self, input: S) -> Vec<u8>
742 where
743 S: AsRef<[u8]>,
744 {
745 let mut state = 0usize;
746 let mut res = Vec::new();
747 res.reserve(input.as_ref().len());
748 for ch in input.as_ref().iter() {
749 if let Some(link) = self.links.get(state, *ch) {
750 if let Some(emitted) = self.links.emitted(link) {
751 res.extend(emitted);
752 }
753 state = self.links.target(link);
754 } else {
755 if let Some(emitted) = self.default_links.get(state) {
756 res.extend(emitted);
757 }
758 res.push(*ch);
759 state = 0;
760 }
761 }
762 if let Some(emitted) = self.default_links.get(state) {
763 res.extend(emitted);
764 }
765 res
766 }
767
768 pub fn mask_str<S>(&self, input: S) -> String
776 where
777 S: AsRef<str>,
778 {
779 String::from_utf8(self.mask_slice(input.as_ref())).unwrap()
780 }
781
782 pub fn mask_chunks(&self) -> ChunkMasker<'_> {
805 ChunkMasker::new(self)
806 }
807
808 #[cfg(feature = "streams")]
814 pub fn mask_stream<S, E>(&self, stream: S) -> streams::MaskedStream<'_, S, E>
815 where
816 S: Stream<Item = Result<Bytes, E>> + Unpin,
817 {
818 streams::MaskedStream::new(stream, self)
819 }
820}
821
822pub struct ChunkMasker<'a> {
829 owner: &'a Masker,
830 state: usize,
831}
832
833impl<'a> ChunkMasker<'a> {
834 fn new(owner: &'a Masker) -> Self {
835 Self { owner, state: 0 }
836 }
837
838 pub fn mask_chunk<C>(&mut self, chunk: C) -> Vec<u8>
846 where
847 C: AsRef<[u8]>,
848 {
849 let mut res = Vec::new();
850 res.reserve(chunk.as_ref().len());
851 for ch in chunk.as_ref().iter() {
852 if let Some(link) = self.owner.links.get(self.state, *ch) {
853 if let Some(emitted) = self.owner.links.emitted(link) {
854 res.extend(emitted);
855 }
856 self.state = self.owner.links.target(link);
857 } else {
858 if let Some(emitted) = self.owner.default_links.get(self.state) {
859 res.extend(emitted);
860 }
861 res.push(*ch);
862 self.state = 0;
863 }
864 }
865 res
866 }
867
868 pub fn finish(self) -> Vec<u8> {
874 let mut res = Vec::new();
875 if let Some(emitted) = self.owner.default_links.get(self.state) {
876 res.extend(emitted);
877 }
878 res
879 }
880}
881
882#[cfg(feature = "streams")]
883mod streams {
884 use super::{ChunkMasker, Masker};
885
886 use bytes::Bytes;
887 use core::task::Poll;
888 use futures::Stream;
889
890 pub struct MaskedStream<'a, S, E>
891 where
892 S: Stream<Item = Result<Bytes, E>> + Unpin,
893 {
894 base: S,
895 mask: Option<ChunkMasker<'a>>,
896 completed: bool,
897 }
898
899 impl<'a, S, E> MaskedStream<'a, S, E>
900 where
901 S: Stream<Item = Result<Bytes, E>> + Unpin,
902 {
903 pub fn new(base: S, masker: &'a Masker) -> Self {
904 Self {
905 base,
906 mask: Some(masker.mask_chunks()),
907 completed: false,
908 }
909 }
910 }
911
912 impl<'a, S, E> Stream for MaskedStream<'a, S, E>
913 where
914 S: Stream<Item = Result<Bytes, E>> + Unpin,
915 {
916 type Item = Result<Bytes, E>;
917
918 fn poll_next(
919 self: std::pin::Pin<&mut Self>,
920 cx: &mut std::task::Context<'_>,
921 ) -> std::task::Poll<Option<Self::Item>> {
922 let me = self.get_mut();
923 loop {
924 match me.completed {
925 true => {
926 return Poll::Ready(None);
927 }
928 false => match core::pin::Pin::new(&mut me.base).poll_next(cx) {
929 Poll::Ready(Some(Ok(bytes))) => {
930 let b = me.mask.as_mut().unwrap().mask_chunk(bytes);
931 if !b.is_empty() {
932 return Poll::Ready(Some(Ok(b.into())));
933 }
934 }
935 Poll::Ready(Some(Err(e))) => {
936 return Poll::Ready(Some(Err(e)));
937 }
938 Poll::Ready(None) => {
939 me.completed = true;
940 let b = me.mask.take().unwrap().finish();
941 if !b.is_empty() {
942 return Poll::Ready(Some(Ok(b.into())));
943 }
944 }
945 Poll::Pending => {
946 return Poll::Pending;
947 }
948 },
949 }
950 }
951 }
952 }
953}
954
955#[cfg(test)]
956mod test {
957 use super::{Masker, MatchData};
958 use rand::rngs::StdRng;
959 use rand::{Rng, SeedableRng};
960
961 fn slow_union(input: &[(usize, usize)]) -> Vec<(usize, usize)> {
962 let mut buf1 = Vec::from(input);
963 let mut buf2 = Vec::new();
964 let mut changes = true;
965 while changes {
966 changes = false;
967 for i in 0..buf1.len() {
968 for j in (i + 1)..buf1.len() {
969 let x1 = std::cmp::max(buf1[i].0, buf1[j].0);
970 let x2 = std::cmp::min(buf1[i].1, buf1[j].1);
971 if x1 < x2 {
972 for b in buf1.iter().take(i) {
974 buf2.push(*b);
975 }
976 buf2.push((
977 std::cmp::min(buf1[i].0, buf1[j].0),
978 std::cmp::max(buf1[i].1, buf1[j].1),
979 ));
980 for b in buf1.iter().take(j).skip(i + 1) {
981 buf2.push(*b);
982 }
983 for b in buf1.iter().skip(j + 1) {
984 buf2.push(*b);
985 }
986 std::mem::swap(&mut buf1, &mut buf2);
987 buf2.clear();
988 changes = true;
989 break;
990 }
991 }
992 if changes {
993 break;
994 }
995 }
996 }
997 buf1.sort_by(|a, b| a.0.cmp(&b.0));
998 buf1
999 }
1000
1001 #[test]
1002 fn test_union() {
1003 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1004 for _ in 0..2000000 {
1005 let mut spans = Vec::new();
1006 let count = rng.gen_range(1..20);
1007 for _ in 0..count {
1008 let x1 = rng.gen_range(0..50);
1009 let x2 = x1 + rng.gen_range(1..20);
1010 spans.push((x1, x2));
1011 }
1012 let mut value = super::unify_spans(&spans);
1013 let mut check = slow_union(&spans);
1014 value.sort();
1015 check.sort();
1016 assert_eq!(value, check);
1017 }
1018 }
1019
1020 fn mask_string_check<S: AsRef<str>>(string: &str, mask: &str, keys: &[S]) -> String {
1021 let spans = {
1022 let mut spans = Vec::new();
1023 for key in keys.iter() {
1024 let mut offset = 0usize;
1025 while let Some(ix) = string[offset..].find(key.as_ref()) {
1026 let len = key.as_ref().as_bytes().len();
1027 spans.push((offset + ix, offset + ix + len));
1028 offset += ix + 1;
1029 }
1030 }
1031 spans
1032 };
1033
1034 let mut unioned_spans = super::unify_spans(&spans);
1035 unioned_spans.sort();
1036
1037 let mut offset = 0usize;
1038 let mut res = Vec::new();
1039 for span in unioned_spans {
1040 if offset < span.0 {
1041 res.extend_from_slice(&string.as_bytes()[offset..span.0]);
1042 }
1043 res.extend_from_slice(mask.as_bytes());
1044 offset = span.1;
1045 }
1046 if offset < string.as_bytes().len() {
1047 res.extend_from_slice(&string.as_bytes()[offset..]);
1048 }
1049 String::from_utf8_lossy(&res).into()
1050 }
1051
1052 fn random_string<R: Rng>(mut rng: R, len: usize) -> String {
1053 let mut res = String::new();
1054 for _ in 0..len {
1055 let ch = rng.gen_range('a'..'e');
1056 res.push(ch);
1057 }
1058 res
1059 }
1060
1061 fn random_buffer<R: Rng>(mut rng: R, len: usize) -> Vec<u8> {
1062 let mut res = Vec::new();
1063 res.resize(len, 0);
1064 for ch in res.iter_mut() {
1065 *ch = rng.gen_range(0x61..0x7a);
1066 }
1067 rng.fill_bytes(res.as_mut());
1068 res
1069 }
1070
1071 fn random_input<R: Rng>(mut rng: R, keys: &Vec<String>, len: usize) -> String {
1072 let mut res = String::new();
1073 let max_chunk = std::cmp::min(5, (len / 4) + 1);
1078 let mut stage = 0;
1079 assert!(max_chunk > 0);
1080 while res.len() < len {
1081 if stage == 0 {
1082 let len = rng.gen_range(1..(max_chunk + 1));
1083 if len > 0 {
1084 let mut remaining = 1000;
1085 let chunk = loop {
1086 let chunk = random_string(&mut rng, len);
1087 if !keys.iter().any(|k| chunk.contains(k)) {
1088 break chunk;
1089 }
1090 remaining -= 1;
1091 if remaining == 0 {
1092 break String::new();
1093 }
1094 };
1095 res.push_str(&chunk);
1096 }
1097 } else if !keys.is_empty() {
1098 let key = rng.gen_range(0..keys.len());
1099 res.push_str(&keys[key]);
1100 }
1101 stage = 1 - stage;
1102 }
1103 res
1104 }
1105
1106 #[test]
1107 fn test_masker() {
1108 let m = Masker::new(&["abcd", "1ab", "cde", "bce", "aa"], "-MASKED-");
1109 assert_eq!(m.mask_str("1abcdef"), "-MASKED-f".to_string());
1110 assert_eq!(m.mask_str("1a"), "1a".to_string());
1111 assert_eq!(m.mask_str("qqcdeblah"), "qq-MASKED-blah");
1112 }
1113
1114 #[test]
1115 fn test_masker_random() {
1116 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1117 for _ in 0..2000 {
1118 let num_keys = rng.gen_range(0..5);
1119 let mut keys = Vec::new();
1120 for _ in 0..num_keys {
1121 let len = rng.gen_range(1..6);
1122 keys.push(random_string(&mut rng, len));
1123 }
1124
1125 let m = Masker::new(&keys, "X");
1126
1127 for _ in 0..1000 {
1128 let len = rng.gen_range(0..100);
1129 let input = random_input(&mut rng, &keys, len);
1130 let output_as_string = m.mask_str(&input);
1131 let check = mask_string_check(&input, "X", &keys);
1132 for key in keys.iter() {
1133 assert!(
1134 !output_as_string.contains(key),
1135 "Key {} is contained in output {}",
1136 key,
1137 output_as_string
1138 );
1139 }
1140 assert_eq!(output_as_string, check);
1141 }
1142 }
1143 }
1144
1145 fn slice_contains_slice(haystack: &[u8], needle: &[u8]) -> bool {
1146 haystack
1147 .windows(needle.len())
1148 .any(|window| window == needle)
1149 }
1150
1151 fn add_separate_keys<R: Rng, S: AsRef<[u8]>>(
1152 mut rng: R,
1153 keys: &[S],
1154 buf: &mut Vec<u8>,
1155 gap: usize,
1156 ) -> usize {
1157 let mut offset = 0;
1158 let mut keys_added = 0;
1159 loop {
1160 let step = rng.gen_range((gap / 2)..gap);
1161 let key = &keys[rng.gen_range(0..keys.len())];
1162 offset += step;
1163 if offset >= buf.len() {
1164 break;
1165 }
1166 let end = std::cmp::min(buf.len(), offset + key.as_ref().len());
1167 let len = end - offset;
1168 buf[offset..(len + offset)].copy_from_slice(&key.as_ref()[..len]);
1169 keys_added += 1;
1170 offset += len;
1171 }
1172 keys_added
1173 }
1174
1175 fn add_random_keys<R: Rng, S: AsRef<[u8]>>(
1176 mut rng: R,
1177 keys: &[S],
1178 buf: &mut Vec<u8>,
1179 count: usize,
1180 ) -> usize {
1181 for _ in 0..count {
1182 let key = &keys[rng.gen_range(0..keys.len())];
1183 let offset = rng.gen_range(0..buf.len());
1184 let end = std::cmp::min(buf.len(), offset + key.as_ref().len());
1185 let len = end - offset;
1186 buf[offset..(len + offset)].copy_from_slice(&key.as_ref()[..len]);
1187 }
1188 count
1189 }
1190
1191 #[allow(dead_code)]
1192 fn diff_buffers<A: AsRef<[u8]>, B: AsRef<[u8]>>(a: A, b: B) -> bool {
1193 let len = std::cmp::min(a.as_ref().len(), b.as_ref().len());
1194 let mut offset = None;
1195 for i in 0..len {
1196 if a.as_ref()[i] != b.as_ref()[i] {
1197 offset = Some(i);
1198 break;
1199 }
1200 }
1201 if let Some(offset) = offset {
1202 println!("A B {}", offset);
1203 let start = if offset < 100 { 0 } else { offset - 100 };
1204 let end = if offset + 100 > len {
1205 len
1206 } else {
1207 offset + 100
1208 };
1209 for i in start..end {
1210 println!(
1211 "{:03} {:03}{}",
1212 a.as_ref()[i],
1213 b.as_ref()[i],
1214 if i == offset { " *" } else { "" }
1215 );
1216 }
1217 return false;
1218 } else if a.as_ref().len() > b.as_ref().len() {
1219 println!("A B {}", b.as_ref().len());
1220 for i in b.as_ref().len()..a.as_ref().len() {
1221 println!("{:03} ---", a.as_ref()[i]);
1222 }
1223 return false;
1224 } else if a.as_ref().len() < b.as_ref().len() {
1225 println!("A B {}", a.as_ref().len());
1226 for i in a.as_ref().len()..b.as_ref().len() {
1227 println!("--- {:03}", b.as_ref()[i]);
1228 }
1229 return false;
1230 }
1231 true
1232 }
1233
1234 fn mask_slice_check<S, T, U>(input: S, mask: T, keys: &[U]) -> Vec<u8>
1235 where
1236 S: AsRef<[u8]>,
1237 T: AsRef<[u8]>,
1238 U: AsRef<[u8]>,
1239 {
1240 let spans = {
1241 let mut spans = Vec::new();
1242 for key in keys.iter() {
1243 for ix in input
1244 .as_ref()
1245 .windows(key.as_ref().len())
1246 .enumerate()
1247 .filter(|(_, window)| window == &key.as_ref())
1248 .map(|(index, _)| index)
1249 {
1250 let len = key.as_ref().len();
1251 spans.push((ix, ix + len));
1252 }
1253 }
1254 spans
1255 };
1256
1257 let mut unioned_spans = super::unify_spans(&spans);
1258 unioned_spans.sort();
1259
1260 let mut offset = 0usize;
1261 let mut res = Vec::new();
1262 for span in unioned_spans {
1263 if offset < span.0 {
1264 res.extend_from_slice(&input.as_ref()[offset..span.0]);
1265 }
1266 res.extend_from_slice(mask.as_ref());
1267 offset = span.1;
1268 }
1269 if offset < input.as_ref().len() {
1270 res.extend_from_slice(&input.as_ref()[offset..]);
1271 }
1272 res
1273 }
1274
1275 #[test]
1276 fn test_masker_slabs() {
1277 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1278 for input_type in 0..4 {
1279 for _ in 0..2 {
1280 let num_keys = rng.gen_range(1..15);
1281 let mut keys = Vec::new();
1282 for _ in 0..num_keys {
1283 let len = rng.gen_range(10..50);
1284 keys.push(random_buffer(&mut rng, len));
1285 }
1286
1287 let m = Masker::new(&keys, "XXXX-XXXX-XXXX-XXXX");
1288
1289 for _ in 0..3 {
1290 let len = rng.gen_range(5_000_000..100_000_000);
1291 let mut input = random_buffer(&mut rng, len);
1292 match input_type {
1293 0 => 0,
1294 1 => add_random_keys(&mut rng, &keys, &mut input, 5),
1295 2 => add_random_keys(&mut rng, &keys, &mut input, 20),
1296 3 => add_separate_keys(&mut rng, &keys, &mut input, 20000),
1297 _ => unreachable!(),
1298 };
1299 let output = m.mask_slice(&input);
1300 let check = mask_slice_check(&input, "XXXX-XXXX-XXXX-XXXX", &keys);
1301 for key in keys.iter() {
1302 assert!(
1303 !slice_contains_slice(&output, key),
1304 "Key {:?} is contained in output",
1305 key
1306 );
1307 }
1308 for key in keys.iter() {
1309 assert!(
1310 !slice_contains_slice(&check, key),
1311 "Key {:?} is contained in check",
1312 key
1313 );
1314 }
1315 diff_buffers(&output, &check);
1316 assert_eq!(output, check);
1317 }
1318 }
1319 }
1320 }
1321
1322 #[test]
1323 fn test_chunk_masker_sanity() {
1324 let m = Masker::new(&["abcd", "1ab", "cde", "bce", "aa"], "-MASK-");
1325 let mut cm = m.mask_chunks();
1326 assert_eq!(cm.mask_chunk("ab"), Vec::new());
1327 assert_eq!(cm.mask_chunk("c"), Vec::new());
1328 assert_eq!(cm.mask_chunk("d"), Vec::new());
1329 assert_eq!(cm.mask_chunk("g"), Vec::from("-MASK-g".as_bytes()));
1330 assert_eq!(cm.finish().as_slice(), "".as_bytes())
1331 }
1332
1333 #[test]
1334 fn test_chunk_masker_random_no_prefixes() {
1335 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1336 for _ in 0..2000 {
1337 let num_keys = rng.gen_range(1..=5);
1338 let mut keys = Vec::new();
1339 for _ in 0..num_keys {
1340 let len = rng.gen_range(1..6);
1341 keys.push(random_string(&mut rng, len));
1342 }
1343
1344 let m = Masker::new(&keys, "X");
1345
1346 for _ in 0..1000 {
1347 let len = rng.gen_range(0..100);
1348 let input = random_input(&mut rng, &keys, len);
1349 let mut cm = m.mask_chunks();
1350 let mut output = Vec::new();
1351 let mut offset = 0;
1352 while offset < input.len() {
1353 let chunk_len = rng.gen_range(0..(std::cmp::min(10, input.len() - offset + 1)));
1354 let mut chunk = Vec::new();
1355 for _ in 0..chunk_len {
1356 chunk.push(input.as_bytes()[offset]);
1357 offset += 1;
1358 }
1359 output.extend_from_slice(cm.mask_chunk(chunk).as_ref());
1360 }
1361 output.extend(cm.finish().as_slice());
1362 let output_as_string = String::from_utf8_lossy(&output);
1363 let check = mask_string_check(&input, "X", &keys);
1364 for key in keys.iter() {
1365 assert!(
1366 !output_as_string.contains(key),
1367 "Key {} is contained in output {}",
1368 key,
1369 output_as_string
1370 );
1371 }
1372 assert_eq!(output_as_string, check);
1373 }
1374 }
1375 }
1376
1377 #[test]
1378 fn test_chunk_masker_slabs() {
1379 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1380 for input_type in 0..4 {
1381 for _ in 0..2 {
1382 let num_keys = rng.gen_range(1..15);
1383 let mut keys = Vec::new();
1384 for _ in 0..num_keys {
1385 let len = rng.gen_range(10..50);
1386 keys.push(random_buffer(&mut rng, len));
1387 }
1388
1389 let m = Masker::new(&keys, "XXXX-XXXX-XXXX-XXXX");
1390
1391 for _ in 0..3 {
1392 let len = rng.gen_range(5_000_000..100_000_000);
1393 let mut input = random_buffer(&mut rng, len);
1394 match input_type {
1395 0 => 0,
1396 1 => add_random_keys(&mut rng, &keys, &mut input, 5),
1397 2 => add_random_keys(&mut rng, &keys, &mut input, 20),
1398 3 => add_separate_keys(&mut rng, &keys, &mut input, 20000),
1399 _ => unreachable!(),
1400 };
1401 let mut cm = m.mask_chunks();
1402 let mut output = Vec::new();
1403 let mut offset = 0;
1404 while offset < input.len() {
1405 let chunk_len =
1406 rng.gen_range(0..(std::cmp::min(10, input.len() - offset + 1)));
1407 let mut chunk = Vec::new();
1408 for _ in 0..chunk_len {
1409 chunk.push(input[offset]);
1410 offset += 1;
1411 }
1412 output.extend_from_slice(cm.mask_chunk(chunk).as_ref());
1413 }
1414 output.extend(cm.finish().as_slice());
1415
1416 let check = mask_slice_check(&input, "XXXX-XXXX-XXXX-XXXX", &keys);
1417 for key in keys.iter() {
1418 assert!(
1419 !slice_contains_slice(&output, key),
1420 "Key {:?} is contained in output {:?}",
1421 key,
1422 output
1423 );
1424 }
1425 diff_buffers(&output, &check);
1426 assert_eq!(output, check);
1427 }
1428 }
1429 }
1430 }
1431
1432 fn mask_string_check_with_prefixes<S: AsRef<str>>(
1433 string: &str,
1434 mask: &str,
1435 keys: &[S],
1436 pfxes: &[MatchData],
1437 ) -> String {
1438 let key_spans = {
1439 let mut spans = Vec::new();
1440 for key in keys.iter() {
1441 let mut offset = 0usize;
1442 while let Some(ix) = string[offset..].find(key.as_ref()) {
1443 let len = key.as_ref().as_bytes().len();
1444 spans.push((offset + ix, offset + ix + len));
1445 offset += ix + 1;
1446 }
1447 }
1448 spans
1449 };
1450
1451 let pfx_spans = {
1452 let mut spans = Vec::new();
1453 for pfx in pfxes.iter() {
1454 if !pfx.prefix.is_empty() {
1455 let mut offset = 0usize;
1456 while let Some(ix) =
1457 string[offset..].find(String::from_utf8_lossy(pfx.prefix).as_ref())
1458 {
1459 let start_ix = if pfx.mask_prefix {
1460 offset + ix
1461 } else {
1462 offset + ix + pfx.prefix.len()
1463 };
1464 let mut end_ix = offset + ix + pfx.prefix.len();
1465 while end_ix < string.len()
1466 && pfx.suffix.contains(&string.as_bytes()[end_ix])
1467 {
1468 end_ix += 1;
1469 }
1470 if end_ix > start_ix {
1471 spans.push((start_ix, end_ix));
1472 }
1473 offset += ix + 1;
1474 if offset >= string.as_bytes().len() {
1475 break;
1476 }
1477 }
1478 }
1479 }
1480 spans
1481 };
1482
1483 let mut spans = key_spans;
1484 spans.extend(pfx_spans);
1485
1486 let mut unioned_spans = super::unify_spans(&spans);
1487 unioned_spans.sort();
1488
1489 let mut offset = 0usize;
1490 let mut res = Vec::new();
1491 for span in unioned_spans {
1492 if offset < span.0 {
1493 res.extend_from_slice(&string.as_bytes()[offset..span.0]);
1494 }
1495 res.extend_from_slice(mask.as_bytes());
1496 offset = span.1;
1497 }
1498 if offset < string.as_bytes().len() {
1499 res.extend_from_slice(&string.as_bytes()[offset..]);
1500 }
1501
1502 for pfx in pfxes.iter() {
1503 if pfx.prefix.is_empty() {
1504 let mut buf = Vec::new();
1505 for b in res.iter() {
1506 if pfx.suffix.contains(b) {
1507 buf.extend_from_slice(mask.as_bytes());
1508 } else {
1509 buf.push(*b);
1510 }
1511 }
1512 std::mem::swap(&mut res, &mut buf);
1513 }
1514 }
1515
1516 String::from_utf8_lossy(&res).into()
1517 }
1518
1519 fn add_separate_keys_with_prefixes<R: Rng, S: AsRef<[u8]>>(
1520 mut rng: R,
1521 keys: &[S],
1522 pfxes: &[MatchData],
1523 buf: &mut Vec<u8>,
1524 gap: usize,
1525 ) -> usize {
1526 let mut offset = 0;
1527 let mut keys_added = 0;
1528 loop {
1529 let step = rng.gen_range((gap / 2)..gap);
1530 let key_ix = rng.gen_range(0..keys.len() + pfxes.len());
1531 offset += step;
1532 if offset >= buf.len() {
1533 break;
1534 }
1535 if key_ix < keys.len() {
1536 let key = &keys[key_ix];
1537 let end = std::cmp::min(buf.len(), offset + key.as_ref().len());
1538 let len = end - offset;
1539 buf[offset..(len + offset)].copy_from_slice(&key.as_ref()[..len]);
1540 offset += len;
1541 } else {
1542 let pfx = &pfxes[key_ix - keys.len()];
1543 let pfx_end = std::cmp::min(buf.len(), offset + pfx.prefix.len());
1544 let pfx_len = pfx_end - offset;
1545 buf[offset..(pfx_len + offset)].copy_from_slice(&pfx.prefix[..pfx_len]);
1546 offset += pfx_len;
1547 let suffix_len = if !pfx.suffix.is_empty() && offset < buf.len() {
1548 rng.gen_range(0..std::cmp::min(64, buf.len() - offset))
1549 } else {
1550 0
1551 };
1552 for _ in 0..suffix_len {
1553 buf[offset] = pfx.suffix[rng.gen_range(0..pfx.suffix.len())];
1554 offset += 1;
1555 }
1556 }
1557 keys_added += 1;
1558 }
1559 keys_added
1560 }
1561
1562 fn add_random_keys_with_prefixes<R: Rng, S: AsRef<[u8]>>(
1563 mut rng: R,
1564 keys: &[S],
1565 pfxes: &[MatchData],
1566 buf: &mut Vec<u8>,
1567 count: usize,
1568 ) -> usize {
1569 for _ in 0..count {
1570 let ix = rng.gen_range(0..(keys.len() + pfxes.len()));
1571 let offset = rng.gen_range(0..buf.len());
1572 if ix < keys.len() {
1573 let key = &keys[ix];
1574 let end = std::cmp::min(buf.len(), offset + key.as_ref().len());
1575 let len = end - offset;
1576 buf[offset..(len + offset)].copy_from_slice(&key.as_ref()[..len]);
1577 } else {
1578 let pfx = &pfxes[ix - keys.len()];
1579 let pfx_end = std::cmp::min(buf.len(), offset + pfx.prefix.len());
1580 let pfx_len = pfx_end - offset;
1581 buf[offset..(pfx_len + offset)].copy_from_slice(&pfx.prefix[..pfx_len]);
1582 let suffix_len = if !pfx.suffix.is_empty() && offset < buf.len() {
1583 rng.gen_range(0..std::cmp::min(64, buf.len() - offset))
1584 } else {
1585 0
1586 };
1587 for i in 0..suffix_len {
1588 buf[offset + pfx_len + i] = pfx.suffix[rng.gen_range(0..pfx.suffix.len())];
1589 }
1590 }
1591 }
1592 count
1593 }
1594
1595 #[test]
1596 fn test_masker_with_prefixes() {
1597 let p = MatchData {
1598 prefix: "pfx-".as_ref(),
1599 suffix: "abcde".as_ref(),
1600 mask_prefix: false,
1601 };
1602 let inputs: &[&str] = &[];
1603 let m = Masker::new_with_match_data(inputs, &[p], "-MASKED-");
1604 assert_eq!(
1605 m.mask_str("pfx-aeebfcsfasgs"),
1606 "pfx--MASKED-fcsfasgs".to_string()
1607 );
1608
1609 let p = MatchData {
1610 prefix: "pfx-".as_ref(),
1611 suffix: "abcde".as_ref(),
1612 mask_prefix: true,
1613 };
1614 let inputs: &[&str] = &[];
1615 let m = Masker::new_with_match_data(inputs, &[p], "-MASKED-");
1616 assert_eq!(
1617 m.mask_str("pfx-aeebfcsfasgs"),
1618 "-MASKED-fcsfasgs".to_string()
1619 );
1620 }
1621
1622 #[test]
1623 fn test_masker_with_prefixes_random() {
1624 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1625 for _ in 0..2000 {
1626 let num_keys = rng.gen_range(0..5);
1627 let mut keys = Vec::new();
1628 for _ in 0..num_keys {
1629 let len = rng.gen_range(1..6);
1630 keys.push(random_string(&mut rng, len));
1631 }
1632 let num_prefixes = rng.gen_range(0..3);
1633 let mut prefixes = Vec::new();
1634 let mut suffixes = Vec::new();
1635 for _ in 0..num_prefixes {
1636 let prefix_len = rng.gen_range(0..6);
1637 let pfx = random_string(&mut rng, prefix_len);
1638 let suffix_set = rng.gen_range(0..4);
1639 let suf = {
1640 let mut s = String::new();
1641 while s.len() < suffix_set {
1642 let ch = rng.gen_range('a'..'e');
1643 if !s.contains(ch) {
1644 s.push(ch);
1645 }
1646 }
1647 s
1648 };
1649 prefixes.push(pfx);
1650 suffixes.push(suf);
1651 }
1652 let mask_prefix = rng.gen_bool(0.5);
1653
1654 let mut pfxes = Vec::new();
1655 for i in 0..num_prefixes {
1656 pfxes.push(MatchData {
1657 prefix: prefixes[i].as_ref(),
1658 suffix: suffixes[i].as_ref(),
1659 mask_prefix,
1660 });
1661 }
1662
1663 let m = Masker::new_with_match_data(&keys, &pfxes, "X");
1664
1665 for _ in 0..1000 {
1666 let len = rng.gen_range(0..100);
1667 let input = random_input(&mut rng, &keys, len);
1668 let output_as_string = m.mask_str(&input);
1669 let check = mask_string_check_with_prefixes(&input, "X", &keys, &pfxes);
1670 for key in keys.iter() {
1671 assert!(
1672 !output_as_string.contains(key),
1673 "Key {} is contained in output {}",
1674 key,
1675 output_as_string
1676 );
1677 }
1678 for pfx in pfxes.iter() {
1679 if !pfx.prefix.is_empty() {
1680 if pfx.mask_prefix {
1681 assert!(
1682 !output_as_string.contains(&*String::from_utf8_lossy(pfx.prefix)),
1683 "Prefix {} is contained in output {}",
1684 String::from_utf8_lossy(pfx.prefix),
1685 output_as_string
1686 );
1687 } else {
1688 for ix in output_as_string
1689 .as_bytes()
1690 .windows(pfx.prefix.len())
1691 .enumerate()
1692 .filter(|(_, w)| w == &pfx.prefix)
1693 .map(|(ix, _)| ix)
1694 {
1695 if ix + pfx.prefix.len() == output_as_string.as_bytes().len() {
1696 break;
1697 }
1698 assert!(
1699 !pfx.suffix.contains(
1700 &output_as_string.as_bytes()[ix + pfx.prefix.len()]
1701 ),
1702 "Suffix char {} is present after prefix {} at offset {} in {}",
1703 char::from_u32(
1704 output_as_string.as_bytes()[ix + pfx.prefix.len()] as u32
1705 )
1706 .unwrap(),
1707 String::from_utf8_lossy(pfx.prefix),
1708 ix,
1709 output_as_string
1710 );
1711 }
1712 }
1713 } else {
1714 for ch in pfx.suffix.iter() {
1715 assert!(
1716 !output_as_string.as_bytes().contains(ch),
1717 "Suffix char {} of suffix {} is contained in output",
1718 ch,
1719 String::from_utf8_lossy(pfx.suffix)
1720 );
1721 }
1722 }
1723 }
1724
1725 assert_eq!(output_as_string, check);
1726 }
1727 }
1728 }
1729
1730 #[test]
1731 fn test_chunk_masker_with_prefixes() {
1732 let p = MatchData {
1733 prefix: "pfx-".as_ref(),
1734 suffix: "abcde".as_ref(),
1735 mask_prefix: false,
1736 };
1737 let inputs: &[&str] = &[];
1738 let m = Masker::new_with_match_data(inputs, &[p], "-MASKED-");
1739 let mut cm = m.mask_chunks();
1740 assert_eq!(cm.mask_chunk("pf"), Vec::from("pf".as_bytes()));
1741 assert_eq!(cm.mask_chunk("x-a"), Vec::from("x-".as_bytes()));
1742 assert_eq!(cm.mask_chunk("eebfcs"), Vec::from("-MASKED-fcs"));
1743 assert_eq!(cm.mask_chunk("fasgs"), Vec::from("fasgs".as_bytes()));
1744 assert_eq!(cm.finish().as_slice(), "".as_bytes());
1745
1746 let p = MatchData {
1747 prefix: "pfx-".as_ref(),
1748 suffix: "abcde".as_ref(),
1749 mask_prefix: true,
1750 };
1751 let inputs: &[&str] = &[];
1752 let m = Masker::new_with_match_data(inputs, &[p], "-MASKED-");
1753 let mut cm = m.mask_chunks();
1754 assert_eq!(cm.mask_chunk("pf"), Vec::new());
1755 assert_eq!(cm.mask_chunk("x-a"), Vec::new());
1756 assert_eq!(cm.mask_chunk("eebfcs"), Vec::from("-MASKED-fcs"));
1757 assert_eq!(cm.mask_chunk("fasgs"), Vec::from("fasgs".as_bytes()));
1758 assert_eq!(cm.finish().as_slice(), "".as_bytes());
1759 }
1760
1761 #[test]
1762 fn test_chunk_masker_random_with_prefixes() {
1763 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1764 for _ in 0..2000 {
1765 let num_keys = rng.gen_range(1..=5);
1766 let mut keys = Vec::new();
1767 for _ in 0..num_keys {
1768 let len = rng.gen_range(1..6);
1769 keys.push(random_string(&mut rng, len));
1770 }
1771 let num_prefixes = rng.gen_range(0..3);
1772 let mut prefixes = Vec::new();
1773 let mut suffixes = Vec::new();
1774 for _ in 0..num_prefixes {
1775 let prefix_len = rng.gen_range(0..6);
1776 let pfx = random_string(&mut rng, prefix_len);
1777 let suffix_set = rng.gen_range(0..4);
1778 let suf = {
1779 let mut s = String::new();
1780 while s.len() < suffix_set {
1781 let ch = rng.gen_range('a'..'e');
1782 if !s.contains(ch) {
1783 s.push(ch);
1784 }
1785 }
1786 s
1787 };
1788 prefixes.push(pfx);
1789 suffixes.push(suf);
1790 }
1791 let mask_prefix = rng.gen_bool(0.5);
1792
1793 let mut pfxes = Vec::new();
1794 for i in 0..num_prefixes {
1795 pfxes.push(MatchData {
1796 prefix: prefixes[i].as_ref(),
1797 suffix: suffixes[i].as_ref(),
1798 mask_prefix,
1799 });
1800 }
1801
1802 let m = Masker::new_with_match_data(&keys, &pfxes, "X");
1803
1804 for _ in 0..1000 {
1805 let len = rng.gen_range(0..100);
1806 let input = random_input(&mut rng, &keys, len);
1807 let mut cm = m.mask_chunks();
1808 let mut output = Vec::new();
1809 let mut offset = 0;
1810 while offset < input.len() {
1811 let chunk_len = rng.gen_range(0..(std::cmp::min(10, input.len() - offset + 1)));
1812 let mut chunk = Vec::new();
1813 for _ in 0..chunk_len {
1814 chunk.push(input.as_bytes()[offset]);
1815 offset += 1;
1816 }
1817 output.extend_from_slice(cm.mask_chunk(chunk).as_ref());
1818 }
1819 output.extend(cm.finish().as_slice());
1820 let output_as_string = String::from_utf8_lossy(&output);
1821 let check = mask_string_check_with_prefixes(&input, "X", &keys, &pfxes);
1822 for key in keys.iter() {
1823 assert!(
1824 !output_as_string.contains(key),
1825 "Key {} is contained in output {}",
1826 key,
1827 output_as_string
1828 );
1829 }
1830 for pfx in pfxes.iter() {
1831 if !pfx.prefix.is_empty() {
1832 if pfx.mask_prefix {
1833 assert!(
1834 !output_as_string.contains(&*String::from_utf8_lossy(pfx.prefix)),
1835 "Prefix {} is contained in output {}",
1836 String::from_utf8_lossy(pfx.prefix),
1837 output_as_string
1838 );
1839 } else {
1840 for ix in output_as_string
1841 .as_bytes()
1842 .windows(pfx.prefix.len())
1843 .enumerate()
1844 .filter(|(_, w)| w == &pfx.prefix)
1845 .map(|(ix, _)| ix)
1846 {
1847 if ix + pfx.prefix.len() == output_as_string.as_bytes().len() {
1848 break;
1849 }
1850 assert!(
1851 !pfx.suffix.contains(
1852 &output_as_string.as_bytes()[ix + pfx.prefix.len()]
1853 ),
1854 "Suffix char {} is present after prefix {} at offset {} in {}",
1855 char::from_u32(
1856 output_as_string.as_bytes()[ix + pfx.prefix.len()] as u32
1857 )
1858 .unwrap(),
1859 String::from_utf8_lossy(pfx.prefix),
1860 ix,
1861 output_as_string
1862 );
1863 }
1864 }
1865 } else {
1866 for ch in pfx.suffix.iter() {
1867 assert!(
1868 !output_as_string.as_bytes().contains(ch),
1869 "Suffix char {} of suffix {} is contained in output",
1870 ch,
1871 String::from_utf8_lossy(pfx.suffix)
1872 );
1873 }
1874 }
1875 }
1876
1877 assert_eq!(output_as_string, check);
1878 }
1879 }
1880 }
1881
1882 fn mask_slice_check_with_prefixes<S, T, U>(
1883 input: S,
1884 mask: T,
1885 keys: &[U],
1886 pfxes: &[MatchData],
1887 ) -> Vec<u8>
1888 where
1889 S: AsRef<[u8]>,
1890 T: AsRef<[u8]>,
1891 U: AsRef<[u8]>,
1892 {
1893 let key_spans = {
1894 let mut spans = Vec::new();
1895 for key in keys.iter() {
1896 for ix in input
1897 .as_ref()
1898 .windows(key.as_ref().len())
1899 .enumerate()
1900 .filter(|(_, window)| window == &key.as_ref())
1901 .map(|(index, _)| index)
1902 {
1903 let len = key.as_ref().len();
1904 spans.push((ix, ix + len));
1905 }
1906 }
1907 spans
1908 };
1909
1910 let pfx_spans = {
1911 let mut spans = Vec::new();
1912 for pfx in pfxes.iter() {
1913 if !pfx.prefix.is_empty() {
1914 for ix in input
1915 .as_ref()
1916 .windows(pfx.prefix.len())
1917 .enumerate()
1918 .filter(|(_, window)| window == &pfx.prefix)
1919 .map(|(index, _)| index)
1920 {
1921 let len = pfx.prefix.len();
1922 let end_ix = ix
1923 + input.as_ref()[ix + len..]
1924 .iter()
1925 .position(|ch| !pfx.suffix.contains(ch))
1926 .map(|i| len + i)
1927 .unwrap_or_else(|| input.as_ref().len() - ix);
1928 if pfx.mask_prefix {
1929 spans.push((ix, end_ix));
1930 } else if end_ix > ix + len {
1931 spans.push((ix + len, end_ix));
1932 }
1933 }
1934 }
1935 }
1936 spans
1937 };
1938
1939 let mut spans = key_spans;
1940 spans.extend(pfx_spans);
1941
1942 let mut unioned_spans = super::unify_spans(&spans);
1943 unioned_spans.sort();
1944
1945 let mut offset = 0usize;
1946 let mut res = Vec::new();
1947 for span in unioned_spans {
1948 if offset < span.0 {
1949 res.extend_from_slice(&input.as_ref()[offset..span.0]);
1950 }
1951 res.extend_from_slice(mask.as_ref());
1952 offset = span.1;
1953 }
1954 if offset < input.as_ref().len() {
1955 res.extend_from_slice(&input.as_ref()[offset..]);
1956 }
1957
1958 for pfx in pfxes.iter() {
1959 if pfx.prefix.is_empty() {
1960 let mut buf = Vec::new();
1961 for b in res.iter() {
1962 if pfx.suffix.contains(b) {
1963 buf.extend_from_slice(mask.as_ref());
1964 } else {
1965 buf.push(*b);
1966 }
1967 }
1968 std::mem::swap(&mut res, &mut buf);
1969 }
1970 }
1971
1972 res
1973 }
1974
1975 #[test]
1976 fn test_chunk_masker_slabs_with_prefixes() {
1977 let mut rng = StdRng::seed_from_u64(0xdeadbeefabadcafe);
1978 for input_type in 0..4 {
1979 for _ in 0..2 {
1980 let num_keys = rng.gen_range(1..15);
1981 let mut keys = Vec::new();
1982 for _ in 0..num_keys {
1983 let len = rng.gen_range(10..50);
1984 keys.push(random_buffer(&mut rng, len));
1985 }
1986
1987 let num_prefixes = rng.gen_range(0..10);
1988 let mut prefixes = Vec::new();
1989 let mut suffixes = Vec::new();
1990 for _ in 0..num_prefixes {
1991 let prefix_len = rng.gen_range(0..15);
1992 let pfx = random_buffer(&mut rng, prefix_len);
1993 let suffix_set = rng.gen_range(0..50);
1994 let suf = {
1995 let mut s = Vec::new();
1996 while s.len() < suffix_set {
1997 let ch = rng.gen_range(0..50);
1998 if !s.contains(&ch) {
1999 s.push(ch);
2000 }
2001 }
2002 s.sort();
2003 s
2004 };
2005 prefixes.push(pfx);
2006 suffixes.push(suf);
2007 }
2008 let mask_prefix = rng.gen_bool(0.5);
2009
2010 let mut pfxes = Vec::new();
2011 for i in 0..num_prefixes {
2012 pfxes.push(MatchData {
2013 prefix: prefixes[i].as_ref(),
2014 suffix: suffixes[i].as_ref(),
2015 mask_prefix,
2016 });
2017 }
2018
2019 let m = Masker::new_with_match_data(&keys, &pfxes, "ABCD=EFGH=IJKL=MNOP");
2020
2021 for _ in 0..3 {
2022 let len = rng.gen_range(5_000_000..100_000_000);
2023 let mut input = random_buffer(&mut rng, len);
2024 match input_type {
2025 0 => 0,
2026 1 => add_random_keys_with_prefixes(&mut rng, &keys, &pfxes, &mut input, 5),
2027 2 => add_random_keys_with_prefixes(&mut rng, &keys, &pfxes, &mut input, 20),
2028 3 => add_separate_keys_with_prefixes(
2029 &mut rng, &keys, &pfxes, &mut input, 20000,
2030 ),
2031 _ => unreachable!(),
2032 };
2033 let mut cm = m.mask_chunks();
2034 let mut output = Vec::new();
2035 let mut offset = 0;
2036 while offset < input.len() {
2037 let chunk_len =
2038 rng.gen_range(0..(std::cmp::min(10, input.len() - offset + 1)));
2039 let mut chunk = Vec::new();
2040 for _ in 0..chunk_len {
2041 chunk.push(input[offset]);
2042 offset += 1;
2043 }
2044 output.extend_from_slice(cm.mask_chunk(chunk).as_ref());
2045 }
2046 output.extend(cm.finish().as_slice());
2047
2048 let check = mask_slice_check_with_prefixes(
2049 &input,
2050 "ABCD=EFGH=IJKL=MNOP",
2051 &keys,
2052 &pfxes,
2053 );
2054 for key in keys.iter() {
2055 assert!(
2056 !slice_contains_slice(&output, key),
2057 "Key {:?} is contained in output {:?}",
2058 key,
2059 output
2060 );
2061 }
2062 diff_buffers(&output, &check);
2063 assert_eq!(output, check);
2064 }
2065 }
2066 }
2067 }
2068
2069 #[cfg(feature = "streams")]
2070 mod streams {
2071 use bytes::Bytes;
2072 use core::convert::Infallible;
2073 use core::pin::Pin;
2074 use core::task::{Context, Poll};
2075 use futures::{Stream, StreamExt};
2076
2077 use crate::Masker;
2078
2079 struct StringStream<'a> {
2080 data: &'a str,
2081 offset: usize,
2082 }
2083
2084 impl<'a> StringStream<'a> {
2085 pub fn new(data: &'a str) -> Self {
2086 Self { data, offset: 0 }
2087 }
2088 }
2089
2090 impl<'a> Stream for StringStream<'a> {
2091 type Item = Result<Bytes, Infallible>;
2092
2093 fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
2094 let this = self.get_mut();
2095 if this.offset < this.data.len() {
2096 let ch = &this.data.as_bytes()[this.offset..this.offset + 1];
2097 this.offset += 1;
2098 Poll::Ready(Some(Ok(Bytes::copy_from_slice(ch))))
2099 } else {
2100 Poll::Ready(None)
2101 }
2102 }
2103 }
2104
2105 async fn aggregate<T, E>(mut s: T) -> Result<Bytes, E>
2106 where
2107 T: Stream<Item = Result<Bytes, E>> + Unpin,
2108 E: core::fmt::Debug,
2109 {
2110 let mut v = Vec::new();
2111 while let Some(r) = s.next().await {
2112 match r {
2113 Ok(bytes) => v.extend_from_slice(&bytes),
2114 Err(e) => {
2115 return Err(e);
2116 }
2117 }
2118 }
2119 Ok(v.into())
2120 }
2121
2122 #[tokio::test]
2123 async fn test_stream_sanity() {
2124 let m = Masker::new(&["abcd", "1ab", "cde", "bce", "aa"], "-MASKED-");
2125
2126 assert_eq!(
2127 aggregate(m.mask_stream(StringStream::new("1abcdef")))
2128 .await
2129 .unwrap(),
2130 "-MASKED-f"
2131 );
2132 assert_eq!(
2133 aggregate(m.mask_stream(StringStream::new("1a")))
2134 .await
2135 .unwrap(),
2136 "1a"
2137 );
2138 assert_eq!(
2139 aggregate(m.mask_stream(StringStream::new("qqcdeblah")))
2140 .await
2141 .unwrap(),
2142 "qq-MASKED-blah"
2143 );
2144 }
2145 }
2146}