1use regex_lite::Regex;
2
3use crate::sanitize::sanitize_with;
4use crate::types::{
5 RedirectMatchPart, RedirectPrependScheme, RedirectTargetEncoding, RuleKind, RuleSource,
6 SanitizeResult, SanitizerOptions, SanitizerRule,
7};
8
9#[derive(Debug)]
10pub struct CompileError(pub String);
11
12impl std::fmt::Display for CompileError {
13 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14 f.write_str(&self.0)
15 }
16}
17
18impl std::error::Error for CompileError {}
19
20pub(crate) struct CompiledRule {
21 pub source: RuleSource,
22 pub provider: String,
23 pub kind: RuleKind,
24 pub url_pattern: Option<Regex>,
25 pub exceptions: Vec<Regex>,
26 pub body: CompiledBody,
27}
28
29pub(crate) enum CompiledBody {
30 StripParam {
31 param_pattern: Regex,
32 value_pattern: Option<Regex>,
33 is_referral_marketing: bool,
34 },
35 RawReplace {
36 pattern: Regex,
37 replacement: String,
38 },
39 UnwrapRedirect {
40 pattern: Regex,
41 capture_group: usize,
42 match_part: RedirectMatchPart,
43 target_encoding: RedirectTargetEncoding,
44 prepend_scheme: Option<RedirectPrependScheme>,
45 target_template: Option<String>,
46 },
47 BlockDomain,
48}
49
50pub struct Sanitizer {
51 pub(crate) rules: Vec<CompiledRule>,
52 pub(crate) options: SanitizerOptions,
53}
54
55fn compile_one(r: &SanitizerRule) -> Option<CompiledRule> {
56 fn try_re(pat: &str) -> Option<Regex> {
57 Regex::new(pat).ok()
58 }
59 fn try_re_ci(pat: &str) -> Option<Regex> {
60 Regex::new(&format!("(?i){}", pat)).ok()
61 }
62 fn compile_exceptions(list: &[String]) -> Vec<Regex> {
63 list.iter().filter_map(|p| try_re(p)).collect()
64 }
65
66 match r {
67 SanitizerRule::StripParam {
68 source,
69 provider,
70 url_pattern,
71 param_pattern,
72 value_pattern,
73 exceptions,
74 is_referral_marketing,
75 } => {
76 let pp = try_re_ci(&format!("^(?:{})$", param_pattern))?;
77 let vp = match value_pattern.as_deref() {
78 Some(p) => Some(try_re_ci(&format!("^(?:{})$", p))?),
79 None => None,
80 };
81 let urlp = match url_pattern.as_deref() {
82 Some(p) => Some(try_re_ci(p)?),
83 None => None,
84 };
85 Some(CompiledRule {
86 source: *source,
87 provider: provider.clone(),
88 kind: RuleKind::StripParam,
89 url_pattern: urlp,
90 exceptions: compile_exceptions(exceptions),
91 body: CompiledBody::StripParam {
92 param_pattern: pp,
93 value_pattern: vp,
94 is_referral_marketing: *is_referral_marketing,
95 },
96 })
97 }
98 SanitizerRule::RawReplace {
99 source,
100 provider,
101 url_pattern,
102 pattern,
103 replacement,
104 exceptions,
105 } => {
106 let p = try_re_ci(pattern)?;
107 let urlp = match url_pattern.as_deref() {
108 Some(p) => Some(try_re_ci(p)?),
109 None => None,
110 };
111 Some(CompiledRule {
112 source: *source,
113 provider: provider.clone(),
114 kind: RuleKind::RawReplace,
115 url_pattern: urlp,
116 exceptions: compile_exceptions(exceptions),
117 body: CompiledBody::RawReplace {
118 pattern: p,
119 replacement: replacement.clone(),
120 },
121 })
122 }
123 SanitizerRule::UnwrapRedirect {
124 source,
125 provider,
126 url_pattern,
127 pattern,
128 capture_group,
129 match_part,
130 target_encoding,
131 prepend_scheme,
132 target_template,
133 exceptions,
134 } => {
135 let p = try_re_ci(pattern)?;
136 let urlp = match url_pattern.as_deref() {
137 Some(p) => Some(try_re_ci(p)?),
138 None => None,
139 };
140 Some(CompiledRule {
141 source: *source,
142 provider: provider.clone(),
143 kind: RuleKind::UnwrapRedirect,
144 url_pattern: urlp,
145 exceptions: compile_exceptions(exceptions),
146 body: CompiledBody::UnwrapRedirect {
147 pattern: p,
148 capture_group: *capture_group as usize,
149 match_part: *match_part,
150 target_encoding: *target_encoding,
151 prepend_scheme: *prepend_scheme,
152 target_template: target_template.clone(),
153 },
154 })
155 }
156 SanitizerRule::BlockDomain {
157 source,
158 provider,
159 url_pattern,
160 exceptions,
161 } => {
162 let urlp = try_re_ci(url_pattern)?;
163 Some(CompiledRule {
164 source: *source,
165 provider: provider.clone(),
166 kind: RuleKind::BlockDomain,
167 url_pattern: Some(urlp),
168 exceptions: compile_exceptions(exceptions),
169 body: CompiledBody::BlockDomain,
170 })
171 }
172 }
173}
174
175impl Sanitizer {
176 pub fn compile(rules: &[SanitizerRule], options: SanitizerOptions) -> Sanitizer {
177 let compiled: Vec<CompiledRule> = rules.iter().filter_map(compile_one).collect();
178 Sanitizer {
179 rules: compiled,
180 options,
181 }
182 }
183
184 pub fn sanitize(&self, input: &str) -> SanitizeResult {
185 sanitize_with(&self.rules, &self.options, input)
186 }
187}