Skip to main content

url_sanitize_core/
compile.rs

1use regex_lite::Regex;
2
3use crate::sanitize::sanitize_with;
4use crate::types::{
5    RedirectMatchPart, RedirectPrependScheme, RedirectTargetEncoding, RuleKind, RuleSource,
6    SanitizeResult, SanitizerOptions, SanitizerRule,
7};
8
9#[derive(Debug)]
10pub struct CompileError(pub String);
11
12impl std::fmt::Display for CompileError {
13    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14        f.write_str(&self.0)
15    }
16}
17
18impl std::error::Error for CompileError {}
19
20pub(crate) struct CompiledRule {
21    pub source: RuleSource,
22    pub provider: String,
23    pub kind: RuleKind,
24    pub url_pattern: Option<Regex>,
25    pub exceptions: Vec<Regex>,
26    pub body: CompiledBody,
27}
28
29pub(crate) enum CompiledBody {
30    StripParam {
31        param_pattern: Regex,
32        value_pattern: Option<Regex>,
33        is_referral_marketing: bool,
34    },
35    RawReplace {
36        pattern: Regex,
37        replacement: String,
38    },
39    UnwrapRedirect {
40        pattern: Regex,
41        capture_group: usize,
42        match_part: RedirectMatchPart,
43        target_encoding: RedirectTargetEncoding,
44        prepend_scheme: Option<RedirectPrependScheme>,
45        target_template: Option<String>,
46    },
47    BlockDomain,
48}
49
50pub struct Sanitizer {
51    pub(crate) rules: Vec<CompiledRule>,
52    pub(crate) options: SanitizerOptions,
53}
54
55fn compile_one(r: &SanitizerRule) -> Option<CompiledRule> {
56    fn try_re(pat: &str) -> Option<Regex> {
57        Regex::new(pat).ok()
58    }
59    fn try_re_ci(pat: &str) -> Option<Regex> {
60        Regex::new(&format!("(?i){}", pat)).ok()
61    }
62    fn compile_exceptions(list: &[String]) -> Vec<Regex> {
63        list.iter().filter_map(|p| try_re(p)).collect()
64    }
65
66    match r {
67        SanitizerRule::StripParam {
68            source,
69            provider,
70            url_pattern,
71            param_pattern,
72            value_pattern,
73            exceptions,
74            is_referral_marketing,
75        } => {
76            let pp = try_re_ci(&format!("^(?:{})$", param_pattern))?;
77            let vp = match value_pattern.as_deref() {
78                Some(p) => Some(try_re_ci(&format!("^(?:{})$", p))?),
79                None => None,
80            };
81            let urlp = match url_pattern.as_deref() {
82                Some(p) => Some(try_re_ci(p)?),
83                None => None,
84            };
85            Some(CompiledRule {
86                source: *source,
87                provider: provider.clone(),
88                kind: RuleKind::StripParam,
89                url_pattern: urlp,
90                exceptions: compile_exceptions(exceptions),
91                body: CompiledBody::StripParam {
92                    param_pattern: pp,
93                    value_pattern: vp,
94                    is_referral_marketing: *is_referral_marketing,
95                },
96            })
97        }
98        SanitizerRule::RawReplace {
99            source,
100            provider,
101            url_pattern,
102            pattern,
103            replacement,
104            exceptions,
105        } => {
106            let p = try_re_ci(pattern)?;
107            let urlp = match url_pattern.as_deref() {
108                Some(p) => Some(try_re_ci(p)?),
109                None => None,
110            };
111            Some(CompiledRule {
112                source: *source,
113                provider: provider.clone(),
114                kind: RuleKind::RawReplace,
115                url_pattern: urlp,
116                exceptions: compile_exceptions(exceptions),
117                body: CompiledBody::RawReplace {
118                    pattern: p,
119                    replacement: replacement.clone(),
120                },
121            })
122        }
123        SanitizerRule::UnwrapRedirect {
124            source,
125            provider,
126            url_pattern,
127            pattern,
128            capture_group,
129            match_part,
130            target_encoding,
131            prepend_scheme,
132            target_template,
133            exceptions,
134        } => {
135            let p = try_re_ci(pattern)?;
136            let urlp = match url_pattern.as_deref() {
137                Some(p) => Some(try_re_ci(p)?),
138                None => None,
139            };
140            Some(CompiledRule {
141                source: *source,
142                provider: provider.clone(),
143                kind: RuleKind::UnwrapRedirect,
144                url_pattern: urlp,
145                exceptions: compile_exceptions(exceptions),
146                body: CompiledBody::UnwrapRedirect {
147                    pattern: p,
148                    capture_group: *capture_group as usize,
149                    match_part: *match_part,
150                    target_encoding: *target_encoding,
151                    prepend_scheme: *prepend_scheme,
152                    target_template: target_template.clone(),
153                },
154            })
155        }
156        SanitizerRule::BlockDomain {
157            source,
158            provider,
159            url_pattern,
160            exceptions,
161        } => {
162            let urlp = try_re_ci(url_pattern)?;
163            Some(CompiledRule {
164                source: *source,
165                provider: provider.clone(),
166                kind: RuleKind::BlockDomain,
167                url_pattern: Some(urlp),
168                exceptions: compile_exceptions(exceptions),
169                body: CompiledBody::BlockDomain,
170            })
171        }
172    }
173}
174
175impl Sanitizer {
176    pub fn compile(rules: &[SanitizerRule], options: SanitizerOptions) -> Sanitizer {
177        let compiled: Vec<CompiledRule> = rules.iter().filter_map(compile_one).collect();
178        Sanitizer {
179            rules: compiled,
180            options,
181        }
182    }
183
184    pub fn sanitize(&self, input: &str) -> SanitizeResult {
185        sanitize_with(&self.rules, &self.options, input)
186    }
187}