https_everywhere_lib_core/
rulesets.rs

1use std::rc::Rc;
2use std::collections::BTreeMap;
3
4#[cfg(feature="add_rulesets")]
5use serde_json::Value;
6#[cfg(feature="add_rulesets")]
7use crate::strings::ERROR_SERDE_PARSE;
8#[cfg(feature="add_rulesets")]
9use std::collections::HashMap;
10#[cfg(feature="rewriter")]
11use regex::Regex;
12
13
14#[cfg(any(all(test,feature="add_rulesets"),feature="updater"))]
15pub const ENABLE_MIXED_RULESETS: bool = true;
16
17#[cfg(any(all(test,feature="add_rulesets"),feature="updater"))]
18lazy_static!{
19    pub static ref RULE_ACTIVE_STATES: HashMap<String, bool> = HashMap::new();
20}
21
22#[cfg(feature="rewriter")]
23lazy_static!{
24    pub static ref TRIVIAL_REGEX: Regex = Regex::new(r"^http:").unwrap();
25}
26
27
28#[cfg(feature="add_rulesets")]
29struct StaticJsonStrings {
30    default_off: &'static str,
31    exclusion: &'static str,
32    from: &'static str,
33    host: &'static str,
34    mixed_content: &'static str,
35    name: &'static str,
36    platform: &'static str,
37    rule: &'static str,
38    securecookie: &'static str,
39    target: &'static str,
40    to: &'static str,
41    user_rule: &'static str,
42}
43
44#[cfg(feature="add_rulesets")]
45const JSON_STRINGS: StaticJsonStrings = StaticJsonStrings {
46    default_off: "default_off",
47    exclusion: "exclusion",
48    from: "from",
49    host: "host",
50    mixed_content: "mixedcontent",
51    name: "name",
52    platform: "platform",
53    rule: "rule",
54    securecookie: "securecookie",
55    target: "target",
56    to: "to",
57    user_rule: "user rule",
58};
59
60/// A Rule is used to rewrite URLs from some regular expression to some string
61#[derive(Debug)]
62pub enum Rule {
63    Trivial,
64    NonTrivial(String, String)
65}
66
67impl Rule {
68
69    /// Returns a rule with the from regex and replacement string specified
70    ///
71    /// # Arguments
72    ///
73    /// * `from_regex` - A string that will be compiled to regex indicating the URL to replace
74    /// * `to` - A string indicating the replacement value
75    pub fn new(from_regex: String, to: String) -> Rule {
76        if &from_regex == "^http:" && &to == "https:" {
77            Rule::Trivial
78        } else {
79            Rule::NonTrivial(from_regex, to)
80        }
81    }
82
83}
84
85
86/// A CookieRule is used to secure cookies which conform to some name and host constraints
87#[derive(Debug)]
88pub struct CookieRule {
89    pub host_regex: String, // RegExp
90    pub name_regex: String // RegExp
91}
92
93impl CookieRule {
94
95    /// Returns a cookierule with the host and scope regex specified
96    ///
97    /// # Arguments
98    ///
99    /// * `host_regex` - A string that will be compiled to regex indicating the host of the cookie
100    /// * `name_regex` - A string that will be compiled to regex indicating the name of the cookie
101    pub fn new(host_regex: String, name_regex: String) -> CookieRule {
102        CookieRule {
103            host_regex,
104            name_regex
105        }
106    }
107}
108
109
110/// A RuleSet is a grouping of rules which act on some target
111#[derive(Debug)]
112pub struct RuleSet {
113    pub name: String,
114    pub rules: Vec<Rule>,
115    pub exclusions: Option<String>, // RegExp
116    pub cookierules: Option<Vec<CookieRule>>,
117    pub active: bool,
118    pub default_state: bool,
119    pub scope: Rc<Option<String>>, // RegExp
120    pub note: Option<String>
121}
122
123impl RuleSet {
124
125    /// Returns a ruleset with the name and scope specified
126    ///
127    /// # Arguments
128    ///
129    /// * `name` - A string that holds the name of the ruleset
130    /// * `scope` - An optional string slice specifying the scope of the ruleset
131    pub fn new(name: String, scope: Rc<Option<String>>) -> RuleSet {
132        RuleSet {
133            name,
134            rules: vec![],
135            exclusions: None,
136            cookierules: None,
137            active: true,
138            default_state: true,
139            scope,
140            note: None
141        }
142    }
143
144    #[cfg(feature="add_rulesets")]
145    pub(crate) fn add_rules(&mut self, rules: &Vec<Value>) {
146        for rule in rules {
147            if let Value::Object(rule) = rule {
148                let from = match rule.get(JSON_STRINGS.from) {
149                    Some(Value::String(from)) => from.to_string(),
150                    _ => String::new(),
151                };
152                let to = match rule.get(JSON_STRINGS.to) {
153                    Some(Value::String(to)) => to.to_string(),
154                    _ => String::new(),
155                };
156                self.rules.push(Rule::new(from, to));
157            }
158        }
159    }
160
161    #[cfg(feature="add_rulesets")]
162    pub(crate) fn add_exclusions(&mut self, exclusions: &Vec<Value>) {
163        let mut exclusions_vec = vec![];
164        for exclusion in exclusions {
165            if let Value::String(exclusion) = exclusion {
166                exclusions_vec.push(exclusion.to_string());
167            }
168        }
169
170        self.exclusions = Some(exclusions_vec.join("|"));
171    }
172
173    #[cfg(feature="add_rulesets")]
174    pub(crate) fn add_cookierules(&mut self, cookierules: &Vec<Value>) {
175        let mut cookierules_vec = vec![];
176
177        for cookierule in cookierules {
178            if let Value::Object(cookierule) = cookierule {
179                let host = match cookierule.get(JSON_STRINGS.host) {
180                    Some(Value::String(host)) => host.to_string(),
181                    _ => String::new(),
182                };
183                let name = match cookierule.get(JSON_STRINGS.name) {
184                    Some(Value::String(name)) => name.to_string(),
185                    _ => String::new(),
186                };
187
188                cookierules_vec.push(
189                    CookieRule::new(
190                        host,
191                        name));
192            }
193        }
194
195        self.cookierules = Some(cookierules_vec);
196    }
197
198    #[cfg(feature="rewriter")]
199    pub(crate) fn apply(&self, url: &str) -> Option<String> {
200        // If we're covered by an exclusion, return
201        if !self.exclusions.is_none() {
202            let exclusions_regex = Regex::new(&self.exclusions.clone().unwrap()).unwrap();
203            if exclusions_regex.is_match(&url) {
204               debug!("Excluded url: {}", url);
205               return None;
206            }
207        }
208
209        for rule in self.rules.iter() {
210            match rule {
211                Rule::Trivial => {
212                    return Some(TRIVIAL_REGEX.replace_all(url, "https:").to_string());
213                }
214                Rule::NonTrivial(from_regex, to) => {
215                    let from_regex = Regex::new(from_regex).unwrap();
216                    let returl = from_regex.replace_all(url, &to[..]).to_string();
217                    if returl != url {
218                        return Some(returl);
219                    }
220                }
221            }
222        }
223        None
224    }
225}
226
227
228/// RuleSets consists of a tuple btreemap of rulesets, keyed by some target FQDN
229#[derive(Debug)]
230pub struct RuleSets(pub BTreeMap<String, Vec<Rc<RuleSet>>>);
231
232impl RuleSets {
233
234    /// Returns a new rulesets struct
235    pub fn new() -> RuleSets {
236        RuleSets(BTreeMap::new())
237    }
238
239    /// Returns the number of targets in the current RuleSets struct as a `usize`
240    pub fn count_targets(&self) -> usize {
241        self.0.len()
242    }
243
244    /// Clears the ruleset btreemap of all values
245    pub fn clear(&mut self) {
246        self.0.clear();
247    }
248
249    /// Construct and add new rulesets given a json string of values
250    ///
251    /// # Arguments
252    ///
253    /// * `json_string` - A json string representing the rulesets to add
254    /// * `enable_mixed_rulesets` - A bool indicating whether rulesets which trigger mixed
255    /// content blocking should be enabled
256    /// * `rule_active_states` - A HashMap which lets us know whether rulesets have been disabled
257    /// or enabled
258    /// * `scope` - An optional string which indicates the scope of the current batch of rulesets
259    /// being added (see the [ruleset update channels](https://github.com/EFForg/https-everywhere/blob/master/docs/en_US/ruleset-update-channels.md) documentation)
260    #[cfg(feature="add_rulesets")]
261    pub fn add_all_from_json_string(&mut self, json_string: &String, enable_mixed_rulesets: &bool, ruleset_active_states: &HashMap<String, bool>, scope: &Option<String>) {
262        let rulesets: Value = serde_json::from_str(&json_string).expect(ERROR_SERDE_PARSE);
263        self.add_all_from_serde_value(rulesets, enable_mixed_rulesets, ruleset_active_states, scope);
264    }
265
266    #[cfg(feature="add_rulesets")]
267    pub fn add_all_from_serde_value(&mut self, rulesets: Value, enable_mixed_rulesets: &bool, ruleset_active_states: &HashMap<String, bool>, scope: &Option<String>) {
268        let scope: Rc<Option<String>> = Rc::new(scope.clone());
269
270        let mut add_one_from_json = |ruleset: Value| {
271            if let Value::Object(ruleset) = ruleset {
272                let ruleset_name: String;
273                let mut default_state = true;
274                let mut note = String::new();
275
276                if let Some(Value::String(default_off)) = ruleset.get(JSON_STRINGS.default_off) {
277                    if default_off != &JSON_STRINGS.user_rule {
278                        default_state = false;
279                    }
280                    note.push_str(default_off);
281                    note.push_str("\n");
282                }
283
284                if let Some(Value::String(platform)) = ruleset.get(JSON_STRINGS.platform) {
285                    if platform == &JSON_STRINGS.mixed_content {
286                        if !enable_mixed_rulesets {
287                            default_state = false;
288                        }
289                    } else {
290                        default_state = false;
291                    }
292
293                    note.push_str("Platform(s): ");
294                    note.push_str(platform);
295                    note.push_str("\n");
296                }
297
298                let mut active = default_state;
299                if let Some(Value::String(name)) = ruleset.get(JSON_STRINGS.name) {
300                    ruleset_name = name.to_string();
301
302                    match ruleset_active_states.get(&ruleset_name) {
303                        Some(false) => { active = false; }
304                        Some(true) => { active = true; }
305                        _ => {}
306                    }
307
308                    let mut rs = RuleSet::new(ruleset_name, Rc::clone(&scope));
309                    rs.default_state = default_state;
310                    rs.note = match note.len() {
311                        0 => None,
312                        _ => Some(note.trim().to_string())
313                    };
314
315                    rs.active = active;
316
317                    if let Some(Value::Array(rules)) = ruleset.get(JSON_STRINGS.rule) {
318                        rs.add_rules(rules);
319                    }
320
321                    if let Some(Value::Array(exclusions)) = ruleset.get(JSON_STRINGS.exclusion) {
322                        rs.add_exclusions(exclusions);
323                    }
324
325                    if let Some(Value::Array(securecookies)) = ruleset.get(JSON_STRINGS.securecookie) {
326                        rs.add_cookierules(securecookies);
327                    }
328
329                    let rs_rc = Rc::new(rs);
330                    if let Some(Value::Array(targets)) = ruleset.get(JSON_STRINGS.target) {
331                        for target in targets {
332                            if let Value::String(target) = target {
333                                match self.0.get_mut(target) {
334                                    Some(rs_vec) => {
335                                        rs_vec.push(Rc::clone(&rs_rc));
336                                    },
337                                    None => {
338                                        self.0.insert(target.to_string(), vec![Rc::clone(&rs_rc)]);
339                                    }
340                                }
341                            }
342                        }
343                    }
344                }
345            }
346        };
347
348        if let Value::Array(rulesets) = rulesets {
349            for ruleset in rulesets {
350                add_one_from_json(ruleset);
351            }
352        }
353    }
354
355    /// Return a vector of rulesets that apply to the given host
356    ///
357    /// # Arguments
358    ///
359    /// * `host` - A string which indicates the host to search for potentially applicable rulesets
360    #[cfg(feature="potentially_applicable")]
361    pub fn potentially_applicable(&self, host: &String) -> Vec<Rc<RuleSet>> {
362        let mut results = vec![];
363
364        self.try_add(&mut results, host);
365
366        // Ensure host is well-formed (RFC 1035)
367        if host.len() <= 0 || host.len() > 255 || host.find("..").is_some() {
368            return results;
369        }
370
371        // Replace www.example.com with www.example.*
372        // eat away from the right for once and only once
373        let mut segmented: Vec<&str> = host.split('.').collect();
374        let last_index = segmented.len() - 1;
375        let tld = segmented[last_index];
376
377        segmented[last_index] = "*";
378        let tmp_host = segmented.join(".");
379        self.try_add(&mut results, &tmp_host);
380        segmented[last_index] = tld;
381
382        // now eat away from the left, with *, so that for x.y.z.google.com we
383        // check *.y.z.google.com, *.z.google.com and *.google.com
384        for index in 0..(segmented.len() - 1) {
385            let mut segmented_tmp = segmented.clone();
386            segmented_tmp[index] = "*";
387            let tmp_host = segmented_tmp.join(".");
388            self.try_add(&mut results, &tmp_host);
389        }
390
391        results
392    }
393
394    #[cfg(feature="potentially_applicable")]
395    fn try_add(&self, results: &mut Vec<Rc<RuleSet>>, host: &String) {
396        if self.0.contains_key(host) {
397            if let Some(rulesets) = self.0.get(host) {
398                for ruleset in rulesets {
399                    results.push(Rc::clone(ruleset));
400                }
401            }
402        }
403    }
404}
405
406#[cfg(all(test,feature="add_rulesets"))]
407pub mod tests {
408    use super::*;
409    use std::fs;
410
411    fn mock_rulesets_json() -> String {
412        fs::read_to_string("tests/mock_rulesets.json").unwrap()
413    }
414
415    pub fn add_mock_rulesets(rs: &mut RuleSets) {
416        rs.add_all_from_json_string(&mock_rulesets_json(), &ENABLE_MIXED_RULESETS, &RULE_ACTIVE_STATES, &None);
417    }
418
419    #[test]
420    fn adds_targets_correctly() {
421        let mut rs = RuleSets::new();
422        add_mock_rulesets(&mut rs);
423        assert_eq!(rs.count_targets(), 28);
424    }
425
426    #[test]
427    fn rulesets_represented_correctly() {
428        let mut rs = RuleSets::new();
429        add_mock_rulesets(&mut rs);
430
431        let rulesets_representation = fs::read_to_string("tests/rulesets_representation.txt").unwrap();
432        assert_eq!(format!("{:?}", rs), rulesets_representation);
433    }
434
435    #[test]
436    #[cfg(feature="potentially_applicable")]
437    fn potentially_applicable() {
438        let mut rs = RuleSets::new();
439        add_mock_rulesets(&mut rs);
440
441        assert_eq!(rs.potentially_applicable(&String::from("1fichier.com")).len(), 1);
442    }
443
444    #[test]
445    #[cfg(feature="potentially_applicable")]
446    fn potentially_applicable_left_widlcard() {
447        let mut rs = RuleSets::new();
448        add_mock_rulesets(&mut rs);
449
450        assert_eq!(rs.potentially_applicable(&String::from("foo.storage.googleapis.com")).len(), 1);
451    }
452
453    #[test]
454    #[cfg(feature="potentially_applicable")]
455    fn potentially_applicable_no_matches() {
456        let mut rs = RuleSets::new();
457        add_mock_rulesets(&mut rs);
458
459        assert_eq!(rs.potentially_applicable(&String::from("nonmatch.example.com")).len(), 0);
460    }
461}