1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
//! In adblocking terms, [`Resource`]s are special placeholder scripts, images,
//! video files, etc. that can be returned as drop-in replacements for harmful
//! equivalents from remote servers. Resources also encompass scriptlets, which
//! can be injected into pages to inhibit malicious behavior.
//!
//! If the `resource-assembler` feature is enabled, the
#![cfg_attr(not(feature = "resource-assembler"), doc="`resource_assembler`")]
#![cfg_attr(feature = "resource-assembler", doc="[`resource_assembler`]")]
//! module will assist with the construction of [`Resource`]s directly from the uBlock Origin
//! project.

#[cfg(feature = "resource-assembler")]
pub mod resource_assembler;

mod resource_storage;
#[doc(inline)]
pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError};
pub(crate) use resource_storage::parse_scriptlet_args;

use memchr::memrchr as find_char_reverse;
use serde::{Deserialize, Serialize};

/// Specifies a set of permissions required to inject a scriptlet resource.
///
/// Permissions can be specified when parsing individual lists using [`crate::FilterSet`] in
/// order to propagate the permission level to all filters contained in the list.
///
/// In practice, permissions are used to limit the risk of third-party lists having access to
/// powerful scriptlets like uBlock Origin's `trusted-set-cookie`, which has the ability to set
/// arbitrary cookies to arbitrary values on visited sites.
///
/// ### Example
///
/// ```
/// # use adblock::Engine;
/// # use adblock::lists::ParseOptions;
/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType};
/// # let mut filter_set = adblock::lists::FilterSet::default();
/// # let untrusted_filters = vec![""];
/// # let trusted_filters = vec![""];
/// const COOKIE_ACCESS: PermissionMask = PermissionMask::from_bits(0b00000001);
/// const LOCALSTORAGE_ACCESS: PermissionMask = PermissionMask::from_bits(0b00000010);
///
/// // `untrusted_filters` will not be able to use privileged scriptlet injections.
/// filter_set.add_filters(
///     untrusted_filters,
///     Default::default(),
/// );
/// // `trusted_filters` will be able to inject scriptlets requiring `COOKIE_ACCESS`
/// // permissions or `LOCALSTORAGE_ACCESS` permissions.
/// filter_set.add_filters(
///     trusted_filters,
///     ParseOptions {
///         permissions: COOKIE_ACCESS | LOCALSTORAGE_ACCESS,
///         ..Default::default()
///     },
/// );
///
/// let mut engine = Engine::from_filter_set(filter_set, true);
/// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS`
/// // permission.
/// engine.add_resource(Resource {
///     name: "trusted-set-cookie.js".to_string(),
///     aliases: vec![],
///     kind: ResourceType::Mime(MimeType::ApplicationJavascript),
///     content: base64::encode("document.cookie = '...';"),
///     dependencies: vec![],
///     permission: COOKIE_ACCESS,
/// });
/// ```
#[derive(Serialize, Deserialize, Clone, Copy, Default)]
#[repr(transparent)]
#[serde(transparent)]
pub struct PermissionMask(u8);

impl std::fmt::Debug for PermissionMask {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "PermissionMask({:b})", self.0)
    }
}

impl core::ops::BitOr<PermissionMask> for PermissionMask {
    type Output = PermissionMask;

    fn bitor(self, rhs: PermissionMask) -> Self::Output {
        Self(self.0 | rhs.0)
    }
}

impl core::ops::BitOrAssign<PermissionMask> for PermissionMask {
    fn bitor_assign(&mut self, rhs: PermissionMask) {
        self.0 |= rhs.0;
    }
}

impl PermissionMask {
    /// Construct a new [`PermissionMask`] with the given bitmask. Use
    /// [`PermissionMask::default()`] instead if you don't want to restrict or grant any
    /// permissions.
    pub const fn from_bits(bits: u8) -> Self {
        Self(bits)
    }

    /// Can `filter_mask` authorize injecting a resource requiring `self` permissions?
    pub fn is_injectable_by(&self, filter_mask: PermissionMask) -> bool {
        // For any particular bit index, the scriptlet is injectable if:
        //  (there is a requirement, AND the filter meets it) OR (there's no requirement)
        // in other words:
        //  (self & filter_mask) | (!self) == 1
        //  (self | !self) & (filter_mask | !self) == 1
        //  filter_mask | !self == 1
        //  !(filter_mask | !self) == 0
        //  !filter_mask & self == 0
        // which we can compare across *all* bits using bitwise operations, hence:
        !filter_mask.0 & self.0 == 0
    }

    /// The default value for [`PermissionMask`] is one which provides no additional permissions.
    fn is_default(&self) -> bool {
        self.0 == 0
    }
}

/// Struct representing a resource that can be used by an adblocking engine.
#[derive(Serialize, Deserialize, Clone)]
pub struct Resource {
    /// Represents the primary name of the resource, often a filename
    pub name: String,
    /// Represents secondary names that can be used to access the resource
    #[serde(default)]
    pub aliases: Vec<String>,
    /// How to interpret the resource data within `content`
    pub kind: ResourceType,
    /// The resource data, encoded using standard base64 configuration
    pub content: String,
    /// Optionally contains the name of any dependencies used by this resource. Currently, this
    /// only applies to `application/javascript` and `fn/javascript` MIME types.
    ///
    /// Aliases should never be added to this list. It should only contain primary/canonical
    /// resource names.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub dependencies: Vec<String>,
    /// Optionally defines permission levels required to use this resource for a scriptlet
    /// injection. See [`PermissionMask`] for more details.
    ///
    /// If there is any customized permission, this resource cannot be used for redirects.
    ///
    /// This field is similar to the `requiresTrust` field from uBlock Origin's scriptlet
    /// resources, except that it supports up to 8 different trust "domains".
    #[serde(default, skip_serializing_if = "PermissionMask::is_default")]
    pub permission: PermissionMask,
}

impl Resource {
    /// Convenience constructor for tests. Creates a new [`Resource`] with no aliases or
    /// dependencies. Content will be automatically base64-encoded by the constructor.
    #[cfg(test)]
    pub fn simple(name: &str, kind: MimeType, content: &str) -> Self {
        Self {
            name: name.to_string(),
            aliases: vec![],
            kind: ResourceType::Mime(kind),
            content: base64::encode(content),
            dependencies: vec![],
            permission: Default::default(),
        }
    }
}

/// Different ways that the data within the `content` field of a `Resource` can be interpreted.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum ResourceType {
    /// Interpret the data according to the MIME type represented by `type`
    Mime(MimeType),
    /// Interpret the data as a Javascript scriptlet template, with embedded template
    /// parameters in the form of `{{1}}`, `{{2}}`, etc. Note that `Mime(ApplicationJavascript)`
    /// can still be used as a templated resource, for compatibility purposes.
    Template,
}

impl ResourceType {
    /// Can resources of this type be used as network redirects?
    pub fn supports_redirect(&self) -> bool {
        !matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::FnJavascript))
    }

    /// Can resources of this type be used for scriptlet injections?
    pub fn supports_scriptlet_injection(&self) -> bool {
        matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript))
    }
}

/// Acceptable MIME types for resources used by `$redirect` and `+js(...)` adblock rules.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[serde(into = "&str")]
#[serde(from = "std::borrow::Cow<'static, str>")]
pub enum MimeType {
    /// `"text/css"`
    TextCss,
    /// `"image/gif"`
    ImageGif,
    /// `"text/html"`
    TextHtml,
    /// `"application/javascript"`
    ApplicationJavascript,
    /// `"application/json"`
    ApplicationJson,
    /// `"audio/mp3"`
    AudioMp3,
    /// `"video/mp4"`
    VideoMp4,
    /// `"image/png"`
    ImagePng,
    /// `"text/plain"`
    TextPlain,
    /// `"text/xml"`
    TextXml,
    /// Custom MIME type invented for the uBlock Origin project. Represented by `"fn/javascript"`.
    /// Used to describe JavaScript functions that can be used as dependencies of other JavaScript
    /// resources.
    FnJavascript,
    /// Any other unhandled MIME type. Maps to `"application/octet-stream"` when re-serialized.
    Unknown,
}

impl MimeType {
    /// Infers a resource's MIME type according to the extension of its path
    pub fn from_extension(resource_path: &str) -> Self {
        if let Some(extension_index) = find_char_reverse(b'.', resource_path.as_bytes()) {
            match &resource_path[extension_index + 1..] {
                "css" => MimeType::TextCss,
                "gif" => MimeType::ImageGif,
                "html" => MimeType::TextHtml,
                "js" => MimeType::ApplicationJavascript,
                "json" => MimeType::ApplicationJson,
                "mp3" => MimeType::AudioMp3,
                "mp4" => MimeType::VideoMp4,
                "png" => MimeType::ImagePng,
                "txt" => MimeType::TextPlain,
                "xml" => MimeType::TextXml,
                _ => {
                    #[cfg(test)]
                    eprintln!("Unrecognized file extension on: {:?}", resource_path);
                    MimeType::Unknown
                }
            }
        } else {
            MimeType::Unknown
        }
    }

    /// Should the MIME type decode as valid UTF8?
    pub fn is_textual(&self) -> bool {
        matches!(
            self,
            Self::ApplicationJavascript
                | Self::FnJavascript
                | Self::ApplicationJson
                | Self::TextCss
                | Self::TextPlain
                | Self::TextHtml
                | Self::TextXml
        )
    }

    /// Can the MIME type have dependencies on other resources?
    pub fn supports_dependencies(&self) -> bool {
        matches!(self, Self::ApplicationJavascript | Self::FnJavascript)
    }
}

impl From<&str> for MimeType {
    fn from(v: &str) -> Self {
        match v {
            "text/css" => MimeType::TextCss,
            "image/gif" => MimeType::ImageGif,
            "text/html" => MimeType::TextHtml,
            "application/javascript" => MimeType::ApplicationJavascript,
            "application/json" => MimeType::ApplicationJson,
            "audio/mp3" => MimeType::AudioMp3,
            "video/mp4" => MimeType::VideoMp4,
            "image/png" => MimeType::ImagePng,
            "text/plain" => MimeType::TextPlain,
            "text/xml" => MimeType::TextXml,
            "fn/javascript" => MimeType::FnJavascript,
            _ => MimeType::Unknown,
        }
    }
}

impl From<&MimeType> for &str {
    fn from(v: &MimeType) -> Self {
        match v {
            MimeType::TextCss => "text/css",
            MimeType::ImageGif => "image/gif",
            MimeType::TextHtml => "text/html",
            MimeType::ApplicationJavascript => "application/javascript",
            MimeType::ApplicationJson => "application/json",
            MimeType::AudioMp3 => "audio/mp3",
            MimeType::VideoMp4 => "video/mp4",
            MimeType::ImagePng => "image/png",
            MimeType::TextPlain => "text/plain",
            MimeType::TextXml => "text/xml",
            MimeType::FnJavascript => "fn/javascript",
            MimeType::Unknown => "application/octet-stream",
        }
    }
}

// Required for `#[serde(from = "std::borrow::Cow<'static, str>")]`
impl From<std::borrow::Cow<'static, str>> for MimeType {
    fn from(v: std::borrow::Cow<'static, str>) -> Self {
        v.as_ref().into()
    }
}

// Required for `#[serde(into = &str)]`
impl From<MimeType> for &str {
    fn from(v: MimeType) -> Self {
        (&v).into()
    }
}

impl std::fmt::Display for MimeType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s: &str = self.into();
        write!(f, "{}", s)
    }
}

#[cfg(test)]
mod permission_tests {
    use super::*;

    #[test]
    fn test_permissions() {
        {
            let resource = PermissionMask(0b00000000);
            assert!(resource.is_injectable_by(PermissionMask(0b00000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000001)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000010)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000011)));
            assert!(resource.is_injectable_by(PermissionMask(0b10000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b11111111)));
        }
        {
            let resource = PermissionMask(0b00000001);
            assert!(!resource.is_injectable_by(PermissionMask(0b00000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000001)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000010)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000011)));
            assert!(!resource.is_injectable_by(PermissionMask(0b10000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b11111111)));
        }
        {
            let resource = PermissionMask(0b00000010);
            assert!(!resource.is_injectable_by(PermissionMask(0b00000000)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000001)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000010)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000011)));
            assert!(!resource.is_injectable_by(PermissionMask(0b10000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b11111111)));
        }
        {
            let resource = PermissionMask(0b00000011);
            assert!(!resource.is_injectable_by(PermissionMask(0b00000000)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000001)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000010)));
            assert!(resource.is_injectable_by(PermissionMask(0b00000011)));
            assert!(!resource.is_injectable_by(PermissionMask(0b10000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b11111111)));
        }
        {
            let resource = PermissionMask(0b10000011);
            assert!(!resource.is_injectable_by(PermissionMask(0b00000000)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000001)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000010)));
            assert!(!resource.is_injectable_by(PermissionMask(0b00000011)));
            assert!(!resource.is_injectable_by(PermissionMask(0b10000000)));
            assert!(resource.is_injectable_by(PermissionMask(0b11111111)));
        }
    }
}