chromiumoxide/handler/blockers/
scripts.rs

1use crate::handler::blockers::Trie;
2
3lazy_static::lazy_static! {
4    /// Ignore list of scripts.
5    pub (crate) static ref URL_IGNORE_TRIE: Trie = {
6        let mut trie = Trie::new();
7        let patterns = [
8            "https://www.googletagservices.com/tag/",
9            "https://js.hs-analytics.net/analytics/",
10            "https://www.googletagmanager.com/",
11            "https://googletagmanager.com/",
12            "https://cm.g.doubleclick.net/",
13            "https://www.googleoptimize.com/optimize.js?id=GTM",
14            "https://ads.pubmatic.com/AdServer/",
15            "https://js.hsadspixel.net",
16            "https://www.google.com/adsense/",
17            "https://www.googleadservices.com",
18            "https://static.cloudflareinsights.com/",
19            "https://adservice.google.com",
20            "https://www.gstatic.com/cv/js/sender/",
21            "https://googleads.g.doubleclick.net",
22            "https://www.google-analytics.com",
23            "https://www.googleanalytics.com",
24            "https://cdn-cookieyes.com/client_data/",
25            "https://iabusprivacy.pmc.com/geo-info.js",
26            "https://support.webtasy.com/scripts/track_visit.php",
27            "https://cookie-cdn.cookiepro.com/consent",
28            "https://a.omappapi.com/app/js/api.min.js",
29            "https://static.hotjar.com/",
30            "https://cdn.eu.amplitude.com/script/",
31            "https://js.hscta.net/cta/current.js",
32            "https://cdn.refersion.com/refersion.js",
33            "https://web.cmp.usercentrics.eu/ui/loader.js",
34            "https://events.framer.com/script?v=2",
35            "https://load.sumome.com/",
36            "https://websdk.appsflyer.com/",
37            "https://s.pinimg.com/ct/core.js",
38            "https://www.mongoosemetrics.com/",
39            "https://geolocation-recommendations.shopifyapps.com/",
40            "https://consent.cookiebot.com/uc.js",
41            "https://w.usabilla.com/",
42            "https://consentcdn.cookiebot.com/",
43            "https://plausible.io/api/event",
44            "https://sentry.io/api/",
45            "https://cdn.onesignal.com/",
46            "https://cdn.cookielaw.org/",
47            "https://static.doubleclick.net",
48            "https://tools.luckyorange.com/",
49            "https://cdn.piano.io",
50            "https://px.ads.linkedin.com",
51            "https://connect.facebook.net",
52            "https://static-tracking.klaviyo.com/onsite/js/",
53            "https://tags.tiqcdn.com",
54            "https://tr.snapchat.com",
55            "https://ads.twitter.com",
56            "https://cdn.segment.com",
57            "https://stats.wp.com",
58            "https://analytics.",
59            "http://analytics.",
60            "https://cdn.cxense.com",
61            "https://cdn.tinypass.com",
62            "https://cd.connatix.com",
63            "https://js.chargebee.com/v2/chargebee.js",
64            "https://consent.cookiebot.com/",
65            "https://platform-api.sharethis.com/js/sharethis.js",
66            "https://js.hsforms.net/forms/embed/v2.js",
67            "https://static.parastorage.com/services/wix-thunderbolt/dist/",
68            "https://static.parastorage.com/services/tag-manager-client/",
69            "https://cdn.consentmanager.net/",
70            "https://static.parastorage.com/services/form-app/",
71            "https://www.datadoghq-browser-agent.com/",
72            "https://b.delivery.consentmanager.net/delivery/",
73            "https://tvem.cdn.turner.com/v2/",
74            "https://image6.pubmatic.com/AdServer/",
75            "https://www.digistore24.com/track/AFFILIATE/",
76            "https://i.cdn.turner.com/ads/adfuel/",
77            "https://featureassets.org",
78            "https://cdn.rudderlabs.com",
79            "https://script.hotjar.com/",
80            "https://cdn.branch.io/branch-latest.min.js",
81            "https://cdn.insurads.com/",
82            "https://cdn.segment.com/",
83            "https://analytics.tiktok.com/",
84            "https://cdn-ukwest.onetrust.com",
85            "https://cdn.onetrust.com",
86            "https://services.insurads.com/",
87            "https://platform.iteratehq.com/loader.js",
88            "https://static.ads-twitter.com/uwt.js",
89            "https://js.hsadspixel.net/fb.js",
90            "https://js.hs-banner.com/v2/",
91            "https://cdn.callrail.com/companies/",
92            "https://js.zi-scripts.com/zi-tag.js",
93            "https://connect.facebook.net/en_US/fbevents.js",
94            "https://cdn.shopify.com/shopifycloud/boomerang/shopify-boomerang-1.0.0.min.js",
95            "https://tag.marinsm.com/serve/",
96            "https://acdn.adnxs.com/ast/ast.js",
97            "https://schibsted-cdn.relevant-digital.com/static/tags/",
98            "https://bat.bing.net",
99            "https://tpc.googlesyndication.com/",
100            "https://cdn.petametrics.com/",
101            "https://cdn.doubleverify.com/",
102            "https://www.facebook.com/v17.0/plugins/like.php?",
103            "https://gum.criteo.com",
104            "https://js-sec.indexww.com",
105            "https://eus.rubiconproject.com/",
106            "https://eb2.3lift.com/",
107            "https://acdn.adnxs.com/",
108            "https://ssc-cms.33across.com/",
109            "https://static.addtoany.com/menu/",
110            "https://www.gstatic.com/cast/sdk/libs/sender/1.0/cast_framework.js",
111            "https://www.gstatic.com/eureka/clank/131/cast_sender.js",
112            "https://static.adsafeprotected.com/",
113            "https://ssum-sec.casalemedia.com/usermatch",
114            "https://cdn.brandmetrics.com/scripts/",
115            "https://cdn.confiant-integrations.net/",
116            "https://www.b2i.us/b2i/",
117            "https://acsbapp.com/apps/app/dist/js/app.js",
118            "https://cdn.doofinder.com/livelayer/",
119            "https://load.sumo.com/",
120            "https://cdn11.bigcommerce.com/",
121            "https://na.shgcdn3.com/collector.js",
122            "https://microapps.bigcommerce.com/bodl-events/index.js",
123            "https://checkout-sdk.bigcommerce.com/v1/loader.js",
124            "https://cdn.callrail.com/companies/",
125            "https://www.webtraxs.com/trxscript.php",
126            "https://diffuser-cdn.app-us1.com/diffuser/diffuser.js",
127            "https://try.abtasty.com/",
128            "https://imasdk.googleapis.com/js/sdkloader/ima3.js",
129            "https://cdn.registerdisney.go.com/v4/responder.js",
130            "https://cdn.registerdisney.go.com/v4/OneID.js",
131            "https://js-agent.newrelic.com/",
132            "https://bat.bing.com/bat.js",
133            "https://s1.hdslb.com/bfs/cm/cm-sdk/static/js/track-collect.js",
134            "https://consent.trustarc.com/",
135            "https://cdn-sitegainer.com/",
136            "https://yob9p0yb4y.kameleoon.eu/",
137            "https://api.clerk.io/v2/log/",
138            "https://cdn.noibu.com/",
139            "https://static.cloudflareinsights.com/beacon.min.js/",
140            "https://hm.baidu.com/hm.js",
141            "https://unpkg.zhimg.com/@efe/zhad-tracker",
142            "https://tracking.g2crowd.com/attribution_tracking/",
143            "https://snap.licdn.com/",
144            "https://www.ist-track.com/",
145            "https://www.redditstatic.com/ads/",
146            "https://verifi.podscribe.com/",
147            "https://script.crazyegg.com/",
148            "https://cdn.iubenda.com/",
149            "https://d34r8q7sht0t9k.cloudfront.net/tag.js",
150            "https://pagead2.googlesyndication.com/",
151            "https://a.klaviyo.com/onsite/track-analytics",
152            "https://apps.bazaarvoice.com/analytics/bv-analytics.js",
153            "https://mab.chartbeat.com/mab_strategy/",
154            "https://c.amazon-adsystem.com/",
155            "https://rumcdn.geoedge.be/",
156            "https://assets.adobedtm.com/extensions/",
157            "https://macro.adnami.io/macro/spec/adsm.macro.",
158            "https://log.medietall.no/analytics.js",
159            "https://cdn.siftscience.com/s.js",
160            "https://lwadm.com/lw/pbjs?",
161            "https://cl.k5a.io/",
162            "https://cdn-cookieyes.com/",
163            "https://s.kk-resources.com/leadtag.js",
164            "https://nexus.ensighten.com/",
165            "https://c.oracleinfinity.io/acs/account/fp3kyrmvtg/js/prod/odc.js",
166            "https://static-tracking.klaviyo.com/",
167            "https://cdn-widgetsrepository.yotpo.com/",
168            "https://a.klaviyo.com/onsite/track-analytics?",
169            "https://klaviyo.com/onsite/track-analytics?",
170            "https://s2.go-mpulse.net/",
171            "https://pbs.yahoo.com/",
172            "https://img1.wsimg.com/",
173            "https://invitejs.trustpilot.com/tp.min.js",
174            "https://ads.pubmatic.com/AdServer/js/",
175            "https://widgets.outbrain.com/nanoWidget/externals/obPixelFrame/obPixelFrame.js",
176            "https://widgets.outbrain.com/external/externals/intentiq.js",
177            "https://applets.ebxcdn.com/ebx.js",
178            "https://cdn.fuseplatform.net/publift/tags/",
179            "https://tag.rmp.rakuten.com/",
180            "https://analytics-api.",
181            "https://cdn.corvidae.ai/pixel.min.js",
182            "https://app.popt.in/pixel.js",
183            "https://js-agent.newrelic.com",
184            "https://d7d3cf2e81d293050033-3dfc0615b0fd7b49143049256703bfce.ssl.cf1.rackcdn.com/stf.js",
185            "https://geo.privacymanager.io/",
186            "https://script.dotmetrics.net/",
187            "//d2wy8f7a9ursnm.cloudfront.net/v8/bugsnag.min.js",
188            ".siteintercept.qualtrics.com",
189            ".sharethis.com",
190            ".newrelic.com",
191            ".googlesyndication.com",
192            ".amazon-adsystem.com",
193            ".onetrust.com",
194            "sc.omtrdc.net",
195            "doubleclick.net",
196            "hotjar.com",
197            "datadome.com",
198            "datadog-logs-us.js",
199            "tinypass.min.js",
200            ".airship.com",
201            ".adlightning.com",
202            ".lab.amplitude.",
203            // explicit ignore tracking.js and ad files
204            "privacy-notice.js",
205            "tracking.js",
206            "privacy_cookie.js",
207            "plugins/cookie-law-info/legacy/",
208            "ads.js",
209            "insight.min.js",
210            "assets/TrackingPixel",
211            "https://ads.",
212            "http://ads.",
213            ".pubmatic.com/AdServer/",
214            "https://tracking.",
215            "http://tracking.",
216            "https://static-tracking.",
217            // exp testin
218            // used for possible location outside
219            // "https://www.recaptcha.net/recaptcha/",
220            // "https://www.google.com/recaptcha/",
221            // "https://www.gstatic.com/recaptcha/",
222        ];
223        for pattern in &patterns {
224            trie.insert(pattern);
225        }
226        trie
227    };
228
229    /// Ignore list of scripts embedded or font extra.
230    pub(crate) static ref URL_IGNORE_EMBEDED_TRIE: Trie = {
231        let mut trie = Trie::new();
232        let patterns = [
233            "https://www.youtube.com/embed/",      // YouTube video embeds
234            "https://www.google.com/maps/embed?",  // Google Maps embeds
235            "https://maps.google.com", // Google maps iframe.
236            "https://player.vimeo.com/video/",     // Vimeo video embeds
237            "https://player.vimeo.com/api/player.js", // Vimeo video embeds
238            "https://open.spotify.com/embed/",     // Spotify music embeds
239            "https://w.soundcloud.com/player/",    // SoundCloud embeds
240            "https://platform.twitter.com/embed/", // Twitter embedded tweets
241            "https://www.instagram.com/embed.js",  // Instagram embeds
242            "https://www.facebook.com/plugins/",   // Facebook embeds (like posts and videos)
243            "https://cdn.embedly.com/widgets/",    // Embedly embeds
244            "https://player.twitch.tv/",           // Twitch video player embeds
245            "https://maps.googleapis.com/maps/", // Google map embeds
246            "https://www.youtube.com/player_api", // Youtube player.
247            "https://consentcdn.cookiebot.com", // Cookie bot
248            "https://www.youtube.com/iframe_api", // Youtube iframes.
249            "https://f.vimeocdn.com", // Vimeo EMBEDDINGS
250            "https://i.vimeocdn.com/",
251            "https://image2.pubmatic.com/AdServer/",
252            "https://ads.pubmatic.com/AdServer/js/",
253            "https://cdn.taboola.com/libtrc/static/topics/",
254            "https://pm-widget.taboola.com/",
255            "https://gum.criteo.com/syncframe",
256            // "https://www.youtube.com/s/player/", // Youtube player not needed usually since iframe_api is used mainly
257            // vercel live
258            "https://vercel.live/api/",
259
260            // extra CDN scripts
261            "https://cdn.readme.io/public/",
262            // font awesome
263            "https://use.fontawesome.com/",
264            // insight tracker
265            "https://insight.adsrvr.org/track/",
266            "http://www.google-analytics.com/ga.js",
267            "cxense.com/",
268            // snapchat tracker
269            "https://tr.snapchat.com/",
270            "https://buy.tinypass.com",
271            "https://nimbleplot.com/",
272            "https://my.actiondata.co/js/tracker.php",
273            "https://ajax.googleapis.com/ajax/libs/webfont/",
274            "http://cdn2.editmysite.com/",
275            // ignore font extras
276            "https://kit.fontawesome.com/",
277            "https://use.typekit.net",
278            ".amplitude.com",
279            ".rudderstack.com",
280            // ignore tailwind cdn
281            "https://cdn.tailwindcss.com",
282            // ignore extra ads
283            ".sharethis.com",
284            "amazon-adsystem.com",
285            ".vimeocdn.com",
286            "g.doubleclick.net",
287            "https://securepubads.g.doubleclick.net",
288            "googlesyndication.com",
289            "adsafeprotected.com",
290            // more google tracking
291            ".googlesyndication.com/safeframe/",
292            // repeat consent js
293            "/ccpa/user-consent.min.js",
294            "consent-manager",
295            "/cookiebanner/js/",
296            "cookielaw.org",
297            "bugsnag.min.js",
298            // privacy
299            "otBannerSdk.js",
300            "privacy-notice.js",
301            ".ingest.sentry.io/api",
302            // ignore amazon scripts for media
303            ".ssl-images-amazon.com/images/"
304        ];
305        for pattern in &patterns {
306            trie.insert(pattern);
307        }
308        trie
309    };
310
311    /// Ignore list of path scripts to ignore for tracking and analytics.
312    pub(crate) static ref URL_IGNORE_SCRIPT_BASE_PATHS: Trie = {
313        let mut trie = Trie::new();
314        let patterns = [
315            "wp-content/plugins/cookie-law-info",
316            "wp-content/js/rlt-proxy.js",
317            "wp-admin/rest-proxy/",
318            "wp-content/mu-plugins/a8c-analytics/",
319            "analytics/",
320            "cookie-tracking",
321        ];
322        for pattern in &patterns {
323            trie.insert(pattern);
324        }
325        trie
326    };
327
328    /// Ignore list of path scripts to ignore for themes.
329    pub (crate) static ref URL_IGNORE_SCRIPT_STYLES_PATHS: Trie = {
330        let mut trie = Trie::new();
331        let patterns = [
332            "wp-content/themes/",
333            "wp-content/plugins/dizo-image-hover/",
334            "wp-content/plugins/supreme-modules-pro-for-divi/",
335            "wp-content/plugins/page-builder-pmc/",
336            "wp-content/plugins/contact-form-7/",
337            "wp-content/plugins/responsive-lightbox/",
338            "wp-content/cache/breeze-minification/",
339            "wp-includes/js/mediaelement",
340            "wp-content/plugins/gravityforms/",
341            "wp-content/plugins/wp-rocket/assets/js/lazyload/",
342            "wp-content/plugins/w3-total-cache/",
343            "wp-content/js/bilmur.min.js",
344            "npm/bootstrap@"
345        ];
346        for pattern in &patterns {
347            trie.insert(pattern);
348        }
349        trie
350    };
351
352    /// Ignore list of scripts paths.
353    pub (crate) static ref URL_IGNORE_TRIE_PATHS: Trie = {
354        let mut trie = Trie::new();
355        let patterns = [
356            // explicit ignore tracking.js and ad files
357            "privacy-notice.js",
358            "tracking.js",
359            "track.js",
360            "ads.js",
361            "analytics.js",
362            "otSDKStub.js",
363            "otBannerSdk.js",
364            "_vercel/insights/script.js",
365            "analytics.",
366        ];
367        for pattern in &patterns {
368            trie.insert(pattern);
369        }
370        trie
371    };
372
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn test_url_ignore_trie_contains() {
381        // Positive tests - these URLs should be contained in the trie
382        let positive_cases = vec![
383            "https://www.googletagservices.com/tag/",
384            "https://www.google-analytics.com",
385            "https://www.googleanalytics.com",
386            ".newrelic.com",
387            "privacy-notice.js",
388        ];
389
390        // Negative tests - these URLs should not be contained in the trie
391        let negative_cases = vec![
392            "https://not-a-tracked-url.com/script.js",
393            "https://google.com",
394        ];
395
396        for case in positive_cases {
397            assert!(
398                URL_IGNORE_TRIE.contains_prefix(case),
399                "Trie should contain: {}",
400                case
401            );
402        }
403
404        for case in negative_cases {
405            assert!(
406                !URL_IGNORE_TRIE.contains_prefix(case),
407                "Trie should not contain: {}",
408                case
409            );
410        }
411    }
412
413    #[test]
414    fn test_url_ignore_embedded_trie_contains() {
415        // Positive tests - these URLs should be contained in the trie
416        let positive_cases = vec![
417            "https://www.youtube.com/embed/",
418            "https://www.google.com/maps/embed?",
419            ".amplitude.com",
420        ];
421
422        // Negative tests - these URLs should not be contained in the trie
423        let negative_cases = vec![
424            "https://secure-site.com/resource.js",
425            "https://example.com/embed.js",
426        ];
427
428        for case in positive_cases {
429            assert!(
430                URL_IGNORE_EMBEDED_TRIE.contains_prefix(case),
431                "Trie should contain: {}",
432                case
433            );
434        }
435
436        for case in negative_cases {
437            assert!(
438                !URL_IGNORE_EMBEDED_TRIE.contains_prefix(case),
439                "Trie should not contain: {}",
440                case
441            );
442        }
443    }
444
445    #[test]
446    fn test_url_ignore_script_base_paths_contains() {
447        // Positive tests - these paths should be contained in the trie
448        let positive_cases = vec!["wp-content/plugins/cookie-law-info", "analytics/"];
449
450        // Negative tests - these paths should not be contained in the trie
451        let negative_cases = vec![
452            "wp-content/some-untracked-plugin/",
453            "random/path/analytics.js",
454        ];
455
456        for case in positive_cases {
457            assert!(
458                URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(case),
459                "Trie should contain: {}",
460                case
461            );
462        }
463
464        for case in negative_cases {
465            assert!(
466                !URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(case),
467                "Trie should not contain: {}",
468                case
469            );
470        }
471    }
472
473    #[test]
474    fn test_url_ignore_script_style_paths_contains() {
475        // Positive tests - these paths should be contained in the trie
476        let positive_cases = vec!["wp-content/themes/", "npm/bootstrap@"];
477
478        // Negative tests - these paths should not be contained in the trie
479        let negative_cases = vec![
480            "wp-content/some-other-theme/",
481            "wp-content/plugins/untracked-plugin/",
482        ];
483
484        for case in positive_cases {
485            assert!(
486                URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(case),
487                "Trie should contain: {}",
488                case
489            );
490        }
491
492        for case in negative_cases {
493            assert!(
494                !URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(case),
495                "Trie should not contain: {}",
496                case
497            );
498        }
499    }
500
501    #[test]
502    fn test_url_ignore_trie_paths_contains() {
503        // Positive tests - these paths should be contained in the trie
504        let positive_cases = vec!["privacy-notice.js", "tracking.js"];
505
506        // Negative tests - these paths should not be contained in the trie
507        let negative_cases = vec!["non-ignored.js", "non-related/tracking.js"];
508
509        for case in positive_cases {
510            assert!(
511                URL_IGNORE_TRIE_PATHS.contains_prefix(case),
512                "Trie should contain: {}",
513                case
514            );
515        }
516
517        for case in negative_cases {
518            assert!(
519                !URL_IGNORE_TRIE_PATHS.contains_prefix(case),
520                "Trie should not contain: {}",
521                case
522            );
523        }
524    }
525}