1use crate::handler::blockers::Trie;
2
3lazy_static::lazy_static! {
4 pub (crate) static ref URL_IGNORE_TRIE: Trie = {
6 let mut trie = Trie::new();
7 let patterns = [
8 "https://www.googletagservices.com/tag/",
9 "https://js.hs-analytics.net/analytics/",
10 "https://www.googletagmanager.com/gtag",
11 "https://www.googletagmanager.com/gtm.js",
12 "https://cm.g.doubleclick.net/",
13 "https://ads.pubmatic.com/AdServer/",
14 "https://js.hsadspixel.net",
15 "https://www.google.com/adsense/",
16 "https://www.googleadservices.com",
17 "https://static.cloudflareinsights.com/",
18 "https://adservice.google.com",
19 "https://www.gstatic.com/cv/js/sender/",
20 "https://googleads.g.doubleclick.net",
21 "https://www.google-analytics.com",
22 "https://www.googleanalytics.com",
23 "https://iabusprivacy.pmc.com/geo-info.js",
24 "https://cookie-cdn.cookiepro.com/consent",
25 "https://static.hotjar.com/",
26 "https://load.sumome.com/",
27 "https://www.mongoosemetrics.com/",
28 "https://geolocation-recommendations.shopifyapps.com/",
29 "https://w.usabilla.com/",
30 "https://consentcdn.cookiebot.com/",
31 "https://plausible.io/api/event",
32 "https://sentry.io/api/",
33 "https://cdn.onesignal.com",
34 "https://cdn.cookielaw.org/",
35 "https://static.doubleclick.net",
36 "https://tools.luckyorange.com/",
37 "https://cdn.piano.io",
38 "https://px.ads.linkedin.com",
39 "https://connect.facebook.net",
40 "https://tags.tiqcdn.com",
41 "https://tr.snapchat.com",
42 "https://ads.twitter.com",
43 "https://cdn.segment.com",
44 "https://stats.wp.com",
45 "https://analytics.",
46 "http://analytics.",
47 "https://cdn.cxense.com",
48 "https://cdn.tinypass.com",
49 "https://cd.connatix.com",
50 "https://js.chargebee.com/v2/chargebee.js",
51 "https://consent.cookiebot.com/",
52 "https://platform-api.sharethis.com/js/sharethis.js",
53 "https://js.hsforms.net/forms/embed/v2.js",
54 "https://static.parastorage.com/services/wix-thunderbolt/dist/",
55 "https://static.parastorage.com/services/tag-manager-client/",
56 "https://static.parastorage.com/services/form-app/",
57 "https://www.datadoghq-browser-agent.com/",
58 "https://tvem.cdn.turner.com/v2/",
59 "https://image6.pubmatic.com/AdServer/",
60 "https://i.cdn.turner.com/ads/adfuel/",
61 "https://featureassets.org",
62 "https://cdn.rudderlabs.com",
63 "https://script.hotjar.com/",
64 "https://cdn.branch.io/branch-latest.min.js",
65 "https://cdn.insurads.com/",
66 "https://cdn-ukwest.onetrust.com",
67 "https://cdn.onetrust.com",
68 "https://services.insurads.com/",
69 "https://platform.iteratehq.com/loader.js",
70 "https://connect.facebook.net/en_US/fbevents.js",
71 "https://acdn.adnxs.com/ast/ast.js",
72 "https://schibsted-cdn.relevant-digital.com/static/tags/",
73 "https://bat.bing.net",
74 "https://tpc.googlesyndication.com/",
75 "https://cdn.petametrics.com/",
76 "https://cdn.doubleverify.com/",
77 "https://www.facebook.com/v17.0/plugins/like.php?",
78 "https://gum.criteo.com",
79 "https://js-sec.indexww.com",
80 "https://eus.rubiconproject.com/",
81 "https://eb2.3lift.com/",
82 "https://acdn.adnxs.com/",
83 "https://ssc-cms.33across.com/",
84 "https://static.addtoany.com/menu/",
85 "https://www.gstatic.com/cast/sdk/libs/sender/1.0/cast_framework.js",
86 "https://www.gstatic.com/eureka/clank/131/cast_sender.js",
87 "https://static.adsafeprotected.com/",
88 "https://ssum-sec.casalemedia.com/usermatch",
89 "https://cdn.brandmetrics.com/scripts/",
90 "https://cdn.confiant-integrations.net/",
91 "https://www.b2i.us/b2i/",
92 "https://acsbapp.com/apps/app/dist/js/app.js",
93 "https://cdn.doofinder.com/livelayer/",
94 "https://load.sumo.com/",
95 "https://cdn11.bigcommerce.com/",
96 "https://na.shgcdn3.com/collector.js",
97 "https://microapps.bigcommerce.com/bodl-events/index.js",
98 "https://checkout-sdk.bigcommerce.com/v1/loader.js",
99 "https://cdn.callrail.com/companies/",
100 "https://www.webtraxs.com/trxscript.php",
101 "https://diffuser-cdn.app-us1.com/diffuser/diffuser.js",
102 "https://try.abtasty.com/",
103 "https://imasdk.googleapis.com/js/sdkloader/ima3.js",
104 "https://cdn.registerdisney.go.com/v4/responder.js",
105 "https://cdn.registerdisney.go.com/v4/OneID.js",
106 "https://js-agent.newrelic.com/",
107 "https://bat.bing.com/bat.js",
108 "https://s1.hdslb.com/bfs/cm/cm-sdk/static/js/track-collect.js",
109 "https://consent.trustarc.com/",
110 "https://cdn-sitegainer.com/",
111 "https://static.cloudflareinsights.com/beacon.min.js/",
112 "https://hm.baidu.com/hm.js",
113 "https://unpkg.zhimg.com/@efe/zhad-tracker",
114 "https://pagead2.googlesyndication.com/tag/js/gpt.js",
115 "https://mab.chartbeat.com/mab_strategy/",
116 "https://c.amazon-adsystem.com/",
117 "https://rumcdn.geoedge.be/",
118 "https://assets.adobedtm.com/extensions/",
119 "https://macro.adnami.io/macro/spec/adsm.macro.",
120 "https://log.medietall.no/analytics.js",
121 "https://cdn.siftscience.com/s.js",
122 "https://lwadm.com/lw/pbjs?",
123 "https://cl.k5a.io/",
124 "https://cdn-cookieyes.com/",
125 "https://pbs.yahoo.com/",
126 "https://ads.pubmatic.com/AdServer/js/",
127 "https://widgets.outbrain.com/nanoWidget/externals/obPixelFrame/obPixelFrame.js",
128 "https://widgets.outbrain.com/external/externals/intentiq.js",
129 "https://cdn.fuseplatform.net/publift/tags/",
130 "//d2wy8f7a9ursnm.cloudfront.net/v8/bugsnag.min.js",
131 ".sharethis.com",
132 ".newrelic.com",
133 ".googlesyndication.com",
134 ".amazon-adsystem.com",
135 ".onetrust.com",
136 "sc.omtrdc.net",
137 "doubleclick.net",
138 "hotjar.com",
139 "datadome.com",
140 "datadog-logs-us.js",
141 "tinypass.min.js",
142 ".airship.com",
143 ".adlightning.com",
144 ".lab.amplitude.",
145 "privacy-notice.js",
147 "tracking.js",
148 "plugins/cookie-law-info/legacy/",
149 "ads.js",
150 "insight.min.js",
151 "assets/TrackingPixel",
152 "https://ads.",
153 "http://ads.",
154 ".pubmatic.com/AdServer/",
155 "https://tracking.",
156 "http://tracking.",
157 "https://static-tracking.",
158 "https://geo.privacymanager.io/",
161 ];
165 for pattern in &patterns {
166 trie.insert(pattern);
167 }
168 trie
169 };
170
171 pub(crate) static ref URL_IGNORE_EMBEDED_TRIE: Trie = {
173 let mut trie = Trie::new();
174 let patterns = [
175 "https://www.youtube.com/embed/", "https://www.google.com/maps/embed?", "https://maps.google.com", "https://player.vimeo.com/video/", "https://player.vimeo.com/api/player.js", "https://open.spotify.com/embed/", "https://w.soundcloud.com/player/", "https://platform.twitter.com/embed/", "https://www.instagram.com/embed.js", "https://www.facebook.com/plugins/", "https://cdn.embedly.com/widgets/", "https://player.twitch.tv/", "https://maps.googleapis.com/maps/", "https://www.youtube.com/player_api", "https://www.googletagmanager.com/ns.html", "https://consentcdn.cookiebot.com", "https://www.youtube.com/iframe_api", "https://f.vimeocdn.com", "https://i.vimeocdn.com/",
194 "https://image2.pubmatic.com/AdServer/",
195 "https://ads.pubmatic.com/AdServer/js/",
196 "https://cdn.taboola.com/libtrc/static/topics/",
197 "https://pm-widget.taboola.com/",
198 "https://gum.criteo.com/syncframe",
199 "https://vercel.live/api/",
202
203 "https://cdn.readme.io/public/",
205 "https://use.fontawesome.com/",
207 "https://insight.adsrvr.org/track/",
209 "http://www.google-analytics.com/ga.js",
210 "cxense.com/",
211 "https://tr.snapchat.com/",
213 "https://buy.tinypass.com",
214 "https://nimbleplot.com/",
215 "https://my.actiondata.co/js/tracker.php",
216 "https://ajax.googleapis.com/ajax/libs/webfont/",
217 "http://cdn2.editmysite.com/",
218 "https://kit.fontawesome.com/",
220 "https://use.typekit.net",
221 ".amplitude.com",
222 ".rudderstack.com",
223 "https://cdn.tailwindcss.com",
225 ".sharethis.com",
227 "amazon-adsystem.com",
228 ".vimeocdn.com",
229 "g.doubleclick.net",
230 "https://securepubads.g.doubleclick.net",
231 "googlesyndication.com",
232 "adsafeprotected.com",
233 ".googlesyndication.com/safeframe/",
235 "/ccpa/user-consent.min.js",
237 "consent-manager",
238 "/cookiebanner/js/",
239 "cookielaw.org",
240 "bugsnag.min.js",
241 "otBannerSdk.js",
243 "privacy-notice.js",
244 ".ingest.sentry.io/api",
245 ".ssl-images-amazon.com/images/"
247 ];
248 for pattern in &patterns {
249 trie.insert(pattern);
250 }
251 trie
252 };
253
254 pub(crate) static ref URL_IGNORE_SCRIPT_BASE_PATHS: Trie = {
256 let mut trie = Trie::new();
257 let patterns = [
258 "wp-content/plugins/cookie-law-info",
259 "wp-content/js/rlt-proxy.js",
260 "wp-admin/rest-proxy/",
261 "wp-content/mu-plugins/a8c-analytics/",
262 "analytics/",
263 "cookie-tracking",
264 ];
265 for pattern in &patterns {
266 trie.insert(pattern);
267 }
268 trie
269 };
270
271 pub (crate) static ref URL_IGNORE_SCRIPT_STYLES_PATHS: Trie = {
273 let mut trie = Trie::new();
274 let patterns = [
275 "wp-content/themes/",
276 "wp-content/plugins/dizo-image-hover/",
277 "wp-content/plugins/supreme-modules-pro-for-divi/",
278 "wp-content/plugins/page-builder-pmc/",
279 "wp-content/plugins/contact-form-7/",
280 "wp-content/plugins/responsive-lightbox/",
281 "wp-content/cache/breeze-minification/",
282 "wp-includes/js/mediaelement",
283 "wp-content/plugins/gravityforms/",
284 "wp-content/plugins/wp-rocket/assets/js/lazyload/",
285 "wp-content/plugins/w3-total-cache/",
286 "wp-content/js/bilmur.min.js",
287 "npm/bootstrap@"
288 ];
289 for pattern in &patterns {
290 trie.insert(pattern);
291 }
292 trie
293 };
294
295 pub (crate) static ref URL_IGNORE_TRIE_PATHS: Trie = {
297 let mut trie = Trie::new();
298 let patterns = [
299 "privacy-notice.js",
301 "tracking.js",
302 "track.js",
303 "ads.js",
304 "analytics.js",
305 "otSDKStub.js",
306 "otBannerSdk.js",
307 "_vercel/insights/script.js",
308 "analytics.",
309 ];
310 for pattern in &patterns {
311 trie.insert(pattern);
312 }
313 trie
314 };
315
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn test_url_ignore_trie_contains() {
324 let positive_cases = vec![
326 "https://www.googletagservices.com/tag/",
327 "https://www.google-analytics.com",
328 "https://www.googleanalytics.com",
329 ".newrelic.com",
330 "privacy-notice.js",
331 ];
332
333 let negative_cases = vec![
335 "https://not-a-tracked-url.com/script.js",
336 "https://google.com",
337 ];
338
339 for case in positive_cases {
340 assert!(
341 URL_IGNORE_TRIE.contains_prefix(case),
342 "Trie should contain: {}",
343 case
344 );
345 }
346
347 for case in negative_cases {
348 assert!(
349 !URL_IGNORE_TRIE.contains_prefix(case),
350 "Trie should not contain: {}",
351 case
352 );
353 }
354 }
355
356 #[test]
357 fn test_url_ignore_embedded_trie_contains() {
358 let positive_cases = vec![
360 "https://www.youtube.com/embed/",
361 "https://www.google.com/maps/embed?",
362 ".amplitude.com",
363 ];
364
365 let negative_cases = vec![
367 "https://secure-site.com/resource.js",
368 "https://example.com/embed.js",
369 ];
370
371 for case in positive_cases {
372 assert!(
373 URL_IGNORE_EMBEDED_TRIE.contains_prefix(case),
374 "Trie should contain: {}",
375 case
376 );
377 }
378
379 for case in negative_cases {
380 assert!(
381 !URL_IGNORE_EMBEDED_TRIE.contains_prefix(case),
382 "Trie should not contain: {}",
383 case
384 );
385 }
386 }
387
388 #[test]
389 fn test_url_ignore_script_base_paths_contains() {
390 let positive_cases = vec!["wp-content/plugins/cookie-law-info", "analytics/"];
392
393 let negative_cases = vec![
395 "wp-content/some-untracked-plugin/",
396 "random/path/analytics.js",
397 ];
398
399 for case in positive_cases {
400 assert!(
401 URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(case),
402 "Trie should contain: {}",
403 case
404 );
405 }
406
407 for case in negative_cases {
408 assert!(
409 !URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(case),
410 "Trie should not contain: {}",
411 case
412 );
413 }
414 }
415
416 #[test]
417 fn test_url_ignore_script_style_paths_contains() {
418 let positive_cases = vec!["wp-content/themes/", "npm/bootstrap@"];
420
421 let negative_cases = vec![
423 "wp-content/some-other-theme/",
424 "wp-content/plugins/untracked-plugin/",
425 ];
426
427 for case in positive_cases {
428 assert!(
429 URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(case),
430 "Trie should contain: {}",
431 case
432 );
433 }
434
435 for case in negative_cases {
436 assert!(
437 !URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(case),
438 "Trie should not contain: {}",
439 case
440 );
441 }
442 }
443
444 #[test]
445 fn test_url_ignore_trie_paths_contains() {
446 let positive_cases = vec!["privacy-notice.js", "tracking.js"];
448
449 let negative_cases = vec!["non-ignored.js", "non-related/tracking.js"];
451
452 for case in positive_cases {
453 assert!(
454 URL_IGNORE_TRIE_PATHS.contains_prefix(case),
455 "Trie should contain: {}",
456 case
457 );
458 }
459
460 for case in negative_cases {
461 assert!(
462 !URL_IGNORE_TRIE_PATHS.contains_prefix(case),
463 "Trie should not contain: {}",
464 case
465 );
466 }
467 }
468}