Skip to main content

servo_fetch/
visibility.rs

1//! Visibility-aware extraction.
2
3mod a11y;
4mod js;
5mod selectors;
6
7use bitflags::bitflags;
8
9pub(crate) use self::a11y::A11yIndex;
10pub(crate) use self::selectors::selectors_to_strip;
11
12/// User stylesheet applied before render to enforce ARIA, HTML, and modal
13/// semantics so matched nodes never produce boxes.
14pub(crate) const USER_STYLESHEET: &str = concat!(
15    "[hidden] { display: none !important; }\n",
16    "[aria-hidden=\"true\"] { display: none !important; }\n",
17    "[role=\"dialog\"][aria-modal=\"true\"] { display: none !important; }\n",
18    "[role=\"alertdialog\"] { display: none !important; }\n",
19    "[role=\"tabpanel\"][aria-hidden=\"true\"] { display: none !important; }\n",
20    "[aria-label*=\"cookie\" i], [aria-label*=\"consent\" i],\n",
21    "[class*=\"cookie-banner\" i], [class*=\"cookie-consent\" i],\n",
22    "[id*=\"cookie\" i][class*=\"banner\" i],\n",
23    "[class*=\"newsletter-popup\" i], [class*=\"subscribe-modal\" i],\n",
24    "#onetrust-banner-sdk, #onetrust-pc-sdk,\n",
25    "#CybotCookiebotDialog, #CybotCookiebotDialogBodyUnderlay,\n",
26    "#qc-cmp2-container, [id^=\"sp_message_container_\"],\n",
27    "#didomi-host, #usercentrics-root,\n",
28    "#truste-consent-track { display: none !important; }\n",
29);
30
31bitflags! {
32    /// Reasons a DOM node may be considered hidden.
33    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
34    pub struct VisibilityFlags: u32 {
35        /// Box has zero or near-zero area (AccessKit bounds).
36        const ZERO_SIZE                 = 1 << 0;
37        /// Box positioned outside the viewport (AccessKit bounds).
38        const OFFSCREEN                 = 1 << 1;
39        /// Computed font-size below 1px (AccessKit).
40        const FONT_SIZE_ZERO            = 1 << 2;
41        /// Tab panel with `aria-selected="false"` (AccessKit).
42        const TAB_PANEL_INACTIVE        = 1 << 3;
43        /// Cumulative opacity below 0.01 (computed CSS via JS).
44        const OPACITY_ZERO              = 1 << 4;
45        /// `clip` or `clip-path` set to a fully-clipped value (computed CSS via JS).
46        const CLIPPED                   = 1 << 5;
47        /// `content-visibility: hidden` (computed CSS via JS).
48        const CONTENT_VISIBILITY_HIDDEN = 1 << 6;
49        /// `text-indent` below `-9999px` while box is otherwise visible.
50        const TEXT_INDENT_OFFSCREEN     = 1 << 7;
51        /// Likely a "screen reader only" pattern (1px clip absolute).
52        const SR_ONLY                   = 1 << 8;
53        /// `visibility: hidden` (computed CSS via JS).
54        const VISIBILITY_HIDDEN         = 1 << 9;
55    }
56}
57
58/// Controls which visibility violations result in nodes being stripped from
59/// the extraction input.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61#[non_exhaustive]
62pub struct VisibilityPolicy {
63    pub(crate) strip_if_any: VisibilityFlags,
64}
65
66impl VisibilityPolicy {
67    /// Strip CSS-, ARIA-, and geometry-hidden content while preserving sr-only.
68    #[must_use]
69    pub fn moderate() -> Self {
70        Self {
71            strip_if_any: VisibilityFlags::ZERO_SIZE
72                | VisibilityFlags::OFFSCREEN
73                | VisibilityFlags::FONT_SIZE_ZERO
74                | VisibilityFlags::TAB_PANEL_INACTIVE
75                | VisibilityFlags::OPACITY_ZERO
76                | VisibilityFlags::CLIPPED
77                | VisibilityFlags::CONTENT_VISIBILITY_HIDDEN
78                | VisibilityFlags::TEXT_INDENT_OFFSCREEN
79                | VisibilityFlags::VISIBILITY_HIDDEN,
80        }
81    }
82
83    /// [`Self::moderate`] plus sr-only stripping.
84    #[must_use]
85    pub fn strict() -> Self {
86        let mut p = Self::moderate();
87        p.strip_if_any |= VisibilityFlags::SR_ONLY;
88        p
89    }
90
91    /// Disable visibility-flag-based stripping.
92    #[must_use]
93    pub fn off() -> Self {
94        Self {
95            strip_if_any: VisibilityFlags::empty(),
96        }
97    }
98}
99
100impl Default for VisibilityPolicy {
101    fn default() -> Self {
102        Self::moderate()
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn moderate_policy_strips_common_hides() {
112        let p = VisibilityPolicy::moderate();
113        assert!(p.strip_if_any.contains(VisibilityFlags::ZERO_SIZE));
114        assert!(p.strip_if_any.contains(VisibilityFlags::OPACITY_ZERO));
115        assert!(p.strip_if_any.contains(VisibilityFlags::TAB_PANEL_INACTIVE));
116        assert!(!p.strip_if_any.contains(VisibilityFlags::SR_ONLY));
117    }
118
119    #[test]
120    fn strict_policy_adds_sr_only() {
121        let p = VisibilityPolicy::strict();
122        assert!(p.strip_if_any.contains(VisibilityFlags::SR_ONLY));
123    }
124
125    #[test]
126    fn off_policy_strips_nothing_directly() {
127        let p = VisibilityPolicy::off();
128        assert!(p.strip_if_any.is_empty());
129    }
130
131    #[test]
132    fn default_is_moderate() {
133        assert_eq!(
134            VisibilityPolicy::default().strip_if_any,
135            VisibilityPolicy::moderate().strip_if_any,
136        );
137    }
138
139    #[test]
140    fn user_stylesheet_targets_aria_hidden_and_hidden_attr() {
141        assert!(USER_STYLESHEET.contains("[hidden]"));
142        assert!(USER_STYLESHEET.contains("[aria-hidden=\"true\"]"));
143        assert!(USER_STYLESHEET.contains("[role=\"dialog\"][aria-modal=\"true\"]"));
144    }
145
146    #[test]
147    fn user_stylesheet_targets_major_cookie_consent_providers() {
148        // Major GDPR / CCPA cookie consent platforms by ID.
149        // Adding new providers here requires no other changes — render-time hide
150        // surfaces them via the `OPACITY_ZERO` flag in visibility.js, and they
151        // are stripped by `moderate` policy.
152        assert!(USER_STYLESHEET.contains("#onetrust-banner-sdk"));
153        assert!(USER_STYLESHEET.contains("#CybotCookiebotDialog"));
154        assert!(USER_STYLESHEET.contains("#qc-cmp2-container"));
155        assert!(USER_STYLESHEET.contains("#didomi-host"));
156        assert!(USER_STYLESHEET.contains("#usercentrics-root"));
157        assert!(USER_STYLESHEET.contains("[id^=\"sp_message_container_\"]"));
158        assert!(USER_STYLESHEET.contains("#truste-consent-track"));
159    }
160
161    /// Guards bit-value synchronisation with `js/visibility.js`.
162    #[test]
163    fn js_flag_constants_match_rust() {
164        assert_eq!(VisibilityFlags::OPACITY_ZERO.bits(), 1 << 4);
165        assert_eq!(VisibilityFlags::CLIPPED.bits(), 1 << 5);
166        assert_eq!(VisibilityFlags::CONTENT_VISIBILITY_HIDDEN.bits(), 1 << 6);
167        assert_eq!(VisibilityFlags::TEXT_INDENT_OFFSCREEN.bits(), 1 << 7);
168        assert_eq!(VisibilityFlags::SR_ONLY.bits(), 1 << 8);
169        assert_eq!(VisibilityFlags::VISIBILITY_HIDDEN.bits(), 1 << 9);
170    }
171}