Skip to main content

servo_fetch/
layout.rs

1//! CSS layout heuristics — detects page structure (navbar, sidebar, footer, main)
2//! to improve content extraction accuracy.
3
4use serde::Deserialize;
5
6/// Default viewport width used by Servo for rendering.
7pub const VIEWPORT_WIDTH: u32 = 1280;
8/// Default viewport height used by Servo for rendering.
9pub const VIEWPORT_HEIGHT: u32 = 800;
10
11/// A page element with CSS layout data, deserialized from the injected JS.
12///
13/// The `tag` field contains the HTML tag name in uppercase, as returned by
14/// `Element.tagName` in JavaScript.
15#[derive(Deserialize)]
16pub struct LayoutElement {
17    tag: String,
18    role: Option<String>,
19    w: f64,
20    h: f64,
21    position: String,
22}
23
24impl LayoutElement {
25    fn is_navbar(&self) -> bool {
26        matches!(self.position.as_str(), "fixed" | "sticky") && self.h < f64::from(VIEWPORT_HEIGHT) * 0.2
27    }
28
29    fn is_sidebar(&self) -> bool {
30        let is_narrow = self.w < f64::from(VIEWPORT_WIDTH) * 0.3;
31        let is_side_tag = matches!(self.tag.as_str(), "ASIDE" | "NAV");
32        let is_side_role = matches!(self.role.as_deref(), Some("navigation" | "complementary"));
33        is_narrow && (is_side_tag || is_side_role)
34    }
35
36    fn is_footer(&self) -> bool {
37        let is_full_width = self.w >= f64::from(VIEWPORT_WIDTH) * 0.8;
38        (self.tag == "FOOTER" && is_full_width) || self.role.as_deref() == Some("contentinfo")
39    }
40
41    fn should_remove(&self) -> bool {
42        self.is_navbar() || self.is_sidebar() || self.is_footer()
43    }
44}
45
46/// CSS selectors for elements that should be stripped before passing to readability.
47///
48/// Each selector is as specific as possible to avoid removing unrelated elements
49/// that happen to share the same tag name.
50#[must_use]
51pub fn selectors_to_strip(elements: &[LayoutElement]) -> Vec<String> {
52    let mut sels: Vec<String> = elements
53        .iter()
54        .filter(|el| el.should_remove())
55        .map(|el| {
56            let tag = el.tag.to_lowercase();
57            match el.role.as_deref() {
58                Some(role) => format!("{tag}[role=\"{role}\"]"),
59                None => tag,
60            }
61        })
62        .collect();
63    sels.sort_unstable();
64    sels.dedup();
65    sels
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    fn el(tag: &str, w: f64, h: f64, position: &str, role: Option<&str>) -> LayoutElement {
73        LayoutElement {
74            tag: tag.to_string(),
75            role: role.map(String::from),
76            w,
77            h,
78            position: position.to_string(),
79        }
80    }
81
82    #[test]
83    fn detects_fixed_navbar() {
84        let sels = selectors_to_strip(&[el("HEADER", 1280.0, 60.0, "fixed", None)]);
85        assert_eq!(sels, vec!["header"]);
86    }
87
88    #[test]
89    fn detects_sticky_navbar() {
90        let sels = selectors_to_strip(&[el("NAV", 1280.0, 50.0, "sticky", None)]);
91        assert_eq!(sels, vec!["nav"]);
92    }
93
94    #[test]
95    fn ignores_tall_fixed_element() {
96        let sels = selectors_to_strip(&[el("DIV", 1280.0, 400.0, "fixed", None)]);
97        assert!(sels.is_empty());
98    }
99
100    #[test]
101    fn detects_narrow_aside_as_sidebar() {
102        let sels = selectors_to_strip(&[el("ASIDE", 300.0, 800.0, "static", None)]);
103        assert_eq!(sels, vec!["aside"]);
104    }
105
106    #[test]
107    fn detects_footer() {
108        let sels = selectors_to_strip(&[el("FOOTER", 1280.0, 100.0, "static", None)]);
109        assert_eq!(sels, vec!["footer"]);
110    }
111
112    #[test]
113    fn ignores_narrow_footer() {
114        // A <footer> inside an <article> is typically narrow — should not be stripped.
115        let sels = selectors_to_strip(&[el("FOOTER", 600.0, 50.0, "static", None)]);
116        assert!(sels.is_empty());
117    }
118
119    #[test]
120    fn detects_contentinfo_role_as_footer() {
121        let sels = selectors_to_strip(&[el("DIV", 1280.0, 100.0, "static", Some("contentinfo"))]);
122        assert_eq!(sels, vec!["div[role=\"contentinfo\"]"]);
123    }
124
125    #[test]
126    fn deduplicates_selectors() {
127        let elements = vec![
128            el("NAV", 200.0, 50.0, "fixed", None),
129            el("NAV", 250.0, 40.0, "sticky", None),
130        ];
131        let sels = selectors_to_strip(&elements);
132        assert_eq!(sels, vec!["nav"]);
133    }
134
135    #[test]
136    fn detects_complementary_role_as_sidebar() {
137        let sels = selectors_to_strip(&[el("DIV", 250.0, 800.0, "static", Some("complementary"))]);
138        assert_eq!(sels, vec!["div[role=\"complementary\"]"]);
139    }
140}