Skip to main content

hpx_browser/
iframe.rs

1//! Iframe support for hpx-browser.
2//!
3//! Each iframe with `srcdoc` gets its own DOM tree, V8 runtime, and event loop.
4//! Communication between parent and child is via serialized postMessage.
5
6use crate::dom::{Dom, NodeData, NodeId};
7#[cfg(feature = "v8")]
8use crate::net::RedirectPolicy;
9
10/// Info about an iframe found in the DOM.
11pub struct IframeInfo {
12    pub node_id: NodeId,
13    pub srcdoc: Option<String>,
14    pub src: Option<String>,
15}
16
17/// A child iframe with its own DOM.
18#[cfg(feature = "v8")]
19pub struct ChildIframe {
20    pub node_id: NodeId,
21    pub event_loop: crate::event_loop::BrowserEventLoop,
22}
23
24#[cfg(feature = "v8")]
25impl ChildIframe {
26    /// Create a child iframe from srcdoc HTML.
27    pub async fn from_srcdoc(
28        node_id: NodeId,
29        html: &str,
30        _profile: &crate::stealth::StealthProfile,
31    ) -> Result<Self, crate::event_loop::EventLoopError> {
32        let dom = crate::html_parser::parse_html(html);
33        let runtime = crate::js_runtime::BrowserJsRuntime::new(dom);
34        let mut event_loop = crate::event_loop::BrowserEventLoop::with_runtime(runtime);
35
36        // Run child event loop
37        let _ = event_loop
38            .run_until_idle(std::time::Duration::from_secs(5))
39            .await;
40
41        Ok(Self {
42            node_id,
43            event_loop,
44        })
45    }
46
47    /// Create a child iframe by fetching src URL via HTTP client.
48    pub async fn from_url(
49        node_id: NodeId,
50        url: &str,
51        client: &crate::net::HttpClient,
52        stealth_profile: Option<&crate::stealth::StealthProfile>,
53    ) -> Result<Self, crate::event_loop::EventLoopError> {
54        let resp = client
55            .request("GET", url, None, &[], RedirectPolicy::Manual)
56            .await
57            .map_err(|e| {
58                crate::event_loop::EventLoopError::Other(format!("iframe fetch error: {}", e))
59            })?;
60
61        if !resp.ok() {
62            return Err(crate::event_loop::EventLoopError::Other(format!(
63                "iframe fetch {} returned {}",
64                url, resp.status
65            )));
66        }
67
68        let html = resp.text();
69        if html.trim().is_empty() {
70            return Self::from_srcdoc(
71                node_id,
72                "<html><body></body></html>",
73                stealth_profile.unwrap_or(&crate::stealth::StealthProfile::default()),
74            )
75            .await;
76        }
77
78        let dom = crate::html_parser::parse_html(&html);
79        let runtime = crate::js_runtime::BrowserJsRuntime::new(dom);
80        let mut event_loop = crate::event_loop::BrowserEventLoop::with_runtime(runtime);
81
82        // Set location
83        let url_js = url.replace('\\', "\\\\").replace('\'', "\\'");
84        let _ = event_loop.execute_script(&format!("location.href = '{}';", url_js));
85
86        // Run child event loop
87        let _ = event_loop
88            .run_until_idle(std::time::Duration::from_secs(10))
89            .await;
90
91        Ok(Self {
92            node_id,
93            event_loop,
94        })
95    }
96
97    /// Evaluate JS in the child's V8 context.
98    pub fn evaluate(&mut self, js: &str) -> Result<String, crate::event_loop::EventLoopError> {
99        self.event_loop.execute_script(js)
100    }
101
102    /// Query the child's DOM for text content of a selector match.
103    pub fn query_text(&mut self, selector: &str) -> Option<String> {
104        self.evaluate(&format!(
105            r#"(() => {{ const el = document.querySelector("{}"); return el ? el.textContent : ""; }})()"#,
106            selector.replace('"', "\\\"")
107        ))
108        .ok()
109        .filter(|s| !s.is_empty())
110    }
111}
112
113/// Find all `<iframe>` elements in the DOM.
114pub fn find_iframes(dom: &Dom) -> Vec<IframeInfo> {
115    let mut iframes = Vec::new();
116    collect_iframes(dom, NodeId::DOCUMENT, &mut iframes);
117    iframes
118}
119
120fn collect_iframes(dom: &Dom, node_id: NodeId, iframes: &mut Vec<IframeInfo>) {
121    let children = dom.children(node_id);
122    for child_id in children {
123        if let Some(node) = dom.get(child_id) {
124            if let NodeData::Element(elem) = &node.data {
125                if elem.name.local.eq_ignore_ascii_case("iframe") {
126                    let srcdoc = elem
127                        .attrs
128                        .iter()
129                        .find(|a| a.name.local == "srcdoc")
130                        .map(|a| a.value.clone());
131                    let src = elem
132                        .attrs
133                        .iter()
134                        .find(|a| a.name.local == "src")
135                        .map(|a| a.value.clone());
136                    iframes.push(IframeInfo {
137                        node_id: child_id,
138                        srcdoc,
139                        src,
140                    });
141                }
142            }
143            collect_iframes(dom, child_id, iframes);
144        }
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn find_no_iframes_in_empty_doc() {
154        let dom = Dom::new();
155        let iframes = find_iframes(&dom);
156        assert!(iframes.is_empty());
157    }
158
159    #[test]
160    fn find_iframe_with_srcdoc() {
161        let dom = crate::html_parser::parse_html(
162            r#"<html><body><iframe srcdoc="<p>Hello</p>"></iframe></body></html>"#,
163        );
164        let iframes = find_iframes(&dom);
165        assert_eq!(iframes.len(), 1);
166        assert!(iframes[0].srcdoc.is_some());
167        assert!(iframes[0].srcdoc.as_ref().unwrap().contains("Hello"));
168    }
169
170    #[test]
171    fn find_iframe_with_src() {
172        let dom = crate::html_parser::parse_html(
173            r#"<html><body><iframe src="https://example.com"></iframe></body></html>"#,
174        );
175        let iframes = find_iframes(&dom);
176        assert_eq!(iframes.len(), 1);
177        assert_eq!(iframes[0].src.as_deref(), Some("https://example.com"));
178    }
179
180    #[test]
181    fn find_multiple_iframes() {
182        let dom = crate::html_parser::parse_html(
183            r#"<html><body>
184                <iframe src="first.html"></iframe>
185                <iframe src="second.html"></iframe>
186            </body></html>"#,
187        );
188        let iframes = find_iframes(&dom);
189        assert_eq!(iframes.len(), 2);
190    }
191}