Skip to main content

hpx_browser/
iframe.rs

1//! Iframe support for hpx-browser.
2//!
3//! Each iframe with `srcdoc` gets its own DOM tree, V8 runtime, and event loop.
4//! Communication between parent and child is via serialized postMessage.
5
6use crate::dom::{Dom, NodeData, NodeId};
7
8/// Info about an iframe found in the DOM.
9pub struct IframeInfo {
10    pub node_id: NodeId,
11    pub srcdoc: Option<String>,
12    pub src: Option<String>,
13}
14
15/// A child iframe with its own DOM.
16#[cfg(feature = "v8")]
17pub struct ChildIframe {
18    pub node_id: NodeId,
19    pub event_loop: crate::event_loop::BrowserEventLoop,
20}
21
22#[cfg(feature = "v8")]
23impl ChildIframe {
24    /// Create a child iframe from srcdoc HTML.
25    pub async fn from_srcdoc(
26        node_id: NodeId,
27        html: &str,
28        _profile: &crate::stealth::StealthProfile,
29    ) -> Result<Self, crate::event_loop::EventLoopError> {
30        let dom = crate::html_parser::parse_html(html);
31        let runtime = crate::js_runtime::BrowserJsRuntime::new(dom);
32        let mut event_loop = crate::event_loop::BrowserEventLoop::with_runtime(runtime);
33
34        // Run child event loop
35        let _ = event_loop
36            .run_until_idle(std::time::Duration::from_secs(5))
37            .await;
38
39        Ok(Self {
40            node_id,
41            event_loop,
42        })
43    }
44
45    /// Create a child iframe by fetching src URL via HTTP client.
46    pub async fn from_url(
47        node_id: NodeId,
48        url: &str,
49        client: &crate::net::HttpClient,
50        stealth_profile: Option<&crate::stealth::StealthProfile>,
51    ) -> Result<Self, crate::event_loop::EventLoopError> {
52        let resp = client.get(url).await.map_err(|e| {
53            crate::event_loop::EventLoopError::Other(format!("iframe fetch error: {}", e))
54        })?;
55
56        if !resp.ok() {
57            return Err(crate::event_loop::EventLoopError::Other(format!(
58                "iframe fetch {} returned {}",
59                url, resp.status
60            )));
61        }
62
63        let html = resp.text();
64        if html.trim().is_empty() {
65            return Self::from_srcdoc(
66                node_id,
67                "<html><body></body></html>",
68                stealth_profile.unwrap_or(&crate::stealth::StealthProfile::default()),
69            )
70            .await;
71        }
72
73        let dom = crate::html_parser::parse_html(&html);
74        let runtime = crate::js_runtime::BrowserJsRuntime::new(dom);
75        let mut event_loop = crate::event_loop::BrowserEventLoop::with_runtime(runtime);
76
77        // Set location
78        let url_js = url.replace('\\', "\\\\").replace('\'', "\\'");
79        let _ = event_loop.execute_script(&format!("location.href = '{}';", url_js));
80
81        // Run child event loop
82        let _ = event_loop
83            .run_until_idle(std::time::Duration::from_secs(10))
84            .await;
85
86        Ok(Self {
87            node_id,
88            event_loop,
89        })
90    }
91
92    /// Evaluate JS in the child's V8 context.
93    pub fn evaluate(&mut self, js: &str) -> Result<String, crate::event_loop::EventLoopError> {
94        self.event_loop.execute_script(js)
95    }
96
97    /// Query the child's DOM for text content of a selector match.
98    pub fn query_text(&mut self, selector: &str) -> Option<String> {
99        self.evaluate(&format!(
100            r#"(() => {{ const el = document.querySelector("{}"); return el ? el.textContent : ""; }})()"#,
101            selector.replace('"', "\\\"")
102        ))
103        .ok()
104        .filter(|s| !s.is_empty())
105    }
106}
107
108/// Find all `<iframe>` elements in the DOM.
109pub fn find_iframes(dom: &Dom) -> Vec<IframeInfo> {
110    let mut iframes = Vec::new();
111    collect_iframes(dom, NodeId::DOCUMENT, &mut iframes);
112    iframes
113}
114
115fn collect_iframes(dom: &Dom, node_id: NodeId, iframes: &mut Vec<IframeInfo>) {
116    let children = dom.children(node_id);
117    for child_id in children {
118        if let Some(node) = dom.get(child_id) {
119            if let NodeData::Element(elem) = &node.data {
120                if elem.name.local.eq_ignore_ascii_case("iframe") {
121                    let srcdoc = elem
122                        .attrs
123                        .iter()
124                        .find(|a| a.name.local == "srcdoc")
125                        .map(|a| a.value.clone());
126                    let src = elem
127                        .attrs
128                        .iter()
129                        .find(|a| a.name.local == "src")
130                        .map(|a| a.value.clone());
131                    iframes.push(IframeInfo {
132                        node_id: child_id,
133                        srcdoc,
134                        src,
135                    });
136                }
137            }
138            collect_iframes(dom, child_id, iframes);
139        }
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn find_no_iframes_in_empty_doc() {
149        let dom = Dom::new();
150        let iframes = find_iframes(&dom);
151        assert!(iframes.is_empty());
152    }
153
154    #[test]
155    fn find_iframe_with_srcdoc() {
156        let dom = crate::html_parser::parse_html(
157            r#"<html><body><iframe srcdoc="<p>Hello</p>"></iframe></body></html>"#,
158        );
159        let iframes = find_iframes(&dom);
160        assert_eq!(iframes.len(), 1);
161        assert!(iframes[0].srcdoc.is_some());
162        assert!(iframes[0].srcdoc.as_ref().unwrap().contains("Hello"));
163    }
164
165    #[test]
166    fn find_iframe_with_src() {
167        let dom = crate::html_parser::parse_html(
168            r#"<html><body><iframe src="https://example.com"></iframe></body></html>"#,
169        );
170        let iframes = find_iframes(&dom);
171        assert_eq!(iframes.len(), 1);
172        assert_eq!(iframes[0].src.as_deref(), Some("https://example.com"));
173    }
174
175    #[test]
176    fn find_multiple_iframes() {
177        let dom = crate::html_parser::parse_html(
178            r#"<html><body>
179                <iframe src="first.html"></iframe>
180                <iframe src="second.html"></iframe>
181            </body></html>"#,
182        );
183        let iframes = find_iframes(&dom);
184        assert_eq!(iframes.len(), 2);
185    }
186}