mochi_rs/imports/
html.rs

1// Most, if not all, of this source code was copied/modified from Aidoku-RS
2// https://github.com/Aidoku/aidoku-rs/blob/main/crates/imports/src/html.rs
3
4extern crate alloc;
5
6use crate::imports::core::Kind;
7use core::fmt::Display;
8
9use alloc::string::String;
10
11use super::core::{PtrRef, ArrayRef, ptr_kind};
12
13use super::error::{Result, MochiError, NodeError};
14use super::core::{HostPtr, copy, destroy};
15
16#[link(wasm_import_module = "html")]
17// #[link(name = "swift-bindings", kind = "static")]
18extern "C" {
19    #[link_name = "parse"]
20    fn scraper_parse(string: *const u8, len: usize) -> HostPtr;
21    #[link_name = "parse_with_uri"]
22    fn scraper_parse_with_uri(
23        string: *const u8,
24        len: usize,
25        base_uri: *const u8,
26        base_uri_len: usize,
27    ) -> HostPtr;
28    #[link_name = "parse_fragment"]
29    fn scraper_parse_fragment(string: *const u8, len: usize) -> HostPtr;
30    #[link_name = "parse_fragment_with_uri"]
31    fn scraper_parse_fragment_with_uri(
32        string: *const u8,
33        len: usize,
34        base_uri: *const u8,
35        base_uri_len: usize,
36    ) -> HostPtr;
37
38    #[link_name = "select"]
39    fn scraper_select(ptr: HostPtr, selector: *const u8, selector_len: usize) -> i32;
40    #[link_name = "attr"]
41    fn scraper_attr(ptr: HostPtr, selector: *const u8, selector_len: usize) -> i32;
42
43    #[link_name = "set_text"]
44    fn scraper_set_text(ptr: HostPtr, text: *const u8, text_len: usize) -> i32;
45    #[link_name = "set_html"]
46    fn scraper_set_html(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
47    #[link_name = "prepend"]
48    fn scraper_prepend(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
49    #[link_name = "append"]
50    fn scraper_append(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
51
52    #[link_name = "first"]
53    fn scraper_first(ptr: HostPtr) -> HostPtr;
54    #[link_name = "last"]
55    fn scraper_last(ptr: HostPtr) -> HostPtr;
56    #[link_name = "next"]
57    fn scraper_next(ptr: HostPtr) -> HostPtr;
58    #[link_name = "previous"]
59    fn scraper_previous(ptr: HostPtr) -> HostPtr;
60
61    #[link_name = "base_uri"]
62    fn scraper_base_uri(ptr: HostPtr) -> HostPtr;
63    #[link_name = "body"]
64    fn scraper_body(ptr: HostPtr) -> HostPtr;
65    #[link_name = "text"]
66    fn scraper_text(ptr: HostPtr) -> HostPtr;
67    #[link_name = "untrimmed_text"]
68    fn scraper_untrimmed_text(ptr: HostPtr) -> HostPtr;
69    #[link_name = "own_text"]
70    fn scraper_own_text(ptr: HostPtr) -> HostPtr;
71    #[link_name = "data"]
72    fn scraper_data(ptr: HostPtr) -> HostPtr;
73    #[link_name = "array"]
74    fn scraper_array(ptr: HostPtr) -> HostPtr;
75    #[link_name = "html"]
76    fn scraper_html(ptr: HostPtr) -> HostPtr;
77    #[link_name = "outer_html"]
78    fn scraper_outer_html(ptr: HostPtr) -> HostPtr;
79
80    #[link_name = "escape"]
81    fn scraper_escape(ptr: HostPtr) -> HostPtr;
82    #[link_name = "unescape"]
83    fn scraper_unescape(ptr: HostPtr) -> HostPtr;
84
85    #[link_name = "id"]
86    fn scraper_id(ptr: HostPtr) -> i32;
87    #[link_name = "tag_name"]
88    fn scraper_tag_name(ptr: HostPtr) -> i32;
89    #[link_name = "class_name"]
90    fn scraper_class_name(ptr: HostPtr) -> i32;
91    #[link_name = "has_class"]
92    fn scraper_has_class(ptr: HostPtr, class_name: *const u8, class_length: usize) -> bool;
93    #[link_name = "has_attr"]
94    fn scraper_has_attr(ptr: HostPtr, attr_name: *const u8, attr_length: usize) -> bool;
95}
96
97/// HTML escape an input string.
98///
99/// # Examples
100/// ```ignore
101/// assert_eq!(escape_html_entities("<"), "&lt;");
102/// ```
103pub fn escape_html_entities<T: AsRef<str>>(text: T) -> String {
104    let ptr_ref = PtrRef::from(text.as_ref());
105    let host_id = unsafe { 
106        scraper_escape(ptr_ref.pointer()) 
107    };
108    PtrRef::new(host_id).as_string().unwrap_or_default()
109}
110
111/// Un-escape an HTML escaped string.
112///
113/// # Examples
114/// ```ignore
115/// assert_eq!(unescape_html_entities("&lt;"), "<");
116/// ```
117pub fn unescape_html_entities<T: AsRef<str>>(text: T) -> String {
118    let ptr_ref = PtrRef::from(text.as_ref());
119    let host_id = unsafe { 
120        scraper_unescape(ptr_ref.pointer()) 
121    };
122    PtrRef::new(host_id).as_string().unwrap_or_default()
123}
124
125/// Type which represents a HTML node, which can be a group of elements,
126/// an element, or the entire HTML document.
127#[derive(Debug)]
128pub struct Node(pub(crate) HostPtr);
129
130impl Node {
131    /// Parse HTML into a Node. As there is no base URI specified, absolute URL
132    /// resolution requires the HTML to have a `<base href>` tag.
133    pub fn new<T: AsRef<[u8]>>(buf: T) -> Result<Self> {
134        let buf = buf.as_ref();
135        let host_id = unsafe { scraper_parse(buf.as_ptr(), buf.len()) };
136        if host_id >= 0 {
137            Ok(Self(host_id))
138        } else {
139            Err(MochiError::from(NodeError::ParserError))
140        }
141    }
142
143    /// Parse HTML into a Node. The given `base_uri` will be used for any URLs that
144    /// occurs before a `<base href>` tag is defined.
145    pub fn new_with_uri<A: AsRef<[u8]>, B: AsRef<str>>(buf: A, base_uri: B) -> Result<Self> {
146        let buf = buf.as_ref();
147        let base_uri = base_uri.as_ref();
148        let host_id = unsafe {
149            scraper_parse_with_uri(buf.as_ptr(), buf.len(), base_uri.as_ptr(), base_uri.len())
150        };
151        if host_id >= 0 {
152            Ok(Self(host_id))
153        } else {
154            Err(MochiError::from(NodeError::ParserError))
155        }
156    }
157
158    /// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
159    /// Similar to [Node::new](crate::html::Node::new), relative URLs will not
160    /// be resolved unless there is a `<base href>` tag.
161    pub fn new_fragment<T: AsRef<[u8]>>(buf: T) -> Result<Self> {
162        let buf = buf.as_ref();
163        let host_id = unsafe { scraper_parse_fragment(buf.as_ptr(), buf.len()) };
164        if host_id >= 0 {
165            Ok(Self(host_id))
166        } else {
167            Err(MochiError::from(NodeError::ParserError))
168        }
169    }
170
171    /// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
172    /// Similar to [Node::new_with_uri](crate::html::Node::new_with_uri), URL
173    /// resolution occurs for any that appears before a `<base href>` tag.
174    pub fn new_fragment_with_uri<A: AsRef<[u8]>, B: AsRef<str>>(
175        buf: A,
176        base_uri: B,
177    ) -> Result<Self> {
178        let buf = buf.as_ref();
179        let base_uri = base_uri.as_ref();
180        let host_id = unsafe {
181            scraper_parse_fragment_with_uri(
182                buf.as_ptr(),
183                buf.len(),
184                base_uri.as_ptr(),
185                base_uri.len(),
186            )
187        };
188        if host_id >= 0 {
189            Ok(Self(host_id))
190        } else {
191            Err(MochiError::from(NodeError::ParserError))
192        }
193    }
194
195    /// Get an instance from a [PtrRef](crate::PtrRef)
196    ///
197    /// # Safety
198    /// Ensure that this Ptr is of [Kind::Node](crate::Kind) before
199    /// converting.
200    #[inline]
201    pub unsafe fn from(ptr: HostPtr) -> Self {
202        Self(ptr)
203    }
204
205    #[inline]
206    pub fn close(self) {
207        drop(self)
208    }
209
210    /// Find elements that matches the given CSS (or JQuery) selector.
211    ///
212    /// <details>
213    ///     <summary>Supported selectors</summary>
214    ///
215    /// | Pattern                 | Matches                                                                                              | Example                                                           |
216    /// |-------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
217    /// | `*`                     | any element                                                                                          | `*`                                                               |
218    /// | `tag`                   | elements with the given tag name                                                                     | `div`                                                             |
219    /// | <code>*\|E</code>       | elements of type E in any namespace (including non-namespaced)                                       | <code>*\|name</code> finds `<fb:name>` and `<name>` elements      |
220    /// | <code>ns\|E</code>      | elements of type E in the namespace ns                                                               | <code>fb\|name</code> finds `<fb:name>` elements                  |
221    /// | `#id`                   | elements with attribute ID of "id"                                                                   | `div#wrap`, `#logo`                                               |
222    /// | `.class`                | elements with a class name of "class"                                                                | `div.left`, `.result`                                             |
223    /// | `[attr]`                | elements with an attribute named "attr" (with any value)                                             | `a[href]`, `[title]`                                              |
224    /// | `[^attrPrefix]`         | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | `[^data-]`, `div[^data-]`                                         |
225    /// | `[attr=val]`            | elements with an attribute named "attr", and value equal to "val"                                    | `img[width=500]`, `a[rel=nofollow]`                               |
226    /// | `[attr="val"]`          | elements with an attribute named "attr", and value equal to "val"                                    | `span[hello="Cleveland"][goodbye="Columbus"]`, `a[rel="nofollow"]`|
227    /// | `[attr^=valPrefix]`     | elements with an attribute named "attr", and value starting with "valPrefix"                         | `a[href^=http:]`                                                  |
228    /// | `[attr$=valSuffix]`     | elements with an attribute named "attr", and value ending with "valSuffix"                           | `img[src$=.png]`                                                  |
229    /// | `[attr*=valContaining]` | elements with an attribute named "attr", and value containing "valContaining"                        | `a[href*=/search/]`                                               |
230    /// | `[attr~=regex]`         | elements with an attribute named "attr", and value matching the regular expression                   | `img[src~=(?i)\\.(png\|jpe?g)]`                                   |
231    /// |                         | The above may be combined in any order                                                               | `div.header[title]`                                               |
232    ///
233    /// ## Combinators
234    /// | Pattern   | Matches                                         | Example                     |
235    /// |-----------|-------------------------------------------------|-----------------------------|
236    /// | `E F`     | an F element descended from an E element        | `div a`, `.logo h1`         |
237    /// | `E > F`   | an F direct child of E                          | `ol > li`                   |
238    /// | `E + F`   | an F element immediately preceded by sibling E  | `li + li`, `div.head + div` |
239    /// | `E ~ F`   | an F element preceded by sibling E              | `h1 ~ p`                    |
240    /// | `E, F, G` | all matching elements E, F, or G                | `a[href], div, h3`          |
241    ///
242    /// ## Pseudo selectors
243    /// | Pattern              | Matches                                                                                                                                                   | Example                                                                                                                                                      |
244    /// |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
245    /// | `:lt(n)`             | elements whose sibling index is less than n                                                                                                               | `td:lt(3)` finds the first 3 cells of each row                                                                                                               |
246    /// | `:gt(n)`             | elements whose sibling index is greater than n                                                                                                            | `td:gt(1)` finds cells after skipping the first two                                                                                                          |
247    /// | `:eq(n)`             | elements whose sibling index is equal to n                                                                                                                | `td:eq(0)` finds the first cell of each row                                                                                                                  |
248    /// | `:has(selector)`     | elements that contains at least one element matching the selector                                                                                         | `div:has(p)` finds divs that contain p elements; `div:has(> a)` selects div elements that have at least one direct child a element.                          |
249    /// | `:not(selector)`     | elements that do not match the selector.                                                                                                                  | `div:not(.logo)` finds all divs that do not have the "logo" class; `div:not(:has(div))` finds divs that do not contain divs.                                 |
250    /// | `:contains(text)`    | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants.           | `p:contains(SwiftSoup)` finds p elements containing the text "SwiftSoup"; `p:contains(hello \(there\))` finds p elements containing the text "Hello (There)" |
251    /// | `:matches(regex)`    | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants.                        | `td:matches(\\d+)` finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively.                           |
252    /// | `:containsOwn(text)` | elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | `p:containsOwn(SwiftSoup)` finds p elements with own text "SwiftSoup".                                                                                       |
253    /// | `:matchesOwn(regex)` | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants.                  | `td:matchesOwn(\\d+)` finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively.            |
254    ///
255    /// ## Structural pseudo-selectors
256    /// | Pattern                   | Matches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Example                                                |
257    /// |---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|
258    /// | `:root`                   | The element that is the root of the document. In HTML, this is the html element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |                                                        |                                                                                                                                                                                                 |
259    /// | `:nth-child(an+b)`        | elements that have an+b-1 siblings before it in the document tree, for any positive integer or zero value of n, and has a parent element. For values of a and b greater than zero, this effectively divides the element's children into groups of a elements (the last group taking the remainder), and selecting the bth element of each group. For example, this allows the selectors to address every other row in a table, and could be used to alternate the color of paragraph text in a cycle of four. The a and b values must be integers (positive, negative, or zero). The index of the first child of an element is 1. |                                                        |
260    /// | `:nth-last-child(an+b)`   | elements that have an+b-1 siblings after it in the document tree. Otherwise like `:nth-child()`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | `tr:nth-last-child(-n+2)` the last two rows of a table |
261    /// | `:nth-of-type(an+b)`      | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name before it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                    | `img:nth-of-type(2n+1)`                                |
262    /// | `:nth-last-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name after it in the document tree, for any zero or positive integer value of n, and has a parent element                                                                                                                                                                                                                                                                                                                                                                                                                     | `img:nth-last-of-type(2n+1)`                           |
263    /// | `:first-child`            | elements that are the first child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | `div > p:first-child`                                  |
264    /// | `:last-child`             | elements that are the last child of some other element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `ol > li:last-child`                                   |
265    /// | `:first-of-type`          | elements that are the first sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `dl dt:first-of-type`                                  |
266    /// | `:last-of-type`           | elements that are the last sibling of its type in the list of children of its parent element                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `tr > td:last-of-type`                                 |
267    /// | `:only-child`             | elements that have a parent element and whose parent element hasve no other element children                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |                                                        |
268    /// | `:only-of-type`           |  an element that has a parent element and whose parent element has no other element children with the same expanded element name                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |                                                        |
269    /// | `:empty`                  | elements that have no children at all                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |                                                        |
270    /// </details>
271    pub fn select<T: AsRef<str>>(&self, selector: T) -> Self {
272        let selector = selector.as_ref();
273        let host_id = unsafe { scraper_select(self.0, selector.as_ptr(), selector.len()) };
274        Self(host_id)
275    }
276
277    /// Get an attribute value by its key.
278    /// To get an absolute URL from an attribute that may be a relative URL,
279    /// prefix the key with `abs:`.
280    ///
281    /// # Example
282    /// ```ignore
283    /// // Assumes that `el` is a Node
284    /// let url = el.attr("abs:src");
285    /// ```
286    pub fn attr<T: AsRef<str>>(&self, attr: T) -> String {
287        let attr = attr.as_ref();
288        let host_id = unsafe { scraper_attr(self.0, attr.as_ptr(), attr.len()) };
289        PtrRef::new(host_id).as_string().unwrap_or_default()
290    }
291
292    /// Set the element's inner HTML, clearning the existing HTML.
293    ///
294    /// # Notice
295    /// Internally, this operates on SwiftSoup.Element, but
296    /// not on SwiftSoup.Elements, which is the type you usually get when using
297    /// methods like [Node::select](crate::html::Node::select). Either use
298    /// [Node::array](crate::html::Node::array) to iterate through each element,
299    /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
300    /// to select an element before calling this function.
301    pub fn set_html<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
302        let html = html.as_ref();
303        match unsafe { scraper_set_html(self.0, html.as_ptr(), html.len()) } {
304            0 => Ok(()),
305            _ => Err(MochiError::from(NodeError::ModifyError)),
306        }
307    }
308
309    /// Set the element's text content, clearing any existing content.
310    ///
311    /// # Notice
312    /// Internally, this operates on SwiftSoup.Element, but
313    /// not on SwiftSoup.Elements, which is the type you usually get when using
314    /// methods like [Node::select](crate::html::Node::select). Either use
315    /// [Node::array](crate::html::Node::array) to iterate through each element,
316    /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
317    /// to select an element before calling this function.
318    pub fn set_text<T: AsRef<str>>(&mut self, text: T) -> Result<()> {
319        let text = text.as_ref();
320        match unsafe { scraper_set_text(self.0, text.as_ptr(), text.len()) } {
321            0 => Ok(()),
322            _ => Err(MochiError::from(NodeError::ModifyError)),
323        }
324    }
325
326    /// Add inner HTML into this element. The given HTML will be parsed, and
327    /// each node prepended to the start of the element's children.
328    ///
329    /// # Notice
330    /// Internally, this operates on SwiftSoup.Element, but
331    /// not on SwiftSoup.Elements, which is the type you usually get when using
332    /// methods like [Node::select](crate::html::Node::select). Either use
333    /// [Node::array](crate::html::Node::array) to iterate through each element,
334    /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
335    /// to select an element before calling this function.
336    pub fn prepend<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
337        let html = html.as_ref();
338        match unsafe { scraper_prepend(self.0, html.as_ptr(), html.len()) } {
339            0 => Ok(()),
340            _ => Err(MochiError::from(NodeError::ModifyError)),
341        }
342    }
343
344    /// Add inner HTML into this element. The given HTML will be parsed, and
345    /// each node appended to the end of the element's children.
346    ///
347    /// # Notice
348    /// Internally, this operates on SwiftSoup.Element, but
349    /// not on SwiftSoup.Elements, which is the type you usually get when using
350    /// methods like [Node::select](crate::html::Node::select). Either use
351    /// [Node::array](crate::html::Node::array) to iterate through each element,
352    /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
353    /// to select an element before calling this function.
354    pub fn append<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
355        let html = html.as_ref();
356        match unsafe { scraper_append(self.0, html.as_ptr(), html.len()) } {
357            0 => Ok(()),
358            _ => Err(MochiError::from(NodeError::ModifyError)),
359        }
360    }
361
362    /// Get the first sibling of this element, which can be this element
363    pub fn first(&self) -> Self {
364        let rid = unsafe { scraper_first(self.0) };
365        Self(rid)
366    }
367
368    /// Get the last sibling of this element, which can be this element
369    pub fn last(&self) -> Self {
370        let rid = unsafe { scraper_last(self.0) };
371        Self(rid)
372    }
373
374    /// Get the next sibling of the element, returning `None` if there isn't
375    /// one.
376    pub fn next(&self) -> Option<Node> {
377        let ptr = unsafe { scraper_next(self.0) };
378        match unsafe { ptr_kind(ptr) } {
379            Kind::Node => Some(Node(ptr)),
380            _ => None,
381        }
382    }
383
384    /// Get the previous sibling of the element, returning `None` if there isn't
385    /// one.
386    pub fn previous(&self) -> Option<Node> {
387        let ptr = unsafe { scraper_previous(self.0) };
388        match unsafe { ptr_kind(ptr) } {
389            Kind::Node => Some(Node(ptr)),
390            _ => None,
391        }
392    }
393
394    /// Get the base URI of this Node
395    pub fn base_uri(&self) -> String {
396        let ptr = unsafe { scraper_base_uri(self.0) };
397        PtrRef::new(ptr).as_string().unwrap_or_default()
398    }
399
400    /// Get the document's `body` element.
401    pub fn body(&self) -> String {
402        let ptr = unsafe { scraper_body(self.0) };
403        PtrRef::new(ptr).as_string().unwrap_or_default()
404    }
405
406    /// Get the **normalized, combined text** of this element and its children.
407    /// Whitespace is normalized and trimmed.
408    ///
409    /// For example, given HTML `<p>Hello <b>there</b> now! </p>`,
410    /// p.text() returns "Hello there now!"
411    ///
412    /// Note that this method returns text that would be presented to a reader.
413    /// The contents of data nodes (e.g. `<script>` tags) are not considered text.
414    /// Use [Node::html](crate::html::Node::html) or [Node::data](crate::html::Node::data)
415    /// to retrieve that content.
416    pub fn text(&self) -> String {
417        let ptr = unsafe { scraper_text(self.0) };
418        PtrRef::new(ptr).as_string().unwrap_or_default()
419    }
420
421    /// Get the text of this element and its children. Whitespace is **not** normalized
422    /// and trimmed.
423    ///
424    /// Notices from [Node::text](crate::html::Node::text) applies.
425    pub fn untrimmed_text(&self) -> String {
426        let ptr = unsafe { scraper_untrimmed_text(self.0) };
427        PtrRef::new(ptr).as_string().unwrap_or_default()
428    }
429
430    /// Gets the (normalized) text owned by this element only; does not get the
431    /// combined text of all children.
432    ///
433    /// Node::own_text only operates on a singular element, so calling it after
434    /// [Node::select](crate::html::Node::select) will not work. You need to get
435    /// a specific element first, through [Node::array](crate::html::Node::array)
436    /// and [ArrayRef::get](crate::std::ArrayRef::get), [Node::first](crate::html::Node::first),
437    /// or [Node::last](crate::html::Node::last).
438    pub fn own_text(&self) -> String {
439        let ptr = unsafe { scraper_own_text(self.0) };
440        PtrRef::new(ptr).as_string().unwrap_or_default()
441    }
442
443    /// Get the combined data of this element. Data is e.g. the inside of a `<script>` tag.
444    ///
445    /// Note that data is NOT the text of the element. Use [Node::text](crate::html::Node::text)
446    /// to get the text that would be visible to a user, and [Node::data](crate::html::Node::data)
447    /// for the contents of scripts, comments, CSS styles, etc.
448    pub fn data(&self) -> String {
449        let ptr = unsafe { scraper_data(self.0) };
450        PtrRef::new(ptr).as_string().unwrap_or_default()
451    }
452
453    /// Get an array of Node. This is most commonly used with
454    /// [Node::select](crate::html::Node::select) to iterate through elements
455    /// that match a selector.
456    pub fn array(&self) -> ArrayRef {
457        let rid = unsafe { scraper_array(self.0) };
458        ArrayRef::from(PtrRef::new(rid))
459    }
460
461    /// Get the node's inner HTML.
462    ///
463    /// For example, on `<div><p></p></div>`, `div.html()` would return `<p></p>`.
464    pub fn html(&self) -> String {
465        let ptr = unsafe { scraper_html(self.0) };
466        PtrRef::new(ptr).as_string().unwrap_or_default()
467    }
468
469    /// Get the node's outer HTML.
470    ///
471    /// For example, on `<div><p></p></div>`, `div.outer_html()` would return
472    /// `<div><p></p></div>`.
473    pub fn outer_html(&self) -> String {
474        let ptr = unsafe { scraper_outer_html(self.0) };
475        PtrRef::new(ptr).as_string().unwrap_or_default()
476    }
477
478    /// Get the node's text and escape any HTML-reserved characters to HTML entities.
479    ///
480    /// For example, for a node with text `Hello &<> Å å π 新 there ¾ © »`,
481    /// this would return `Hello &amp;&lt;&gt; Å å π 新 there ¾ © »`
482    pub fn escape(&self) -> String {
483        let ptr: i32 = unsafe { scraper_escape(self.0) };
484        PtrRef::new(ptr).as_string().unwrap_or_default()
485    }
486
487    /// Get the node's text and unescape any HTML entities to their original characters.
488    ///
489    /// For example, for a node with text `Hello &amp;&lt;&gt; Å å π 新 there ¾ © »`,
490    /// this would return `Hello &<> Å å π 新 there ¾ © »`.
491    pub fn unescape(&self) -> String {
492        let host_id: i32 = unsafe { scraper_unescape(self.0) };
493        PtrRef::new(host_id).as_string().unwrap_or_default()
494    }
495
496    /// Get the `id` attribute of this element.
497    pub fn id(&self) -> String {
498        let host_id = unsafe { scraper_id(self.0) };
499        PtrRef::new(host_id).as_string().unwrap_or_default()
500    }
501
502    /// Get the name of the tag for this element. This will always be the
503    /// lowercased version. For example, `<DIV>` and `<div>` would both return
504    /// `div`.
505    pub fn tag_name(&self) -> String {
506        let host_id = unsafe { scraper_tag_name(self.0) };
507        PtrRef::new(host_id).as_string().unwrap_or_default()
508    }
509
510    /// Get the literal value of this node's `class` attribute. For example,
511    /// on `<div class="header gray">` this would return `header gray`.
512    pub fn class_name(&self) -> String {
513        let host_id = unsafe { scraper_class_name(self.0) };
514        PtrRef::new(host_id).as_string().unwrap_or_default()
515    }
516
517    /// Test if this element has a class. Case insensitive.
518    pub fn has_class<T: AsRef<str>>(&self, class_name: T) -> bool {
519        let class_name = class_name.as_ref();
520        unsafe { scraper_has_class(self.0, class_name.as_ptr(), class_name.len()) }
521    }
522
523    /// Test if this element has an attribute. Case insensitive.
524    pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
525        let attr_name = attr_name.as_ref();
526        unsafe { scraper_has_attr(self.0, attr_name.as_ptr(), attr_name.len()) }
527    }
528}
529
530impl Display for Node {
531    /// Returns the outer HTML of the node.
532    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
533        write!(f, "{}", self.outer_html())
534    }
535}
536
537impl Drop for Node {
538    fn drop(&mut self) {
539        unsafe { destroy(self.0) }
540    }
541}
542
543impl Clone for Node {
544    fn clone(&self) -> Self {
545        let ptr: HostPtr = unsafe { copy(self.0) };
546        Self(ptr)
547    }
548}
mochi_rs/imports/html.rs

mochi_rs/imports/
html.rs