mochi_rs/imports/html.rs
1// Most, if not all, of this source code was copied/modified from Aidoku-RS
2// https://github.com/Aidoku/aidoku-rs/blob/main/crates/imports/src/html.rs
3
4extern crate alloc;
5
6use crate::imports::core::Kind;
7use core::fmt::Display;
8
9use alloc::string::String;
10
11use super::core::{PtrRef, ArrayRef, ptr_kind};
12
13use super::error::{Result, MochiError, NodeError};
14use super::core::{HostPtr, copy, destroy};
15
16#[link(wasm_import_module = "html")]
17// #[link(name = "swift-bindings", kind = "static")]
18extern "C" {
19 #[link_name = "parse"]
20 fn scraper_parse(string: *const u8, len: usize) -> HostPtr;
21 #[link_name = "parse_with_uri"]
22 fn scraper_parse_with_uri(
23 string: *const u8,
24 len: usize,
25 base_uri: *const u8,
26 base_uri_len: usize,
27 ) -> HostPtr;
28 #[link_name = "parse_fragment"]
29 fn scraper_parse_fragment(string: *const u8, len: usize) -> HostPtr;
30 #[link_name = "parse_fragment_with_uri"]
31 fn scraper_parse_fragment_with_uri(
32 string: *const u8,
33 len: usize,
34 base_uri: *const u8,
35 base_uri_len: usize,
36 ) -> HostPtr;
37
38 #[link_name = "select"]
39 fn scraper_select(ptr: HostPtr, selector: *const u8, selector_len: usize) -> i32;
40 #[link_name = "attr"]
41 fn scraper_attr(ptr: HostPtr, selector: *const u8, selector_len: usize) -> i32;
42
43 #[link_name = "set_text"]
44 fn scraper_set_text(ptr: HostPtr, text: *const u8, text_len: usize) -> i32;
45 #[link_name = "set_html"]
46 fn scraper_set_html(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
47 #[link_name = "prepend"]
48 fn scraper_prepend(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
49 #[link_name = "append"]
50 fn scraper_append(ptr: HostPtr, html: *const u8, html_len: usize) -> i32;
51
52 #[link_name = "first"]
53 fn scraper_first(ptr: HostPtr) -> HostPtr;
54 #[link_name = "last"]
55 fn scraper_last(ptr: HostPtr) -> HostPtr;
56 #[link_name = "next"]
57 fn scraper_next(ptr: HostPtr) -> HostPtr;
58 #[link_name = "previous"]
59 fn scraper_previous(ptr: HostPtr) -> HostPtr;
60
61 #[link_name = "base_uri"]
62 fn scraper_base_uri(ptr: HostPtr) -> HostPtr;
63 #[link_name = "body"]
64 fn scraper_body(ptr: HostPtr) -> HostPtr;
65 #[link_name = "text"]
66 fn scraper_text(ptr: HostPtr) -> HostPtr;
67 #[link_name = "untrimmed_text"]
68 fn scraper_untrimmed_text(ptr: HostPtr) -> HostPtr;
69 #[link_name = "own_text"]
70 fn scraper_own_text(ptr: HostPtr) -> HostPtr;
71 #[link_name = "data"]
72 fn scraper_data(ptr: HostPtr) -> HostPtr;
73 #[link_name = "array"]
74 fn scraper_array(ptr: HostPtr) -> HostPtr;
75 #[link_name = "html"]
76 fn scraper_html(ptr: HostPtr) -> HostPtr;
77 #[link_name = "outer_html"]
78 fn scraper_outer_html(ptr: HostPtr) -> HostPtr;
79
80 #[link_name = "escape"]
81 fn scraper_escape(ptr: HostPtr) -> HostPtr;
82 #[link_name = "unescape"]
83 fn scraper_unescape(ptr: HostPtr) -> HostPtr;
84
85 #[link_name = "id"]
86 fn scraper_id(ptr: HostPtr) -> i32;
87 #[link_name = "tag_name"]
88 fn scraper_tag_name(ptr: HostPtr) -> i32;
89 #[link_name = "class_name"]
90 fn scraper_class_name(ptr: HostPtr) -> i32;
91 #[link_name = "has_class"]
92 fn scraper_has_class(ptr: HostPtr, class_name: *const u8, class_length: usize) -> bool;
93 #[link_name = "has_attr"]
94 fn scraper_has_attr(ptr: HostPtr, attr_name: *const u8, attr_length: usize) -> bool;
95}
96
97/// HTML escape an input string.
98///
99/// # Examples
100/// ```ignore
101/// assert_eq!(escape_html_entities("<"), "<");
102/// ```
103pub fn escape_html_entities<T: AsRef<str>>(text: T) -> String {
104 let ptr_ref = PtrRef::from(text.as_ref());
105 let host_id = unsafe {
106 scraper_escape(ptr_ref.pointer())
107 };
108 PtrRef::new(host_id).as_string().unwrap_or_default()
109}
110
111/// Un-escape an HTML escaped string.
112///
113/// # Examples
114/// ```ignore
115/// assert_eq!(unescape_html_entities("<"), "<");
116/// ```
117pub fn unescape_html_entities<T: AsRef<str>>(text: T) -> String {
118 let ptr_ref = PtrRef::from(text.as_ref());
119 let host_id = unsafe {
120 scraper_unescape(ptr_ref.pointer())
121 };
122 PtrRef::new(host_id).as_string().unwrap_or_default()
123}
124
125/// Type which represents a HTML node, which can be a group of elements,
126/// an element, or the entire HTML document.
127#[derive(Debug)]
128pub struct Node(pub(crate) HostPtr);
129
130impl Node {
131 /// Parse HTML into a Node. As there is no base URI specified, absolute URL
132 /// resolution requires the HTML to have a `<base href>` tag.
133 pub fn new<T: AsRef<[u8]>>(buf: T) -> Result<Self> {
134 let buf = buf.as_ref();
135 let host_id = unsafe { scraper_parse(buf.as_ptr(), buf.len()) };
136 if host_id >= 0 {
137 Ok(Self(host_id))
138 } else {
139 Err(MochiError::from(NodeError::ParserError))
140 }
141 }
142
143 /// Parse HTML into a Node. The given `base_uri` will be used for any URLs that
144 /// occurs before a `<base href>` tag is defined.
145 pub fn new_with_uri<A: AsRef<[u8]>, B: AsRef<str>>(buf: A, base_uri: B) -> Result<Self> {
146 let buf = buf.as_ref();
147 let base_uri = base_uri.as_ref();
148 let host_id = unsafe {
149 scraper_parse_with_uri(buf.as_ptr(), buf.len(), base_uri.as_ptr(), base_uri.len())
150 };
151 if host_id >= 0 {
152 Ok(Self(host_id))
153 } else {
154 Err(MochiError::from(NodeError::ParserError))
155 }
156 }
157
158 /// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
159 /// Similar to [Node::new](crate::html::Node::new), relative URLs will not
160 /// be resolved unless there is a `<base href>` tag.
161 pub fn new_fragment<T: AsRef<[u8]>>(buf: T) -> Result<Self> {
162 let buf = buf.as_ref();
163 let host_id = unsafe { scraper_parse_fragment(buf.as_ptr(), buf.len()) };
164 if host_id >= 0 {
165 Ok(Self(host_id))
166 } else {
167 Err(MochiError::from(NodeError::ParserError))
168 }
169 }
170
171 /// Parse a HTML fragment, assuming that it forms the `body` of the HTML.
172 /// Similar to [Node::new_with_uri](crate::html::Node::new_with_uri), URL
173 /// resolution occurs for any that appears before a `<base href>` tag.
174 pub fn new_fragment_with_uri<A: AsRef<[u8]>, B: AsRef<str>>(
175 buf: A,
176 base_uri: B,
177 ) -> Result<Self> {
178 let buf = buf.as_ref();
179 let base_uri = base_uri.as_ref();
180 let host_id = unsafe {
181 scraper_parse_fragment_with_uri(
182 buf.as_ptr(),
183 buf.len(),
184 base_uri.as_ptr(),
185 base_uri.len(),
186 )
187 };
188 if host_id >= 0 {
189 Ok(Self(host_id))
190 } else {
191 Err(MochiError::from(NodeError::ParserError))
192 }
193 }
194
195 /// Get an instance from a [PtrRef](crate::PtrRef)
196 ///
197 /// # Safety
198 /// Ensure that this Ptr is of [Kind::Node](crate::Kind) before
199 /// converting.
200 #[inline]
201 pub unsafe fn from(ptr: HostPtr) -> Self {
202 Self(ptr)
203 }
204
205 #[inline]
206 pub fn close(self) {
207 drop(self)
208 }
209
210 /// Find elements that matches the given CSS (or JQuery) selector.
211 ///
212 /// <details>
213 /// <summary>Supported selectors</summary>
214 ///
215 /// | Pattern | Matches | Example |
216 /// |-------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
217 /// | `*` | any element | `*` |
218 /// | `tag` | elements with the given tag name | `div` |
219 /// | <code>*\|E</code> | elements of type E in any namespace (including non-namespaced) | <code>*\|name</code> finds `<fb:name>` and `<name>` elements |
220 /// | <code>ns\|E</code> | elements of type E in the namespace ns | <code>fb\|name</code> finds `<fb:name>` elements |
221 /// | `#id` | elements with attribute ID of "id" | `div#wrap`, `#logo` |
222 /// | `.class` | elements with a class name of "class" | `div.left`, `.result` |
223 /// | `[attr]` | elements with an attribute named "attr" (with any value) | `a[href]`, `[title]` |
224 /// | `[^attrPrefix]` | elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets | `[^data-]`, `div[^data-]` |
225 /// | `[attr=val]` | elements with an attribute named "attr", and value equal to "val" | `img[width=500]`, `a[rel=nofollow]` |
226 /// | `[attr="val"]` | elements with an attribute named "attr", and value equal to "val" | `span[hello="Cleveland"][goodbye="Columbus"]`, `a[rel="nofollow"]`|
227 /// | `[attr^=valPrefix]` | elements with an attribute named "attr", and value starting with "valPrefix" | `a[href^=http:]` |
228 /// | `[attr$=valSuffix]` | elements with an attribute named "attr", and value ending with "valSuffix" | `img[src$=.png]` |
229 /// | `[attr*=valContaining]` | elements with an attribute named "attr", and value containing "valContaining" | `a[href*=/search/]` |
230 /// | `[attr~=regex]` | elements with an attribute named "attr", and value matching the regular expression | `img[src~=(?i)\\.(png\|jpe?g)]` |
231 /// | | The above may be combined in any order | `div.header[title]` |
232 ///
233 /// ## Combinators
234 /// | Pattern | Matches | Example |
235 /// |-----------|-------------------------------------------------|-----------------------------|
236 /// | `E F` | an F element descended from an E element | `div a`, `.logo h1` |
237 /// | `E > F` | an F direct child of E | `ol > li` |
238 /// | `E + F` | an F element immediately preceded by sibling E | `li + li`, `div.head + div` |
239 /// | `E ~ F` | an F element preceded by sibling E | `h1 ~ p` |
240 /// | `E, F, G` | all matching elements E, F, or G | `a[href], div, h3` |
241 ///
242 /// ## Pseudo selectors
243 /// | Pattern | Matches | Example |
244 /// |----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
245 /// | `:lt(n)` | elements whose sibling index is less than n | `td:lt(3)` finds the first 3 cells of each row |
246 /// | `:gt(n)` | elements whose sibling index is greater than n | `td:gt(1)` finds cells after skipping the first two |
247 /// | `:eq(n)` | elements whose sibling index is equal to n | `td:eq(0)` finds the first cell of each row |
248 /// | `:has(selector)` | elements that contains at least one element matching the selector | `div:has(p)` finds divs that contain p elements; `div:has(> a)` selects div elements that have at least one direct child a element. |
249 /// | `:not(selector)` | elements that do not match the selector. | `div:not(.logo)` finds all divs that do not have the "logo" class; `div:not(:has(div))` finds divs that do not contain divs. |
250 /// | `:contains(text)` | elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants. | `p:contains(SwiftSoup)` finds p elements containing the text "SwiftSoup"; `p:contains(hello \(there\))` finds p elements containing the text "Hello (There)" |
251 /// | `:matches(regex)` | elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants. | `td:matches(\\d+)` finds table cells containing digits. div:matches((?i)login) finds divs containing the text, case insensitively. |
252 /// | `:containsOwn(text)` | elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants. | `p:containsOwn(SwiftSoup)` finds p elements with own text "SwiftSoup". |
253 /// | `:matchesOwn(regex)` | elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants. | `td:matchesOwn(\\d+)` finds table cells directly containing digits. div:matchesOwn((?i)login) finds divs containing the text, case insensitively. |
254 ///
255 /// ## Structural pseudo-selectors
256 /// | Pattern | Matches | Example |
257 /// |---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|
258 /// | `:root` | The element that is the root of the document. In HTML, this is the html element | | |
259 /// | `:nth-child(an+b)` | elements that have an+b-1 siblings before it in the document tree, for any positive integer or zero value of n, and has a parent element. For values of a and b greater than zero, this effectively divides the element's children into groups of a elements (the last group taking the remainder), and selecting the bth element of each group. For example, this allows the selectors to address every other row in a table, and could be used to alternate the color of paragraph text in a cycle of four. The a and b values must be integers (positive, negative, or zero). The index of the first child of an element is 1. | |
260 /// | `:nth-last-child(an+b)` | elements that have an+b-1 siblings after it in the document tree. Otherwise like `:nth-child()` | `tr:nth-last-child(-n+2)` the last two rows of a table |
261 /// | `:nth-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name before it in the document tree, for any zero or positive integer value of n, and has a parent element | `img:nth-of-type(2n+1)` |
262 /// | `:nth-last-of-type(an+b)` | pseudo-class notation represents an element that has an+b-1 siblings with the same expanded element name after it in the document tree, for any zero or positive integer value of n, and has a parent element | `img:nth-last-of-type(2n+1)` |
263 /// | `:first-child` | elements that are the first child of some other element. | `div > p:first-child` |
264 /// | `:last-child` | elements that are the last child of some other element. | `ol > li:last-child` |
265 /// | `:first-of-type` | elements that are the first sibling of its type in the list of children of its parent element | `dl dt:first-of-type` |
266 /// | `:last-of-type` | elements that are the last sibling of its type in the list of children of its parent element | `tr > td:last-of-type` |
267 /// | `:only-child` | elements that have a parent element and whose parent element hasve no other element children | |
268 /// | `:only-of-type` | an element that has a parent element and whose parent element has no other element children with the same expanded element name | |
269 /// | `:empty` | elements that have no children at all | |
270 /// </details>
271 pub fn select<T: AsRef<str>>(&self, selector: T) -> Self {
272 let selector = selector.as_ref();
273 let host_id = unsafe { scraper_select(self.0, selector.as_ptr(), selector.len()) };
274 Self(host_id)
275 }
276
277 /// Get an attribute value by its key.
278 /// To get an absolute URL from an attribute that may be a relative URL,
279 /// prefix the key with `abs:`.
280 ///
281 /// # Example
282 /// ```ignore
283 /// // Assumes that `el` is a Node
284 /// let url = el.attr("abs:src");
285 /// ```
286 pub fn attr<T: AsRef<str>>(&self, attr: T) -> String {
287 let attr = attr.as_ref();
288 let host_id = unsafe { scraper_attr(self.0, attr.as_ptr(), attr.len()) };
289 PtrRef::new(host_id).as_string().unwrap_or_default()
290 }
291
292 /// Set the element's inner HTML, clearning the existing HTML.
293 ///
294 /// # Notice
295 /// Internally, this operates on SwiftSoup.Element, but
296 /// not on SwiftSoup.Elements, which is the type you usually get when using
297 /// methods like [Node::select](crate::html::Node::select). Either use
298 /// [Node::array](crate::html::Node::array) to iterate through each element,
299 /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
300 /// to select an element before calling this function.
301 pub fn set_html<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
302 let html = html.as_ref();
303 match unsafe { scraper_set_html(self.0, html.as_ptr(), html.len()) } {
304 0 => Ok(()),
305 _ => Err(MochiError::from(NodeError::ModifyError)),
306 }
307 }
308
309 /// Set the element's text content, clearing any existing content.
310 ///
311 /// # Notice
312 /// Internally, this operates on SwiftSoup.Element, but
313 /// not on SwiftSoup.Elements, which is the type you usually get when using
314 /// methods like [Node::select](crate::html::Node::select). Either use
315 /// [Node::array](crate::html::Node::array) to iterate through each element,
316 /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
317 /// to select an element before calling this function.
318 pub fn set_text<T: AsRef<str>>(&mut self, text: T) -> Result<()> {
319 let text = text.as_ref();
320 match unsafe { scraper_set_text(self.0, text.as_ptr(), text.len()) } {
321 0 => Ok(()),
322 _ => Err(MochiError::from(NodeError::ModifyError)),
323 }
324 }
325
326 /// Add inner HTML into this element. The given HTML will be parsed, and
327 /// each node prepended to the start of the element's children.
328 ///
329 /// # Notice
330 /// Internally, this operates on SwiftSoup.Element, but
331 /// not on SwiftSoup.Elements, which is the type you usually get when using
332 /// methods like [Node::select](crate::html::Node::select). Either use
333 /// [Node::array](crate::html::Node::array) to iterate through each element,
334 /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
335 /// to select an element before calling this function.
336 pub fn prepend<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
337 let html = html.as_ref();
338 match unsafe { scraper_prepend(self.0, html.as_ptr(), html.len()) } {
339 0 => Ok(()),
340 _ => Err(MochiError::from(NodeError::ModifyError)),
341 }
342 }
343
344 /// Add inner HTML into this element. The given HTML will be parsed, and
345 /// each node appended to the end of the element's children.
346 ///
347 /// # Notice
348 /// Internally, this operates on SwiftSoup.Element, but
349 /// not on SwiftSoup.Elements, which is the type you usually get when using
350 /// methods like [Node::select](crate::html::Node::select). Either use
351 /// [Node::array](crate::html::Node::array) to iterate through each element,
352 /// or use [Node::first](crate::html::Node::first)/[Node::last](crate::html::Node::last)
353 /// to select an element before calling this function.
354 pub fn append<T: AsRef<str>>(&mut self, html: T) -> Result<()> {
355 let html = html.as_ref();
356 match unsafe { scraper_append(self.0, html.as_ptr(), html.len()) } {
357 0 => Ok(()),
358 _ => Err(MochiError::from(NodeError::ModifyError)),
359 }
360 }
361
362 /// Get the first sibling of this element, which can be this element
363 pub fn first(&self) -> Self {
364 let rid = unsafe { scraper_first(self.0) };
365 Self(rid)
366 }
367
368 /// Get the last sibling of this element, which can be this element
369 pub fn last(&self) -> Self {
370 let rid = unsafe { scraper_last(self.0) };
371 Self(rid)
372 }
373
374 /// Get the next sibling of the element, returning `None` if there isn't
375 /// one.
376 pub fn next(&self) -> Option<Node> {
377 let ptr = unsafe { scraper_next(self.0) };
378 match unsafe { ptr_kind(ptr) } {
379 Kind::Node => Some(Node(ptr)),
380 _ => None,
381 }
382 }
383
384 /// Get the previous sibling of the element, returning `None` if there isn't
385 /// one.
386 pub fn previous(&self) -> Option<Node> {
387 let ptr = unsafe { scraper_previous(self.0) };
388 match unsafe { ptr_kind(ptr) } {
389 Kind::Node => Some(Node(ptr)),
390 _ => None,
391 }
392 }
393
394 /// Get the base URI of this Node
395 pub fn base_uri(&self) -> String {
396 let ptr = unsafe { scraper_base_uri(self.0) };
397 PtrRef::new(ptr).as_string().unwrap_or_default()
398 }
399
400 /// Get the document's `body` element.
401 pub fn body(&self) -> String {
402 let ptr = unsafe { scraper_body(self.0) };
403 PtrRef::new(ptr).as_string().unwrap_or_default()
404 }
405
406 /// Get the **normalized, combined text** of this element and its children.
407 /// Whitespace is normalized and trimmed.
408 ///
409 /// For example, given HTML `<p>Hello <b>there</b> now! </p>`,
410 /// p.text() returns "Hello there now!"
411 ///
412 /// Note that this method returns text that would be presented to a reader.
413 /// The contents of data nodes (e.g. `<script>` tags) are not considered text.
414 /// Use [Node::html](crate::html::Node::html) or [Node::data](crate::html::Node::data)
415 /// to retrieve that content.
416 pub fn text(&self) -> String {
417 let ptr = unsafe { scraper_text(self.0) };
418 PtrRef::new(ptr).as_string().unwrap_or_default()
419 }
420
421 /// Get the text of this element and its children. Whitespace is **not** normalized
422 /// and trimmed.
423 ///
424 /// Notices from [Node::text](crate::html::Node::text) applies.
425 pub fn untrimmed_text(&self) -> String {
426 let ptr = unsafe { scraper_untrimmed_text(self.0) };
427 PtrRef::new(ptr).as_string().unwrap_or_default()
428 }
429
430 /// Gets the (normalized) text owned by this element only; does not get the
431 /// combined text of all children.
432 ///
433 /// Node::own_text only operates on a singular element, so calling it after
434 /// [Node::select](crate::html::Node::select) will not work. You need to get
435 /// a specific element first, through [Node::array](crate::html::Node::array)
436 /// and [ArrayRef::get](crate::std::ArrayRef::get), [Node::first](crate::html::Node::first),
437 /// or [Node::last](crate::html::Node::last).
438 pub fn own_text(&self) -> String {
439 let ptr = unsafe { scraper_own_text(self.0) };
440 PtrRef::new(ptr).as_string().unwrap_or_default()
441 }
442
443 /// Get the combined data of this element. Data is e.g. the inside of a `<script>` tag.
444 ///
445 /// Note that data is NOT the text of the element. Use [Node::text](crate::html::Node::text)
446 /// to get the text that would be visible to a user, and [Node::data](crate::html::Node::data)
447 /// for the contents of scripts, comments, CSS styles, etc.
448 pub fn data(&self) -> String {
449 let ptr = unsafe { scraper_data(self.0) };
450 PtrRef::new(ptr).as_string().unwrap_or_default()
451 }
452
453 /// Get an array of Node. This is most commonly used with
454 /// [Node::select](crate::html::Node::select) to iterate through elements
455 /// that match a selector.
456 pub fn array(&self) -> ArrayRef {
457 let rid = unsafe { scraper_array(self.0) };
458 ArrayRef::from(PtrRef::new(rid))
459 }
460
461 /// Get the node's inner HTML.
462 ///
463 /// For example, on `<div><p></p></div>`, `div.html()` would return `<p></p>`.
464 pub fn html(&self) -> String {
465 let ptr = unsafe { scraper_html(self.0) };
466 PtrRef::new(ptr).as_string().unwrap_or_default()
467 }
468
469 /// Get the node's outer HTML.
470 ///
471 /// For example, on `<div><p></p></div>`, `div.outer_html()` would return
472 /// `<div><p></p></div>`.
473 pub fn outer_html(&self) -> String {
474 let ptr = unsafe { scraper_outer_html(self.0) };
475 PtrRef::new(ptr).as_string().unwrap_or_default()
476 }
477
478 /// Get the node's text and escape any HTML-reserved characters to HTML entities.
479 ///
480 /// For example, for a node with text `Hello &<> Å å π 新 there ¾ © »`,
481 /// this would return `Hello &<> Å å π 新 there ¾ © »`
482 pub fn escape(&self) -> String {
483 let ptr: i32 = unsafe { scraper_escape(self.0) };
484 PtrRef::new(ptr).as_string().unwrap_or_default()
485 }
486
487 /// Get the node's text and unescape any HTML entities to their original characters.
488 ///
489 /// For example, for a node with text `Hello &<> Å å π 新 there ¾ © »`,
490 /// this would return `Hello &<> Å å π 新 there ¾ © »`.
491 pub fn unescape(&self) -> String {
492 let host_id: i32 = unsafe { scraper_unescape(self.0) };
493 PtrRef::new(host_id).as_string().unwrap_or_default()
494 }
495
496 /// Get the `id` attribute of this element.
497 pub fn id(&self) -> String {
498 let host_id = unsafe { scraper_id(self.0) };
499 PtrRef::new(host_id).as_string().unwrap_or_default()
500 }
501
502 /// Get the name of the tag for this element. This will always be the
503 /// lowercased version. For example, `<DIV>` and `<div>` would both return
504 /// `div`.
505 pub fn tag_name(&self) -> String {
506 let host_id = unsafe { scraper_tag_name(self.0) };
507 PtrRef::new(host_id).as_string().unwrap_or_default()
508 }
509
510 /// Get the literal value of this node's `class` attribute. For example,
511 /// on `<div class="header gray">` this would return `header gray`.
512 pub fn class_name(&self) -> String {
513 let host_id = unsafe { scraper_class_name(self.0) };
514 PtrRef::new(host_id).as_string().unwrap_or_default()
515 }
516
517 /// Test if this element has a class. Case insensitive.
518 pub fn has_class<T: AsRef<str>>(&self, class_name: T) -> bool {
519 let class_name = class_name.as_ref();
520 unsafe { scraper_has_class(self.0, class_name.as_ptr(), class_name.len()) }
521 }
522
523 /// Test if this element has an attribute. Case insensitive.
524 pub fn has_attr<T: AsRef<str>>(&self, attr_name: T) -> bool {
525 let attr_name = attr_name.as_ref();
526 unsafe { scraper_has_attr(self.0, attr_name.as_ptr(), attr_name.len()) }
527 }
528}
529
530impl Display for Node {
531 /// Returns the outer HTML of the node.
532 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
533 write!(f, "{}", self.outer_html())
534 }
535}
536
537impl Drop for Node {
538 fn drop(&mut self) {
539 unsafe { destroy(self.0) }
540 }
541}
542
543impl Clone for Node {
544 fn clone(&self) -> Self {
545 let ptr: HostPtr = unsafe { copy(self.0) };
546 Self(ptr)
547 }
548}