DOM_QUERY
A crate for HTML querying and manipulations with CSS selectors.
DOM_QUERY is based on HTML crate html5ever and the CSS selector crate selectors. You can use the jQuery-like syntax to query and manipulate an HTML document quickly. With its help you can query dom and modify it.
It is a fork of nipper, with a lot of updates. Also this fork supports :has
, :has-text
, :contains
pseudo-classes, and some others.
Examples
use Document;
use StrTendril;
// Document may consume &str, String, StrTendril
let contents_str = r#"<!DOCTYPE html>
<html><head><title>Test Page</title></head><body></body></html>"#;
let doc = from;
let contents_string = contents_str.to_string;
let doc = from;
let contents_tendril = from;
let doc = from;
// The root element for the `Document` is a Document
assert!;
// if the source has DocType, then the Document will also have one
// as a first child.
assert!;
//both of them are not elements.
use Document;
use StrTendril;
// fragment can be created with Document::fragment(), which accepts &str, String, StrTendril
let contents_str = r#"<!DOCTYPE html>
<html><head><title>Test Page</title></head><body></body></html>"#;
let fragment = fragment;
let contents_string = contents_str.to_string;
let fragment = fragment;
let contents_tendril = from;
let fragment = fragment;
// The root element for the fragment is not a Document but a Fragment
assert!;
assert!;
// and when it parses a fragment, it drops Doctype
assert!;
use Document;
let html = r#"<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Test Page</title>
</head>
<body>
<h1>Test Page</h1>
<ul>
<li>One</li>
<li><a href="/2">Two</a></li>
<li><a href="/3">Three</a></li>
</ul>
</body>
</html>"#;
let document = from;
// select a single element
let a = document.select;
let text = a.text.to_string;
assert!;
// selecting multiple elements
document.select.iter.for_each
// there is also `try_select` which returns an Option
let no_sel = document.try_select;
assert!;
use Document;
let doc: Document = r#"<!DOCTYPE html>
<html lang="en">
<head></head>
<body>
<ul class="list">
<li>1</li><li>2</li><li>3</li>
</ul>
<ul class="list">
<li>4</li><li>5</li><li>6</li>
</ul>
</body>
</html>"#
.into;
// if you need to select only the first, single match, you can use following:
let single_selection = doc.select_single;
// access is only for the first matching:
assert_eq!;
assert_eq!;
// simple selection contains all matches:
let selection = doc.select;
assert_eq!;
// but if you call inner_html() on it, you will get the inner_html of the first match:
assert_eq!;
//this approach is using the first node from nodes vec and `select_single` consumes one iteration instead.
let first_selection = doc.select.first;
assert_eq!;
assert_eq!;
// this approach is consuming all nodes into vec at first, and then you can call `iter().next()` to get the first one.
let next_selection = doc.select.iter.next.unwrap;
assert_eq!;
assert_eq!;
// currently, to get data from all matches you need to iterate over them:
let all_matched: String = selection
.iter
.map
.collect;
assert_eq!;
use Document;
let html = r#"<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Test Page</title>
</head>
<body>
<h1>Test Page</h1>
<ul class="list-a">
<li>One</li>
<li><a href="/2">Two</a></li>
<li><a href="/3">Three</a></li>
</ul>
<ul class="list-b">
<li><a href="/4">Four</a></li>
</ul>
</body>
</html>"#;
let document = from;
// select a parent element
let ul = document.select;
// selecting multiple elements
ul.select.iter.for_each;
// also descendant selector may be specified starting from the parent elements
let el = ul.select.first;
let text = el.text;
assert_eq!;
use ;
let html1 = r#"<!DOCTYPE html><html><head><title>Test Page 1</title></head><body></body></html>"#;
let html2 = r#"<!DOCTYPE html><html><head><title>Test Page 2</title></head><body></body></html>"#;
let doc1 = from;
let doc2 = from;
// create a matcher once, reuse on different documents
let title_matcher = new.unwrap;
let title_el1 = doc1.select_matcher;
assert_eq!;
let title_el2 = doc2.select_matcher;
assert_eq!;
// selecting a single match
let title_single = doc1.select_single_matcher;
assert_eq!;
use Document;
let html = r#"<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body><input hidden="" id="k" class="important" type="hidden" name="k" data-k="100"></body>
</html>"#;
let doc = from;
let mut input_selection = doc.select;
// get the value of attribute "data-k"
let val = input_selection.attr.unwrap;
assert_eq!;
// remove the attribute "data-k" from the element
input_selection.remove_attr;
// get the value of attribute "data-k", if missing, return default value
let val_or = input_selection.attr_or;
assert_eq!;
// remove a list of attributes from the element
input_selection.remove_attrs;
// set a attribute "data-k" with value "200"
input_selection.set_attr;
assert_eq!;
// check if attribute "hidden" exists on the element
let is_hidden = input_selection.has_attr;
assert!;
let has_title = input_selection.has_attr;
assert!;
// remove all attributes from the element
input_selection.remove_all_attrs;
assert_eq!;
use Document;
let html = r#"<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body><div class="content"><h1>Test Page</h1></div></body>
</html>"#;
let doc = from;
let heading_selector = doc.select;
// serializing including the outer html tag
let content = heading_selector.html;
assert_eq!;
// serializing without the outer html tag
let inner_content = heading_selector.inner_html;
assert_eq!;
// there is also `try_html()` method, which returns an `Option<StrTendril>`,
// and if there is no matching selection it returns None
let opt_no_content = doc.select.try_html;
assert_eq!;
//`html()` method will return an empty `StrTendril` if there is no matching selection
let no_content = doc.select.html;
assert_eq!;
//Same things works for `inner_html()` and `try_inner_html()` method.
assert_eq!;
assert_eq!;
use Document;
let html = r#"<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body><div><h1>Test <span>Page</span></h1></div></body>
</html>"#;
let doc = from;
let body_selection = doc.select.first;
let text = body_selection.text;
assert_eq!;
use Document;
let html = include_str!;
let doc = from;
// searching list items inside a `tr` element which has a `a` element with title="Programming paradigm"
let paradigm_selection = doc.select;
println!;
for item in paradigm_selection.iter
println!;
//since `th` contains text "Influenced by" without sibling tags, we can use `:has-text` pseudo class
let influenced_by_selection = doc.select;
println!;
for item in influenced_by_selection.iter
println!;
// Extract all links from the block that contains certain text.
// Since `foreign function interface` located in its own tag,
// we have to use `:contains` pseudo class
let links_selection = doc.select;
println!;
for item in links_selection.iter
println!;
use Document;
let html_contents = r#"<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<div class="content">
<p>9,8,7</p>
</div>
<div class="remove-it">
Remove me
</div>
<div class="replace-it">
<div>Replace me</div>
</div>
</body>
</html>"#;;
let doc = from;
let mut content_selection = doc.select;
// append a new html node to the selection
content_selection.append_html;
assert!;
// set a new content to the selection, replacing existing content
let mut set_selection = doc.select;
set_selection.set_html;
assert_eq!;
// remove the selection
doc.select.remove;
assert!;
// replace the selection with a new html, current selection will not change.
let mut replace_selection = doc.select;
replace_selection.replace_with_html;
assert_eq!;
//but the document will change
assert_eq!;
use Document;
let doc: Document = r#"<!DOCTYPE html>
<html lang="en">
<head></head>
<body>
<div id="main">
<p id="first">It's</p>
<div>
</body>
</html>"#.into;
// selecting a node we want to attach a new element
let main_sel = doc.select_single;
let main_node = main_sel.nodes.first.unwrap;
// if you need just to create an empty element, then you can use the following:
let el = doc.tree.new_element;
// you still able to deal with element's attributes:
el.set_attr;
doc.tree.append_child_of;
assert!;
// because this method doesn't parse anything it is much more cheaper than following approaches.
// if you need to add a more complex element, you can use `node.append_html`,
// which is much more convenient, then previous approach:
main_node.append_html;
assert_eq!;
assert!;
// if we need to replace existing element content with a new one, then use `node.set_html`:
main_node.set_html;
assert_eq!;
assert!;
use Document;
let doc: Document = r#"<!DOCTYPE>
<html>
<head><title>Test</title></head>
<body>
<div class="content">
<div>1</div>
<div>2</div>
<div>3</div>
<span>4</span>
</div>
<body>
</html>"#
.into;
let mut sel = doc.select;
// before renaming, there are 3 `div` and 1 `span`
assert_eq!;
sel.rename;
// after renaming, there are no `div` and `span` elements
assert_eq!;
// but there are three `p` elements
assert_eq!;
Related projects
Features
hashbrown
— optional, standard hashmaps and hashsets will be replacedhashbrown
hashmaps and hashsets;
Changelog
License
Licensed under MIT (LICENSE or http://opensource.org/licenses/MIT)
Contribution
Any contribution intentionally submitted for inclusion in the work by you, shall be licensed with MIT license, without any additional terms or conditions.