use crate::selectors::Selector;
use rand::Rng;
use std::borrow::Cow;
use tl::VDom;
use tl::{HTMLTag, Node, NodeHandle, Parser};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub(crate) fn find_parent(handle: NodeHandle, parser: &Parser) -> Option<NodeHandle> {
let inner = handle.get_inner();
let mut next_id = inner - 1;
let mut optional_node = parser.resolve_node_id(next_id);
while let Some(node) = optional_node {
let children = node.children();
if children.is_some()
&& children
.unwrap()
.top()
.iter()
.any(|child_handle| child_handle.get_inner() == inner)
{
return Some(NodeHandle::new(next_id));
}
next_id = next_id.checked_sub(1)?;
optional_node = parser.resolve_node_id(next_id);
}
None
}
pub fn find_root<'a>(dom: &'a VDom<'a>) -> Option<&'a NodeHandle> {
dom.children()
.iter()
.find(|node| node_is_tag(node, dom.parser()))
}
pub(crate) fn node_is_tag(node: &NodeHandle, parser: &Parser) -> bool {
node.get(parser)
.map(|node| matches!(node, Node::Tag(..)))
.unwrap_or(false)
}
pub(crate) fn get_direct_inner_text(tag: &HTMLTag, parser: &Parser) -> String {
tag.children()
.top()
.iter()
.filter_map(|child| {
child
.get(parser)
.and_then(|node| node.as_raw())
.map(|raw| raw.as_utf8_str())
})
.collect()
}
pub(crate) fn get_trimmed_attr_value(tag: &HTMLTag, attr: &str) -> Option<String> {
let attrv = tag.attributes().get(attr).flatten();
if let Some(attrv) = attrv {
let attrv = attrv.as_utf8_str();
let trimmed_attrv = attrv.trim();
if !trimmed_attrv.is_empty() {
return Some(trimmed_attrv.to_string());
}
}
None
}
pub(crate) fn get_trimmed_attr_prefix_value(tag: &HTMLTag, attr_prefix: &str) -> Option<String> {
let attrv = tag
.attributes()
.iter()
.find(|(attr, _)| attr.starts_with(attr_prefix))
.map(|(_, val)| val)
.flatten();
if let Some(attrv) = attrv {
let trimmed_attrv = attrv.trim();
if !trimmed_attrv.is_empty() {
return Some(trimmed_attrv.to_string());
}
}
None
}
pub fn find_node_with_text(dom: &VDom, text: &str) -> Option<NodeHandle> {
dom.nodes()
.iter()
.enumerate()
.find(|(_, node)| {
node.as_tag().is_some()
&& node
.as_tag()
.unwrap()
.inner_text(dom.parser())
.as_ref()
.trim()
== text.trim()
})
.map(|(i, _)| NodeHandle::new(i as u32))
}
pub(crate) fn get_id<'p>(handle: NodeHandle, parser: &'p Parser<'p>) -> Option<Cow<'p, str>> {
Some(
handle
.get(parser)?
.as_tag()?
.attributes()
.id()?
.as_utf8_str(),
)
}
#[allow(dead_code)] pub(crate) fn get_classes<'p>(handle: NodeHandle, parser: &'p Parser<'p>) -> Option<Cow<'p, str>> {
Some(
handle
.get(parser)?
.as_tag()?
.attributes()
.class()?
.as_utf8_str(),
)
}
pub(crate) fn style_selected_element(selector: &Selector, dom: &mut VDom) -> bool {
if let Some(node) = selector.try_select(*find_root(dom).unwrap(), dom.parser()) {
let attributes = node
.get_mut(dom.parser_mut())
.unwrap()
.as_tag_mut()
.unwrap()
.attributes_mut();
if let Some(Some(style)) = attributes.get_mut("style") {
let new_style = format!("{}; border: 1px solid red;", style.as_utf8_str()).into_bytes();
style.set(new_style).is_ok()
} else {
attributes.insert("style", Some("border: 1px solid red;"));
true
}
} else {
false
}
}
pub(crate) fn random_index_weighted<R: Rng>(rng: &mut R, weights: &[f32]) -> usize {
let random: f32 = rng.gen();
let mut sum = 0f32;
for (i, weight) in weights.iter().enumerate() {
sum += weight;
if sum >= random {
return i;
}
}
panic!("this should not happen: {:?} {} {}", weights, random, sum);
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum TextRetrievalOption {
InnerText,
Attribute(String),
AttributeStartsWith(String),
}
pub type TextRetrievalOptions = Vec<TextRetrievalOption>;
pub fn get_node_text(
vdom: &VDom,
node: NodeHandle,
text_retrieval_options: &TextRetrievalOptions,
) -> Option<String> {
node.get(vdom.parser())
.and_then(|node| node.as_tag())
.and_then(|tag| {
for option in text_retrieval_options {
match option {
TextRetrievalOption::InnerText => {
let inner_text = get_direct_inner_text(tag, vdom.parser());
let trimmed_inner_text = inner_text.trim();
if !trimmed_inner_text.is_empty() {
return Some(trimmed_inner_text.to_string());
}
}
TextRetrievalOption::Attribute(name) => {
let value = get_trimmed_attr_value(tag, &name);
if value.is_some() {
return value;
}
}
TextRetrievalOption::AttributeStartsWith(prefix) => {
let value = get_trimmed_attr_prefix_value(tag, &prefix);
if value.is_some() {
return value;
}
}
}
}
None
})
}