1use crate::selectors::Selector;
2use rand::Rng;
3use std::borrow::Cow;
4use tl::VDom;
5use tl::{HTMLTag, Node, NodeHandle, Parser};
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10pub(crate) fn find_parent(handle: NodeHandle, parser: &Parser) -> Option<NodeHandle> {
12 let inner = handle.get_inner();
13 let mut next_id = inner - 1;
14 let mut optional_node = parser.resolve_node_id(next_id);
15 while let Some(node) = optional_node {
16 let children = node.children();
17 if children.is_some()
18 && children
19 .unwrap()
20 .top()
21 .iter()
22 .any(|child_handle| child_handle.get_inner() == inner)
23 {
24 return Some(NodeHandle::new(next_id));
25 }
26 next_id = next_id.checked_sub(1)?;
27 optional_node = parser.resolve_node_id(next_id);
28 }
29 None
30}
31
32pub fn find_root<'a>(dom: &'a VDom<'a>) -> Option<&'a NodeHandle> {
34 dom.children()
35 .iter()
36 .find(|node| node_is_tag(node, dom.parser()))
37}
38
39pub(crate) fn node_is_tag(node: &NodeHandle, parser: &Parser) -> bool {
41 node.get(parser)
42 .map(|node| matches!(node, Node::Tag(..)))
43 .unwrap_or(false)
44}
45
46pub(crate) fn get_direct_inner_text(tag: &HTMLTag, parser: &Parser) -> String {
48 tag.children()
49 .top()
50 .iter()
51 .filter_map(|child| {
52 child
53 .get(parser)
54 .and_then(|node| node.as_raw())
55 .map(|raw| raw.as_utf8_str())
56 })
57 .collect()
58}
59
60pub(crate) fn get_trimmed_attr_value(tag: &HTMLTag, attr: &str) -> Option<String> {
63 let attrv = tag.attributes().get(attr).flatten();
64 if let Some(attrv) = attrv {
65 let attrv = attrv.as_utf8_str();
66 let trimmed_attrv = attrv.trim();
67 if !trimmed_attrv.is_empty() {
68 return Some(trimmed_attrv.to_string());
69 }
70 }
71 None
72}
73
74pub(crate) fn get_trimmed_attr_prefix_value(tag: &HTMLTag, attr_prefix: &str) -> Option<String> {
76 let attrv = tag
77 .attributes()
78 .iter()
79 .find(|(attr, _)| attr.starts_with(attr_prefix))
80 .map(|(_, val)| val)
81 .flatten();
82 if let Some(attrv) = attrv {
83 let trimmed_attrv = attrv.trim();
84 if !trimmed_attrv.is_empty() {
85 return Some(trimmed_attrv.to_string());
86 }
87 }
88 None
89}
90
91pub fn find_node_with_text(dom: &VDom, text: &str) -> Option<NodeHandle> {
95 dom.nodes()
96 .iter()
97 .enumerate()
98 .find(|(_, node)| {
99 node.as_tag().is_some()
100 && node
101 .as_tag()
102 .unwrap()
103 .inner_text(dom.parser())
104 .as_ref()
105 .trim()
106 == text.trim()
107 })
108 .map(|(i, _)| NodeHandle::new(i as u32))
109}
110
111pub(crate) fn get_id<'p>(handle: NodeHandle, parser: &'p Parser<'p>) -> Option<Cow<'p, str>> {
113 Some(
114 handle
115 .get(parser)?
116 .as_tag()?
117 .attributes()
118 .id()?
119 .as_utf8_str(),
120 )
121}
122
123#[allow(dead_code)] pub(crate) fn get_classes<'p>(handle: NodeHandle, parser: &'p Parser<'p>) -> Option<Cow<'p, str>> {
126 Some(
127 handle
128 .get(parser)?
129 .as_tag()?
130 .attributes()
131 .class()?
132 .as_utf8_str(),
133 )
134}
135
136pub(crate) fn style_selected_element(selector: &Selector, dom: &mut VDom) -> bool {
139 if let Some(node) = selector.try_select(*find_root(dom).unwrap(), dom.parser()) {
140 let attributes = node
141 .get_mut(dom.parser_mut())
142 .unwrap()
143 .as_tag_mut()
144 .unwrap()
145 .attributes_mut();
146 if let Some(Some(style)) = attributes.get_mut("style") {
147 let new_style = format!("{}; border: 1px solid red;", style.as_utf8_str()).into_bytes();
149 style.set(new_style).is_ok()
150 } else {
151 attributes.insert("style", Some("border: 1px solid red;"));
152 true
153 }
154 } else {
155 false
156 }
157}
158
159pub(crate) fn random_index_weighted<R: Rng>(rng: &mut R, weights: &[f32]) -> usize {
160 let random: f32 = rng.gen();
161 let mut sum = 0f32;
162 for (i, weight) in weights.iter().enumerate() {
163 sum += weight;
164 if sum >= random {
165 return i;
166 }
167 }
168 panic!("this should not happen: {:?} {} {}", weights, random, sum);
169}
170
171#[derive(Debug)]
174#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
175pub enum TextRetrievalOption {
176 InnerText,
178 Attribute(String),
181 AttributeStartsWith(String),
185}
186
187pub type TextRetrievalOptions = Vec<TextRetrievalOption>;
188
189pub fn get_node_text(
191 vdom: &VDom,
192 node: NodeHandle,
193 text_retrieval_options: &TextRetrievalOptions,
194) -> Option<String> {
195 node.get(vdom.parser())
196 .and_then(|node| node.as_tag())
197 .and_then(|tag| {
198 for option in text_retrieval_options {
199 match option {
200 TextRetrievalOption::InnerText => {
201 let inner_text = get_direct_inner_text(tag, vdom.parser());
202 let trimmed_inner_text = inner_text.trim();
203 if !trimmed_inner_text.is_empty() {
204 return Some(trimmed_inner_text.to_string());
205 }
206 }
207 TextRetrievalOption::Attribute(name) => {
208 let value = get_trimmed_attr_value(tag, &name);
209 if value.is_some() {
210 return value;
211 }
212 }
213 TextRetrievalOption::AttributeStartsWith(prefix) => {
214 let value = get_trimmed_attr_prefix_value(tag, &prefix);
215 if value.is_some() {
216 return value;
217 }
218 }
219 }
220 }
221
222 None
223 })
224}