1use crate::{
4 XpathError,
5 fallback::{CssFallback, CustomFilters},
6};
7
8pub struct XpathEngine;
10
11impl XpathEngine {
12 pub fn eval_text(html: &str, xpath: &str) -> Result<Vec<String>, XpathError> {
14 match Self::eval_text_sxd(html, xpath) {
16 Ok(results) => Ok(results),
17 Err(XpathError::HtmlParseError) => {
18 Self::eval_text_fallback(html, xpath)
20 }
21 Err(e) => Err(e),
22 }
23 }
24
25 fn eval_text_sxd(html: &str, xpath: &str) -> Result<Vec<String>, XpathError> {
27 let package = sxd_html::parse_html(html);
28 let document = package.as_document();
29
30 let factory = sxd_xpath::Factory::new();
31 let expression = factory
32 .build(xpath)
33 .map_err(|e| XpathError::CompileError(format!("{e:?}")))?
34 .ok_or_else(|| XpathError::CompileError("empty expression".into()))?;
35
36 let context = sxd_xpath::Context::new();
37 let value = expression
38 .evaluate(&context, document.root())
39 .map_err(|e| XpathError::EvalError(format!("{e:?}")))?;
40
41 match value {
42 sxd_xpath::Value::Nodeset(nodes) => Ok(nodes
43 .document_order()
44 .iter()
45 .map(|n| n.string_value())
46 .collect()),
47 sxd_xpath::Value::String(s) => Ok(vec![s]),
48 _ => Ok(vec![value.string()]),
49 }
50 }
51
52 fn eval_text_fallback(html: &str, xpath: &str) -> Result<Vec<String>, XpathError> {
54 if let Some(result) = CssFallback::eval_text(html, xpath) {
56 return result;
57 }
58
59 if xpath.contains("translate(") {
61 return Self::eval_text_with_custom_filters(html, xpath);
62 }
63
64 Err(XpathError::EvalError(
65 "XPath cannot be translated to CSS selector".into(),
66 ))
67 }
68
69 fn eval_text_with_custom_filters(html: &str, xpath: &str) -> Result<Vec<String>, XpathError> {
71 let base_selector = Self::extract_base_selector(xpath)?;
73
74 let html_doc = scraper::Html::parse_document(html);
75 let selector = scraper::Selector::parse(base_selector)
76 .map_err(|e| XpathError::CompileError(format!("CSS selector error: {e}")))?;
77
78 let elements: Vec<scraper::ElementRef> = html_doc.select(&selector).collect();
79 let filtered = CustomFilters::apply_filters(elements);
80
81 let results: Vec<String> = filtered.iter().map(|el| el.text().collect()).collect();
82
83 Ok(results)
84 }
85
86 fn extract_base_selector(xpath: &str) -> Result<&str, XpathError> {
88 if xpath.contains("self::article") {
90 return Ok("article");
91 }
92 if xpath.contains("self::div") {
93 return Ok("div");
94 }
95 if xpath.contains("self::section") {
96 return Ok("section");
97 }
98
99 Err(XpathError::CompileError(
100 "Cannot extract base selector from XPath".into(),
101 ))
102 }
103
104 pub fn has_match(html: &str, xpath: &str) -> bool {
106 Self::eval_text(html, xpath)
107 .map(|results| !results.is_empty())
108 .unwrap_or(false)
109 }
110}