Skip to main content

fhp_selector/xpath/
ast.rs

1//! XPath expression AST types.
2//!
3//! Represents a subset of XPath 1.0 sufficient for common web scraping tasks.
4
5use fhp_core::tag::Tag;
6
7/// A parsed XPath expression.
8#[derive(Debug, Clone, PartialEq)]
9pub enum XPathExpr {
10    /// `//tag` — find descendants by tag name.
11    DescendantByTag(Tag),
12
13    /// `//tag[@attr='value']` — find descendants by tag with an attribute
14    /// predicate.
15    DescendantByAttr {
16        /// The tag to match.
17        tag: Tag,
18        /// Attribute name.
19        attr: String,
20        /// Attribute value.
21        value: String,
22    },
23
24    /// `//tag[@attr]` — descendants by tag with attribute existence predicate.
25    DescendantByAttrExists {
26        /// The tag to match.
27        tag: Tag,
28        /// Attribute name.
29        attr: String,
30    },
31
32    /// `/path/to/tag` — absolute path from root.
33    AbsolutePath(Vec<PathStep>),
34
35    /// `//tag[contains(@attr, 'substr')]` — find descendants by tag with a
36    /// contains predicate.
37    ContainsPredicate {
38        /// The tag to match.
39        tag: Tag,
40        /// Attribute name.
41        attr: String,
42        /// Substring to search for.
43        substr: String,
44    },
45
46    /// `//tag[position()=N]` — find descendants by tag at a specific position.
47    PositionPredicate {
48        /// The tag to match.
49        tag: Tag,
50        /// 1-based position.
51        pos: usize,
52    },
53
54    /// `//tag/text()` or expression/text() — extract text from matched nodes.
55    TextExtract(Box<XPathExpr>),
56
57    /// `*` in descendant context — `//\*`
58    DescendantWildcard,
59
60    /// `//\*[@attr='value']` — wildcard with attribute predicate.
61    DescendantWildcardByAttr {
62        /// Attribute name.
63        attr: String,
64        /// Attribute value.
65        value: String,
66    },
67
68    /// `//\*[@attr]` — wildcard with attribute existence predicate.
69    DescendantWildcardByAttrExists {
70        /// Attribute name.
71        attr: String,
72    },
73
74    /// `..` — parent axis (relative to a context node).
75    Parent,
76}
77
78/// A single step in an absolute path (`/step/step/...`).
79#[derive(Debug, Clone, PartialEq)]
80pub struct PathStep {
81    /// Tag name for this step.
82    pub tag: Tag,
83    /// Optional predicate for this step.
84    pub predicate: Option<Predicate>,
85}
86
87/// A predicate inside square brackets `[...]`.
88#[derive(Debug, Clone, PartialEq)]
89pub enum Predicate {
90    /// `[@attr='value']` — exact attribute match.
91    AttrEquals {
92        /// Attribute name.
93        attr: String,
94        /// Expected value.
95        value: String,
96    },
97
98    /// `[contains(@attr, 'substr')]` — attribute substring match.
99    Contains {
100        /// Attribute name.
101        attr: String,
102        /// Substring to find.
103        substr: String,
104    },
105
106    /// `[position()=N]` — 1-based position among siblings of same type.
107    Position(usize),
108
109    /// `[@attr]` — attribute existence check.
110    AttrExists {
111        /// Attribute name.
112        attr: String,
113    },
114}
115
116/// Result of evaluating an XPath expression.
117#[derive(Debug, Clone, PartialEq)]
118pub enum XPathResult {
119    /// A list of matched node ids.
120    Nodes(Vec<fhp_tree::node::NodeId>),
121    /// A list of extracted strings (e.g. from `text()`).
122    Strings(Vec<String>),
123    /// A boolean result.
124    Boolean(bool),
125}
126
127impl XPathResult {
128    /// Returns `true` if the result contains no nodes or strings.
129    pub fn is_empty(&self) -> bool {
130        match self {
131            Self::Nodes(v) => v.is_empty(),
132            Self::Strings(v) => v.is_empty(),
133            Self::Boolean(_) => false,
134        }
135    }
136
137    /// Returns the number of items in the result.
138    pub fn len(&self) -> usize {
139        match self {
140            Self::Nodes(v) => v.len(),
141            Self::Strings(v) => v.len(),
142            Self::Boolean(_) => 1,
143        }
144    }
145}