fhp_selector/xpath/ast.rs
1//! XPath expression AST types.
2//!
3//! Represents a subset of XPath 1.0 sufficient for common web scraping tasks.
4
5use fhp_core::tag::Tag;
6
7/// A parsed XPath expression.
8#[derive(Debug, Clone, PartialEq)]
9pub enum XPathExpr {
10 /// `//tag` — find descendants by tag name.
11 DescendantByTag(Tag),
12
13 /// `//tag[@attr='value']` — find descendants by tag with an attribute
14 /// predicate.
15 DescendantByAttr {
16 /// The tag to match.
17 tag: Tag,
18 /// Attribute name.
19 attr: String,
20 /// Attribute value.
21 value: String,
22 },
23
24 /// `//tag[@attr]` — descendants by tag with attribute existence predicate.
25 DescendantByAttrExists {
26 /// The tag to match.
27 tag: Tag,
28 /// Attribute name.
29 attr: String,
30 },
31
32 /// `/path/to/tag` — absolute path from root.
33 AbsolutePath(Vec<PathStep>),
34
35 /// `//tag[contains(@attr, 'substr')]` — find descendants by tag with a
36 /// contains predicate.
37 ContainsPredicate {
38 /// The tag to match.
39 tag: Tag,
40 /// Attribute name.
41 attr: String,
42 /// Substring to search for.
43 substr: String,
44 },
45
46 /// `//tag[position()=N]` — find descendants by tag at a specific position.
47 PositionPredicate {
48 /// The tag to match.
49 tag: Tag,
50 /// 1-based position.
51 pos: usize,
52 },
53
54 /// `//tag/text()` or expression/text() — extract text from matched nodes.
55 TextExtract(Box<XPathExpr>),
56
57 /// `*` in descendant context — `//\*`
58 DescendantWildcard,
59
60 /// `//\*[@attr='value']` — wildcard with attribute predicate.
61 DescendantWildcardByAttr {
62 /// Attribute name.
63 attr: String,
64 /// Attribute value.
65 value: String,
66 },
67
68 /// `//\*[@attr]` — wildcard with attribute existence predicate.
69 DescendantWildcardByAttrExists {
70 /// Attribute name.
71 attr: String,
72 },
73
74 /// `..` — parent axis (relative to a context node).
75 Parent,
76}
77
78/// A single step in an absolute path (`/step/step/...`).
79#[derive(Debug, Clone, PartialEq)]
80pub struct PathStep {
81 /// Tag name for this step.
82 pub tag: Tag,
83 /// Optional predicate for this step.
84 pub predicate: Option<Predicate>,
85}
86
87/// A predicate inside square brackets `[...]`.
88#[derive(Debug, Clone, PartialEq)]
89pub enum Predicate {
90 /// `[@attr='value']` — exact attribute match.
91 AttrEquals {
92 /// Attribute name.
93 attr: String,
94 /// Expected value.
95 value: String,
96 },
97
98 /// `[contains(@attr, 'substr')]` — attribute substring match.
99 Contains {
100 /// Attribute name.
101 attr: String,
102 /// Substring to find.
103 substr: String,
104 },
105
106 /// `[position()=N]` — 1-based position among siblings of same type.
107 Position(usize),
108
109 /// `[@attr]` — attribute existence check.
110 AttrExists {
111 /// Attribute name.
112 attr: String,
113 },
114}
115
116/// Result of evaluating an XPath expression.
117#[derive(Debug, Clone, PartialEq)]
118pub enum XPathResult {
119 /// A list of matched node ids.
120 Nodes(Vec<fhp_tree::node::NodeId>),
121 /// A list of extracted strings (e.g. from `text()`).
122 Strings(Vec<String>),
123 /// A boolean result.
124 Boolean(bool),
125}
126
127impl XPathResult {
128 /// Returns `true` if the result contains no nodes or strings.
129 pub fn is_empty(&self) -> bool {
130 match self {
131 Self::Nodes(v) => v.is_empty(),
132 Self::Strings(v) => v.is_empty(),
133 Self::Boolean(_) => false,
134 }
135 }
136
137 /// Returns the number of items in the result.
138 pub fn len(&self) -> usize {
139 match self {
140 Self::Nodes(v) => v.len(),
141 Self::Strings(v) => v.len(),
142 Self::Boolean(_) => 1,
143 }
144 }
145}