#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Query {
Css(String),
Xpath(String),
}
impl Query {
pub fn as_str(&self) -> &str {
match self {
Query::Css(s) | Query::Xpath(s) => s,
}
}
pub fn is_xpath(&self) -> bool {
matches!(self, Query::Xpath(_))
}
}
pub fn xpath_literal(s: &str) -> String {
if !s.contains('"') {
format!("\"{s}\"")
} else if !s.contains('\'') {
format!("'{s}'")
} else {
let parts: Vec<String> = s.split('"').map(|p| format!("\"{p}\"")).collect();
format!("concat({})", parts.join(", '\"', "))
}
}
type PrefixRule = (&'static str, fn(&str) -> Query);
pub fn parse(selector: &str) -> Query {
let sel = selector.trim();
const PREFIX_RULES: &[PrefixRule] = &[
("xpath:", |r| Query::Xpath(r.to_string())),
("x:", |r| Query::Xpath(r.to_string())),
("css:", |r| Query::Css(r.to_string())),
("c:", |r| Query::Css(r.to_string())),
("tag:", |r| Query::Css(r.trim().to_string())),
("t:", |r| Query::Css(r.trim().to_string())),
("text:", text_contains_xpath),
];
for &(prefix, make) in PREFIX_RULES {
if let Some(rest) = strip_prefix_ci(sel, prefix) {
return make(rest);
}
}
if let Some(rest) = sel.strip_prefix('@') {
return parse_attribute(rest);
}
if sel.starts_with('#') || sel.starts_with('.') {
return Query::Css(sel.to_string());
}
text_contains_xpath(sel)
}
fn parse_attribute(rest: &str) -> Query {
let (name, value) = match rest.find([':', '=']) {
Some(i) => (&rest[..i], &rest[i + 1..]),
None => (rest, ""),
};
let name = name.trim();
if name.eq_ignore_ascii_case("text()") || name.eq_ignore_ascii_case("text") {
return text_contains_xpath(value);
}
if value.is_empty() {
return Query::Xpath(format!("//*[@{name}]"));
}
Query::Xpath(format!("//*[@{}={}]", name, xpath_literal(value)))
}
fn text_contains_xpath(text: &str) -> Query {
let t = text.trim();
Query::Xpath(format!(
"//*[contains(normalize-space(.), {})]",
xpath_literal(t)
))
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StaticQuery {
Css(String),
AttrEq { name: String, value: String },
AttrPresent(String),
TextContains(String),
Xpath(String),
}
pub fn parse_static(selector: &str) -> StaticQuery {
let sel = selector.trim();
type StaticPrefixRule = (&'static str, fn(&str) -> StaticQuery);
const PREFIX_RULES: &[StaticPrefixRule] = &[
("xpath:", |r| StaticQuery::Xpath(r.to_string())),
("x:", |r| StaticQuery::Xpath(r.to_string())),
("css:", |r| StaticQuery::Css(r.to_string())),
("c:", |r| StaticQuery::Css(r.to_string())),
("tag:", |r| StaticQuery::Css(r.trim().to_string())),
("t:", |r| StaticQuery::Css(r.trim().to_string())),
("text:", |r| StaticQuery::TextContains(r.trim().to_string())),
];
for &(prefix, make) in PREFIX_RULES {
if let Some(rest) = strip_prefix_ci(sel, prefix) {
return make(rest);
}
}
if let Some(rest) = sel.strip_prefix('@') {
return parse_attribute_static(rest);
}
if sel.starts_with('#') || sel.starts_with('.') {
return StaticQuery::Css(sel.to_string());
}
StaticQuery::TextContains(sel.to_string())
}
fn parse_attribute_static(rest: &str) -> StaticQuery {
let (name, value) = match rest.find([':', '=']) {
Some(i) => (&rest[..i], &rest[i + 1..]),
None => (rest, ""),
};
let name = name.trim();
if name.eq_ignore_ascii_case("text()") || name.eq_ignore_ascii_case("text") {
return StaticQuery::TextContains(value.trim().to_string());
}
if value.is_empty() {
return StaticQuery::AttrPresent(name.to_string());
}
StaticQuery::AttrEq {
name: name.to_string(),
value: value.to_string(),
}
}
fn strip_prefix_ci<'a>(s: &'a str, prefix: &str) -> Option<&'a str> {
let head = s.get(..prefix.len())?;
if head.eq_ignore_ascii_case(prefix) {
Some(s[prefix.len()..].trim_start())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn css_shorthands() {
assert_eq!(parse("#kw"), Query::Css("#kw".into()));
assert_eq!(parse(".title.foo"), Query::Css(".title.foo".into()));
}
#[test]
fn explicit_css_and_xpath() {
assert_eq!(parse("css:div.box"), Query::Css("div.box".into()));
assert_eq!(parse("c:div.box"), Query::Css("div.box".into()));
assert_eq!(parse("xpath://div[@id='a']"), Query::Xpath("//div[@id='a']".into()));
assert_eq!(parse("x://a"), Query::Xpath("//a".into()));
}
#[test]
fn tag_prefix() {
assert_eq!(parse("tag:li"), Query::Css("li".into()));
assert_eq!(parse("t:h3"), Query::Css("h3".into()));
}
#[test]
fn attribute_colon_and_eq() {
assert_eq!(parse("@id:kw"), Query::Xpath(r#"//*[@id="kw"]"#.into()));
assert_eq!(parse("@id=kw"), Query::Xpath(r#"//*[@id="kw"]"#.into()));
assert_eq!(
parse("@class=project list"),
Query::Xpath(r#"//*[@class="project list"]"#.into())
);
}
#[test]
fn attribute_presence_only() {
assert_eq!(parse("@disabled"), Query::Xpath("//*[@disabled]".into()));
}
#[test]
fn attribute_text() {
assert_eq!(
parse("@text():登录"),
Query::Xpath(r#"//*[contains(normalize-space(.), "登录")]"#.into())
);
}
#[test]
fn text_prefix_and_default() {
assert_eq!(
parse("text:提交"),
Query::Xpath(r#"//*[contains(normalize-space(.), "提交")]"#.into())
);
assert_eq!(
parse("提交"),
Query::Xpath(r#"//*[contains(normalize-space(.), "提交")]"#.into())
);
}
#[test]
fn xpath_literal_quotes() {
assert_eq!(xpath_literal("abc"), r#""abc""#);
assert_eq!(xpath_literal(r#"say "hi""#), r#"'say "hi"'"#);
assert_eq!(xpath_literal("a\"b'c"), r#"concat("a", '"', "b'c")"#);
}
#[test]
fn static_query_mapping() {
assert_eq!(parse_static("#kw"), StaticQuery::Css("#kw".into()));
assert_eq!(parse_static(".a.b"), StaticQuery::Css(".a.b".into()));
assert_eq!(parse_static("css:div.box"), StaticQuery::Css("div.box".into()));
assert_eq!(parse_static("tag:li"), StaticQuery::Css("li".into()));
assert_eq!(
parse_static("@id:kw"),
StaticQuery::AttrEq {
name: "id".into(),
value: "kw".into()
}
);
assert_eq!(
parse_static("@disabled"),
StaticQuery::AttrPresent("disabled".into())
);
assert_eq!(
parse_static("text:登录"),
StaticQuery::TextContains("登录".into())
);
assert_eq!(parse_static("提交"), StaticQuery::TextContains("提交".into()));
assert_eq!(
parse_static("@text():你好"),
StaticQuery::TextContains("你好".into())
);
assert_eq!(parse_static("xpath://a"), StaticQuery::Xpath("//a".into()));
}
}