use crate::error::{Result, SimdXmlError};
use crate::index::{TagType, XmlIndex};
use super::ast::*;
use super::parser::parse_xpath_predicate_expr;
fn xpath_format_number(n: f64) -> String {
if n.is_nan() { return "NaN".to_string(); }
if n == f64::INFINITY { return "Infinity".to_string(); }
if n == f64::NEG_INFINITY { return "-Infinity".to_string(); }
if n == n.trunc() && n.abs() < 1e15 {
return format!("{}", n as i64);
}
let abs_n = n.abs();
let exp = if abs_n > 0.0 { abs_n.log10().floor() as i32 } else { 0 };
if exp >= -4 && exp < 15 {
let decimal_digits = (14 - exp).max(0) as usize;
let mut s = format!("{:.prec$}", n, prec = decimal_digits);
if s.contains('.') {
while s.ends_with('0') { s.pop(); }
if s.ends_with('.') { s.pop(); }
}
s
} else {
let mut s = format!("{:.14e}", n);
if let Some(e_pos) = s.find('e') {
let exp_part = &s[e_pos + 1..];
if !exp_part.starts_with('-') && !exp_part.starts_with('+') {
s.insert(e_pos + 1, '+');
}
}
if let Some(e_pos) = s.find('e') {
let (mantissa, exp_str) = s.split_at(e_pos);
let mantissa = mantissa.trim_end_matches('0').trim_end_matches('.');
s = format!("{}{}", mantissa, exp_str);
}
if let Some(e_pos) = s.find('e') {
let sign_start = e_pos + 1;
let (prefix, exp_part) = s.split_at(sign_start);
let (sign, digits) = if exp_part.starts_with('-') || exp_part.starts_with('+') {
(&exp_part[..1], exp_part[1..].trim_start_matches('0'))
} else {
("", exp_part.trim_start_matches('0'))
};
let digits = if digits.is_empty() { "0" } else { digits };
s = format!("{}{}{}", prefix, sign, digits);
}
s
}
}
#[derive(Debug, Clone, Copy)]
pub enum XPathNode {
Element(usize),
Text(usize),
Attribute(usize, u64),
Namespace(usize, u64),
}
pub(crate) fn attr_name_hash(name: &str) -> u64 {
let mut h: u64 = 0xcbf29ce484222325; for b in name.bytes() {
h ^= b as u64;
h = h.wrapping_mul(0x100000001b3);
}
h
}
pub fn eval_expr_with_doc(index: &XmlIndex, expr_str: &str) -> Result<StandaloneResult> {
eval_expr_with_context(index, expr_str, XPathNode::Element(DOC_ROOT))
}
pub fn eval_expr_with_context(index: &XmlIndex, expr_str: &str, context: XPathNode) -> Result<StandaloneResult> {
let parsed = parse_xpath_predicate_expr(expr_str)?;
let value = eval_predicate_value(index, context, &parsed, 1, 1)?;
Ok(match value {
XPathValue::Number(n) => StandaloneResult::Number(n),
XPathValue::String(s) => StandaloneResult::String(s),
XPathValue::Boolean(b) => StandaloneResult::Boolean(b),
})
}
pub fn eval_standalone_expr(expr_str: &str) -> Result<StandaloneResult> {
let parsed = parse_xpath_predicate_expr(expr_str)?;
let dummy = b"<r/>";
let index = crate::index::structural::parse_scalar(dummy)?;
let node = XPathNode::Element(DOC_ROOT);
let value = eval_predicate_value(&index, node, &parsed, 1, 1)?;
Ok(match value {
XPathValue::Number(n) => StandaloneResult::Number(n),
XPathValue::String(s) => StandaloneResult::String(s),
XPathValue::Boolean(b) => StandaloneResult::Boolean(b),
})
}
#[derive(Debug, Clone)]
pub enum StandaloneResult {
Number(f64),
String(String),
Boolean(bool),
}
#[derive(Debug, Clone)]
pub enum XPathResult {
NodeSet(Vec<XPathNode>),
String(String),
Number(f64),
Boolean(bool),
}
impl XPathResult {
pub fn to_display_string(&self, index: &XmlIndex) -> String {
match self {
XPathResult::NodeSet(nodes) => {
let mut parts = Vec::new();
for node in nodes {
parts.push(node_string_value(index, *node));
}
parts.join("\n")
}
XPathResult::String(s) => s.clone(),
XPathResult::Number(n) => xpath_format_number(*n),
XPathResult::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
}
}
}
pub fn eval_xpath(index: &XmlIndex, expr: &XPathExpr) -> Result<XPathResult> {
match expr {
XPathExpr::LocationPath(_)
| XPathExpr::Union(_)
| XPathExpr::FilterPath(_, _)
| XPathExpr::GlobalFilter(_, _) => {
return Ok(XPathResult::NodeSet(evaluate(index, expr)?));
}
XPathExpr::FunctionCall(name, _) if name == "id" => {
return Ok(XPathResult::NodeSet(evaluate(index, expr)?));
}
_ => {}
}
let doc_root = XPathNode::Element(DOC_ROOT);
let value = eval_predicate_value(index, doc_root, expr, 1, 1)?;
Ok(match value {
XPathValue::Number(n) => XPathResult::Number(n),
XPathValue::String(s) => XPathResult::String(s),
XPathValue::Boolean(b) => XPathResult::Boolean(b),
})
}
pub fn evaluate<'a>(
index: &'a XmlIndex<'a>,
expr: &XPathExpr,
) -> Result<Vec<XPathNode>> {
match expr {
XPathExpr::LocationPath(path) => eval_location_path(index, path),
XPathExpr::Union(exprs) => {
let mut result = Vec::new();
for e in exprs {
result.extend(evaluate(index, e)?);
}
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
Ok(result)
}
XPathExpr::FunctionCall(name, args) if name == "id" => {
eval_id_function(index, args)
}
XPathExpr::FilterPath(inner, steps) => {
let initial = evaluate(index, inner)?;
let mut context = initial;
for step in steps {
context = eval_step(index, &context, step)?;
}
Ok(context)
}
XPathExpr::GlobalFilter(inner, preds) => {
let mut result = evaluate(index, inner)?;
for pred in preds {
result = apply_predicate(index, &result, pred)?;
}
Ok(result)
}
_ => Err(SimdXmlError::XPathEvalError(
"Only location paths, unions, and id() are supported".into(),
)),
}
}
pub fn eval_text<'a>(
index: &'a XmlIndex<'a>,
expr: &XPathExpr,
) -> Result<Vec<&'a str>> {
let nodes = evaluate(index, expr)?;
let mut results = Vec::with_capacity(nodes.len());
for node in nodes {
match node {
XPathNode::Element(idx) => {
let text_slice = index.child_text_slice(idx);
if !text_slice.is_empty() {
for &ti in text_slice {
let text = index.text_content(&index.text_ranges[ti as usize]);
if !text.is_empty() {
results.push(text);
}
}
} else {
for range in &index.text_ranges {
if range.parent_tag == idx as u32 {
let text = index.text_content(range);
if !text.is_empty() {
results.push(text);
}
}
}
}
}
XPathNode::Text(idx) => {
results.push(index.text_content(&index.text_ranges[idx]));
}
_ => {}
}
}
Ok(results)
}
pub fn extract_text<'a>(index: &'a XmlIndex<'a>, nodes: &[XPathNode]) -> Result<Vec<&'a str>> {
let mut results = Vec::with_capacity(nodes.len());
for &node in nodes {
match node {
XPathNode::Element(idx) => {
let text_slice = index.child_text_slice(idx);
if !text_slice.is_empty() {
for &ti in text_slice {
let text = index.text_content(&index.text_ranges[ti as usize]);
if !text.is_empty() {
results.push(text);
}
}
} else {
for range in &index.text_ranges {
if range.parent_tag == idx as u32 {
let text = index.text_content(range);
if !text.is_empty() {
results.push(text);
}
}
}
}
}
XPathNode::Text(idx) => {
results.push(index.text_content(&index.text_ranges[idx]));
}
_ => {}
}
}
Ok(results)
}
const DOC_ROOT: usize = usize::MAX;
fn eval_id_function(index: &XmlIndex, args: &[XPathExpr]) -> Result<Vec<XPathNode>> {
if args.len() != 1 {
return Err(SimdXmlError::XPathEvalError(
format!("id() requires exactly 1 argument, got {}", args.len()),
));
}
let id_value = match args.first() {
Some(XPathExpr::StringLiteral(s)) => s.clone(),
_ => return Ok(vec![]),
};
for i in 0..index.tag_count() {
if index.tag_types[i] == TagType::Open || index.tag_types[i] == TagType::SelfClose {
if let Some(val) = index.get_attribute(i, "id") {
if val == id_value {
return Ok(vec![XPathNode::Element(i)]);
}
}
}
}
Ok(vec![])
}
fn eval_location_path<'a>(
index: &'a XmlIndex<'a>,
path: &LocationPath,
) -> Result<Vec<XPathNode>> {
let mut context: Vec<XPathNode> = if path.absolute {
vec![XPathNode::Element(DOC_ROOT)]
} else {
vec![XPathNode::Element(DOC_ROOT)]
};
let steps = &path.steps;
let mut i = 0;
while i < steps.len() {
if i + 1 < steps.len()
&& steps[i].axis == Axis::DescendantOrSelf
&& steps[i].node_test == NodeTest::Node
&& steps[i].predicates.is_empty()
&& steps[i + 1].axis == Axis::Child
{
if steps[i + 1].predicates.is_empty() {
context = eval_fused_descendant_child(index, &context, &steps[i + 1])?;
} else {
context = eval_fused_descendant_child_with_preds(
index, &context, &steps[i + 1],
)?;
}
i += 2;
} else {
context = eval_step(index, &context, &steps[i])?;
i += 1;
}
}
Ok(context)
}
fn eval_fused_descendant_child(
index: &XmlIndex,
context: &[XPathNode],
child_step: &Step,
) -> Result<Vec<XPathNode>> {
let mut result = Vec::new();
for &ctx_node in context {
let (scan_start, scan_end) = match ctx_node {
XPathNode::Element(DOC_ROOT) => (0, index.tag_count()),
XPathNode::Element(idx) if idx < index.tag_count() => {
let close = index.matching_close(idx).unwrap_or(index.tag_count());
(idx + 1, close)
}
_ => continue,
};
match &child_step.node_test {
NodeTest::Name(name) => {
let posting = index.tags_by_name(name);
if !posting.is_empty() && scan_start == 0 && scan_end == index.tag_count() {
result.extend(posting.iter().map(|&j| XPathNode::Element(j as usize)));
} else if !posting.is_empty() {
let lo = posting.partition_point(|&j| (j as usize) < scan_start);
let hi = posting.partition_point(|&j| (j as usize) < scan_end);
result.extend(posting[lo..hi].iter().map(|&j| XPathNode::Element(j as usize)));
} else {
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if (tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name_eq(j, name)
{
result.push(XPathNode::Element(j));
}
}
}
}
NodeTest::Text => {
for (ti, range) in index.text_ranges.iter().enumerate() {
let p = range.parent_tag as usize;
if p >= scan_start && p < scan_end {
result.push(XPathNode::Text(ti));
}
}
}
NodeTest::Wildcard => {
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if tt == TagType::Open || tt == TagType::SelfClose {
result.push(XPathNode::Element(j));
}
}
}
NodeTest::Node => {
for j in scan_start..scan_end {
if is_node_tag(index.tag_types[j]) {
result.push(XPathNode::Element(j));
}
}
for (ti, range) in index.text_ranges.iter().enumerate() {
let p = range.parent_tag as usize;
if p >= scan_start && p < scan_end {
result.push(XPathNode::Text(ti));
}
}
}
NodeTest::Comment => {
for j in scan_start..scan_end {
if index.tag_types[j] == TagType::Comment {
result.push(XPathNode::Element(j));
}
}
}
NodeTest::NamespacedName(prefix, local) => {
let full_name = if local == "*" {
let prefix_colon = format!("{}:", prefix);
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if (tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name(j).starts_with(&prefix_colon)
{
result.push(XPathNode::Element(j));
}
}
continue;
} else {
format!("{}:{}", prefix, local)
};
let posting = index.tags_by_name(&full_name);
if !posting.is_empty() && scan_start == 0 && scan_end == index.tag_count() {
result.extend(posting.iter().map(|&j| XPathNode::Element(j as usize)));
} else if !posting.is_empty() {
let lo = posting.partition_point(|&j| (j as usize) < scan_start);
let hi = posting.partition_point(|&j| (j as usize) < scan_end);
result.extend(posting[lo..hi].iter().map(|&j| XPathNode::Element(j as usize)));
} else {
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if (tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name_eq(j, &full_name)
{
result.push(XPathNode::Element(j));
}
}
}
}
_ => {
let desc = eval_descendant_axis(index, ctx_node, true);
for dn in desc {
let children = eval_child_axis(index, dn);
for c in children {
if matches_node_test(index, c, &child_step.node_test) {
result.push(c);
}
}
}
}
}
}
if context.len() > 1 {
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
}
Ok(result)
}
fn eval_fused_descendant_child_with_preds(
index: &XmlIndex,
context: &[XPathNode],
child_step: &Step,
) -> Result<Vec<XPathNode>> {
let mut result = Vec::new();
let inline_attr_pred = extract_simple_attr_eq(&child_step.predicates);
for &ctx_node in context {
let (scan_start, scan_end) = match ctx_node {
XPathNode::Element(DOC_ROOT) => (0, index.tag_count()),
XPathNode::Element(idx) if idx < index.tag_count() => {
let close = index.matching_close(idx).unwrap_or(index.tag_count());
(idx, close)
}
_ => continue,
};
let mut all_matches: Vec<XPathNode> = Vec::new();
match &child_step.node_test {
NodeTest::Name(name) => {
let posting = index.tags_by_name(name);
let iter: Box<dyn Iterator<Item = usize>> = if !posting.is_empty() && scan_start == 0 && scan_end == index.tag_count() {
Box::new(posting.iter().map(|&j| j as usize))
} else if !posting.is_empty() {
let lo = posting.partition_point(|&j| (j as usize) < scan_start);
let hi = posting.partition_point(|&j| (j as usize) < scan_end);
Box::new(posting[lo..hi].iter().map(|&j| j as usize))
} else {
Box::new((scan_start..scan_end).filter(|&j| {
let tt = index.tag_types[j];
(tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name_eq(j, name)
}))
};
for j in iter {
if let Some((attr, val, _pred_idx)) = &inline_attr_pred {
if index.get_attribute(j, attr) != Some(val) {
continue;
}
}
all_matches.push(XPathNode::Element(j));
}
}
NodeTest::NamespacedName(prefix, local) => {
if local == "*" {
let prefix_colon = format!("{}:", prefix);
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if (tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name(j).starts_with(&prefix_colon)
{
all_matches.push(XPathNode::Element(j));
}
}
} else {
let full_name = format!("{}:{}", prefix, local);
for j in scan_start..scan_end {
let tt = index.tag_types[j];
if (tt == TagType::Open || tt == TagType::SelfClose)
&& index.tag_name_eq(j, &full_name)
{
all_matches.push(XPathNode::Element(j));
}
}
}
}
_ => {
for j in scan_start..scan_end {
if matches_node_test(index, XPathNode::Element(j), &child_step.node_test) {
all_matches.push(XPathNode::Element(j));
}
}
}
}
let mut parent_groups: std::collections::HashMap<u32, Vec<XPathNode>> =
std::collections::HashMap::new();
for &m in &all_matches {
let parent = match m {
XPathNode::Element(idx) if idx < index.tag_count() => index.parents[idx],
_ => u32::MAX,
};
parent_groups.entry(parent).or_default().push(m);
}
for (_parent, mut group) in parent_groups {
for (pi, pred) in child_step.predicates.iter().enumerate() {
if inline_attr_pred.as_ref().is_some_and(|(_, _, idx)| *idx == pi) {
continue;
}
group = apply_predicate(index, &group, pred)?;
}
result.extend(group);
}
}
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
Ok(result)
}
fn eval_step<'a>(
index: &'a XmlIndex<'a>,
context: &[XPathNode],
step: &Step,
) -> Result<Vec<XPathNode>> {
let mut result = Vec::new();
for &node in context {
let candidates = match step.axis {
Axis::Child => eval_child_axis(index, node),
Axis::Descendant => eval_descendant_axis(index, node, false),
Axis::DescendantOrSelf => eval_descendant_axis(index, node, true),
Axis::Parent => eval_parent_axis(index, node),
Axis::Ancestor => eval_ancestor_axis(index, node, false),
Axis::AncestorOrSelf => eval_ancestor_axis(index, node, true),
Axis::FollowingSibling => eval_following_sibling_axis(index, node),
Axis::PrecedingSibling => eval_preceding_sibling_axis(index, node),
Axis::Following => eval_following_axis(index, node),
Axis::Preceding => eval_preceding_axis(index, node),
Axis::SelfAxis => vec![node],
Axis::Attribute => eval_attribute_axis(index, node, &step.node_test),
Axis::Namespace => eval_namespace_axis(index, node),
};
let mut matched: Vec<XPathNode> = if step.axis == Axis::Attribute || step.axis == Axis::Namespace {
candidates
} else {
candidates.into_iter()
.filter(|c| matches_node_test(index, *c, &step.node_test))
.collect()
};
for pred in &step.predicates {
matched = apply_predicate(index, &matched, pred)?;
}
result.extend(matched);
}
if context.len() > 1 {
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
}
Ok(result)
}
fn dedup_nodes(nodes: &mut Vec<XPathNode>) {
let mut seen = std::collections::HashSet::new();
nodes.retain(|n| {
let key = match n {
XPathNode::Element(idx) => (0, *idx),
XPathNode::Text(idx) => (1, *idx),
XPathNode::Attribute(idx, h) => (2, *idx ^ (*h as usize)),
XPathNode::Namespace(idx, h) => (3, *idx ^ (*h as usize)),
};
seen.insert(key)
});
}
fn node_doc_pos(index: &XmlIndex, node: &XPathNode) -> u64 {
match node {
XPathNode::Element(idx) if *idx < index.tag_count() => index.tag_starts[*idx],
XPathNode::Text(idx) if *idx < index.text_ranges.len() => index.text_ranges[*idx].start,
XPathNode::Attribute(idx, _) if *idx < index.tag_count() => index.tag_starts[*idx],
XPathNode::Namespace(idx, _) if *idx < index.tag_count() => index.tag_starts[*idx],
_ => u64::MAX,
}
}
fn sort_doc_order(index: &XmlIndex, nodes: &mut Vec<XPathNode>) {
nodes.sort_by_key(|n| node_doc_pos(index, n));
}
fn apply_predicate<'a>(
index: &'a XmlIndex<'a>,
nodes: &[XPathNode],
pred: &XPathExpr,
) -> Result<Vec<XPathNode>> {
if let Some(result) = try_batch_attr_predicate(index, nodes, pred) {
return Ok(result);
}
match pred {
XPathExpr::NumberLiteral(n) => {
let n = *n;
if n.is_nan() || n.is_infinite() || n < 1.0 || n > nodes.len() as f64 || n != n.trunc() {
Ok(vec![])
} else {
let pos = n as usize;
if pos >= 1 && pos <= nodes.len() {
Ok(vec![nodes[pos - 1]])
} else {
Ok(vec![])
}
}
}
XPathExpr::UnaryMinus(inner) => {
if nodes.is_empty() {
return Ok(vec![]);
}
let val = eval_predicate_value(index, nodes[0], inner, 1, nodes.len())?;
let n = -(val.as_number());
if n.is_nan() || n.is_infinite() || n < 1.0 || n > nodes.len() as f64 {
Ok(vec![])
} else {
let pos = n.round() as usize;
if pos >= 1 && pos <= nodes.len() {
Ok(vec![nodes[pos - 1]])
} else {
Ok(vec![])
}
}
}
XPathExpr::BinaryOp(left, op, right) => {
let is_arithmetic = matches!(op, BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div | BinaryOp::Mod);
if is_arithmetic {
let mut result = Vec::new();
for (i, &node) in nodes.iter().enumerate() {
let val = eval_predicate_value(index, node, pred, i + 1, nodes.len())?;
match val {
XPathValue::Number(n) => {
let pos = (i + 1) as f64;
if pos == n { result.push(node); }
}
_ => {
if val.is_truthy() { result.push(node); }
}
}
}
return Ok(result);
}
let mut result = Vec::new();
for (i, &node) in nodes.iter().enumerate() {
let left_val = if matches!(op, BinaryOp::Or | BinaryOp::And) {
if let XPathExpr::LocationPath(_) = left.as_ref() {
let nodes = evaluate_in_context(index, node, left)?;
XPathValue::Boolean(!nodes.is_empty())
} else {
eval_predicate_value(index, node, left, i + 1, nodes.len())?
}
} else {
eval_predicate_value(index, node, left, i + 1, nodes.len())?
};
let right_val = if matches!(op, BinaryOp::Or | BinaryOp::And) {
if let XPathExpr::LocationPath(_) = right.as_ref() {
let nodes = evaluate_in_context(index, node, right)?;
XPathValue::Boolean(!nodes.is_empty())
} else {
eval_predicate_value(index, node, right, i + 1, nodes.len())?
}
} else {
eval_predicate_value(index, node, right, i + 1, nodes.len())?
};
if compare_values(&left_val, op, &right_val) {
result.push(node);
}
}
Ok(result)
}
XPathExpr::FunctionCall(name, args) => {
if let Some(result) = try_batch_string_predicate(index, nodes, name, args) {
return Ok(result);
}
let mut result = Vec::new();
for (i, &node) in nodes.iter().enumerate() {
let val = eval_function(index, node, name, args, i + 1, nodes.len())?;
let keep = match &val {
XPathValue::Number(n) => {
let pos = (i + 1) as f64;
pos == *n
}
_ => val.is_truthy(),
};
if keep {
result.push(node);
}
}
Ok(result)
}
XPathExpr::LocationPath(_) => {
let mut result = Vec::new();
for &node in nodes {
let sub_nodes = evaluate_in_context(index, node, pred)?;
if !sub_nodes.is_empty() {
result.push(node);
}
}
Ok(result)
}
_ => Ok(nodes.to_vec()),
}
}
#[derive(Debug, Clone)]
enum XPathValue {
String(String),
Number(f64),
Boolean(bool),
}
impl XPathValue {
fn is_truthy(&self) -> bool {
match self {
XPathValue::Boolean(b) => *b,
XPathValue::String(s) => !s.is_empty(),
XPathValue::Number(n) => *n != 0.0 && !n.is_nan(),
}
}
fn as_string(&self) -> String {
match self {
XPathValue::String(s) => s.clone(),
XPathValue::Number(n) => xpath_format_number(*n),
XPathValue::Boolean(b) => b.to_string(),
}
}
fn as_number(&self) -> f64 {
match self {
XPathValue::Number(n) => *n,
XPathValue::String(s) => s.trim().parse().unwrap_or(f64::NAN),
XPathValue::Boolean(b) => if *b { 1.0 } else { 0.0 },
}
}
}
fn eval_predicate_value(
index: &XmlIndex,
node: XPathNode,
expr: &XPathExpr,
position: usize,
size: usize,
) -> Result<XPathValue> {
match expr {
XPathExpr::StringLiteral(s) => Ok(XPathValue::String(s.clone())),
XPathExpr::NumberLiteral(n) => Ok(XPathValue::Number(*n)),
XPathExpr::FunctionCall(name, args) => eval_function(index, node, name, args, position, size),
XPathExpr::LocationPath(path) => {
if path.steps.len() == 1
&& path.steps[0].axis == Axis::Attribute
&& path.steps[0].predicates.is_empty()
{
if let XPathNode::Element(idx) = node {
if let NodeTest::Name(attr_name) = &path.steps[0].node_test {
if attr_name != "xmlns" && !attr_name.starts_with("xmlns:") {
if let Some(val) = index.get_attribute(idx, attr_name) {
return Ok(XPathValue::String(val.to_string()));
}
}
}
}
return Ok(XPathValue::String(String::new()));
}
let nodes = evaluate_in_context(index, node, expr)?;
if let Some(n) = nodes.first() {
Ok(XPathValue::String(node_string_value(index, *n)))
} else {
Ok(XPathValue::String(String::new()))
}
}
XPathExpr::UnaryMinus(inner) => {
let val = eval_predicate_value(index, node, inner, position, size)?;
Ok(XPathValue::Number(-val.as_number()))
}
XPathExpr::BinaryOp(left, op, right) => {
let is_comparison = matches!(op, BinaryOp::Eq | BinaryOp::Neq
| BinaryOp::Lt | BinaryOp::Gt | BinaryOp::Lte | BinaryOp::Gte);
let left_is_path = matches!(left.as_ref(), XPathExpr::LocationPath(_));
let right_is_path = matches!(right.as_ref(), XPathExpr::LocationPath(_));
if is_comparison && (left_is_path || right_is_path) {
let left_nodes = if left_is_path {
evaluate_in_context(index, node, left)?
} else {
vec![]
};
let right_nodes = if right_is_path {
evaluate_in_context(index, node, right)?
} else {
vec![]
};
let result = if left_is_path && right_is_path {
let left_vals: Vec<String> = left_nodes.iter().map(|n| node_string_value(index, *n)).collect();
let right_vals: Vec<String> = right_nodes.iter().map(|n| node_string_value(index, *n)).collect();
left_vals.iter().any(|lv| right_vals.iter().any(|rv| {
compare_values(&XPathValue::String(lv.clone()), op, &XPathValue::String(rv.clone()))
}))
} else if left_is_path {
let r = eval_predicate_value(index, node, right, position, size)?;
let is_eq = matches!(op, BinaryOp::Eq | BinaryOp::Neq);
if is_eq && matches!(&r, XPathValue::Boolean(_)) {
let lv = XPathValue::Boolean(!left_nodes.is_empty());
compare_values(&lv, op, &r)
} else {
left_nodes.iter().any(|n| {
let lv = XPathValue::String(node_string_value(index, *n));
compare_values(&lv, op, &r)
})
}
} else {
let l = eval_predicate_value(index, node, left, position, size)?;
let is_eq = matches!(op, BinaryOp::Eq | BinaryOp::Neq);
if is_eq && matches!(&l, XPathValue::Boolean(_)) {
let rv = XPathValue::Boolean(!right_nodes.is_empty());
return Ok(XPathValue::Boolean(compare_values(&l, op, &rv)));
}
right_nodes.iter().any(|n| {
let rv = XPathValue::String(node_string_value(index, *n));
compare_values(&l, op, &rv)
})
};
return Ok(XPathValue::Boolean(result));
}
let l = eval_predicate_value(index, node, left, position, size)?;
let r = eval_predicate_value(index, node, right, position, size)?;
let ln = l.as_number();
let rn = r.as_number();
let result = match op {
BinaryOp::Add => ln + rn,
BinaryOp::Sub => ln - rn,
BinaryOp::Mul => ln * rn,
BinaryOp::Div => ln / rn,
BinaryOp::Mod => ln % rn,
_ => {
return Ok(XPathValue::Boolean(compare_values(&l, op, &r)));
}
};
Ok(XPathValue::Number(result))
}
XPathExpr::Union(_) | XPathExpr::FilterPath(_, _) | XPathExpr::GlobalFilter(_, _) => {
let nodes = evaluate_in_context(index, node, expr)?;
if let Some(n) = nodes.first() {
Ok(XPathValue::String(node_string_value(index, *n)))
} else {
Ok(XPathValue::String(String::new()))
}
}
}
}
fn eval_function(
index: &XmlIndex,
node: XPathNode,
name: &str,
args: &[XPathExpr],
position: usize,
size: usize,
) -> Result<XPathValue> {
match name {
"position" => Ok(XPathValue::Number(position as f64)),
"last" => Ok(XPathValue::Number(size as f64)),
"count" => {
if let Some(arg) = args.first() {
let nodes = evaluate_in_context(index, node, arg)?;
Ok(XPathValue::Number(nodes.len() as f64))
} else {
Ok(XPathValue::Number(0.0))
}
}
"contains" => {
if args.len() >= 2 {
let haystack = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let needle = eval_predicate_value(index, node, &args[1], position, size)?.as_string();
Ok(XPathValue::Boolean(haystack.contains(&needle)))
} else {
Ok(XPathValue::Boolean(false))
}
}
"starts-with" => {
if args.len() >= 2 {
let haystack = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let prefix = eval_predicate_value(index, node, &args[1], position, size)?.as_string();
Ok(XPathValue::Boolean(haystack.starts_with(&prefix)))
} else {
Ok(XPathValue::Boolean(false))
}
}
"string-length" => {
let s = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?.as_string()
} else {
node_string_value(index, node)
};
Ok(XPathValue::Number(s.len() as f64))
}
"normalize-space" => {
let s = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?.as_string()
} else {
node_string_value(index, node)
};
let normalized = s.split_whitespace().collect::<Vec<_>>().join(" ");
Ok(XPathValue::String(normalized))
}
"not" => {
if let Some(arg) = args.first() {
if let XPathExpr::LocationPath(_) = arg {
let nodes = evaluate_in_context(index, node, arg)?;
Ok(XPathValue::Boolean(nodes.is_empty()))
} else {
let val = eval_predicate_value(index, node, arg, position, size)?;
Ok(XPathValue::Boolean(!val.is_truthy()))
}
} else {
Ok(XPathValue::Boolean(true))
}
}
"true" => Ok(XPathValue::Boolean(true)),
"false" => Ok(XPathValue::Boolean(false)),
"name" => {
let target = if let Some(arg) = args.first() {
let mut nodes = evaluate_in_context(index, node, arg)?;
sort_doc_order(index, &mut nodes);
nodes.first().copied()
} else {
Some(node)
};
match target {
Some(XPathNode::Element(idx)) if idx != DOC_ROOT && idx < index.tag_count() => {
Ok(XPathValue::String(index.tag_name(idx).to_string()))
}
Some(XPathNode::Attribute(tag_idx, name_hash)) => {
let attr_name = index.get_all_attribute_names(tag_idx)
.into_iter()
.find(|n| attr_name_hash(n) == name_hash)
.unwrap_or("");
Ok(XPathValue::String(attr_name.to_string()))
}
Some(XPathNode::Element(idx)) if idx < index.tag_count()
&& index.tag_types[idx] == TagType::PI => {
Ok(XPathValue::String(index.tag_name(idx).to_string()))
}
_ => Ok(XPathValue::String(String::new())),
}
}
"local-name" => {
let target = if let Some(arg) = args.first() {
let mut nodes = evaluate_in_context(index, node, arg)?;
sort_doc_order(index, &mut nodes);
nodes.first().copied()
} else {
Some(node)
};
match target {
Some(XPathNode::Element(idx)) if idx != DOC_ROOT && idx < index.tag_count() => {
let full_name = index.tag_name(idx);
let local = full_name.rsplit_once(':').map_or(full_name, |(_, l)| l);
Ok(XPathValue::String(local.to_string()))
}
Some(XPathNode::Attribute(tag_idx, name_hash)) => {
let attr_name = index.get_all_attribute_names(tag_idx)
.into_iter()
.find(|n| attr_name_hash(n) == name_hash)
.unwrap_or("");
let local = attr_name.rsplit_once(':').map_or(attr_name, |(_, l)| l);
Ok(XPathValue::String(local.to_string()))
}
Some(XPathNode::Element(idx)) if idx < index.tag_count()
&& index.tag_types[idx] == TagType::PI => {
Ok(XPathValue::String(index.tag_name(idx).to_string()))
}
_ => Ok(XPathValue::String(String::new())),
}
}
"string" => {
if let Some(arg) = args.first() {
let val = eval_predicate_value(index, node, arg, position, size)?;
Ok(XPathValue::String(val.as_string()))
} else {
Ok(XPathValue::String(node_string_value(index, node)))
}
}
"concat" => {
let mut result = String::new();
for arg in args {
result.push_str(&eval_predicate_value(index, node, arg, position, size)?.as_string());
}
Ok(XPathValue::String(result))
}
"substring" => {
if args.len() >= 2 {
let s = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let start_raw = eval_predicate_value(index, node, &args[1], position, size)?.as_number();
if start_raw.is_nan() {
return Ok(XPathValue::String(String::new()));
}
let chars: Vec<char> = s.chars().collect();
if args.len() >= 3 {
let len_raw = eval_predicate_value(index, node, &args[2], position, size)?.as_number();
if len_raw.is_nan() || len_raw == f64::NEG_INFINITY {
return Ok(XPathValue::String(String::new()));
}
let p = start_raw.round();
let n = len_raw.round();
let end = p + n;
if end == f64::NEG_INFINITY || p == f64::INFINITY {
return Ok(XPathValue::String(String::new()));
}
let start_idx = (p.max(1.0) as i64 - 1).max(0) as usize;
let end_idx = if end.is_infinite() {
chars.len()
} else {
((end as i64 - 1).max(0) as usize).min(chars.len())
};
if start_idx >= end_idx || start_idx >= chars.len() {
Ok(XPathValue::String(String::new()))
} else {
Ok(XPathValue::String(chars[start_idx..end_idx].iter().collect()))
}
} else {
let start_idx = (start_raw.round().max(1.0) as i64 - 1).max(0) as usize;
if start_idx >= chars.len() {
Ok(XPathValue::String(String::new()))
} else {
Ok(XPathValue::String(chars[start_idx..].iter().collect()))
}
}
} else {
Ok(XPathValue::String(String::new()))
}
}
"floor" => {
let n = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?.as_number()
} else { 0.0 };
Ok(XPathValue::Number(n.floor()))
}
"ceiling" => {
let n = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?.as_number()
} else { 0.0 };
Ok(XPathValue::Number(n.ceil()))
}
"round" => {
let n = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?.as_number()
} else { 0.0 };
Ok(XPathValue::Number(if n.fract() == -0.5 { n.ceil() } else { n.round() }))
}
"number" => {
let val = if let Some(arg) = args.first() {
eval_predicate_value(index, node, arg, position, size)?
} else {
XPathValue::String(node_string_value(index, node))
};
Ok(XPathValue::Number(val.as_number()))
}
"sum" => {
if let Some(arg) = args.first() {
let nodes = evaluate_in_context(index, node, arg)?;
let total: f64 = nodes.iter()
.map(|n| node_string_value(index, *n).parse::<f64>().unwrap_or(0.0))
.sum();
Ok(XPathValue::Number(total))
} else {
Ok(XPathValue::Number(0.0))
}
}
"translate" => {
if args.len() >= 3 {
let s = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let from = eval_predicate_value(index, node, &args[1], position, size)?.as_string();
let to = eval_predicate_value(index, node, &args[2], position, size)?.as_string();
let from_chars: Vec<char> = from.chars().collect();
let to_chars: Vec<char> = to.chars().collect();
let result: String = s.chars().filter_map(|c| {
if let Some(pos) = from_chars.iter().position(|&fc| fc == c) {
to_chars.get(pos).copied() } else {
Some(c)
}
}).collect();
Ok(XPathValue::String(result))
} else {
Ok(XPathValue::String(String::new()))
}
}
"substring-before" => {
if args.len() >= 2 {
let s = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let needle = eval_predicate_value(index, node, &args[1], position, size)?.as_string();
if let Some(pos) = s.find(&needle) {
Ok(XPathValue::String(s[..pos].to_string()))
} else {
Ok(XPathValue::String(String::new()))
}
} else {
Ok(XPathValue::String(String::new()))
}
}
"substring-after" => {
if args.len() >= 2 {
let s = eval_predicate_value(index, node, &args[0], position, size)?.as_string();
let needle = eval_predicate_value(index, node, &args[1], position, size)?.as_string();
if let Some(pos) = s.find(&needle) {
Ok(XPathValue::String(s[pos + needle.len()..].to_string()))
} else {
Ok(XPathValue::String(String::new()))
}
} else {
Ok(XPathValue::String(String::new()))
}
}
"boolean" => {
if let Some(arg) = args.first() {
if let XPathExpr::LocationPath(_) = arg {
let nodes = evaluate_in_context(index, node, arg)?;
Ok(XPathValue::Boolean(!nodes.is_empty()))
} else {
let val = eval_predicate_value(index, node, arg, position, size)?;
Ok(XPathValue::Boolean(val.is_truthy()))
}
} else {
Ok(XPathValue::Boolean(false))
}
}
"lang" => {
if let Some(arg) = args.first() {
let target = eval_predicate_value(index, node, arg, position, size)?.as_string();
if target.is_empty() {
return Ok(XPathValue::Boolean(false));
}
let target_lower = target.to_ascii_lowercase();
let mut current = match node {
XPathNode::Element(idx) if idx != DOC_ROOT => Some(idx),
_ => None,
};
while let Some(idx) = current {
if let Some(lang_val) = index.get_attribute(idx, "xml:lang") {
let lang_lower = lang_val.to_ascii_lowercase();
let matches = lang_lower == target_lower
|| (lang_lower.starts_with(&target_lower)
&& lang_lower.as_bytes().get(target_lower.len()) == Some(&b'-'));
return Ok(XPathValue::Boolean(matches));
}
let parent = index.parents[idx];
current = if parent != u32::MAX { Some(parent as usize) } else { None };
}
Ok(XPathValue::Boolean(false))
} else {
Ok(XPathValue::Boolean(false))
}
}
"namespace-uri" => {
let target = if let Some(arg) = args.first() {
let mut nodes = evaluate_in_context(index, node, arg)?;
sort_doc_order(index, &mut nodes);
nodes.first().copied()
} else {
Some(node)
};
match target {
Some(XPathNode::Element(idx)) if idx != DOC_ROOT && idx < index.tag_count() => {
let full_name = index.tag_name(idx);
let prefix = full_name.split_once(':').map(|(p, _)| p);
Ok(XPathValue::String(resolve_namespace_uri(index, idx, prefix).unwrap_or_default()))
}
Some(XPathNode::Attribute(tag_idx, name_hash)) => {
let attr_name = index.get_all_attribute_names(tag_idx)
.into_iter()
.find(|n| attr_name_hash(n) == name_hash)
.unwrap_or("");
if let Some((prefix, _)) = attr_name.split_once(':') {
Ok(XPathValue::String(resolve_namespace_uri(index, tag_idx, Some(prefix)).unwrap_or_default()))
} else {
Ok(XPathValue::String(String::new()))
}
}
_ => Ok(XPathValue::String(String::new())),
}
}
_ => Ok(XPathValue::String(String::new())),
}
}
fn resolve_namespace_uri(index: &XmlIndex, start_idx: usize, prefix: Option<&str>) -> Option<String> {
let attr_name = match prefix {
Some(p) => format!("xmlns:{}", p),
None => "xmlns".to_string(),
};
let mut idx = start_idx;
loop {
if let Some(uri) = index.get_attribute(idx, &attr_name) {
return Some(uri.to_string());
}
let parent = index.parents[idx];
if parent == u32::MAX {
return None;
}
idx = parent as usize;
}
}
fn compare_values(left: &XPathValue, op: &BinaryOp, right: &XPathValue) -> bool {
match op {
BinaryOp::Eq => {
match (left, right) {
(XPathValue::Boolean(a), _) => *a == right.is_truthy(),
(_, XPathValue::Boolean(b)) => left.is_truthy() == *b,
_ if matches!(left, XPathValue::Number(_)) || matches!(right, XPathValue::Number(_)) => {
let ln = left.as_number();
let rn = right.as_number();
if ln.is_nan() || rn.is_nan() {
false } else {
ln == rn
}
}
_ => left.as_string() == right.as_string(),
}
}
BinaryOp::Neq => !compare_values(left, &BinaryOp::Eq, right),
BinaryOp::Lt => left.as_number() < right.as_number(),
BinaryOp::Gt => left.as_number() > right.as_number(),
BinaryOp::Lte => left.as_number() <= right.as_number(),
BinaryOp::Gte => left.as_number() >= right.as_number(),
BinaryOp::Or => left.is_truthy() || right.is_truthy(),
BinaryOp::And => left.is_truthy() && right.is_truthy(),
_ => false,
}
}
fn node_string_value(index: &XmlIndex, node: XPathNode) -> String {
match node {
XPathNode::Element(idx) if idx == DOC_ROOT => {
let mut result = String::new();
for range in &index.text_ranges {
result.push_str(&XmlIndex::decode_entities(index.text_content(range)));
}
result
}
XPathNode::Element(idx) if idx < index.tag_count() && index.tag_types[idx] == TagType::PI => {
let start = index.tag_starts[idx] as usize;
let end = index.tag_ends[idx] as usize;
let bytes = &index.input[start..end];
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
let inner = s.strip_prefix("<?").unwrap_or(s);
let inner = inner.strip_suffix("?>")
.or_else(|| inner.strip_suffix("?"))
.or_else(|| inner.strip_suffix(">"))
.unwrap_or(inner);
if let Some(space_pos) = inner.find(|c: char| c.is_whitespace()) {
inner[space_pos..].trim().to_string()
} else {
String::new() }
}
XPathNode::Element(idx) if idx < index.tag_count() && index.tag_types[idx] == TagType::Comment => {
let start = index.tag_starts[idx] as usize;
let end = index.tag_ends[idx] as usize;
let bytes = &index.input[start..end];
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
let inner = s.strip_prefix("<!--").unwrap_or(s);
inner.strip_suffix("-->")
.or_else(|| inner.strip_suffix("--"))
.or_else(|| inner.strip_suffix(">"))
.unwrap_or(inner)
.to_string()
}
XPathNode::Element(idx) if idx != DOC_ROOT => {
XmlIndex::decode_entities(&index.all_text(idx)).into_owned()
}
XPathNode::Text(idx) => {
XmlIndex::decode_entities(index.text_content(&index.text_ranges[idx])).into_owned()
}
XPathNode::Attribute(tag_idx, name_hash) => {
for attr_name in index.get_all_attribute_names(tag_idx) {
if attr_name_hash(attr_name) == name_hash {
if let Some(val) = index.get_attribute(tag_idx, attr_name) {
return val.to_string();
}
}
}
String::new()
}
_ => String::new(),
}
}
pub fn evaluate_from_context(
index: &XmlIndex,
expr: &XPathExpr,
context_node: XPathNode,
) -> Result<Vec<XPathNode>> {
match expr {
XPathExpr::LocationPath(path) if !path.absolute => {
let mut context = vec![context_node];
let steps = &path.steps;
let mut i = 0;
while i < steps.len() {
if i + 1 < steps.len()
&& steps[i].axis == Axis::DescendantOrSelf
&& steps[i].node_test == NodeTest::Node
&& steps[i].predicates.is_empty()
&& steps[i + 1].axis == Axis::Child
{
if steps[i + 1].predicates.is_empty() {
context = eval_fused_descendant_child(index, &context, &steps[i + 1])?;
} else {
context = eval_fused_descendant_child_with_preds(index, &context, &steps[i + 1])?;
}
i += 2;
} else {
context = eval_step(index, &context, &steps[i])?;
i += 1;
}
}
Ok(context)
}
XPathExpr::LocationPath(_) => evaluate(index, expr),
XPathExpr::Union(exprs) => {
let mut result = Vec::new();
for e in exprs {
result.extend(evaluate_from_context(index, e, context_node)?);
}
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
Ok(result)
}
XPathExpr::GlobalFilter(inner, preds) => {
let mut nodes = evaluate_from_context(index, inner, context_node)?;
for pred in preds {
nodes = apply_predicate(index, &nodes, pred)?;
}
Ok(nodes)
}
XPathExpr::FilterPath(primary, steps) => {
let mut context = evaluate_from_context(index, primary, context_node)?;
for step in steps {
context = eval_step(index, &context, step)?;
}
Ok(context)
}
XPathExpr::FunctionCall(_, _) => evaluate(index, expr),
_ => evaluate(index, expr),
}
}
fn evaluate_in_context(
index: &XmlIndex,
context_node: XPathNode,
expr: &XPathExpr,
) -> Result<Vec<XPathNode>> {
match expr {
XPathExpr::LocationPath(path) if !path.absolute => {
let mut context = vec![context_node];
for step in &path.steps {
context = eval_step(index, &context, step)?;
}
Ok(context)
}
XPathExpr::LocationPath(_) => {
evaluate(index, expr)
}
XPathExpr::Union(exprs) => {
let mut result = Vec::new();
for e in exprs {
result.extend(evaluate_in_context(index, context_node, e)?);
}
dedup_nodes(&mut result);
sort_doc_order(index, &mut result);
Ok(result)
}
XPathExpr::FilterPath(primary, steps) => {
let mut context = evaluate_in_context(index, context_node, primary)?;
for step in steps {
context = eval_step(index, &context, step)?;
}
Ok(context)
}
XPathExpr::GlobalFilter(inner, preds) => {
let mut nodes = evaluate_in_context(index, context_node, inner)?;
for pred in preds {
nodes = apply_predicate(index, &nodes, pred)?;
}
Ok(nodes)
}
XPathExpr::FunctionCall(name, args) if name == "id" => {
evaluate(index, expr)
}
_ => Ok(vec![]),
}
}
#[inline]
fn matches_node_test(index: &XmlIndex, node: XPathNode, test: &NodeTest) -> bool {
match (node, test) {
(_, NodeTest::Node) => true,
(XPathNode::Element(idx), NodeTest::Wildcard) => {
idx != DOC_ROOT && idx < index.tag_count()
&& (index.tag_types[idx] == TagType::Open || index.tag_types[idx] == TagType::SelfClose)
}
(XPathNode::Namespace(_, _), NodeTest::Wildcard) => true,
(XPathNode::Text(_), NodeTest::Text) => true,
(XPathNode::Element(idx), NodeTest::Name(name)) if idx < index.tag_count() => {
(index.tag_types[idx] == TagType::Open || index.tag_types[idx] == TagType::SelfClose)
&& index.tag_name_eq(idx, name)
}
(XPathNode::Element(idx), NodeTest::Comment) if idx < index.tag_count() => {
index.tag_types[idx] == TagType::Comment
}
(XPathNode::Element(idx), NodeTest::PI) if idx < index.tag_count() => {
index.tag_types[idx] == TagType::PI
}
(XPathNode::Element(idx), NodeTest::PIName(target)) if idx < index.tag_count() => {
index.tag_types[idx] == TagType::PI && index.tag_name_eq(idx, target)
}
(XPathNode::Element(idx), NodeTest::NamespacedName(prefix, local)) if idx < index.tag_count() => {
let tt = index.tag_types[idx];
if tt != TagType::Open && tt != TagType::SelfClose { return false; }
let full_name = index.tag_name(idx);
if local == "*" {
full_name.starts_with(prefix.as_str()) && full_name.as_bytes().get(prefix.len()) == Some(&b':')
} else if let Some((p, l)) = full_name.split_once(':') {
p == prefix && l == local
} else {
false
}
}
(XPathNode::Attribute(_, _), NodeTest::Name(_)) => false,
(XPathNode::Namespace(_, hash), NodeTest::Name(name)) => hash == attr_name_hash(name),
_ => false,
}
}
fn eval_child_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let XPathNode::Element(parent_idx) = node else {
return vec![];
};
if parent_idx == DOC_ROOT {
let mut children_with_pos: Vec<(u64, XPathNode)> = Vec::new();
for i in 0..index.tag_count() {
if index.depths[i] == 0 && is_node_tag(index.tag_types[i])
&& index.tag_types[i] != TagType::Close
{
children_with_pos.push((index.tag_starts[i], XPathNode::Element(i)));
}
}
children_with_pos.sort_by_key(|(pos, _)| *pos);
return children_with_pos.into_iter().map(|(_, node)| node).collect();
}
if !index.has_indices() {
let mut children_with_pos: Vec<(u64, XPathNode)> = Vec::new();
for i in 0..index.tag_count() {
if index.parents[i] == parent_idx as u32 && is_node_tag(index.tag_types[i]) {
children_with_pos.push((index.tag_starts[i], XPathNode::Element(i)));
}
}
for (i, range) in index.text_ranges.iter().enumerate() {
if range.parent_tag == parent_idx as u32 {
children_with_pos.push((range.start, XPathNode::Text(i)));
}
}
children_with_pos.sort_by_key(|(pos, _)| *pos);
return children_with_pos.into_iter().map(|(_, node)| node).collect();
}
let tags = index.child_tag_slice(parent_idx);
let texts = index.child_text_slice(parent_idx);
if texts.is_empty() {
return tags.iter().map(|&i| XPathNode::Element(i as usize)).collect();
}
let mut result = Vec::with_capacity(tags.len() + texts.len());
let mut ti = 0;
let mut xi = 0;
while ti < tags.len() && xi < texts.len() {
let tag_pos = index.tag_starts[tags[ti] as usize];
let txt_pos = index.text_ranges[texts[xi] as usize].start;
if tag_pos < txt_pos {
result.push(XPathNode::Element(tags[ti] as usize));
ti += 1;
} else {
result.push(XPathNode::Text(texts[xi] as usize));
xi += 1;
}
}
while ti < tags.len() {
result.push(XPathNode::Element(tags[ti] as usize));
ti += 1;
}
while xi < texts.len() {
result.push(XPathNode::Text(texts[xi] as usize));
xi += 1;
}
result
}
#[inline(always)]
fn is_node_tag(tt: TagType) -> bool {
matches!(tt, TagType::Open | TagType::SelfClose | TagType::Comment | TagType::PI)
}
fn eval_descendant_axis(index: &XmlIndex, node: XPathNode, include_self: bool) -> Vec<XPathNode> {
let XPathNode::Element(start_idx) = node else {
return if include_self { vec![node] } else { vec![] };
};
if start_idx == DOC_ROOT {
let mut items: Vec<(u64, XPathNode)> = Vec::new();
if include_self {
items.push((0, XPathNode::Element(DOC_ROOT)));
}
for i in 0..index.tag_count() {
if is_node_tag(index.tag_types[i]) {
items.push((index.tag_starts[i], XPathNode::Element(i)));
}
}
for i in 0..index.text_ranges.len() {
if index.text_ranges[i].parent_tag == u32::MAX {
continue;
}
items.push((index.text_ranges[i].start, XPathNode::Text(i)));
}
items.sort_by_key(|(pos, _)| *pos);
return items.into_iter().map(|(_, node)| node).collect();
}
let mut items: Vec<(u64, XPathNode)> = Vec::new();
if include_self {
items.push((index.tag_starts[start_idx], node));
}
let close_idx = index.matching_close(start_idx).unwrap_or(index.tag_count());
for i in (start_idx + 1)..close_idx {
if is_node_tag(index.tag_types[i]) {
items.push((index.tag_starts[i], XPathNode::Element(i)));
}
}
for (i, range) in index.text_ranges.iter().enumerate() {
let parent = range.parent_tag as usize;
if parent >= start_idx && parent < close_idx {
items.push((range.start, XPathNode::Text(i)));
}
}
items.sort_by_key(|(pos, _)| *pos);
items.into_iter().map(|(_, node)| node).collect()
}
fn eval_parent_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
match node {
XPathNode::Element(idx) if idx == DOC_ROOT => vec![],
XPathNode::Element(idx) => {
let parent = index.parents[idx];
if parent != u32::MAX {
vec![XPathNode::Element(parent as usize)]
} else {
vec![XPathNode::Element(DOC_ROOT)]
}
}
XPathNode::Text(idx) => {
let parent = index.text_ranges[idx].parent_tag;
if parent != u32::MAX {
vec![XPathNode::Element(parent as usize)]
} else {
vec![XPathNode::Element(DOC_ROOT)]
}
}
XPathNode::Attribute(tag_idx, _) => vec![XPathNode::Element(tag_idx)],
XPathNode::Namespace(elem_idx, _) => vec![XPathNode::Element(elem_idx)],
}
}
fn eval_ancestor_axis(index: &XmlIndex, node: XPathNode, include_self: bool) -> Vec<XPathNode> {
let mut result = Vec::new();
if include_self {
result.push(node);
}
let mut current = match node {
XPathNode::Element(idx) if idx == DOC_ROOT => u32::MAX,
XPathNode::Element(idx) if idx < index.tag_count() => index.parents[idx],
XPathNode::Text(idx) => index.text_ranges[idx].parent_tag,
XPathNode::Attribute(tag_idx, _) if tag_idx < index.tag_count() => tag_idx as u32,
XPathNode::Namespace(elem_idx, _) if elem_idx < index.tag_count() => elem_idx as u32,
_ => u32::MAX,
};
while current != u32::MAX && (current as usize) < index.tag_count() {
result.push(XPathNode::Element(current as usize));
current = index.parents[current as usize];
}
if !matches!(node, XPathNode::Element(DOC_ROOT)) {
result.push(XPathNode::Element(DOC_ROOT));
}
result
}
fn eval_following_sibling_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let (idx, parent_tag) = match node {
XPathNode::Element(i) if i == DOC_ROOT || i >= index.tag_count() => return vec![],
XPathNode::Element(i) => (i, index.parents[i]),
XPathNode::Text(i) => {
let p = index.text_ranges[i].parent_tag;
let mut result = Vec::new();
let my_pos = index.text_ranges[i].start;
if p != u32::MAX {
let parent_idx = p as usize;
for &child in index.child_tag_slice(parent_idx) {
if index.tag_starts[child as usize] > my_pos {
result.push(XPathNode::Element(child as usize));
}
}
for &ti in index.child_text_slice(parent_idx) {
if index.text_ranges[ti as usize].start > my_pos {
result.push(XPathNode::Text(ti as usize));
}
}
sort_doc_order(index, &mut result);
}
return result;
}
_ => return vec![],
};
let mut result = Vec::new();
if index.has_indices() && parent_tag != u32::MAX {
let parent_idx = parent_tag as usize;
for &child in index.child_tag_slice(parent_idx) {
if (child as usize) > idx {
result.push(XPathNode::Element(child as usize));
}
}
let my_pos = index.tag_starts[idx];
for &ti in index.child_text_slice(parent_idx) {
if index.text_ranges[ti as usize].start > my_pos {
result.push(XPathNode::Text(ti as usize));
}
}
sort_doc_order(index, &mut result);
} else {
let depth = index.depths[idx];
for i in (idx + 1)..index.tag_count() {
if index.parents[i] == parent_tag
&& index.depths[i] == depth
&& is_node_tag(index.tag_types[i])
{
result.push(XPathNode::Element(i));
}
}
}
result
}
fn eval_preceding_sibling_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let (idx, parent_tag) = match node {
XPathNode::Element(i) if i == DOC_ROOT || i >= index.tag_count() => return vec![],
XPathNode::Element(i) => (i, index.parents[i]),
XPathNode::Text(i) => {
let p = index.text_ranges[i].parent_tag;
let mut result = Vec::new();
let my_pos = index.text_ranges[i].start;
if p != u32::MAX {
let parent_idx = p as usize;
for &child in index.child_tag_slice(parent_idx) {
if index.tag_starts[child as usize] < my_pos {
result.push(XPathNode::Element(child as usize));
}
}
for &ti in index.child_text_slice(parent_idx) {
if index.text_ranges[ti as usize].start < my_pos {
result.push(XPathNode::Text(ti as usize));
}
}
sort_doc_order(index, &mut result);
}
return result;
}
_ => return vec![],
};
let mut result = Vec::new();
if index.has_indices() && parent_tag != u32::MAX {
let parent_idx = parent_tag as usize;
for &child in index.child_tag_slice(parent_idx) {
if (child as usize) < idx {
result.push(XPathNode::Element(child as usize));
}
}
let my_pos = index.tag_starts[idx];
for &ti in index.child_text_slice(parent_idx) {
if index.text_ranges[ti as usize].start < my_pos {
result.push(XPathNode::Text(ti as usize));
}
}
sort_doc_order(index, &mut result);
} else {
let depth = index.depths[idx];
for i in (0..idx).rev() {
if index.parents[i] == parent_tag
&& index.depths[i] == depth
&& is_node_tag(index.tag_types[i])
{
result.push(XPathNode::Element(i));
}
}
}
result
}
fn eval_following_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let is_attr = matches!(node, XPathNode::Attribute(_, _) | XPathNode::Namespace(_, _));
let idx = match node {
XPathNode::Element(i) => i,
XPathNode::Namespace(i, _) | XPathNode::Attribute(i, _) => i,
_ => return vec![],
};
if idx == DOC_ROOT || idx >= index.tag_count() {
return vec![];
}
let close = index.matching_close(idx).unwrap_or(idx);
let mut result = Vec::new();
if is_attr {
for i in (idx + 1)..index.tag_count() {
if is_node_tag(index.tag_types[i]) && index.tag_types[i] != TagType::Close {
result.push(XPathNode::Element(i));
}
}
for (ti, range) in index.text_ranges.iter().enumerate() {
if range.start > index.tag_starts[idx] {
result.push(XPathNode::Text(ti));
}
}
sort_doc_order(index, &mut result);
} else {
for i in (close.saturating_add(1))..index.tag_count() {
if index.tag_types[i] == TagType::Open || index.tag_types[i] == TagType::SelfClose {
result.push(XPathNode::Element(i));
}
}
}
result
}
fn eval_preceding_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let idx = match node {
XPathNode::Element(i) => i,
XPathNode::Namespace(i, _) | XPathNode::Attribute(i, _) => i,
_ => return vec![],
};
if idx == DOC_ROOT || idx >= index.tag_count() {
return vec![];
}
let mut result = Vec::new();
if !index.post_order.is_empty() {
for i in 0..idx {
if (index.tag_types[i] == TagType::Open || index.tag_types[i] == TagType::SelfClose)
&& !index.is_ancestor(i, idx)
{
result.push(XPathNode::Element(i));
}
}
} else {
let mut ancestors = std::collections::HashSet::new();
let mut current = index.parents[idx];
while current != u32::MAX {
ancestors.insert(current);
current = index.parents[current as usize];
}
for i in 0..idx {
if (index.tag_types[i] == TagType::Open || index.tag_types[i] == TagType::SelfClose)
&& !ancestors.contains(&(i as u32))
{
result.push(XPathNode::Element(i));
}
}
}
result
}
fn eval_attribute_axis(
index: &XmlIndex,
node: XPathNode,
test: &NodeTest,
) -> Vec<XPathNode> {
let XPathNode::Element(idx) = node else {
return vec![];
};
match test {
NodeTest::Name(name) => {
if name == "xmlns" || name.starts_with("xmlns:") {
return vec![];
}
if index.get_attribute(idx, name).is_some() {
vec![XPathNode::Attribute(idx, attr_name_hash(name))]
} else {
vec![]
}
}
NodeTest::NamespacedName(prefix, local) => {
if prefix == "xmlns" {
return vec![];
}
if local == "*" {
let prefix_colon = format!("{}:", prefix);
index.get_all_attribute_names(idx).iter()
.filter(|name| name.starts_with(&prefix_colon))
.map(|name| XPathNode::Attribute(idx, attr_name_hash(name)))
.collect()
} else {
let full_name = format!("{}:{}", prefix, local);
if index.get_attribute(idx, &full_name).is_some() {
vec![XPathNode::Attribute(idx, attr_name_hash(&full_name))]
} else {
vec![]
}
}
}
NodeTest::Wildcard | NodeTest::Node => {
index.get_all_attribute_names(idx).into_iter()
.filter(|name| *name != "xmlns" && !name.starts_with("xmlns:"))
.map(|name| XPathNode::Attribute(idx, attr_name_hash(name)))
.collect()
}
_ => vec![],
}
}
fn eval_namespace_axis(index: &XmlIndex, node: XPathNode) -> Vec<XPathNode> {
let XPathNode::Element(idx) = node else {
return vec![];
};
if idx == DOC_ROOT || idx >= index.tag_count() {
return vec![];
}
let mut ns_map: Vec<(String, u64)> = Vec::new();
let mut seen_prefixes = std::collections::HashSet::new();
let mut current = Some(idx);
while let Some(cur_idx) = current {
if cur_idx < index.tag_count()
&& (index.tag_types[cur_idx] == TagType::Open || index.tag_types[cur_idx] == TagType::SelfClose)
{
for (prefix, _uri) in index.get_namespace_decls(cur_idx) {
if seen_prefixes.insert(prefix.to_string()) {
ns_map.push((prefix.to_string(), attr_name_hash(prefix)));
}
}
}
let parent = index.parents[cur_idx];
current = if parent != u32::MAX { Some(parent as usize) } else { None };
}
ns_map.into_iter()
.map(|(_, hash)| XPathNode::Namespace(idx, hash))
.collect()
}
fn extract_simple_attr_eq(predicates: &[XPathExpr]) -> Option<(String, String, usize)> {
for (i, pred) in predicates.iter().enumerate() {
if let XPathExpr::BinaryOp(left, BinaryOp::Eq, right) = pred {
if let XPathExpr::LocationPath(path) = left.as_ref() {
if !path.absolute
&& path.steps.len() == 1
&& path.steps[0].axis == Axis::Attribute
&& path.steps[0].predicates.is_empty()
{
if let NodeTest::Name(attr_name) = &path.steps[0].node_test {
if let XPathExpr::StringLiteral(val) = right.as_ref() {
return Some((attr_name.clone(), val.clone(), i));
}
}
}
}
if let XPathExpr::LocationPath(path) = right.as_ref() {
if !path.absolute
&& path.steps.len() == 1
&& path.steps[0].axis == Axis::Attribute
&& path.steps[0].predicates.is_empty()
{
if let NodeTest::Name(attr_name) = &path.steps[0].node_test {
if let XPathExpr::StringLiteral(val) = left.as_ref() {
return Some((attr_name.clone(), val.clone(), i));
}
}
}
}
}
}
None
}
fn try_batch_attr_predicate(
_index: &XmlIndex,
_nodes: &[XPathNode],
pred: &XPathExpr,
) -> Option<Vec<XPathNode>> {
let (attr_name, value) = match pred {
XPathExpr::BinaryOp(left, BinaryOp::Eq, right) => {
let attr = match left.as_ref() {
XPathExpr::LocationPath(path)
if !path.absolute
&& path.steps.len() == 1
&& path.steps[0].axis == Axis::Attribute
&& path.steps[0].predicates.is_empty() =>
{
match &path.steps[0].node_test {
NodeTest::Name(name) => name.as_str(),
_ => return None,
}
}
_ => return None,
};
let val = match right.as_ref() {
XPathExpr::StringLiteral(s) => s.as_str(),
_ => return None,
};
(attr, val)
}
_ => return None,
};
let _ = (attr_name, value);
None
}
fn try_batch_string_predicate(
index: &XmlIndex,
nodes: &[XPathNode],
func_name: &str,
args: &[XPathExpr],
) -> Option<Vec<XPathNode>> {
if args.len() != 2 {
return None;
}
let is_self_ref = match &args[0] {
XPathExpr::LocationPath(path) => {
!path.absolute && path.steps.len() == 1
&& path.steps[0].axis == super::ast::Axis::SelfAxis
&& path.steps[0].node_test == super::ast::NodeTest::Node
&& path.steps[0].predicates.is_empty()
}
_ => false,
};
if !is_self_ref {
return None;
}
let needle = match &args[1] {
XPathExpr::StringLiteral(s) => s.as_str(),
_ => return None,
};
let mask = match func_name {
"contains" => super::simd_pred::batch_contains(index, nodes, needle),
"starts-with" => super::simd_pred::batch_starts_with(index, nodes, needle),
_ => return None,
};
let filtered: Vec<XPathNode> = nodes.iter()
.zip(mask.iter())
.filter(|(_, &keep)| keep)
.map(|(&node, _)| node)
.collect();
Some(filtered)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::index::structural::parse_scalar;
use crate::xpath::parser::parse_xpath;
fn query_text<'a>(xml: &'a [u8], xpath: &str) -> Vec<String> {
let index = parse_scalar(xml).unwrap();
let expr = parse_xpath(xpath).unwrap();
let nodes = evaluate(&index, &expr).unwrap();
let mut results = Vec::new();
for node in nodes {
match node {
XPathNode::Element(idx) => {
for t in index.direct_text(idx) {
results.push(t.to_string());
}
}
XPathNode::Text(idx) => {
results.push(index.text_content(&index.text_ranges[idx]).to_string());
}
_ => {}
}
}
results
}
fn query_names<'a>(xml: &'a [u8], xpath: &str) -> Vec<String> {
let index = parse_scalar(xml).unwrap();
let expr = parse_xpath(xpath).unwrap();
let nodes = evaluate(&index, &expr).unwrap();
nodes
.iter()
.filter_map(|n| match n {
XPathNode::Element(idx) => Some(index.tag_name(*idx).to_string()),
_ => None,
})
.collect()
}
#[test]
fn test_simple_child() {
let names = query_names(b"<root><a/><b/><c/></root>", "/root/*");
assert_eq!(names, vec!["a", "b", "c"]);
}
#[test]
fn test_specific_child() {
let names = query_names(b"<root><a/><b/><c/></root>", "/root/b");
assert_eq!(names, vec!["b"]);
}
#[test]
fn test_text_content() {
let texts = query_text(b"<root><item>hello</item><item>world</item></root>", "/root/item");
assert_eq!(texts, vec!["hello", "world"]);
}
#[test]
fn test_descendant() {
let names = query_names(
b"<root><a><b><c/></b></a></root>",
"//c",
);
assert_eq!(names, vec!["c"]);
}
#[test]
fn test_text_node() {
let texts = query_text(
b"<root>hello</root>",
"/root/text()",
);
assert_eq!(texts, vec!["hello"]);
}
#[test]
fn test_wildcard() {
let names = query_names(
b"<root><a/><b/></root>",
"/root/*",
);
assert_eq!(names, vec!["a", "b"]);
}
#[test]
fn test_deep_path() {
let texts = query_text(
b"<patent><claims><claim>Claim 1 text</claim></claims></patent>",
"/patent/claims/claim",
);
assert_eq!(texts, vec!["Claim 1 text"]);
}
#[test]
fn test_descendant_deep() {
let names = query_names(
b"<a><b><c><d/></c></b><e><d/></e></a>",
"//d",
);
assert_eq!(names.len(), 2);
}
#[test]
fn test_following_sibling() {
let xml = b"<root><a/><b/><c/></root>";
let index = parse_scalar(xml).unwrap();
let a_idx = 1; let siblings = eval_following_sibling_axis(&index, XPathNode::Element(a_idx));
assert_eq!(siblings.len(), 2); }
#[test]
fn test_preceding_sibling() {
let xml = b"<root><a/><b/><c/></root>";
let index = parse_scalar(xml).unwrap();
let c_idx = 3; let siblings = eval_preceding_sibling_axis(&index, XPathNode::Element(c_idx));
assert_eq!(siblings.len(), 2); }
#[test]
fn test_parent() {
let xml = b"<root><child/></root>";
let index = parse_scalar(xml).unwrap();
let parents = eval_parent_axis(&index, XPathNode::Element(1)); assert_eq!(parents.len(), 1);
match parents[0] {
XPathNode::Element(idx) => assert_eq!(index.tag_name(idx), "root"),
_ => panic!("Expected element"),
}
}
#[test]
fn test_ancestor() {
let xml = b"<a><b><c/></b></a>";
let index = parse_scalar(xml).unwrap();
let ancestors = eval_ancestor_axis(&index, XPathNode::Element(2), false); assert_eq!(ancestors.len(), 3); }
#[test]
fn test_position_predicate() {
let names = query_names(
b"<root><a/><b/><c/></root>",
"/root/*[1]",
);
assert_eq!(names, vec!["a"]);
}
#[test]
fn test_position_predicate_last() {
let names = query_names(
b"<root><a/><b/><c/></root>",
"/root/*[3]",
);
assert_eq!(names, vec!["c"]);
}
#[test]
fn test_position_function_predicate() {
let names = query_names(
b"<root><a/><b/><c/></root>",
"/root/*[position()=2]",
);
assert_eq!(names, vec!["b"]);
}
#[test]
fn test_last_function_predicate() {
let names = query_names(
b"<root><a/><b/><c/></root>",
"/root/*[position()=last()]",
);
assert_eq!(names, vec!["c"]);
}
#[test]
fn test_attribute_value_predicate() {
let texts = query_text(
b"<root><item type='a'>first</item><item type='b'>second</item></root>",
"/root/item[@type='b']",
);
assert_eq!(texts, vec!["second"]);
}
#[test]
fn test_contains_predicate() {
let texts = query_text(
b"<root><p>hello world</p><p>goodbye</p></root>",
"/root/p[contains(., 'world')]",
);
assert_eq!(texts, vec!["hello world"]);
}
#[test]
fn test_starts_with_predicate() {
let texts = query_text(
b"<root><p>hello world</p><p>goodbye</p></root>",
"/root/p[starts-with(., 'hello')]",
);
assert_eq!(texts, vec!["hello world"]);
}
#[test]
fn test_multiple_predicates() {
let xml = b"<root><item type='a'>first</item><item type='b'>second</item><item type='a'>third</item></root>";
let texts = query_text(xml, "/root/item[@type='a'][1]");
assert_eq!(texts, vec!["first"]);
}
#[test]
fn test_patent_claims() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "//claim");
assert_eq!(texts.len(), 3);
assert!(texts[0].contains("prosthetic arm device"));
}
#[test]
fn test_patent_independent_claims() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "//claim[@type='independent']");
assert_eq!(texts.len(), 1);
assert!(texts[0].contains("prosthetic arm device"));
}
#[test]
fn test_patent_dependent_claims() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "//claim[@type='dependent']");
assert_eq!(texts.len(), 2);
}
#[test]
fn test_patent_title() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "/patent/title");
assert_eq!(texts, vec!["Prosthetic Arm Device"]);
}
#[test]
fn test_patent_description_paragraphs() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "/patent/description/p");
assert_eq!(texts.len(), 2);
}
#[test]
fn test_patent_first_claim() {
let xml = include_bytes!("../../../../testdata/small.xml");
let texts = query_text(xml, "//claim[1]");
assert_eq!(texts.len(), 1);
}
}