use crate::index::XmlIndex;
use super::eval::XPathNode;
const BATCH_THRESHOLD: usize = 8;
pub fn batch_contains(
index: &XmlIndex,
candidates: &[XPathNode],
needle: &str,
) -> Vec<bool> {
if candidates.len() < BATCH_THRESHOLD || needle.is_empty() {
return candidates.iter().map(|node| {
let text = node_text_content(index, *node);
text.contains(needle)
}).collect();
}
let mut buffer = String::new();
let mut offsets: Vec<usize> = Vec::with_capacity(candidates.len() + 1);
for &node in candidates {
offsets.push(buffer.len());
let text = node_text_content(index, node);
buffer.push_str(&text);
buffer.push('\0'); }
offsets.push(buffer.len());
let finder = memchr::memmem::Finder::new(needle.as_bytes());
let buf_bytes = buffer.as_bytes();
let mut results = vec![false; candidates.len()];
for pos in finder.find_iter(buf_bytes) {
let doc_idx = match offsets.binary_search(&pos) {
Ok(i) => i,
Err(i) => i.saturating_sub(1),
};
if doc_idx < candidates.len() {
results[doc_idx] = true;
}
}
results
}
pub fn batch_starts_with(
index: &XmlIndex,
candidates: &[XPathNode],
prefix: &str,
) -> Vec<bool> {
if candidates.len() < BATCH_THRESHOLD || prefix.is_empty() {
return candidates.iter().map(|node| {
let text = node_text_content(index, *node);
text.starts_with(prefix)
}).collect();
}
let prefix_bytes = prefix.as_bytes();
candidates.iter().map(|&node| {
let text = node_text_content(index, node);
text.as_bytes().starts_with(prefix_bytes)
}).collect()
}
fn node_text_content(index: &XmlIndex, node: XPathNode) -> String {
match node {
XPathNode::Element(idx) => {
index.all_text(idx)
}
XPathNode::Text(idx) => {
if idx < index.text_ranges.len() {
index.text_content(&index.text_ranges[idx]).to_string()
} else {
String::new()
}
}
XPathNode::Attribute(tag_idx, hash) => {
let names = index.get_all_attribute_names(tag_idx);
for name in names {
if super::eval::attr_name_hash(name) == hash {
return index.get_attribute(tag_idx, name).unwrap_or("").to_string();
}
}
String::new()
}
_ => String::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn batch_contains_basic() {
let xml = b"<r><p>hello world</p><p>goodbye</p><p>hello again</p></r>";
let index = crate::parse(xml).unwrap();
let nodes = index.xpath("//p").unwrap();
assert_eq!(nodes.len(), 3);
let results = batch_contains(&index, &nodes, "hello");
assert_eq!(results, vec![true, false, true]);
}
#[test]
fn batch_contains_empty_needle() {
let xml = b"<r><p>text</p></r>";
let index = crate::parse(xml).unwrap();
let nodes = index.xpath("//p").unwrap();
let results = batch_contains(&index, &nodes, "");
assert_eq!(results, vec![true]);
}
#[test]
fn batch_starts_with_basic() {
let xml = b"<r><p>alpha one</p><p>beta two</p><p>alpha three</p></r>";
let index = crate::parse(xml).unwrap();
let nodes = index.xpath("//p").unwrap();
let results = batch_starts_with(&index, &nodes, "alpha");
assert_eq!(results, vec![true, false, true]);
}
#[test]
fn batch_vs_individual_contains() {
let xml = br#"<corpus>
<p>The quick brown fox</p>
<p>jumps over the lazy dog</p>
<p>pack my box with five dozen liquor jugs</p>
<p>the five boxing wizards jump quickly</p>
<p>how vexingly quick daft zebras jump</p>
<p>sphinx of black quartz judge my vow</p>
<p>two driven jocks help fax my big quiz</p>
<p>the jay pig fox zebra and my wolves quack</p>
<p>sympathizing would fix quaker objectives</p>
<p>a wizard quick job vexes much of Gryphon</p>
</corpus>"#;
let index = crate::parse(xml).unwrap();
let nodes = index.xpath("//p").unwrap();
assert_eq!(nodes.len(), 10);
let needle = "quick";
let batch_results = batch_contains(&index, &nodes, needle);
let individual: Vec<bool> = nodes.iter().map(|&node| {
let text = node_text_content(&index, node);
text.contains(needle)
}).collect();
assert_eq!(batch_results, individual);
}
#[test]
fn batch_contains_no_matches() {
let xml = b"<r><p>aaa</p><p>bbb</p><p>ccc</p></r>";
let index = crate::parse(xml).unwrap();
let nodes = index.xpath("//p").unwrap();
let results = batch_contains(&index, &nodes, "zzz");
assert_eq!(results, vec![false, false, false]);
}
}