use crate::Attribute;
use crate::QuerySection;
use std::ops::Range;
mod text_content;
pub(crate) use text_content::TextContent;
mod arena;
mod attributes;
mod element;
mod query_node;
pub(crate) use arena::id::Nullable;
use arena::span::Span;
pub use arena::{
Arena,
id::{AttributeId, ElementId, QueryId},
};
pub use element::Element;
pub use query_node::QueryNode;
#[derive(Debug, PartialEq)]
pub struct Store<'html, 'query> {
pub elements: Arena<Element<'html>, ElementId>,
pub attributes: Arena<Attribute<'html>, AttributeId>,
pub queries: Arena<QueryNode<'query>, QueryId>,
pub text_content: TextContent,
#[cfg(any(debug_assertions, test))]
pub trace: crate::debug::TraceStore<'html, 'query>,
}
impl<'html, 'query: 'html> Default for Store<'html, 'query> {
fn default() -> Self {
Self {
elements: Arena::new(),
queries: Arena::new(),
text_content: TextContent::new(),
attributes: Arena::new(),
#[cfg(any(debug_assertions, test))]
trace: crate::debug::TraceStore::new(),
}
}
}
impl<'html, 'query: 'html> Store<'html, 'query> {
pub fn with_capacity(capacity: usize) -> Self {
Self {
elements: Arena::with_capacity(capacity / 3),
queries: Arena::new(),
text_content: TextContent::with_capacity(capacity / 3),
attributes: Arena::with_capacity(capacity / 3),
#[cfg(any(debug_assertions, test))]
trace: crate::debug::TraceStore::with_capacity(capacity.min(4096)),
}
}
#[inline(always)]
#[cfg_attr(not(any(debug_assertions, test)), allow(dead_code))]
pub(crate) fn trace_event(
&mut self,
#[cfg(any(debug_assertions, test))] event: crate::debug::TraceEvent<'html, 'query>,
) {
#[cfg(any(debug_assertions, test))]
{
self.trace.push(event);
}
}
pub fn get(&'html self, query: &str) -> Option<impl Iterator<Item = &'html Element<'html>>> {
if self.queries.is_empty() {
return None;
}
self.queries
.iter_from(QueryId(0))
.find(|q| q.query == query)
.map(|query_node| query_node.elements.start())
.map(|element_id| self.elements.iter_from(element_id))
}
fn link_query_to_query(&mut self, query: QueryId, mut root: QueryId) {
loop {
if root == query {
return;
}
let query_node = &self.queries[root];
match query_node.next_sibling {
Some(sibling) => root = sibling,
None => {
self.queries[root].next_sibling = Some(query);
break;
}
}
}
}
fn link_query_to_element(&mut self, query: QueryId, element: ElementId) {
let id = self.elements[element].first_child_query;
match id {
Some(id) => {
self.link_query_to_query(query, id);
}
None => {
self.elements[element].first_child_query = Some(query);
}
}
}
fn link_element_to_query(&mut self, query: QueryId, element: ElementId) {
let id = self.queries[query].elements.end();
if id == element {
return;
}
assert!(self.elements[id].next_sibling.is_none());
self.elements[id].next_sibling = Some(element);
self.queries[query].elements.set_end(element);
}
pub fn push(
&mut self,
from: ElementId,
selection: &QuerySection<'query>,
element: crate::XHtmlElement<'html>,
) -> ElementId {
let new_element = Element {
name: element.name,
class: element.class,
id: element.id,
attributes: self.attributes.attribute_slice_to_range(element.attributes),
..Default::default()
};
assert!(from.is_null() || from.0 < self.elements.len());
let existing_id = {
if !from.is_null() {
self.elements[from].first_child_query.and_then(|query| {
self.queries
.iter_from(query)
.find(|q| q.query == selection.source)
.map(|q| unsafe { self.queries.index_of(q) })
})
} else if !self.queries.is_empty() {
self.queries
.iter_from(QueryId(0))
.find(|q| q.query == selection.source)
.map(|q| unsafe { self.queries.index_of(q) })
} else {
None
}
};
let index = ElementId(self.elements.len());
self.elements.push(new_element);
let query_id = match existing_id {
Some(id) => id,
None => {
self.queries.push(QueryNode {
query: selection.source,
elements: Span::new(index),
next_sibling: None,
});
QueryId(self.queries.len() - 1)
}
};
assert!(!self.queries.is_empty());
assert!(query_id.index() < self.queries.len());
if !from.is_null() {
self.link_query_to_element(query_id, from);
} else {
self.link_query_to_query(query_id, QueryId(0));
}
self.link_element_to_query(query_id, index);
index
}
pub fn set_content(
&mut self,
element_id: ElementId,
inner_html: Option<&'html str>,
text_content: Option<Range<usize>>,
) {
assert!(!self.elements.is_empty());
assert!(element_id.index() < self.elements.len());
#[cfg(any(debug_assertions, test))]
let tag = self.elements[element_id].name;
#[cfg(any(debug_assertions, test))]
let has_inner_html = inner_html.is_some();
#[cfg(any(debug_assertions, test))]
let has_text_content = text_content.is_some();
let element = &mut self.elements[element_id];
element.inner_html = inner_html;
element.text_content = text_content;
crate::scah_trace!(
self,
crate::debug::TraceEvent::ContentFinalized {
element_id,
tag,
has_inner_html,
has_text_content,
}
);
}
}
#[cfg(test)]
mod tests {
use crate::{Query, Save};
use super::*;
#[test]
fn test_find_next_query() {
let mut store = Store::default();
store.queries.inner = vec![
QueryNode {
query: "1",
next_sibling: Some(QueryId(1)),
..Default::default()
},
QueryNode {
query: "2",
next_sibling: Some(QueryId(2)),
..Default::default()
},
QueryNode {
query: "3",
next_sibling: Some(QueryId(3)),
..Default::default()
},
QueryNode {
query: "3",
next_sibling: None,
..Default::default()
},
];
assert_eq!(
store
.queries
.iter_from(QueryId(0))
.find(|q| q.query == "1")
.map(|q| unsafe { store.queries.index_of(q) }),
Some(QueryId(0))
);
assert_eq!(
store
.queries
.iter_from(QueryId(0))
.find(|q| q.query == "2")
.map(|q| unsafe { store.queries.index_of(q) }),
Some(QueryId(1))
);
assert_eq!(
store
.queries
.iter_from(QueryId(0))
.find(|q| q.query == "3")
.map(|q| unsafe { store.queries.index_of(q) }),
Some(QueryId(2))
);
assert_eq!(
store
.queries
.iter_from(QueryId(0))
.find(|q| q.query == "not in list")
.map(|q| unsafe { store.queries.index_of(q) }),
None
);
}
#[test]
fn test_link_query_to_element() {
let mut store = Store::default();
store.elements.inner = vec![
Element {
first_child_query: Some(QueryId(0)),
..Default::default()
},
Element {
first_child_query: None,
..Default::default()
},
];
store.queries.inner = vec![
QueryNode {
next_sibling: Some(QueryId(1)),
..Default::default()
},
QueryNode {
next_sibling: None,
..Default::default()
},
];
store.link_query_to_element(QueryId(0), ElementId(1));
assert_eq!(
store.queries.inner,
vec![
QueryNode {
next_sibling: Some(QueryId(1)),
..Default::default()
},
QueryNode {
next_sibling: None,
..Default::default()
}
]
);
}
#[test]
fn test_branching_next_query() {
let mut store = Store::default();
let q = Query::all("1", Save::all())
.unwrap()
.then(|ctx| Ok([ctx.all("2", Save::all())?, ctx.all("3", Save::all())?]))
.unwrap();
store.push(
ElementId::default(),
&q.selection[0],
crate::XHtmlElement::default(),
);
assert_eq!(
store.queries.inner,
vec![QueryNode {
query: "1",
next_sibling: None,
elements: Span::new(ElementId(0))
}]
);
assert_eq!(store.elements.inner, vec![Element::default(),]);
store.push(
ElementId(0),
&q.selection[1],
crate::XHtmlElement::default(),
);
assert_eq!(
store.queries.inner,
vec![
QueryNode {
query: "1",
next_sibling: None,
elements: Span::new(ElementId(0))
},
QueryNode {
query: "2",
next_sibling: None,
elements: Span::new(ElementId(1))
}
]
);
assert_eq!(
store.elements.inner,
vec![
Element {
first_child_query: Some(QueryId(1)),
..Default::default()
},
Element {
..Default::default()
},
]
);
store.push(
ElementId(0),
&q.selection[2],
crate::XHtmlElement::default(),
);
assert_eq!(
store.queries.inner,
vec![
QueryNode {
query: "1",
next_sibling: None,
elements: Span::new(ElementId(0))
},
QueryNode {
query: "2",
next_sibling: Some(QueryId(2)),
elements: Span::new(ElementId(1))
},
QueryNode {
query: "3",
next_sibling: None,
elements: Span::new(ElementId(2))
}
]
);
assert_eq!(
store.elements.inner,
vec![
Element {
first_child_query: Some(QueryId(1)),
..Default::default()
},
Element {
..Default::default()
},
Element {
..Default::default()
},
]
);
}
#[test]
fn test_push_multi_section() {
let query = Query::all("main > section", Save::all())
.unwrap()
.then(|section| {
Ok([
section.all("> a[href]", Save::all())?,
section.all("div a", Save::all())?,
])
})
.unwrap()
.build();
let mut store = Store::default();
store.push(
ElementId::default(),
&query.queries[0],
crate::XHtmlElement {
name: "section",
..Default::default()
},
);
assert_eq!(
store
.queries
.iter_from(QueryId(0))
.find(|q| q.query == query.queries[0].source)
.map(|q| unsafe { store.queries.index_of(q) }),
Some(QueryId(0))
);
store.push(
ElementId::default(),
&query.queries[0],
crate::XHtmlElement {
name: "section",
..Default::default()
},
);
assert_eq!(
store.elements.inner,
vec![
Element {
name: "section",
next_sibling: Some(ElementId(1)),
..Default::default()
},
Element {
name: "section",
..Default::default()
},
]
);
assert_eq!(
store.queries.inner,
vec![QueryNode {
query: "main > section",
next_sibling: None,
elements: Span::from(ElementId(0), ElementId(1))
},]
);
store.push(
ElementId(1),
&query.queries[1],
crate::XHtmlElement {
name: "a",
..Default::default()
},
);
assert_eq!(
store.queries.inner,
vec![
QueryNode {
query: "main > section",
next_sibling: None,
elements: Span::from(ElementId(0), ElementId(1))
},
QueryNode {
query: "> a[href]",
next_sibling: None,
elements: Span::new(ElementId(2))
}
]
);
assert_eq!(
store.elements.inner,
vec![
Element {
name: "section",
next_sibling: Some(ElementId(1)),
..Default::default()
},
Element {
name: "section",
first_child_query: Some(QueryId(1)),
..Default::default()
},
Element {
name: "a",
..Default::default()
},
]
);
}
#[test]
fn test_multi_root_queries() {
let queries = &[
Query::all("span", Save::all()).unwrap().build(),
Query::all("a", Save::all()).unwrap().build(),
];
let mut store = Store::default();
store.push(
ElementId::default(),
&queries[0].queries[0],
crate::XHtmlElement {
name: "span",
..Default::default()
},
);
store.push(
ElementId::default(),
&queries[1].queries[0],
crate::XHtmlElement {
name: "a",
..Default::default()
},
);
assert!(store.get("span").is_some());
assert_eq!(store.get("span").iter().count(), 1);
assert!(store.get("a").is_some());
assert_eq!(store.get("a").iter().count(), 1);
}
}