#[cfg(any(test, feature = "compliance"))]
#[allow(clippy::missing_panics_doc)]
pub mod tests;
use std::collections::HashMap;
use std::pin::pin;
use async_trait::async_trait;
use clayers_xml::ContentHash;
use futures_core::Stream;
use crate::error::{Error, Result};
use crate::object::Object;
use crate::refs;
use crate::store::{ObjectStore, RefStore};
#[derive(Debug, Clone, Copy)]
pub enum QueryMode {
Count,
Text,
Xml,
}
#[derive(Debug)]
pub enum QueryResult {
Count(usize),
Text(Vec<String>),
Xml(Vec<String>),
}
pub type NamespaceMap = Vec<(String, String)>;
#[derive(Debug)]
pub struct DocumentQueryResult {
pub path: String,
pub result: QueryResult,
}
#[async_trait]
pub trait QueryStore: Send + Sync {
async fn query_document(
&self,
doc_hash: ContentHash,
xpath: &str,
mode: QueryMode,
namespaces: &NamespaceMap,
) -> Result<QueryResult>;
}
async fn try_collect_stream<S>(stream: S) -> Result<HashMap<ContentHash, Object>>
where
S: Stream<Item = Result<(ContentHash, Object)>>,
{
let mut stream = pin!(stream);
let mut map = HashMap::new();
while let Some(item) = std::future::poll_fn(|cx| stream.as_mut().poll_next(cx)).await {
let (hash, obj) = item?;
map.insert(hash, obj);
}
Ok(map)
}
pub async fn default_query_document(
store: &dyn ObjectStore,
doc_hash: ContentHash,
xpath: &str,
mode: QueryMode,
namespaces: &NamespaceMap,
) -> Result<QueryResult> {
let objects = try_collect_stream(store.subtree(&doc_hash)).await?;
let root_hash = match objects.get(&doc_hash) {
Some(Object::Document(doc)) => doc.root,
Some(_) => return Err(Error::InvalidObject("expected Document object".into())),
None => return Err(Error::NotFound(doc_hash)),
};
let xml_string = crate::export::build_xml_from_objects(&objects, root_hash)?;
let ns_refs: Vec<(&str, &str)> = namespaces
.iter()
.map(|(p, u)| (p.as_str(), u.as_str()))
.collect();
let xml_mode = match mode {
QueryMode::Count => clayers_xml::query::QueryMode::Count,
QueryMode::Text => clayers_xml::query::QueryMode::Text,
QueryMode::Xml => clayers_xml::query::QueryMode::Xml,
};
let result = clayers_xml::query::evaluate_xpath(&xml_string, xpath, xml_mode, &ns_refs)?;
Ok(match result {
clayers_xml::query::QueryResult::Count(n) => QueryResult::Count(n),
clayers_xml::query::QueryResult::Text(t) => QueryResult::Text(t),
clayers_xml::query::QueryResult::Xml(x) => QueryResult::Xml(x),
})
}
pub async fn resolve_to_document(
store: &dyn ObjectStore,
ref_store: &dyn RefStore,
revspec: &str,
) -> Result<ContentHash> {
let hash = resolve_revspec(ref_store, revspec).await?;
follow_to_document(store, hash).await
}
pub async fn resolve_to_tree(
store: &dyn ObjectStore,
ref_store: &dyn RefStore,
revspec: &str,
) -> Result<(ContentHash, crate::object::TreeObject)> {
let hash = resolve_revspec(ref_store, revspec).await?;
let tree_hash = follow_to_tree(store, hash).await?;
let obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
let Object::Tree(t) = obj else {
return Err(Error::InvalidObject("expected Tree object".into()));
};
Ok((tree_hash, t))
}
pub async fn resolve_revspec(
ref_store: &dyn RefStore,
revspec: &str,
) -> Result<ContentHash> {
if let Ok(h) = try_parse_hash(revspec) {
Ok(h)
} else if revspec == "HEAD" {
refs::resolve_head(ref_store)
.await?
.ok_or_else(|| Error::Ref("HEAD not set".into()))
} else if revspec.starts_with("refs/") {
ref_store
.get_ref(revspec)
.await?
.ok_or_else(|| Error::Ref(format!("ref not found: {revspec}")))
} else if let Some(h) = ref_store.get_ref(&refs::branch_ref(revspec)).await? {
Ok(h)
} else if let Some(h) = ref_store.get_ref(&refs::tag_ref(revspec)).await? {
Ok(h)
} else {
Err(Error::Ref(format!("cannot resolve revspec: {revspec}")))
}
}
fn try_parse_hash(s: &str) -> Result<ContentHash> {
if s.len() != 64 {
return Err(Error::Ref("not a valid hash".into()));
}
let bytes: Vec<u8> = (0..64)
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16))
.collect::<std::result::Result<Vec<u8>, _>>()
.map_err(|_| Error::Ref("not a valid hex hash".into()))?;
let arr: [u8; 32] = bytes
.try_into()
.map_err(|_| Error::Ref("not 32 bytes".into()))?;
Ok(ContentHash(arr))
}
async fn follow_to_document(
store: &dyn ObjectStore,
hash: ContentHash,
) -> Result<ContentHash> {
let obj = store.get(&hash).await?.ok_or(Error::NotFound(hash))?;
match obj {
Object::Document(_) => Ok(hash),
Object::Tree(t) => {
t.entries.first()
.map(|e| e.document)
.ok_or_else(|| Error::InvalidObject("empty tree has no documents".into()))
}
Object::Commit(c) => Box::pin(follow_to_document(store, c.tree)).await,
Object::Tag(t) => Box::pin(follow_to_document(store, t.target)).await,
_ => Err(Error::InvalidObject(
"revspec resolved to a non-versioning object".into(),
)),
}
}
pub async fn follow_to_tree(
store: &dyn ObjectStore,
hash: ContentHash,
) -> Result<ContentHash> {
let obj = store.get(&hash).await?.ok_or(Error::NotFound(hash))?;
match obj {
Object::Tree(_) => Ok(hash),
Object::Commit(c) => Ok(c.tree),
Object::Tag(t) => Box::pin(follow_to_tree(store, t.target)).await,
_ => Err(Error::InvalidObject(
"revspec resolved to a non-versioning object".into(),
)),
}
}
#[derive(Debug)]
pub struct RefQueryResult {
pub ref_name: String,
pub commit_hash: ContentHash,
pub doc_hash: ContentHash,
pub result: QueryResult,
}
pub async fn query_refs(
store: &(dyn ObjectStore + Sync),
query_store: &dyn QueryStore,
ref_store: &dyn RefStore,
prefix: &str,
xpath: &str,
mode: QueryMode,
namespaces: &NamespaceMap,
) -> Result<Vec<RefQueryResult>> {
let all_refs = ref_store.list_refs(prefix).await?;
let mut results = Vec::new();
let mut seen_docs = std::collections::HashSet::new();
for (ref_name, commit_hash) in all_refs {
let tree_hash = follow_to_tree(store, commit_hash).await?;
if !seen_docs.insert(tree_hash) {
continue; }
let tree_obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
let Object::Tree(tree) = tree_obj else {
return Err(Error::InvalidObject("expected Tree object".into()));
};
let mut doc_results = Vec::new();
for entry in &tree.entries {
match query_store
.query_document(entry.document, xpath, mode, namespaces)
.await
{
Ok(result) => {
doc_results.push(DocumentQueryResult {
path: entry.path.clone(),
result,
});
}
Err(Error::Xml(ref e)) if e.to_string().contains("compile error") => {}
Err(e) => return Err(e),
}
}
let combined = aggregate_results(mode, doc_results);
let doc_hash = tree.entries.first()
.map_or(tree_hash, |e| e.document);
results.push(RefQueryResult {
ref_name,
commit_hash,
doc_hash,
result: combined,
});
}
Ok(results)
}
#[allow(clippy::too_many_arguments)]
pub async fn query_by_document(
store: &(dyn ObjectStore + Sync),
query_store: &dyn QueryStore,
ref_store: &dyn RefStore,
revspec: &str,
xpath: &str,
mode: QueryMode,
namespaces: &NamespaceMap,
files: &[String],
) -> Result<Vec<DocumentQueryResult>> {
let hash = resolve_revspec(ref_store, revspec).await?;
let tree_hash = follow_to_tree(store, hash).await?;
let tree_obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
let Object::Tree(tree) = tree_obj else {
return Err(Error::InvalidObject("expected Tree object".into()));
};
let mut results = Vec::new();
for entry in &tree.entries {
if !files.is_empty() && !files.iter().any(|f| entry.path.contains(f.as_str())) {
continue;
}
match query_store
.query_document(entry.document, xpath, mode, namespaces)
.await
{
Ok(result) => {
let has_matches = match &result {
QueryResult::Count(0) => false,
QueryResult::Count(_) => true,
QueryResult::Text(t) => !t.is_empty(),
QueryResult::Xml(x) => !x.is_empty(),
};
if has_matches {
results.push(DocumentQueryResult {
path: entry.path.clone(),
result,
});
}
}
Err(Error::Xml(ref e)) if e.to_string().contains("compile error") => {
}
Err(e) => return Err(e),
}
}
Ok(results)
}
pub async fn query(
store: &(dyn ObjectStore + Sync),
query_store: &dyn QueryStore,
ref_store: &dyn RefStore,
revspec: &str,
xpath: &str,
mode: QueryMode,
namespaces: &NamespaceMap,
) -> Result<QueryResult> {
let docs = query_by_document(
store, query_store, ref_store, revspec, xpath, mode, namespaces, &[],
)
.await?;
Ok(aggregate_results(mode, docs))
}
fn aggregate_results(mode: QueryMode, docs: Vec<DocumentQueryResult>) -> QueryResult {
let mut combined_count = 0usize;
let mut combined_texts = Vec::new();
let mut combined_xmls = Vec::new();
for doc in docs {
match doc.result {
QueryResult::Count(n) => combined_count += n,
QueryResult::Text(ts) => combined_texts.extend(ts),
QueryResult::Xml(xs) => combined_xmls.extend(xs),
}
}
match mode {
QueryMode::Count => QueryResult::Count(combined_count),
QueryMode::Text => QueryResult::Text(combined_texts),
QueryMode::Xml => QueryResult::Xml(combined_xmls),
}
}