1#[cfg(any(test, feature = "compliance"))]
7#[allow(clippy::missing_panics_doc)]
8pub mod tests;
9
10use std::collections::HashMap;
11use std::pin::pin;
12
13use async_trait::async_trait;
14use clayers_xml::ContentHash;
15use futures_core::Stream;
16use crate::error::{Error, Result};
17use crate::object::Object;
18use crate::refs;
19use crate::store::{ObjectStore, RefStore};
20
21#[derive(Debug, Clone, Copy)]
27pub enum QueryMode {
28 Count,
30 Text,
32 Xml,
34}
35
36#[derive(Debug)]
38pub enum QueryResult {
39 Count(usize),
41 Text(Vec<String>),
43 Xml(Vec<String>),
45}
46
47pub type NamespaceMap = Vec<(String, String)>;
49
50#[derive(Debug)]
52pub struct DocumentQueryResult {
53 pub path: String,
55 pub result: QueryResult,
57}
58
59#[async_trait]
69pub trait QueryStore: Send + Sync {
70 async fn query_document(
72 &self,
73 doc_hash: ContentHash,
74 xpath: &str,
75 mode: QueryMode,
76 namespaces: &NamespaceMap,
77 ) -> Result<QueryResult>;
78}
79
80async fn try_collect_stream<S>(stream: S) -> Result<HashMap<ContentHash, Object>>
86where
87 S: Stream<Item = Result<(ContentHash, Object)>>,
88{
89 let mut stream = pin!(stream);
90 let mut map = HashMap::new();
91 while let Some(item) = std::future::poll_fn(|cx| stream.as_mut().poll_next(cx)).await {
92 let (hash, obj) = item?;
93 map.insert(hash, obj);
94 }
95 Ok(map)
96}
97
98pub async fn default_query_document(
105 store: &dyn ObjectStore,
106 doc_hash: ContentHash,
107 xpath: &str,
108 mode: QueryMode,
109 namespaces: &NamespaceMap,
110) -> Result<QueryResult> {
111 let objects = try_collect_stream(store.subtree(&doc_hash)).await?;
112
113 let root_hash = match objects.get(&doc_hash) {
115 Some(Object::Document(doc)) => doc.root,
116 Some(_) => return Err(Error::InvalidObject("expected Document object".into())),
117 None => return Err(Error::NotFound(doc_hash)),
118 };
119
120 let xml_string = crate::export::build_xml_from_objects(&objects, root_hash)?;
122
123 let ns_refs: Vec<(&str, &str)> = namespaces
125 .iter()
126 .map(|(p, u)| (p.as_str(), u.as_str()))
127 .collect();
128 let xml_mode = match mode {
129 QueryMode::Count => clayers_xml::query::QueryMode::Count,
130 QueryMode::Text => clayers_xml::query::QueryMode::Text,
131 QueryMode::Xml => clayers_xml::query::QueryMode::Xml,
132 };
133 let result = clayers_xml::query::evaluate_xpath(&xml_string, xpath, xml_mode, &ns_refs)?;
134 Ok(match result {
135 clayers_xml::query::QueryResult::Count(n) => QueryResult::Count(n),
136 clayers_xml::query::QueryResult::Text(t) => QueryResult::Text(t),
137 clayers_xml::query::QueryResult::Xml(x) => QueryResult::Xml(x),
138 })
139}
140
141pub async fn resolve_to_document(
155 store: &dyn ObjectStore,
156 ref_store: &dyn RefStore,
157 revspec: &str,
158) -> Result<ContentHash> {
159 let hash = resolve_revspec(ref_store, revspec).await?;
160 follow_to_document(store, hash).await
162}
163
164pub async fn resolve_to_tree(
170 store: &dyn ObjectStore,
171 ref_store: &dyn RefStore,
172 revspec: &str,
173) -> Result<(ContentHash, crate::object::TreeObject)> {
174 let hash = resolve_revspec(ref_store, revspec).await?;
175 let tree_hash = follow_to_tree(store, hash).await?;
176 let obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
177 let Object::Tree(t) = obj else {
178 return Err(Error::InvalidObject("expected Tree object".into()));
179 };
180 Ok((tree_hash, t))
181}
182
183pub async fn resolve_revspec(
189 ref_store: &dyn RefStore,
190 revspec: &str,
191) -> Result<ContentHash> {
192 if let Ok(h) = try_parse_hash(revspec) {
193 Ok(h)
194 } else if revspec == "HEAD" {
195 refs::resolve_head(ref_store)
196 .await?
197 .ok_or_else(|| Error::Ref("HEAD not set".into()))
198 } else if revspec.starts_with("refs/") {
199 ref_store
200 .get_ref(revspec)
201 .await?
202 .ok_or_else(|| Error::Ref(format!("ref not found: {revspec}")))
203 } else if let Some(h) = ref_store.get_ref(&refs::branch_ref(revspec)).await? {
204 Ok(h)
205 } else if let Some(h) = ref_store.get_ref(&refs::tag_ref(revspec)).await? {
206 Ok(h)
207 } else {
208 Err(Error::Ref(format!("cannot resolve revspec: {revspec}")))
209 }
210}
211
212fn try_parse_hash(s: &str) -> Result<ContentHash> {
214 if s.len() != 64 {
215 return Err(Error::Ref("not a valid hash".into()));
216 }
217 let bytes: Vec<u8> = (0..64)
218 .step_by(2)
219 .map(|i| u8::from_str_radix(&s[i..i + 2], 16))
220 .collect::<std::result::Result<Vec<u8>, _>>()
221 .map_err(|_| Error::Ref("not a valid hex hash".into()))?;
222 let arr: [u8; 32] = bytes
223 .try_into()
224 .map_err(|_| Error::Ref("not 32 bytes".into()))?;
225 Ok(ContentHash(arr))
226}
227
228async fn follow_to_document(
231 store: &dyn ObjectStore,
232 hash: ContentHash,
233) -> Result<ContentHash> {
234 let obj = store.get(&hash).await?.ok_or(Error::NotFound(hash))?;
235 match obj {
236 Object::Document(_) => Ok(hash),
237 Object::Tree(t) => {
238 t.entries.first()
240 .map(|e| e.document)
241 .ok_or_else(|| Error::InvalidObject("empty tree has no documents".into()))
242 }
243 Object::Commit(c) => Box::pin(follow_to_document(store, c.tree)).await,
244 Object::Tag(t) => Box::pin(follow_to_document(store, t.target)).await,
245 _ => Err(Error::InvalidObject(
246 "revspec resolved to a non-versioning object".into(),
247 )),
248 }
249}
250
251pub async fn follow_to_tree(
257 store: &dyn ObjectStore,
258 hash: ContentHash,
259) -> Result<ContentHash> {
260 let obj = store.get(&hash).await?.ok_or(Error::NotFound(hash))?;
261 match obj {
262 Object::Tree(_) => Ok(hash),
263 Object::Commit(c) => Ok(c.tree),
264 Object::Tag(t) => Box::pin(follow_to_tree(store, t.target)).await,
265 _ => Err(Error::InvalidObject(
266 "revspec resolved to a non-versioning object".into(),
267 )),
268 }
269}
270
271#[derive(Debug)]
277pub struct RefQueryResult {
278 pub ref_name: String,
280 pub commit_hash: ContentHash,
282 pub doc_hash: ContentHash,
284 pub result: QueryResult,
286}
287
288pub async fn query_refs(
294 store: &(dyn ObjectStore + Sync),
295 query_store: &dyn QueryStore,
296 ref_store: &dyn RefStore,
297 prefix: &str,
298 xpath: &str,
299 mode: QueryMode,
300 namespaces: &NamespaceMap,
301) -> Result<Vec<RefQueryResult>> {
302 let all_refs = ref_store.list_refs(prefix).await?;
303 let mut results = Vec::new();
304 let mut seen_docs = std::collections::HashSet::new();
305
306 for (ref_name, commit_hash) in all_refs {
307 let tree_hash = follow_to_tree(store, commit_hash).await?;
308 if !seen_docs.insert(tree_hash) {
309 continue; }
311 let tree_obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
312 let Object::Tree(tree) = tree_obj else {
313 return Err(Error::InvalidObject("expected Tree object".into()));
314 };
315 let mut doc_results = Vec::new();
318 for entry in &tree.entries {
319 match query_store
320 .query_document(entry.document, xpath, mode, namespaces)
321 .await
322 {
323 Ok(result) => {
324 doc_results.push(DocumentQueryResult {
325 path: entry.path.clone(),
326 result,
327 });
328 }
329 Err(Error::Xml(ref e)) if e.to_string().contains("compile error") => {}
330 Err(e) => return Err(e),
331 }
332 }
333 let combined = aggregate_results(mode, doc_results);
334 let doc_hash = tree.entries.first()
335 .map_or(tree_hash, |e| e.document);
336 results.push(RefQueryResult {
337 ref_name,
338 commit_hash,
339 doc_hash,
340 result: combined,
341 });
342 }
343
344 Ok(results)
345}
346
347#[allow(clippy::too_many_arguments)]
357pub async fn query_by_document(
358 store: &(dyn ObjectStore + Sync),
359 query_store: &dyn QueryStore,
360 ref_store: &dyn RefStore,
361 revspec: &str,
362 xpath: &str,
363 mode: QueryMode,
364 namespaces: &NamespaceMap,
365 files: &[String],
366) -> Result<Vec<DocumentQueryResult>> {
367 let hash = resolve_revspec(ref_store, revspec).await?;
368 let tree_hash = follow_to_tree(store, hash).await?;
369 let tree_obj = store.get(&tree_hash).await?.ok_or(Error::NotFound(tree_hash))?;
370 let Object::Tree(tree) = tree_obj else {
371 return Err(Error::InvalidObject("expected Tree object".into()));
372 };
373
374 let mut results = Vec::new();
375 for entry in &tree.entries {
376 if !files.is_empty() && !files.iter().any(|f| entry.path.contains(f.as_str())) {
378 continue;
379 }
380
381 match query_store
382 .query_document(entry.document, xpath, mode, namespaces)
383 .await
384 {
385 Ok(result) => {
386 let has_matches = match &result {
388 QueryResult::Count(0) => false,
389 QueryResult::Count(_) => true,
390 QueryResult::Text(t) => !t.is_empty(),
391 QueryResult::Xml(x) => !x.is_empty(),
392 };
393 if has_matches {
394 results.push(DocumentQueryResult {
395 path: entry.path.clone(),
396 result,
397 });
398 }
399 }
400 Err(Error::Xml(ref e)) if e.to_string().contains("compile error") => {
401 }
403 Err(e) => return Err(e),
404 }
405 }
406 Ok(results)
407}
408
409pub async fn query(
415 store: &(dyn ObjectStore + Sync),
416 query_store: &dyn QueryStore,
417 ref_store: &dyn RefStore,
418 revspec: &str,
419 xpath: &str,
420 mode: QueryMode,
421 namespaces: &NamespaceMap,
422) -> Result<QueryResult> {
423 let docs = query_by_document(
424 store, query_store, ref_store, revspec, xpath, mode, namespaces, &[],
425 )
426 .await?;
427 Ok(aggregate_results(mode, docs))
428}
429
430fn aggregate_results(mode: QueryMode, docs: Vec<DocumentQueryResult>) -> QueryResult {
432 let mut combined_count = 0usize;
433 let mut combined_texts = Vec::new();
434 let mut combined_xmls = Vec::new();
435 for doc in docs {
436 match doc.result {
437 QueryResult::Count(n) => combined_count += n,
438 QueryResult::Text(ts) => combined_texts.extend(ts),
439 QueryResult::Xml(xs) => combined_xmls.extend(xs),
440 }
441 }
442 match mode {
443 QueryMode::Count => QueryResult::Count(combined_count),
444 QueryMode::Text => QueryResult::Text(combined_texts),
445 QueryMode::Xml => QueryResult::Xml(combined_xmls),
446 }
447}
448