xee_interpreter/xml/
document.rs

1use std::sync::atomic;
2
3use ahash::{HashMap, HashMapExt};
4use iri_string::types::{IriStr, IriString};
5use xot::Xot;
6
7use super::document_order::DocumentOrderAnnotations;
8use super::DocumentOrderAccess;
9
10static DOCUMENTS_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
11
12fn get_documents_id() -> usize {
13    DOCUMENTS_COUNTER.fetch_add(1, atomic::Ordering::Relaxed)
14}
15
16/// Something went wrong loading [`Documents`]
17#[derive(Debug)]
18pub enum DocumentsError {
19    /// An attempt as made to add a document with a URI that was already known.
20    DuplicateUri(String),
21    /// An error occurred loading the document XML (using the [`xot`] crate).
22    Parse(xot::ParseError),
23}
24
25impl std::error::Error for DocumentsError {}
26
27impl std::fmt::Display for DocumentsError {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        match self {
30            DocumentsError::DuplicateUri(uri) => write!(f, "Duplicate URI: {}", uri),
31            DocumentsError::Parse(e) => write!(f, "Parse error: {}", e),
32        }
33    }
34}
35
36impl From<xot::ParseError> for DocumentsError {
37    fn from(e: xot::ParseError) -> Self {
38        DocumentsError::Parse(e)
39    }
40}
41
42#[derive(Debug, Clone)]
43pub struct Document {
44    pub(crate) uri: Option<IriString>,
45    root: xot::Node,
46}
47
48impl Document {
49    /// The document root node
50    pub fn root(&self) -> xot::Node {
51        self.root
52    }
53
54    pub(crate) fn cleanup(&self, xot: &mut Xot) {
55        xot.remove(self.root).unwrap();
56    }
57}
58
59/// A collection of XML documents as can be used by XPath and XSLT.
60///
61/// This collection can be prepared before any XPath or XSLT processing begins.
62///
63/// Alternatively this collection can be added to incrementally during
64/// processing using the `fn:doc` function for instance. Once a document under
65/// a URL is present, it cannot be changed anymore.
66///
67/// The `fn:parse-xml` and `fn:parse-xml-fragment` functions can be used to
68/// create new documents from strings without URLs.
69#[derive(Debug, Clone)]
70pub struct Documents {
71    id: usize,
72    annotations: DocumentOrderAnnotations,
73    documents: Vec<Document>,
74    by_uri: HashMap<IriString, DocumentHandle>,
75    uri_by_document_node: HashMap<xot::Node, IriString>,
76}
77
78/// A handle to a document.
79///
80/// This is an identifier into a [`Documents`] collection. You can
81/// freely copy it.
82#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
83pub struct DocumentHandle {
84    pub(crate) documents_id: usize,
85    pub(crate) id: usize,
86}
87
88impl Documents {
89    /// Create a new empty collection of documents.
90    pub fn new() -> Self {
91        Self {
92            id: get_documents_id(),
93            annotations: DocumentOrderAnnotations::new(),
94            documents: Vec::new(),
95            by_uri: HashMap::new(),
96            uri_by_document_node: HashMap::new(),
97        }
98    }
99
100    /// Clean up all documents.
101    pub fn cleanup(&mut self, xot: &mut Xot) {
102        for document in &self.documents {
103            document.cleanup(xot);
104        }
105        self.documents.clear();
106        self.by_uri.clear();
107    }
108
109    /// Add a string as an XML document. It can be designated with a URI.
110    pub fn add_string(
111        &mut self,
112        xot: &mut Xot,
113        uri: Option<&IriStr>,
114        xml: &str,
115    ) -> Result<DocumentHandle, DocumentsError> {
116        let root = xot.parse(xml)?;
117        self.add_root(uri, root)
118    }
119
120    /// Add a string as an XML fragment.
121    pub fn add_fragment_string(
122        &mut self,
123        xot: &mut Xot,
124        xml: &str,
125    ) -> Result<DocumentHandle, DocumentsError> {
126        let root = xot.parse_fragment(xml)?;
127        self.add_root(None, root)
128    }
129
130    /// Add a root node of an XML document. Designate it with a URI.
131    pub fn add_root(
132        &mut self,
133        uri: Option<&IriStr>,
134        root: xot::Node,
135    ) -> Result<DocumentHandle, DocumentsError> {
136        if let Some(uri) = uri {
137            if self.by_uri.contains_key(uri) {
138                // duplicate URI is an error
139                return Err(DocumentsError::DuplicateUri(uri.as_str().to_string()));
140            }
141        }
142
143        let id = self.documents.len();
144        let handle = DocumentHandle {
145            documents_id: self.id,
146            id,
147        };
148        self.documents.push(Document {
149            uri: uri.map(|uri| uri.to_owned()),
150            root,
151        });
152        if let Some(uri) = uri {
153            self.by_uri.insert(uri.to_owned(), handle);
154            self.uri_by_document_node.insert(root, uri.to_owned());
155        }
156
157        Ok(handle)
158    }
159
160    /// Obtain a document by handle
161    pub fn get_by_handle(&self, handle: DocumentHandle) -> Option<&Document> {
162        // only works if the handle is from this collection
163        if handle.documents_id != self.id {
164            return None;
165        }
166        self.documents.get(handle.id)
167    }
168
169    /// Obtain document node by handle
170    pub fn get_node_by_handle(&self, handle: DocumentHandle) -> Option<xot::Node> {
171        Some(self.get_by_handle(handle)?.root)
172    }
173
174    /// Obtain a document by URI
175    ///
176    /// It's only possible to obtain a document by URI if it was added with a URI.
177    pub fn get_by_uri(&self, uri: &IriStr) -> Option<&Document> {
178        let handle = self.by_uri.get(uri)?;
179        self.get_by_handle(*handle)
180    }
181
182    /// Obtain document node by URI
183    pub fn get_node_by_uri(&self, uri: &IriStr) -> Option<xot::Node> {
184        Some(self.get_by_uri(uri)?.root)
185    }
186
187    /// Obtain document URI by document node.
188    ///
189    /// This only returns a URI if the document was added with a URI.
190    pub fn get_uri_by_document_node(&self, node: xot::Node) -> Option<IriString> {
191        self.uri_by_document_node.get(&node).cloned()
192    }
193
194    /// How many documents are stored.
195    pub fn len(&self) -> usize {
196        self.documents.len()
197    }
198
199    /// Is the collection empty?
200    pub fn is_empty(&self) -> bool {
201        self.documents.is_empty()
202    }
203
204    /// Get the annotations object
205    pub(crate) fn annotations(&self) -> &DocumentOrderAnnotations {
206        &self.annotations
207    }
208
209    pub(crate) fn document_order_access<'a>(&'a self, xot: &'a Xot) -> DocumentOrderAccess<'a> {
210        self.annotations.access(xot)
211    }
212}
213
214impl Default for Documents {
215    fn default() -> Self {
216        Self::new()
217    }
218}