xee_interpreter/sequence/
creation.rs

1use ahash::{HashSet, HashSetExt};
2use std::rc::Rc;
3use xot::Xot;
4
5use crate::{context, error, string::Collation, xml};
6
7use super::{
8    core::Sequence,
9    item::Item,
10    normalization::normalize,
11    serialization::{serialize_sequence, SerializationParameters},
12    traits::SequenceCore,
13    variant::{Empty, Range, RangeIterator},
14};
15
16impl Sequence {
17    pub(crate) fn new(items: Vec<Item>) -> Self {
18        match items.len() {
19            0 => Self::Empty(Empty {}),
20            1 => Self::One(items.into_iter().next().unwrap().into()),
21            _ => Self::Many(items.into()),
22        }
23    }
24
25    /// Concatenate two sequences producing a new sequence.
26    pub fn concat(self, other: Self) -> error::Result<Self> {
27        Ok(match (self, other) {
28            (Self::Empty(_), Self::Empty(_)) => Self::Empty(Empty {}),
29            (Self::Empty(_), Self::One(item)) => Self::One(item),
30            (Self::One(item), Self::Empty(_)) => Self::One(item),
31            (Self::Empty(_), Self::Many(items)) => Self::Many(items),
32            (Self::Many(items), Self::Empty(_)) => Self::Many(items),
33            (Self::One(item1), Self::One(item2)) => {
34                Self::Many((vec![item1.into_item(), item2.into_item()]).into())
35            }
36            (Self::One(item), Self::Many(items)) => {
37                let mut many = Vec::with_capacity(items.len() + 1);
38                many.push(item.into_item());
39                for item in items.iter() {
40                    many.push(item);
41                }
42                Self::Many(many.into())
43            }
44            (Self::Many(items), Self::One(item)) => {
45                let mut many = Vec::with_capacity(items.len() + 1);
46                for item in items.iter() {
47                    many.push(item);
48                }
49                many.push(item.into_item());
50                Self::Many(many.into())
51            }
52            (Self::Many(items1), Self::Many(items2)) => {
53                let mut many = Vec::with_capacity(items1.len() + items2.len());
54                for item in items1.iter() {
55                    many.push(item);
56                }
57                for item in items2.iter() {
58                    many.push(item);
59                }
60                Self::Many(many.into())
61            }
62            (Self::Range(a), Self::Range(b)) => {
63                // if the ranges are consecutive we can merge them
64                if a.end() == b.start() {
65                    Self::Range(Range::new(a.start().clone(), b.end().clone())?)
66                } else if b.end() == a.start() {
67                    Self::Range(Range::new(b.start().clone(), a.end().clone())?)
68                } else {
69                    // otherwise unfortunately we have to construct the sequence
70                    let mut v = Vec::with_capacity(a.len() + b.len());
71                    for i in RangeIterator::new(a.start().clone(), a.end().clone()) {
72                        v.push(i);
73                    }
74                    for i in RangeIterator::new(b.start().clone(), b.end().clone()) {
75                        v.push(i);
76                    }
77                    Self::new(v)
78                }
79            }
80            // handle other cases in less efficient way
81            (a, b) => {
82                let mut v = Vec::with_capacity(a.len() + b.len());
83                for item in a.iter() {
84                    v.push(item);
85                }
86                for item in b.iter() {
87                    v.push(item);
88                }
89                Self::new(v)
90            }
91        })
92    }
93
94    // https://www.w3.org/TR/xpath-31/#id-path-operator
95    pub(crate) fn deduplicate(self, annotations: xml::DocumentOrderAccess) -> error::Result<Self> {
96        let mut s = HashSet::new();
97        let mut non_node_seen = false;
98
99        for item in self.iter() {
100            match item {
101                Item::Node(n) => {
102                    if non_node_seen {
103                        return Err(error::Error::XPTY0004);
104                    }
105                    s.insert(n);
106                }
107                _ => {
108                    if !s.is_empty() {
109                        return Err(error::Error::XPTY0004);
110                    }
111                    non_node_seen = true;
112                }
113            }
114        }
115        if non_node_seen {
116            Ok(self)
117        } else {
118            Ok(Self::process_set_result(s, annotations))
119        }
120    }
121
122    pub(crate) fn process_set_result(
123        s: HashSet<xot::Node>,
124        annotations: xml::DocumentOrderAccess,
125    ) -> Self {
126        // sort nodes by document order
127        let mut nodes = s.into_iter().collect::<Vec<_>>();
128        nodes.sort_by_key(|n| annotations.get(*n));
129        nodes.into()
130    }
131
132    pub fn sorted(
133        &self,
134        context: &context::DynamicContext,
135        collation: Rc<Collation>,
136        xot: &Xot,
137    ) -> error::Result<Self> {
138        self.sorted_by_key(context, collation, |item| {
139            // the equivalent of fn:data()
140            let seq: Self = item.into();
141            seq.atomized(xot).collect::<error::Result<Sequence>>()
142        })
143    }
144
145    pub fn sorted_by_key<F>(
146        &self,
147        context: &context::DynamicContext,
148        collation: Rc<Collation>,
149        get: F,
150    ) -> error::Result<Self>
151    where
152        F: FnMut(Item) -> error::Result<Sequence>,
153    {
154        // see also sort_by_sequence in array.rs. The signatures are
155        // sufficiently different we don't want to try to unify them.
156
157        let items = self.iter().collect::<Vec<_>>();
158        let keys = self.iter().map(get).collect::<error::Result<Vec<_>>>()?;
159
160        let mut keys_and_items = keys.into_iter().zip(items).collect::<Vec<_>>();
161        // sort by key. unfortunately sort_by requires the compare function
162        // to be infallible. It's not in reality, so we make any failures
163        // sort less, so they appear early on in the sequence.
164        keys_and_items.sort_by(|(a_key, _), (b_key, _)| {
165            a_key.compare(b_key, &collation, context.implicit_timezone())
166        });
167        // a pass to detect any errors; if sorting between two items is
168        // impossible we want to raise a type error
169        for ((a_key, _), (b_key, _)) in keys_and_items.iter().zip(keys_and_items.iter().skip(1)) {
170            a_key.fallible_compare(b_key, &collation, context.implicit_timezone())?;
171        }
172        // now pick up items again
173        let result = keys_and_items
174            .into_iter()
175            .map(|(_, item)| item)
176            .collect::<Sequence>();
177        Ok(result)
178    }
179
180    /// Flatten all arrays in this sequence
181    pub fn flatten(&self) -> error::Result<Self> {
182        let mut result = vec![];
183        for item in self.iter() {
184            if let Ok(array) = item.to_array() {
185                for sequence in array.iter() {
186                    for item in sequence.flatten()?.iter() {
187                        result.push(item);
188                    }
189                }
190            } else {
191                result.push(item);
192            }
193        }
194        Ok(result.into())
195    }
196
197    pub(crate) fn union(
198        self,
199        other: Self,
200        annotations: xml::DocumentOrderAccess,
201    ) -> error::Result<Self> {
202        let mut s = HashSet::new();
203        for node in self.nodes() {
204            s.insert(node?);
205        }
206        for node in other.nodes() {
207            s.insert(node?);
208        }
209
210        Ok(Self::process_set_result(s, annotations))
211    }
212
213    pub(crate) fn intersect(
214        self,
215        other: Self,
216        annotations: xml::DocumentOrderAccess,
217    ) -> error::Result<Self> {
218        let mut s = HashSet::new();
219        let mut r = HashSet::new();
220        for node in self.nodes() {
221            s.insert(node?);
222        }
223        for node in other.nodes() {
224            let node = node?;
225            if s.contains(&node) {
226                r.insert(node);
227            }
228        }
229        Ok(Self::process_set_result(r, annotations))
230    }
231
232    pub(crate) fn except(
233        self,
234        other: Self,
235        annotations: xml::DocumentOrderAccess,
236    ) -> error::Result<Self> {
237        let mut s = HashSet::new();
238        for node in self.nodes() {
239            s.insert(node?);
240        }
241        for node in other.nodes() {
242            s.remove(&node?);
243        }
244        Ok(Self::process_set_result(s, annotations))
245    }
246
247    /// Normalize this sequence into a document node, according to
248    /// <https://www.w3.org/TR/xslt-xquery-serialization-31/#serdm>
249    pub fn normalize(&self, item_separator: &str, xot: &mut Xot) -> error::Result<xot::Node> {
250        normalize(self, item_separator, xot)
251    }
252
253    /// Serialize this sequence according to serialization parameters
254    pub fn serialize(
255        &self,
256        params: SerializationParameters,
257        xot: &mut Xot,
258    ) -> error::Result<String> {
259        serialize_sequence(self, params, xot)
260    }
261
262    /// Display representation of the sequence
263    pub fn display_representation(&self, xot: &Xot, context: &context::DynamicContext) -> String {
264        // TODO: various unwraps
265        match &self {
266            Sequence::Empty(_) => "()".to_string(),
267            Sequence::One(item) => item.item().display_representation(xot, context).unwrap(),
268            Sequence::Many(items) => {
269                let mut representations = Vec::with_capacity(self.len());
270                for item in items.iter() {
271                    representations.push(item.display_representation(xot, context).unwrap());
272                }
273                format!("(\n{}\n)", representations.join(",\n"))
274            }
275            Sequence::Range(range) => {
276                format!("{} to {}", range.start(), range.end())
277            }
278        }
279    }
280}