xee_interpreter/sequence/
creation.rs

1use ahash::{HashSet, HashSetExt};
2use std::rc::Rc;
3use xot::Xot;
4
5use crate::{context, error, string::Collation, xml};
6
7use super::{
8    core::Sequence,
9    item::Item,
10    normalization::normalize,
11    serialization::{serialize_sequence, SerializationParameters},
12    traits::SequenceCore,
13    variant::{Empty, Range, RangeIterator},
14};
15
16impl Sequence {
17    pub(crate) fn new(items: Vec<Item>) -> Self {
18        match items.len() {
19            0 => Self::Empty(Empty {}),
20            1 => Self::One(items.into_iter().next().unwrap().into()),
21            _ => Self::Many(items.into()),
22        }
23    }
24
25    /// Concatenate two sequences producing a new sequence.
26    pub fn concat(self, other: Self) -> error::Result<Self> {
27        Ok(match (self, other) {
28            (Self::Empty(_), Self::Empty(_)) => Self::Empty(Empty {}),
29            (Self::Empty(_), Self::One(item)) => Self::One(item),
30            (Self::One(item), Self::Empty(_)) => Self::One(item),
31            (Self::Empty(_), Self::Many(items)) => Self::Many(items),
32            (Self::Many(items), Self::Empty(_)) => Self::Many(items),
33            (Self::One(item1), Self::One(item2)) => {
34                Self::Many((vec![item1.into_item(), item2.into_item()]).into())
35            }
36            (Self::One(item), Self::Many(items)) => {
37                let mut many = Vec::with_capacity(items.len() + 1);
38                many.push(item.into_item());
39                for item in items.iter() {
40                    many.push(item);
41                }
42                Self::Many(many.into())
43            }
44            (Self::Many(items), Self::One(item)) => {
45                let mut many = Vec::with_capacity(items.len() + 1);
46                for item in items.iter() {
47                    many.push(item);
48                }
49                many.push(item.into_item());
50                Self::Many(many.into())
51            }
52            (Self::Many(items1), Self::Many(items2)) => {
53                let mut many = Vec::with_capacity(items1.len() + items2.len());
54                for item in items1.iter() {
55                    many.push(item);
56                }
57                for item in items2.iter() {
58                    many.push(item);
59                }
60                Self::Many(many.into())
61            }
62            (Self::Range(a), Self::Range(b)) => {
63                // if the ranges are consecutive we can merge them
64                if a.end() == b.start() {
65                    Self::Range(Range::new(a.start().clone(), b.end().clone())?)
66                } else if b.end() == a.start() {
67                    Self::Range(Range::new(b.start().clone(), a.end().clone())?)
68                } else {
69                    // otherwise unfortunately we have to construct the sequence
70                    let mut v = Vec::with_capacity(a.len() + b.len());
71                    for i in RangeIterator::new(a.start().clone(), a.end().clone()) {
72                        v.push(i);
73                    }
74                    for i in RangeIterator::new(b.start().clone(), b.end().clone()) {
75                        v.push(i);
76                    }
77                    Self::new(v)
78                }
79            }
80            // handle other cases in less efficient way
81            (a, b) => {
82                let mut v = Vec::with_capacity(a.len() + b.len());
83                for item in a.iter() {
84                    v.push(item);
85                }
86                for item in b.iter() {
87                    v.push(item);
88                }
89                Self::new(v)
90            }
91        })
92    }
93
94    // https://www.w3.org/TR/xpath-31/#id-path-operator
95    pub(crate) fn deduplicate(self, annotations: &xml::Annotations) -> error::Result<Self> {
96        let mut s = HashSet::new();
97        let mut non_node_seen = false;
98
99        for item in self.iter() {
100            match item {
101                Item::Node(n) => {
102                    if non_node_seen {
103                        return Err(error::Error::XPTY0004);
104                    }
105                    s.insert(n);
106                }
107                _ => {
108                    if !s.is_empty() {
109                        return Err(error::Error::XPTY0004);
110                    }
111                    non_node_seen = true;
112                }
113            }
114        }
115        if non_node_seen {
116            Ok(self)
117        } else {
118            Ok(Self::process_set_result(s, annotations))
119        }
120    }
121
122    pub(crate) fn process_set_result(
123        s: HashSet<xot::Node>,
124        annotations: &xml::Annotations,
125    ) -> Self {
126        // sort nodes by document order
127        let mut nodes = s.into_iter().collect::<Vec<_>>();
128        nodes.sort_by_key(|n| annotations.document_order(*n));
129        nodes.into()
130    }
131
132    pub fn sorted(
133        &self,
134        context: &context::DynamicContext,
135        collation: Rc<Collation>,
136        xot: &Xot,
137    ) -> error::Result<Self> {
138        self.sorted_by_key(context, collation, |item| {
139            // the equivalent of fn:data()
140            let seq: Self = item.into();
141            seq.atomized(xot).collect::<error::Result<Sequence>>()
142        })
143    }
144
145    pub fn sorted_by_key<F>(
146        &self,
147        context: &context::DynamicContext,
148        collation: Rc<Collation>,
149        get: F,
150    ) -> error::Result<Self>
151    where
152        F: FnMut(Item) -> error::Result<Sequence>,
153    {
154        // see also sort_by_sequence in array.rs. The signatures are
155        // sufficiently different we don't want to try to unify them.
156
157        let items = self.iter().collect::<Vec<_>>();
158        let keys = self.iter().map(get).collect::<error::Result<Vec<_>>>()?;
159
160        let mut keys_and_items = keys.into_iter().zip(items).collect::<Vec<_>>();
161        // sort by key. unfortunately sort_by requires the compare function
162        // to be infallible. It's not in reality, so we make any failures
163        // sort less, so they appear early on in the sequence.
164        keys_and_items.sort_by(|(a_key, _), (b_key, _)| {
165            a_key.compare(b_key, &collation, context.implicit_timezone())
166        });
167        // a pass to detect any errors; if sorting between two items is
168        // impossible we want to raise a type error
169        for ((a_key, _), (b_key, _)) in keys_and_items.iter().zip(keys_and_items.iter().skip(1)) {
170            a_key.fallible_compare(b_key, &collation, context.implicit_timezone())?;
171        }
172        // now pick up items again
173        let result = keys_and_items
174            .into_iter()
175            .map(|(_, item)| item)
176            .collect::<Sequence>();
177        Ok(result)
178    }
179
180    /// Flatten all arrays in this sequence
181    pub fn flatten(&self) -> error::Result<Self> {
182        let mut result = vec![];
183        for item in self.iter() {
184            if let Ok(array) = item.to_array() {
185                for sequence in array.iter() {
186                    for item in sequence.flatten()?.iter() {
187                        result.push(item);
188                    }
189                }
190            } else {
191                result.push(item);
192            }
193        }
194        Ok(result.into())
195    }
196
197    pub(crate) fn union(self, other: Self, annotations: &xml::Annotations) -> error::Result<Self> {
198        let mut s = HashSet::new();
199        for node in self.nodes() {
200            s.insert(node?);
201        }
202        for node in other.nodes() {
203            s.insert(node?);
204        }
205
206        Ok(Self::process_set_result(s, annotations))
207    }
208
209    pub(crate) fn intersect(
210        self,
211        other: Self,
212        annotations: &xml::Annotations,
213    ) -> error::Result<Self> {
214        let mut s = HashSet::new();
215        let mut r = HashSet::new();
216        for node in self.nodes() {
217            s.insert(node?);
218        }
219        for node in other.nodes() {
220            let node = node?;
221            if s.contains(&node) {
222                r.insert(node);
223            }
224        }
225        Ok(Self::process_set_result(r, annotations))
226    }
227
228    pub(crate) fn except(self, other: Self, annotations: &xml::Annotations) -> error::Result<Self> {
229        let mut s = HashSet::new();
230        for node in self.nodes() {
231            s.insert(node?);
232        }
233        for node in other.nodes() {
234            s.remove(&node?);
235        }
236        Ok(Self::process_set_result(s, annotations))
237    }
238
239    /// Normalize this sequence into a document node, according to
240    /// <https://www.w3.org/TR/xslt-xquery-serialization-31/#serdm>
241    pub fn normalize(&self, item_separator: &str, xot: &mut Xot) -> error::Result<xot::Node> {
242        normalize(self, item_separator, xot)
243    }
244
245    /// Serialize this sequence according to serialization parameters
246    pub(crate) fn serialize(
247        &self,
248        params: SerializationParameters,
249        xot: &mut Xot,
250    ) -> error::Result<String> {
251        serialize_sequence(self, params, xot)
252    }
253
254    /// Display representation of the sequence
255    pub fn display_representation(&self, xot: &Xot, context: &context::DynamicContext) -> String {
256        // TODO: various unwraps
257        match &self {
258            Sequence::Empty(_) => "()".to_string(),
259            Sequence::One(item) => item.item().display_representation(xot, context).unwrap(),
260            Sequence::Many(items) => {
261                let mut representations = Vec::with_capacity(self.len());
262                for item in items.iter() {
263                    representations.push(item.display_representation(xot, context).unwrap());
264                }
265                format!("(\n{}\n)", representations.join(",\n"))
266            }
267            Sequence::Range(range) => {
268                format!("{} to {}", range.start(), range.end())
269            }
270        }
271    }
272}