pattern_3/haystack.rs
1//! Haystacks.
2//!
3//! A *haystack* refers to any linear structure which can be split or sliced
4//! into smaller, non-overlapping parts. Examples are strings and vectors.
5//!
6//! ```rust
7//! let haystack: &str = "hello"; // a string slice (`&str`) is a haystack.
8//! let (a, b) = haystack.split_at(4); // it can be split into two strings.
9//! let c = &a[1..3]; // it can be sliced.
10//! ```
11//!
12//! The minimal haystack which cannot be further sliced is called a *codeword*.
13//! For instance, the codeword of a string would be a UTF-8 sequence. A haystack
14//! can therefore be viewed as a consecutive list of codewords.
15//!
16//! The boundary between codewords can be addressed using an *index*. The
17//! numbers 1, 3 and 4 in the snippet above are sample indices of a string. An
18//! index is usually a `usize`.
19//!
20//! An arbitrary number may point outside of a haystack, or in the interior of a
21//! codeword. These indices are invalid. A *valid index* of a certain haystack
22//! would only point to the boundaries.
23
24use std::ops::{Deref, Range};
25use std::fmt::Debug;
26use std::mem;
27
28/// Borrowed [`Haystack`].
29///
30/// Every `Haystack` type can be borrowed as references to `Hay` types. This
31/// allows multiple similar types to share the same implementation (e.g. the
32/// haystacks `&[T]`, `&mut [T]` and `Vec<T>` all have the same corresponding
33/// hay type `[T]`).
34///
35/// In the other words, a `Haystack` is a generalized reference to `Hay`.
36/// `Hay`s are typically implemented on unsized slice types like `str` and `[T]`.
37///
38/// # Safety
39///
40/// This trait is unsafe as there are some unchecked requirements which the
41/// implementor must uphold. Failing to meet these requirements would lead to
42/// out-of-bound access. The safety requirements are written in each member of
43/// this trait.
44pub unsafe trait Hay {
45 /// The index type of the haystack. Typically a `usize`.
46 ///
47 /// Splitting a hay must be sublinear using this index type. For instance,
48 /// if we implement `Hay` for a linked list, the index should not be an
49 /// integer offset (`usize`) as this would require O(n) time to chase the
50 /// pointer and find the split point. Instead, for a linked list we should
51 /// directly use the node pointer as the index.
52 ///
53 /// # Safety
54 ///
55 /// Valid indices of a single hay have a total order, even this type does
56 /// not require an `Ord` bound — for instance, to order two linked list
57 /// cursors, we need to chase the links and see if they meet; this is slow
58 /// and not suitable for implementing `Ord`, but conceptually an ordering
59 /// can be defined on linked list cursors.
60 type Index: Copy + Debug + Eq;
61
62 /// Creates an empty hay.
63 ///
64 /// # Safety
65 ///
66 /// An empty hay's start and end indices must be the same, e.g.
67 ///
68 /// ```rust
69 /// extern crate pattern_3;
70 /// use pattern_3::Hay;
71 ///
72 /// let empty = <str>::empty();
73 /// assert_eq!(empty.start_index(), empty.end_index());
74 /// ```
75 ///
76 /// This also suggests that there is exactly one valid index for an empty
77 /// hay.
78 ///
79 /// There is no guarantee that two separate calls to `.empty()` will produce
80 /// the same hay reference.
81 fn empty<'a>() -> &'a Self;
82
83 /// Obtains the index to the start of the hay.
84 ///
85 /// Usually this method returns `0`.
86 ///
87 /// # Safety
88 ///
89 /// Implementation must ensure that the start index of hay is the first
90 /// valid index, i.e. for all valid indices `i` of `self`, we have
91 /// `self.start_index() <= i`.
92 fn start_index(&self) -> Self::Index;
93
94 /// Obtains the index to the end of the hay.
95 ///
96 /// Usually this method returns the length of the hay.
97 ///
98 /// # Safety
99 ///
100 /// Implementation must ensure that the end index of hay is the last valid
101 /// index, i.e. for all valid indices `i` of `self`, we have
102 /// `i <= self.end_index()`.
103 fn end_index(&self) -> Self::Index;
104
105 /// Returns the next immediate index in this haystack.
106 ///
107 /// # Safety
108 ///
109 /// The `index` must be a valid index, and also must not equal to
110 /// `self.end_index()`.
111 ///
112 /// Implementation must ensure that if `j = self.next_index(i)`, then `j`
113 /// is also a valid index satisfying `j > i`.
114 ///
115 /// # Examples
116 ///
117 /// ```rust
118 /// use pattern_3::Hay;
119 ///
120 /// let sample = "A→😀";
121 /// unsafe {
122 /// assert_eq!(sample.next_index(0), 1);
123 /// assert_eq!(sample.next_index(1), 4);
124 /// assert_eq!(sample.next_index(4), 8);
125 /// }
126 /// ```
127 unsafe fn next_index(&self, index: Self::Index) -> Self::Index;
128
129 /// Returns the previous immediate index in this haystack.
130 ///
131 /// # Safety
132 ///
133 /// The `index` must be a valid index, and also must not equal to
134 /// `self.start_index()`.
135 ///
136 /// Implementation must ensure that if `j = self.prev_index(i)`, then `j`
137 /// is also a valid index satisfying `j < i`.
138 ///
139 /// # Examples
140 ///
141 /// ```rust
142 /// use pattern_3::Hay;
143 ///
144 /// let sample = "A→😀";
145 /// unsafe {
146 /// assert_eq!(sample.prev_index(8), 4);
147 /// assert_eq!(sample.prev_index(4), 1);
148 /// assert_eq!(sample.prev_index(1), 0);
149 /// }
150 /// ```
151 unsafe fn prev_index(&self, index: Self::Index) -> Self::Index;
152
153 /// Obtains a child hay by slicing `self`.
154 ///
155 /// # Safety
156 ///
157 /// The two ends of the range must be valid indices. The start of the range
158 /// must be before the end of the range (`range.start <= range.end`).
159 unsafe fn slice_unchecked(&self, range: Range<Self::Index>) -> &Self;
160}
161
162/// Linear splittable structure.
163///
164/// A `Haystack` is implemented for reference and collection types such as
165/// `&str`, `&mut [T]` and `Vec<T>`. Every haystack can be borrowed as an
166/// underlying representation called a [`Hay`]. Multiple haystacks may share the
167/// same hay type, and thus share the same implementation of string search
168/// algorithms.
169///
170/// In the other words, a `Haystack` is a generalized reference to `Hay`.
171///
172/// # Safety
173///
174/// This trait is unsafe as there are some unchecked requirements which the
175/// implementor must uphold. Failing to meet these requirements would lead to
176/// out-of-bound access. The safety requirements are written in each member of
177/// this trait.
178pub unsafe trait Haystack: Deref + Sized where Self::Target: Hay {
179 /// Creates an empty haystack.
180 fn empty() -> Self;
181
182 /// Splits the haystack into 3 slices around the given range.
183 ///
184 /// This method splits `self` into 3 non-overlapping parts:
185 ///
186 /// 1. Before the range (`self[..range.start]`),
187 /// 2. Inside the range (`self[range]`), and
188 /// 3. After the range (`self[range.end..]`)
189 ///
190 /// The returned array contains these 3 parts in order.
191 ///
192 /// # Safety
193 ///
194 /// Caller should ensure that the starts and end indices of `range` are
195 /// valid indices for the haystack `self` with `range.start <= range.end`.
196 ///
197 /// If the haystack is a mutable reference (`&mut A`), implementation must
198 /// ensure that the 3 returned haystack are truly non-overlapping in memory.
199 /// This is required to uphold the "Aliasing XOR Mutability" guarantee. If a
200 /// haystack cannot be physically split into non-overlapping parts (e.g. in
201 /// `OsStr`), then `&mut A` should not implement `Haystack` either.
202 ///
203 /// # Examples
204 ///
205 /// ```rust
206 /// use pattern_3::Haystack;
207 ///
208 /// let haystack = &mut [0, 1, 2, 3, 4, 5, 6];
209 /// let [left, middle, right] = unsafe { haystack.split_around(2..6) };
210 /// assert_eq!(left, &mut [0, 1]);
211 /// assert_eq!(middle, &mut [2, 3, 4, 5]);
212 /// assert_eq!(right, &mut [6]);
213 /// ```
214 unsafe fn split_around(self, range: Range<<Self::Target as Hay>::Index>) -> [Self; 3];
215
216 /// Subslices this haystack.
217 ///
218 /// # Safety
219 ///
220 /// The starts and end indices of `range` must be valid indices for the
221 /// haystack `self` with `range.start <= range.end`.
222 unsafe fn slice_unchecked(self, range: Range<<Self::Target as Hay>::Index>) -> Self {
223 let [_, middle, _] = self.split_around(range);
224 middle
225 }
226
227 /// Transforms the range from relative to self's parent to the original
228 /// haystack it was sliced from.
229 ///
230 /// Typically this method can be simply implemented as
231 ///
232 /// ```text
233 /// (original.start + parent.start)..(original.start + parent.end)
234 /// ```
235 ///
236 /// If this haystack is a [`SharedHaystack`], this method would never be
237 /// called.
238 ///
239 /// # Safety
240 ///
241 /// The `parent` range should be a valid range relative to a hay *a*, which
242 /// was used to slice out *self*: `self == &a[parent]`.
243 ///
244 /// Similarly, the `original` range should be a valid range relative to
245 /// another hay *b* used to slice out *a*: `a == &b[original]`.
246 ///
247 /// The distance of `parent` must be consistent with the length of `self`.
248 ///
249 /// This method should return a range which satisfies:
250 ///
251 /// ```text
252 /// self == &b[parent][original] == &b[range]
253 /// ```
254 ///
255 /// Slicing can be destructive and *invalidates* some indices, in particular
256 /// for owned type with a pointer-like index, e.g. linked list. In this
257 /// case, one should derive an entirely new index range from `self`, e.g.
258 /// returning `self.start_index()..self.end_index()`.
259 ///
260 /// # Examples
261 ///
262 /// ```rust
263 /// use pattern_3::Haystack;
264 ///
265 /// let hay = b"This is a sample haystack";
266 /// let this = hay[2..23][3..19].to_vec();
267 /// assert_eq!(&*this, &hay[this.restore_range(2..23, 3..19)]);
268 /// ```
269 fn restore_range(
270 &self,
271 original: Range<<Self::Target as Hay>::Index>,
272 parent: Range<<Self::Target as Hay>::Index>,
273 ) -> Range<<Self::Target as Hay>::Index>;
274}
275
276/// A [`Haystack`] which can be shared and cheaply cloned (e.g. `&H`, `Rc<H>`).
277///
278/// If a haystack implements this marker trait, during internal operations the
279/// original haystack will be retained in full and cloned, rather than being
280/// sliced and splitted. Being a shared haystack allows searcher to see the
281/// entire haystack, including the consumed portion.
282pub trait SharedHaystack: Haystack + Clone
283where Self::Target: Hay // FIXME: RFC 2089 or 2289
284{}
285
286/// The borrowing behavior differs between a (unique) haystack and shared
287/// haystack. We use *specialization* to distinguish between these behavior:
288///
289/// * When using `split_around()` and `slice_unchecked()` with a unique
290/// haystack, the original haystack will be splitted or sliced accordingly
291/// to maintain unique ownership.
292/// * When using these functions with a shared haystack, the original haystack
293/// will be cloned in full as that could provide more context into
294/// searchers.
295///
296/// This trait will never be public.
297trait SpanBehavior: Haystack
298where Self::Target: Hay // FIXME: RFC 2089 or 2289
299{
300 fn take(&mut self) -> Self;
301
302 fn from_span(span: Span<Self>) -> Self;
303
304 unsafe fn split_around_for_span(self, subrange: Range<<Self::Target as Hay>::Index>) -> [Self; 3];
305
306 unsafe fn slice_unchecked_for_span(self, subrange: Range<<Self::Target as Hay>::Index>) -> Self;
307
308 fn borrow_range(
309 &self,
310 range: Range<<Self::Target as Hay>::Index>,
311 ) -> Range<<Self::Target as Hay>::Index>;
312
313 fn do_restore_range(
314 &self,
315 range: Range<<Self::Target as Hay>::Index>,
316 subrange: Range<<Self::Target as Hay>::Index>,
317 ) -> Range<<Self::Target as Hay>::Index>;
318}
319
320impl<H: Haystack> SpanBehavior for H
321where H::Target: Hay // FIXME: RFC 2089 or 2289
322{
323 #[inline]
324 default fn take(&mut self) -> Self {
325 mem::replace(self, Self::empty())
326 }
327
328 #[inline]
329 default fn from_span(span: Span<Self>) -> Self {
330 span.haystack
331 }
332
333 #[inline]
334 default fn borrow_range(&self, _: Range<<Self::Target as Hay>::Index>) -> Range<<Self::Target as Hay>::Index> {
335 self.start_index()..self.end_index()
336 }
337
338 #[inline]
339 default fn do_restore_range(
340 &self,
341 range: Range<<Self::Target as Hay>::Index>,
342 subrange: Range<<Self::Target as Hay>::Index>,
343 ) -> Range<<Self::Target as Hay>::Index> {
344 self.restore_range(range, subrange)
345 }
346
347 #[inline]
348 default unsafe fn split_around_for_span(self, subrange: Range<<Self::Target as Hay>::Index>) -> [Self; 3] {
349 self.split_around(subrange)
350 }
351
352 #[inline]
353 default unsafe fn slice_unchecked_for_span(self, subrange: Range<<Self::Target as Hay>::Index>) -> Self {
354 self.slice_unchecked(subrange)
355 }
356}
357
358impl<H: SharedHaystack> SpanBehavior for H
359where H::Target: Hay // FIXME: RFC 2089 or 2289
360{
361 #[inline]
362 fn take(&mut self) -> Self {
363 self.clone()
364 }
365
366 #[inline]
367 fn from_span(span: Span<Self>) -> Self {
368 unsafe {
369 span.haystack.slice_unchecked(span.range)
370 }
371 }
372
373 #[inline]
374 fn borrow_range(&self, range: Range<<Self::Target as Hay>::Index>) -> Range<<Self::Target as Hay>::Index> {
375 range
376 }
377
378 #[inline]
379 fn do_restore_range(
380 &self,
381 _: Range<<Self::Target as Hay>::Index>,
382 subrange: Range<<Self::Target as Hay>::Index>,
383 ) -> Range<<Self::Target as Hay>::Index> {
384 subrange
385 }
386
387 #[inline]
388 unsafe fn split_around_for_span(self, _: Range<<Self::Target as Hay>::Index>) -> [Self; 3] {
389 [self.clone(), self.clone(), self]
390 }
391
392 #[inline]
393 unsafe fn slice_unchecked_for_span(self, _: Range<<Self::Target as Hay>::Index>) -> Self {
394 self
395 }
396}
397
398
399
400/// A span is a haystack coupled with the original range where the haystack is found.
401///
402/// It can be considered as a tuple `(H, Range<H::Target::Index>)`
403/// where the range is guaranteed to be valid for the haystack.
404///
405/// # Examples
406///
407/// ```
408/// use pattern_3::Span;
409///
410/// let orig_str = "Hello世界";
411/// let orig_span = Span::<&str>::from(orig_str);
412///
413/// // slice a span.
414/// let span = unsafe { orig_span.slice_unchecked(3..8) };
415///
416/// // further slicing (note the range is relative to the original span)
417/// let subspan = unsafe { span.slice_unchecked(4..8) };
418///
419/// // obtains the substring.
420/// let substring = subspan.into();
421/// assert_eq!(substring, "o世");
422/// ```
423///
424/// Visualizing the spans:
425///
426/// ```text
427///
428/// 0 1 2 3 4 5 6 7 8 9 10 11
429/// +---+---+---+---+---+---+---+---+---+---+---+
430/// | H | e | l | l | o | U+4E16 | U+754C | orig_str
431/// +---+---+---+---+---+---+---+---+---+---+---+
432///
433/// ^___________________________________________^ orig_span = (orig_str, 0..11)
434///
435/// ^___________________^ span = (orig_str, 3..8)
436///
437/// ^_______________^ subspan = (orig_str, 4..8)
438/// ```
439#[derive(Debug, Clone)]
440pub struct Span<H: Haystack>
441where H::Target: Hay // FIXME: RFC 2089 or 2289
442{
443 haystack: H,
444 range: Range<<<H as Deref>::Target as Hay>::Index>,
445 //^ The `<H as Trait>` is to trick `#[derive]` not to generate
446 // the where bound for `H::Hay`.
447}
448
449/// Creates a span which covers the entire haystack.
450impl<H: Haystack> From<H> for Span<H>
451where H::Target: Hay // FIXME: RFC 2089 or 2289
452{
453 #[inline]
454 fn from(haystack: H) -> Self {
455 let range = haystack.start_index()..haystack.end_index();
456 Self { haystack, range }
457 }
458}
459
460impl<H: SharedHaystack> Span<H>
461where H::Target: Hay // FIXME: RFC 2089 or 2289
462{
463 /// Decomposes this span into the original haystack, and the range it focuses on.
464 #[inline]
465 pub fn into_parts(self) -> (H, Range<<H::Target as Hay>::Index>) {
466 (self.haystack, self.range)
467 }
468
469 /// Creates a span from a haystack, and a range it should focus on.
470 ///
471 /// # Safety
472 ///
473 /// The `range` must be a valid range relative to `haystack`.
474 #[inline]
475 pub unsafe fn from_parts(haystack: H, range: Range<<H::Target as Hay>::Index>) -> Self {
476 Self { haystack, range }
477 }
478}
479
480impl<'h> Span<&'h str> {
481 /// Reinterprets the string span as a byte-array span.
482 #[inline]
483 pub fn as_bytes(self) -> Span<&'h [u8]> {
484 Span {
485 haystack: self.haystack.as_bytes(),
486 range: self.range,
487 }
488 }
489}
490
491impl<H: Haystack> Span<H>
492where H::Target: Hay // FIXME: RFC 2089 or 2289
493{
494 /// The range of the span, relative to the ultimate original haystack it was sliced from.
495 #[inline]
496 pub fn original_range(&self) -> Range<<H::Target as Hay>::Index> {
497 self.range.clone()
498 }
499
500 /// Borrows a shared span.
501 #[inline]
502 pub fn borrow(&self) -> Span<&H::Target> {
503 Span {
504 haystack: &*self.haystack,
505 range: self.haystack.borrow_range(self.range.clone()),
506 }
507 }
508
509 /// Checks whether this span is empty.
510 #[inline]
511 pub fn is_empty(&self) -> bool {
512 self.range.start == self.range.end
513 }
514
515 /// Returns this span by value, and replaces the original span by an empty
516 /// span.
517 #[inline]
518 pub fn take(&mut self) -> Self {
519 let haystack = self.haystack.take();
520 let range = self.range.clone();
521 self.range.end = self.range.start;
522 Span { haystack, range }
523 }
524
525 // FIXME: This should be changed to an `impl From<Span<H>> for H`.
526 /// Slices the original haystack to the focused range.
527 #[inline]
528 pub fn into(self) -> H {
529 H::from_span(self)
530 }
531
532 /// Splits this span into 3 spans around the given range.
533 ///
534 /// # Safety
535 ///
536 /// `subrange` must be a valid range relative to `self.borrow()`. A safe
537 /// usage is like:
538 ///
539 /// ```rust
540 /// # use pattern_3::{Span, Needle, Searcher};
541 /// # let span = Span::from("foo");
542 /// # let mut searcher = <&str as Needle<&str>>::into_searcher("o");
543 /// # (|| -> Option<()> {
544 /// let range = searcher.search(span.borrow())?;
545 /// let [left, middle, right] = unsafe { span.split_around(range) };
546 /// # Some(()) })();
547 /// ```
548 #[inline]
549 pub unsafe fn split_around(self, subrange: Range<<H::Target as Hay>::Index>) -> [Self; 3] {
550 let self_range = self.haystack.borrow_range(self.range.clone());
551 let [left, middle, right] = self.haystack.split_around_for_span(subrange.clone());
552
553 let left_range = left.do_restore_range(self.range.clone(), self_range.start..subrange.start);
554 let right_range = right.do_restore_range(self.range.clone(), subrange.end..self_range.end);
555 let middle_range = middle.do_restore_range(self.range, subrange);
556
557 [
558 Self { haystack: left, range: left_range },
559 Self { haystack: middle, range: middle_range },
560 Self { haystack: right, range: right_range },
561 ]
562 }
563
564 /// Slices this span to the given range.
565 ///
566 /// # Safety
567 ///
568 /// `subrange` must be a valid range relative to `self.borrow()`.
569 #[inline]
570 pub unsafe fn slice_unchecked(self, subrange: Range<<H::Target as Hay>::Index>) -> Self {
571 let haystack = self.haystack.slice_unchecked_for_span(subrange.clone());
572 let range = haystack.do_restore_range(self.range, subrange);
573 Self { haystack, range }
574 }
575}
576
577unsafe impl<'a, A: Hay + ?Sized + 'a> Haystack for &'a A {
578 #[inline]
579 fn empty() -> Self {
580 A::empty()
581 }
582
583 #[inline]
584 unsafe fn split_around(self, range: Range<A::Index>) -> [Self; 3] {
585 [
586 self.slice_unchecked(self.start_index()..range.start),
587 self.slice_unchecked(range.clone()),
588 self.slice_unchecked(range.end..self.end_index()),
589 ]
590 }
591
592 #[inline]
593 unsafe fn slice_unchecked(self, range: Range<A::Index>) -> Self {
594 A::slice_unchecked(self, range)
595 }
596
597 #[inline]
598 fn restore_range(&self, _: Range<A::Index>, _: Range<A::Index>) -> Range<A::Index> {
599 unreachable!()
600 }
601}
602
603impl<'a, A: Hay + ?Sized + 'a> SharedHaystack for &'a A {}