1use crate::pattern::Pattern;
2use crate::{Offsets, Result};
3use std::ops::{Bound, RangeBounds};
4use unicode_normalization_alignments::UnicodeNormalization;
5
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum OffsetReferential {
11 Original,
12 Normalized,
13}
14
15#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum Range<T: RangeBounds<usize> + Clone> {
19 Original(T),
20 Normalized(T),
21}
22
23#[allow(clippy::len_without_is_empty)]
24impl<T> Range<T>
25where
26 T: RangeBounds<usize> + Clone,
27{
28 pub fn unwrap(self) -> T {
30 match self {
31 Self::Original(r) => r,
32 Self::Normalized(r) => r,
33 }
34 }
35
36 pub fn len(&self) -> Option<usize> {
38 let range = self.clone().unwrap();
39
40 let end = match range.end_bound() {
41 Bound::Unbounded => None,
42 Bound::Included(i) => Some(*i + 1),
43 Bound::Excluded(i) => Some(*i),
44 }?;
45
46 match range.start_bound() {
47 Bound::Unbounded => Some(end),
48 Bound::Included(i) => Some(end - *i),
49 Bound::Excluded(i) => Some(end - (*i + 1)),
50 }
51 }
52
53 pub fn into_full_range(self, max_len: usize) -> std::ops::Range<usize> {
57 let range = self.unwrap();
58
59 let start = match range.start_bound() {
60 Bound::Unbounded => 0,
61 Bound::Included(i) => *i,
62 Bound::Excluded(i) => *i + 1,
63 };
64 let end = match range.end_bound() {
65 Bound::Unbounded => max_len,
66 Bound::Included(i) => *i + 1,
67 Bound::Excluded(i) => *i,
68 };
69
70 start..end
71 }
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Eq)]
82pub enum SplitDelimiterBehavior {
83 Removed,
84 Isolated,
85 MergedWithPrevious,
86 MergedWithNext,
87 Contiguous,
88}
89
90#[derive(Default, Debug, Clone, PartialEq, Eq)]
99pub struct NormalizedString {
100 original: String,
102 normalized: String,
104 alignments: Vec<(usize, usize)>,
107 original_shift: usize,
111}
112
113impl NormalizedString {
114 #[cfg(test)]
115 pub(crate) fn new(
116 original: String,
117 normalized: String,
118 alignments: Vec<(usize, usize)>,
119 original_shift: usize,
120 ) -> Self {
121 Self {
122 original,
123 normalized,
124 alignments,
125 original_shift,
126 }
127 }
128 pub fn get(&self) -> &str {
130 &self.normalized
131 }
132
133 pub fn get_original(&self) -> &str {
135 &self.original
136 }
137
138 pub fn offsets_original(&self) -> Offsets {
140 (
141 self.original_shift,
142 self.original_shift + self.len_original(),
143 )
144 }
145
146 pub fn convert_offsets<T>(&self, range: Range<T>) -> Option<std::ops::Range<usize>>
151 where
152 T: RangeBounds<usize> + Clone,
153 {
154 let len_original = self.len_original();
155 let len_normalized = self.len();
156
157 let (target, original) = match range {
158 Range::Original(_) => (range.into_full_range(len_original), true),
159 Range::Normalized(_) => (range.into_full_range(len_normalized), false),
160 };
161
162 if target.start == target.end {
164 return Some(target);
165 }
166 if target.start > target.end {
168 return None;
169 }
170
171 if original && self.original.is_empty() && target == (0..0) {
173 return Some(0..len_normalized);
174 }
175 if !original && self.normalized.is_empty() && target == (0..0) {
176 return Some(0..len_original);
177 }
178
179 if original {
180 let (mut start, mut end) = (None, None);
181 self.alignments
182 .iter()
183 .enumerate()
184 .take_while(|(_, alignment)| target.end >= alignment.1)
185 .for_each(|(i, alignment)| {
186 if start.is_none() && target.start <= alignment.0 {
187 if alignment.0 != alignment.1 {
189 start = Some(i);
190 }
191 }
192 if target.end >= alignment.1 {
193 end = Some(i + 1);
194 }
195 });
196
197 match (start, end) {
198 (Some(s), None) => Some(s..s),
200 (None, Some(e)) => Some(e..e),
202 (Some(s), Some(e)) => Some(s..e),
204 _ => None,
205 }
206 } else {
207 self.alignments.get(target).and_then(expand_alignments)
208 }
209 }
210
211 pub fn get_range<T>(&self, range: Range<T>) -> Option<&str>
213 where
214 T: RangeBounds<usize> + Clone,
215 {
216 match range {
217 Range::Original(_) => self.normalized.get(self.convert_offsets(range)?),
218 Range::Normalized(_) => self.normalized.get(range.into_full_range(self.len())),
219 }
220 }
221
222 pub fn get_range_original<T>(&self, range: Range<T>) -> Option<&str>
224 where
225 T: RangeBounds<usize> + Clone,
226 {
227 match range {
228 Range::Original(_) => self
229 .original
230 .get(range.into_full_range(self.len_original())),
231 Range::Normalized(_) => self.original.get(self.convert_offsets(range)?),
232 }
233 }
234
235 fn validate_range<T: RangeBounds<usize> + Clone>(
237 &self,
238 range: Range<T>,
239 ) -> Option<Range<std::ops::Range<usize>>> {
240 match range {
241 Range::Original(_) => {
242 let r = range.into_full_range(self.original.len());
243 if !(self.original.is_char_boundary(r.start)
244 && self.original.is_char_boundary(r.end))
245 {
246 None
247 } else {
248 Some(Range::Original(r))
249 }
250 }
251 Range::Normalized(_) => {
252 let r = range.into_full_range(self.normalized.len());
253 if !(self.normalized.is_char_boundary(r.start)
254 && self.normalized.is_char_boundary(r.end))
255 {
256 None
257 } else {
258 Some(Range::Normalized(r))
259 }
260 }
261 }
262 }
263
264 pub fn slice<T>(&self, range: Range<T>) -> Option<NormalizedString>
267 where
268 T: RangeBounds<usize> + Clone,
269 {
270 let full_range = self.validate_range(range)?;
271 let (normalized_range, original_range) = match full_range {
272 Range::Original(_) => (
273 self.convert_offsets(full_range.clone())?,
274 full_range.clone().unwrap(),
275 ),
276 Range::Normalized(_) => (
277 full_range.clone().unwrap(),
278 self.convert_offsets(full_range.clone())?,
279 ),
280 };
281
282 let n_shift = original_range.start;
283
284 Some(Self {
285 original: self
286 .get_range_original(full_range.clone())
287 .unwrap_or_default()
288 .into(),
289 normalized: self.get_range(full_range).unwrap_or_default().into(),
290 alignments: self
291 .alignments
292 .get(normalized_range)?
293 .to_vec()
294 .iter()
295 .map(|(start, end)| (start - n_shift, end - n_shift))
296 .collect(),
297 original_shift: self.original_shift + original_range.start,
298 })
299 }
300
301 pub fn transform_range<T, I>(&mut self, range: Range<T>, dest: I, initial_offset: usize)
313 where
314 T: RangeBounds<usize> + Clone,
315 I: IntoIterator<Item = (char, isize)>,
316 {
317 let n_range = match range {
318 Range::Normalized(_) => range.into_full_range(self.len()),
319 Range::Original(_) => match self.convert_offsets(range) {
320 Some(range) => range,
321 None => return,
322 },
323 };
324 trace!(
325 "===== transform_range call with {:?} (initial_offset: {}) =====",
326 n_range,
327 initial_offset
328 );
329
330 let mut replaced_normalized = self.normalized[n_range.clone()]
333 .chars()
334 .collect::<Vec<_>>()
335 .into_iter();
336 let initial_removed: usize = (&mut replaced_normalized)
337 .take(initial_offset)
338 .map(|c| c.len_utf8())
339 .sum();
340
341 let mut offset = (initial_removed + n_range.start) as isize;
342 let mut alignments = Vec::with_capacity(n_range.len());
343 trace!("=> Applying transformations");
344 let normalized = dest
345 .into_iter()
346 .map(|(c, changes)| {
347 trace!(
348 "### {:?} with size {}: {} with offset {} ###",
349 c,
350 c.len_utf8(),
351 match changes {
352 0 => "Replacing".into(),
353 ch if ch > 0 => "Adding".into(),
354 ch if ch < 0 => format!("Replacing + removing {ch} following chars"),
355 _ => "Undefined".into(),
356 },
357 offset
358 );
359
360 let idx = offset as usize;
361 let align = if changes.is_positive() {
362 if idx < 1 {
363 (0, 0)
364 } else {
365 self.alignments[idx - 1]
368 }
369 } else {
370 self.alignments[idx]
371 };
372
373 let replaced_char = if !changes.is_positive() {
375 replaced_normalized.next()
376 } else {
377 None
378 };
379 let replaced_char_size = replaced_char.map_or(0, |c| c.len_utf8());
380 let replaced_char_size_change = c.len_utf8() as isize - replaced_char_size as isize;
381 if let Some(ref replaced_char) = replaced_char {
382 trace!(
383 "Replacing char {:?} - with a change in size: {}",
384 replaced_char,
385 replaced_char_size_change
386 );
387 }
388
389 let total_bytes_to_remove = if changes.is_negative() {
391 (&mut replaced_normalized)
392 .take(-changes as usize)
393 .map(|c| c.len_utf8())
394 .sum()
395 } else {
396 0
397 };
398 trace!("Total bytes to remove: {}", total_bytes_to_remove);
399
400 offset += replaced_char_size as isize;
402 offset += total_bytes_to_remove as isize;
403 trace!("New offset: {}", offset);
404
405 trace!("New normalized alignment: {}x {:?}", c.len_utf8(), align);
406 alignments.extend((0..c.len_utf8()).map(|_| align));
407
408 c
410 })
411 .collect::<String>();
412
413 self.alignments.splice(n_range.clone(), alignments);
414
415 assert!(self.normalized.get(n_range.clone()).is_some());
419 unsafe {
420 self.normalized
421 .as_mut_vec()
425 .splice(n_range, normalized.bytes());
426 }
427 }
428
429 pub fn transform<I>(&mut self, dest: I, initial_offset: usize)
441 where
442 I: IntoIterator<Item = (char, isize)>,
443 {
444 self.transform_range(Range::Original(..), dest, initial_offset)
445 }
446
447 pub fn nfd(&mut self) -> &mut Self {
449 self.transform(self.get().to_owned().nfd(), 0);
450 self
451 }
452
453 pub fn nfkd(&mut self) -> &mut Self {
455 self.transform(self.get().to_owned().nfkd(), 0);
456 self
457 }
458
459 pub fn nfc(&mut self) -> &mut Self {
461 self.transform(self.get().to_owned().nfc(), 0);
462 self
463 }
464
465 pub fn nfkc(&mut self) -> &mut Self {
467 self.transform(self.get().to_owned().nfkc(), 0);
468 self
469 }
470
471 pub fn filter<F: Fn(char) -> bool>(&mut self, keep: F) -> &mut Self {
473 let mut removed: isize = 0;
474 let mut removed_start: usize = 0;
475
476 let mut transforms = Vec::with_capacity(self.normalized.len());
477 let mut last_c = None;
478 for c in self.normalized.chars() {
479 if keep(c) {
480 match last_c {
481 Some(lc) => {
482 transforms.push((lc, -removed));
483 }
484 None => {
485 removed_start = removed as usize;
486 }
487 }
488 last_c = Some(c);
489 removed = 0;
490 } else {
491 removed += 1;
492 }
493 }
494 if let Some(lc) = last_c {
495 transforms.push((lc, -removed));
496 }
497 self.transform(transforms, removed_start);
498 self
499 }
500
501 pub fn prepend(&mut self, s: &str) -> &mut Self {
503 if let Some(next) = self.normalized.chars().next() {
504 let transformations = s
505 .chars()
506 .enumerate()
507 .map(|(i, c)| (c, isize::from(i != 0)))
508 .chain(std::iter::once((next, 1)));
509
510 self.transform_range(Range::Normalized(0..next.len_utf8()), transformations, 0);
511 }
512 self
513 }
514
515 pub fn append(&mut self, s: &str) -> &mut Self {
517 if let Some((b, prev)) = self.normalized.char_indices().last() {
518 let transformations = std::iter::once((prev, 0)).chain(s.chars().map(|c| (c, 1)));
519 self.transform_range(Range::Normalized(b..), transformations, 0);
520 }
521 self
522 }
523
524 pub fn map<F: Fn(char) -> char>(&mut self, map: F) -> &mut Self {
526 let transformations = self
527 .normalized
528 .chars()
529 .map(|c| (map(c), 0))
530 .collect::<Vec<_>>();
531 self.transform(transformations, 0);
532 self
533 }
534
535 pub fn for_each<F: FnMut(char)>(&self, foreach: F) -> &Self {
537 self.normalized.chars().for_each(foreach);
538 self
539 }
540
541 pub fn lowercase(&mut self) -> &mut Self {
543 let mut new_chars: Vec<(char, isize)> = vec![];
544 self.for_each(|c| {
545 c.to_lowercase().enumerate().for_each(|(index, c)| {
546 new_chars.push((c, isize::from(index > 0)));
547 })
548 });
549 self.transform(new_chars, 0);
550 self
551 }
552
553 pub fn uppercase(&mut self) -> &mut Self {
555 let mut new_chars: Vec<(char, isize)> = vec![];
556 self.for_each(|c| {
557 c.to_uppercase().enumerate().for_each(|(index, c)| {
558 new_chars.push((c, isize::from(index > 0)));
559 })
560 });
561 self.transform(new_chars, 0);
562 self
563 }
564
565 pub fn replace<P: Pattern>(&mut self, pattern: P, content: &str) -> Result<()> {
567 let mut new_normalized = String::with_capacity(self.normalized.len()); let mut new_alignments: Vec<(usize, usize)> = Vec::with_capacity(self.alignments.len());
569 let mut last_end = 0; pattern
572 .find_matches(&self.normalized)?
573 .into_iter()
574 .for_each(|((start, end), is_match)| {
575 if is_match {
576 let range = start..end;
577
578 let mut new_len = 0;
579 let removed_chars = self.normalized[range.clone()].chars().count();
580
581 new_normalized.push_str(&self.normalized[last_end..start]);
593 new_alignments.extend(self.alignments[last_end..start].iter().cloned());
594
595 let n_range = Range::Normalized(range).into_full_range(self.len());
596
597 let mut replaced_normalized = self.normalized[n_range.clone()]
600 .chars()
601 .collect::<Vec<_>>()
602 .into_iter();
603 let initial_removed: usize = (&mut replaced_normalized)
604 .take(removed_chars)
605 .map(|c| c.len_utf8())
606 .sum();
607
608 let dest = content.chars().map(|c| {
609 new_len += c.len_utf8();
610 (c, 1)
611 });
612 let mut offset = (initial_removed + n_range.start) as isize;
613 let normalized = dest
614 .into_iter()
615 .map(|(c, changes): (char, i32)| {
616 let idx = offset as usize;
617 let align = if changes.is_positive() {
618 if idx < 1 {
619 (0, 0)
620 } else {
621 self.alignments[idx - 1]
624 }
625 } else {
626 self.alignments[idx]
627 };
628
629 let replaced_char = if !changes.is_positive() {
631 replaced_normalized.next()
632 } else {
633 None
634 };
635 let replaced_char_size = replaced_char.map_or(0, |c| c.len_utf8());
636
637 let total_bytes_to_remove = if changes.is_negative() {
639 (&mut replaced_normalized)
640 .take(-changes as usize)
641 .map(|c| c.len_utf8())
642 .sum()
643 } else {
644 0
645 };
646
647 offset += replaced_char_size as isize;
649 offset += total_bytes_to_remove as isize;
650
651 new_alignments.extend((0..c.len_utf8()).map(|_| align));
652
653 c
655 })
656 .collect::<String>();
657
658 new_normalized.push_str(&normalized);
659 last_end = end;
660 }
661 });
662
663 new_normalized.push_str(&self.normalized[last_end..]);
665 new_alignments.extend(&self.alignments[last_end..]);
666
667 self.normalized = new_normalized;
668 self.alignments = new_alignments;
669 Ok(())
670 }
671
672 pub fn clear(&mut self) -> usize {
674 let len = self.len();
675 self.transform(std::iter::empty(), len);
676 len
677 }
678
679 pub fn split<P: Pattern>(
691 &self,
692 pattern: P,
693 behavior: SplitDelimiterBehavior,
694 ) -> Result<Vec<NormalizedString>> {
695 let matches = pattern.find_matches(&self.normalized)?;
696
697 use SplitDelimiterBehavior::*;
699 let splits = match behavior {
700 Isolated => matches
701 .into_iter()
702 .map(|(offsets, _)| (offsets, false))
703 .collect(),
704 Removed => matches,
705 Contiguous => {
706 let mut previous_match = false;
707 matches
708 .into_iter()
709 .fold(vec![], |mut acc, (offsets, is_match)| {
710 if is_match == previous_match {
711 if let Some(((_, end), _)) = acc.last_mut() {
712 *end = offsets.1;
713 } else {
714 acc.push((offsets, false));
715 }
716 } else {
717 acc.push((offsets, false));
718 }
719 previous_match = is_match;
720 acc
721 })
722 }
723 MergedWithPrevious => {
724 let mut previous_match = false;
725 matches
726 .into_iter()
727 .fold(vec![], |mut acc, (offsets, is_match)| {
728 if is_match && !previous_match {
729 if let Some(((_, end), _)) = acc.last_mut() {
730 *end = offsets.1;
731 } else {
732 acc.push((offsets, false));
733 }
734 } else {
735 acc.push((offsets, false));
736 }
737 previous_match = is_match;
738 acc
739 })
740 }
741 MergedWithNext => {
742 let mut previous_match = false;
743 let mut matches =
744 matches
745 .into_iter()
746 .rev()
747 .fold(vec![], |mut acc, (offsets, is_match)| {
748 if is_match && !previous_match {
749 if let Some(((start, _), _)) = acc.last_mut() {
750 *start = offsets.0;
751 } else {
752 acc.push((offsets, false));
753 }
754 } else {
755 acc.push((offsets, false));
756 }
757 previous_match = is_match;
758 acc
759 });
760 matches.reverse();
761 matches
762 }
763 };
764
765 Ok(splits
767 .into_iter()
768 .filter_map(|(offsets, remove)| {
769 if !remove {
770 Some(
771 self.slice(Range::Normalized(offsets.0..offsets.1))
772 .expect("NormalizedString bad split"),
773 )
774 } else {
775 None
776 }
777 })
778 .collect())
779 }
780
781 pub fn lstrip(&mut self) -> &mut Self {
783 self.lrstrip(true, false)
784 }
785
786 pub fn rstrip(&mut self) -> &mut Self {
788 self.lrstrip(false, true)
789 }
790
791 pub fn strip(&mut self) -> &mut Self {
793 self.lrstrip(true, true)
794 }
795
796 fn lrstrip(&mut self, left: bool, right: bool) -> &mut Self {
797 let leading_spaces = if left {
798 self.get().chars().take_while(|c| c.is_whitespace()).count()
799 } else {
800 0
801 };
802 let trailing_spaces = if right {
803 self.get()
804 .chars()
805 .rev()
806 .take_while(|c| c.is_whitespace())
807 .count()
808 } else {
809 0
810 };
811
812 if leading_spaces > 0 || trailing_spaces > 0 {
813 let count = self.get().chars().count();
814 let transformation = self
815 .normalized
816 .chars()
817 .enumerate()
818 .filter_map(|(i, c)| {
819 if i < leading_spaces || i >= count - trailing_spaces {
820 None
821 } else if i == self.len() - trailing_spaces - 1 {
822 Some((c, -(trailing_spaces as isize)))
823 } else {
824 Some((c, 0))
825 }
826 })
827 .collect::<Vec<_>>();
828 self.transform(transformation, leading_spaces);
829 }
830 self
831 }
832
833 pub fn len(&self) -> usize {
835 self.normalized.len()
836 }
837
838 pub fn len_original(&self) -> usize {
840 self.original.len()
841 }
842
843 pub fn is_empty(&self) -> bool {
845 self.normalized.is_empty()
846 }
847
848 #[allow(dead_code)]
850 pub(crate) fn alignments_original(&self) -> Vec<(usize, usize)> {
851 let mut alignments_original = Vec::with_capacity(self.original.len());
854
855 let start = self.alignments[0].0;
857 if start != 0 {
858 alignments_original.extend(vec![(0, 0); start]);
859 }
860
861 let mut last = (&self.alignments[0].0, &self.alignments[0].1);
862 let mut offset = 0;
863 let mut length = 0;
864 for (start, end) in &self.alignments {
865 if last == (start, end) {
866 length += 1;
868 } else {
869 if start < last.1 {
871 panic!("We can't have overlapping ranges.");
872 }
873
874 alignments_original.extend(vec![(offset, offset + length); last.1 - last.0]);
876 offset += length;
877 length = 1;
878
879 alignments_original.extend(vec![(offset, offset); start - last.1]);
881 }
882
883 last = (start, end);
884 }
885 alignments_original.extend(vec![(offset, offset + length); last.1 - last.0]);
887
888 offset += length;
890 alignments_original.extend(vec![
891 (offset, offset);
892 self.original.len() - alignments_original.len()
893 ]);
894
895 alignments_original
897 }
898}
899
900fn expand_alignments(alignments: &[(usize, usize)]) -> Option<std::ops::Range<usize>> {
902 if alignments.is_empty() {
903 None
904 } else {
905 let start = alignments[0].0;
906 let end = alignments[alignments.len() - 1].1;
907 Some(start..end)
908 }
909}
910
911pub fn get_range_of<T: RangeBounds<usize>>(s: &str, range: T) -> Option<&str> {
913 let len = s.chars().count();
914 let start = match range.start_bound() {
915 Bound::Unbounded => 0,
916 Bound::Included(i) => *i,
917 Bound::Excluded(i) => *i + 1,
918 };
919 let end = match range.end_bound() {
920 Bound::Unbounded => len,
921 Bound::Included(i) => *i + 1,
922 Bound::Excluded(i) => *i,
923 };
924
925 if start == 0 && end == 0 {
926 Some(&s[0..0])
927 } else if start >= len || end > len || start >= end {
928 None
929 } else {
930 let start_b = s.char_indices().map(|(i, _)| i).nth(start).unwrap_or(0);
931 let end_b = s.char_indices().map(|(i, _)| i).nth(end).unwrap_or(s.len());
932 Some(&s[start_b..end_b])
933 }
934}
935
936pub fn bytes_to_char(s: &str, range: std::ops::Range<usize>) -> Option<std::ops::Range<usize>> {
938 let (mut start, mut end) = if range == (0..0) {
939 (Some(0), Some(0))
940 } else {
941 (None, None)
942 };
943
944 s.char_indices()
945 .enumerate()
946 .take_while(|(_, (b, _))| *b <= range.end)
947 .filter(|(_, (b, _))| *b >= range.start)
948 .for_each(|(i, (b, c))| {
949 if b == range.start {
950 start = Some(i);
951 }
952 if b == range.end {
953 end = Some(i);
954 }
955 if b + c.len_utf8() == range.end {
956 end = Some(i + 1);
957 }
958 });
959
960 Some(start?..end?)
961}
962
963pub fn char_to_bytes(s: &str, range: std::ops::Range<usize>) -> Option<std::ops::Range<usize>> {
965 let (mut start, mut end) = if range == (0..0) {
966 (Some(0), Some(0))
967 } else {
968 (None, None)
969 };
970
971 if range.start == range.end {
972 s.char_indices()
973 .skip(range.start)
974 .take(1)
975 .for_each(|(b, _)| {
976 start = Some(b);
977 end = Some(b);
978 });
979 } else {
980 s.char_indices()
981 .skip(range.start)
982 .take(range.end - range.start)
983 .for_each(|(b, c)| {
984 if start.is_none() {
985 start = Some(b);
986 }
987 end = Some(b + c.len_utf8());
988 });
989 }
990
991 Some(start?..end?)
992}
993
994impl From<String> for NormalizedString {
995 fn from(s: String) -> Self {
996 let alignments = s
997 .char_indices()
998 .flat_map(|(b, c)| {
999 let len = c.len_utf8();
1000 (0..len).map(move |_| (b, b + len))
1001 })
1002 .collect::<Vec<_>>();
1003 Self {
1004 original: s.clone(),
1005 normalized: s,
1006 alignments,
1007 original_shift: 0,
1008 }
1009 }
1010}
1011
1012impl From<&str> for NormalizedString {
1013 fn from(s: &str) -> Self {
1014 Self::from(s.to_owned())
1015 }
1016}
1017
1018#[cfg(test)]
1019mod tests {
1020 use super::*;
1021 use regex::Regex;
1022 use unicode_categories::UnicodeCategories;
1023
1024 #[test]
1025 fn test_len_range_inclusive() {
1026 let range = Range::Original(3..=7);
1027 let len = range.len();
1028 assert_eq!(len, Some(5)); }
1030
1031 #[test]
1032 fn test_len_range_exclusive() {
1033 let range = Range::Original(3..7);
1034 let len = range.len();
1035 assert_eq!(len, Some(4)); }
1037
1038 #[test]
1039 fn nfd_adds_new_chars() {
1040 let mut n = NormalizedString::from("Γ©lΓ©gant");
1041 n.nfd();
1042 assert_eq!(
1043 &n.alignments,
1044 &[
1045 (0, 2),
1046 (0, 2),
1047 (0, 2),
1048 (2, 3),
1049 (3, 5),
1050 (3, 5),
1051 (3, 5),
1052 (5, 6),
1053 (6, 7),
1054 (7, 8),
1055 (8, 9)
1056 ]
1057 );
1058 assert_eq!(
1059 n.alignments_original(),
1060 vec![
1061 (0, 3),
1062 (0, 3),
1063 (3, 4),
1064 (4, 7),
1065 (4, 7),
1066 (7, 8),
1067 (8, 9),
1068 (9, 10),
1069 (10, 11)
1070 ]
1071 );
1072 }
1073
1074 #[test]
1075 fn remove_chars_added_by_nfd() {
1076 let mut n = NormalizedString::from("Γ©lΓ©gant");
1077 n.nfd().filter(|c| !c.is_mark_nonspacing());
1078
1079 assert_eq!(n.get(), "elegant");
1080
1081 assert_eq!(
1082 &n.alignments,
1083 &[(0, 2), (2, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9)]
1084 );
1085 assert_eq!(
1086 n.alignments_original(),
1087 vec![
1088 (0, 1),
1089 (0, 1),
1090 (1, 2),
1091 (2, 3),
1092 (2, 3),
1093 (3, 4),
1094 (4, 5),
1095 (5, 6),
1096 (6, 7)
1097 ]
1098 );
1099 }
1100
1101 #[test]
1102 fn remove_chars() {
1103 let mut n = NormalizedString::from("Γ©lΓ©gant");
1104 n.filter(|c| c != 'n');
1105 assert_eq!(n.get(), "Γ©lΓ©gat");
1106 assert_eq!(
1107 &n.alignments,
1108 &[
1109 (0, 2),
1110 (0, 2),
1111 (2, 3),
1112 (3, 5),
1113 (3, 5),
1114 (5, 6),
1115 (6, 7),
1116 (8, 9)
1118 ]
1119 );
1120 assert_eq!(
1121 n.alignments_original(),
1122 vec![
1123 (0, 2),
1124 (0, 2),
1125 (2, 3),
1126 (3, 5),
1127 (3, 5),
1128 (5, 6),
1129 (6, 7),
1130 (7, 7), (7, 8)
1132 ]
1133 );
1134 }
1135
1136 #[test]
1137 fn mixed_addition_and_removal() {
1138 let mut n = NormalizedString::from("Γ©lΓ©gant");
1139 n.nfd().filter(|c| !c.is_mark_nonspacing() && c != 'n');
1140 assert_eq!(n.get(), "elegat");
1141 assert_eq!(
1142 &n.alignments,
1143 &[(0, 2), (2, 3), (3, 5), (5, 6), (6, 7), (8, 9)]
1144 );
1145 assert_eq!(
1146 n.alignments_original(),
1147 vec![
1148 (0, 1),
1149 (0, 1),
1150 (1, 2),
1151 (2, 3),
1152 (2, 3),
1153 (3, 4), (4, 5), (5, 5), (5, 6)
1157 ]
1158 );
1159 }
1160
1161 #[test]
1162 fn range_conversion() {
1163 let mut n = NormalizedString::from(" __Hello__ ");
1164 n.filter(|c| !c.is_whitespace()).lowercase();
1165 let hello_n = n.convert_offsets(Range::Original(6..11));
1166 assert_eq!(hello_n, Some(2..7));
1167 assert_eq!(
1168 n.get_range(Range::Normalized(hello_n.clone().unwrap())),
1169 Some("hello")
1170 );
1171 assert_eq!(
1172 n.get_range_original(Range::Normalized(hello_n.unwrap())),
1173 Some("Hello")
1174 );
1175 assert_eq!(n.get_range(Range::Original(6..11)), Some("hello"));
1176 assert_eq!(n.get_range_original(Range::Original(6..11)), Some("Hello"));
1177
1178 assert_eq!(n.convert_offsets(Range::Original(0..0)), Some(0..0));
1180 assert_eq!(n.convert_offsets(Range::Original(3..3)), Some(3..3));
1181 assert_eq!(n.convert_offsets(Range::Original(15..)), Some(9..9));
1182 assert_eq!(n.convert_offsets(Range::Original(16..)), Some(16..16));
1183 assert_eq!(n.convert_offsets(Range::Original(17..)), None);
1184 assert_eq!(n.convert_offsets(Range::Normalized(0..0)), Some(0..0));
1185 assert_eq!(n.convert_offsets(Range::Normalized(3..3)), Some(3..3));
1186 assert_eq!(n.convert_offsets(Range::Normalized(9..)), Some(9..9));
1187 assert_eq!(n.convert_offsets(Range::Normalized(10..)), None);
1188 }
1189
1190 #[test]
1191 fn original_range() {
1192 let mut n = NormalizedString::from("Hello_______ World!");
1193 n.filter(|c| c != '_').lowercase();
1194 let world_n = n.get_range(Range::Normalized(6..11)).unwrap();
1195 let world_o = n.get_range_original(Range::Normalized(6..11)).unwrap();
1196 assert_eq!(world_n, "world");
1197 assert_eq!(world_o, "World");
1198 let original_range = Range::Original(n.convert_offsets(Range::Normalized(6..11)).unwrap());
1199 assert_eq!(n.get_range(original_range.clone()).unwrap(), "world");
1200 assert_eq!(
1201 n.get_range_original(original_range.clone()).unwrap(),
1202 "World"
1203 );
1204 assert_eq!(original_range.into_full_range(n.len_original()), 13..18);
1205 }
1206
1207 #[test]
1208 fn added_around_edges() {
1209 let mut n = NormalizedString::from("Hello");
1210 n.transform(
1211 vec![
1212 (' ', 1),
1213 ('H', 0),
1214 ('e', 0),
1215 ('l', 0),
1216 ('l', 0),
1217 ('o', 0),
1218 (' ', 1),
1219 ],
1220 0,
1221 );
1222
1223 assert_eq!(&n.normalized, " Hello ");
1224 assert_eq!(
1225 n.get_range_original(Range::Normalized(1..n.normalized.len() - 1)),
1226 Some("Hello")
1227 );
1228 }
1229
1230 #[test]
1231 fn added_characters_alignment() {
1232 let mut n = NormalizedString::from("ιε£ No");
1233 n.transform(
1234 n.get().to_owned().chars().flat_map(|c| {
1235 if (c as usize) > 0x4E00 {
1236 vec![(' ', 0), (c, 1), (' ', 1)]
1237 } else {
1238 vec![(c, 0)]
1239 }
1240 }),
1241 0,
1242 );
1243
1244 assert_eq!(
1245 n,
1246 NormalizedString {
1247 original: "ιε£ No".into(),
1248 normalized: " ι ε£ No".into(),
1249 alignments: vec![
1250 (0, 3),
1251 (0, 3),
1252 (0, 3),
1253 (0, 3),
1254 (0, 3),
1255 (3, 6),
1256 (3, 6),
1257 (3, 6),
1258 (3, 6),
1259 (3, 6),
1260 (6, 7),
1261 (7, 8),
1262 (8, 9)
1263 ],
1264 original_shift: 0
1265 }
1266 );
1267 assert_eq!(
1268 n.alignments_original(),
1269 vec![
1270 (0, 5),
1271 (0, 5),
1272 (0, 5),
1273 (5, 10),
1274 (5, 10),
1275 (5, 10),
1276 (10, 11),
1277 (11, 12),
1278 (12, 13)
1279 ]
1280 );
1281 }
1282
1283 #[test]
1284 fn remove_at_beginning() {
1285 let mut n = NormalizedString::from(" Hello");
1286 n.filter(|c| !c.is_whitespace());
1287 assert_eq!(
1288 n.get_range_original(Range::Normalized(1.."Hello".len())),
1289 Some("ello")
1290 );
1291 assert_eq!(
1292 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1293 Some("Hello")
1294 );
1295 }
1296
1297 #[test]
1298 fn remove_at_end() {
1299 let mut n = NormalizedString::from("Hello ");
1300 n.filter(|c| !c.is_whitespace());
1301 assert_eq!(n.get_range_original(Range::Normalized(0..4)), Some("Hell"));
1302 assert_eq!(
1303 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1304 Some("Hello")
1305 );
1306 }
1307
1308 #[test]
1309 fn removed_around_both_edges() {
1310 let mut n = NormalizedString::from(" Hello ");
1311 n.filter(|c| !c.is_whitespace());
1312 assert_eq!(&n.normalized, "Hello");
1313
1314 assert_eq!(
1315 n.get_range_original(Range::Normalized(0.."Hello".len())),
1316 Some("Hello")
1317 );
1318 assert_eq!(
1319 n.get_range_original(Range::Normalized(1.."Hell".len())),
1320 Some("ell")
1321 );
1322 }
1323
1324 #[test]
1325 fn lstrip() {
1326 let mut n = NormalizedString::from(" This is an example ");
1327 n.lstrip();
1328 assert_eq!(&n.normalized, "This is an example ");
1329 assert_eq!(
1330 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1331 Some("This is an example ")
1332 );
1333 }
1334
1335 #[test]
1336 fn rstrip() {
1337 let mut n = NormalizedString::from(" This is an example ");
1338 n.rstrip();
1339 assert_eq!(&n.normalized, " This is an example");
1340 assert_eq!(
1341 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1342 Some(" This is an example")
1343 );
1344 }
1345
1346 #[test]
1347 fn strip() {
1348 let mut n = NormalizedString::from(" This is an example ");
1349 n.strip();
1350 assert_eq!(&n.normalized, "This is an example");
1351 assert_eq!(
1352 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1353 Some("This is an example")
1354 );
1355 }
1356
1357 #[test]
1358 fn strip_unicode() {
1359 let mut n = NormalizedString::from(" δ½ ε₯½asa \n");
1360 n.strip();
1361 assert_eq!(&n.normalized, "δ½ ε₯½asa");
1362 assert_eq!(
1363 n.get_range_original(Range::Normalized(0..n.normalized.len())),
1364 Some("δ½ ε₯½asa")
1365 );
1366 }
1367
1368 #[test]
1369 fn prepend() {
1370 let mut n = NormalizedString::from("there");
1371 n.prepend("Hey ");
1372 assert_eq!(&n.normalized, "Hey there");
1373 assert_eq!(
1374 n.alignments,
1375 vec![
1376 (0, 1),
1377 (0, 1),
1378 (0, 1),
1379 (0, 1),
1380 (0, 1),
1381 (1, 2),
1382 (2, 3),
1383 (3, 4),
1384 (4, 5)
1385 ]
1386 );
1387 assert_eq!(n.convert_offsets(Range::Normalized(0..4)), Some(0..1));
1388 }
1389
1390 #[test]
1391 fn append() {
1392 let mut n = NormalizedString::from("Hey");
1393 n.append(" there");
1394 assert_eq!(&n.normalized, "Hey there");
1395 assert_eq!(
1396 n.alignments,
1397 vec![
1398 (0, 1),
1399 (1, 2),
1400 (2, 3),
1401 (2, 3),
1402 (2, 3),
1403 (2, 3),
1404 (2, 3),
1405 (2, 3),
1406 (2, 3)
1407 ]
1408 );
1409 assert_eq!(
1410 n.convert_offsets(Range::Normalized(3.." there".len())),
1411 Some(2..3)
1412 );
1413 }
1414
1415 #[test]
1416 fn get_range() {
1417 let s = String::from("Hello my name is John π");
1418 assert_eq!(get_range_of(&s, ..), Some(&s[..]));
1419 assert_eq!(get_range_of(&s, 17..), Some("John π"));
1420 }
1421
1422 #[test]
1423 fn slice() {
1424 let mut s = NormalizedString::from("πΎπ π π ππ π£ππππ");
1425 s.nfkc();
1426
1427 let original_slice = s.slice(Range::Original(0..4)).unwrap();
1428 assert_eq!(original_slice.get(), "G");
1429 assert_eq!(original_slice.get_original(), "πΎ");
1430
1431 let normalized_slice = s.slice(Range::Normalized(0..4)).unwrap();
1432 assert_eq!(normalized_slice.get(), "Good");
1433 assert_eq!(normalized_slice.get_original(), "πΎπ π π");
1434
1435 let mut s = NormalizedString::from(" Good Morning! ");
1437 s.strip();
1438
1439 let slice = s.slice(Range::Original(..)).unwrap();
1441 assert_eq!(
1442 slice.get_range_original(Range::Normalized(0..4)),
1443 Some("Good")
1444 );
1445 let slice = s.slice(Range::Normalized(..)).unwrap();
1446 assert_eq!(
1447 slice.get_range_original(Range::Normalized(0..4)),
1448 Some("Good")
1449 );
1450
1451 let slice = s.slice(Range::Original(4..15)).unwrap();
1453 assert_eq!(
1454 slice.get_range_original(Range::Normalized(0..3)),
1455 Some("ood")
1456 );
1457
1458 let slice = s.slice(Range::Original(3..16)).unwrap();
1460 assert_eq!(
1461 slice.get_range_original(Range::Normalized(0..4)),
1462 Some("Good")
1463 );
1464 }
1465
1466 #[test]
1467 fn replace() {
1468 let mut s = NormalizedString::from(" Hello friend ");
1470 s.replace(' ', "_").unwrap();
1471 assert_eq!(s.get(), "_Hello___friend_");
1472 let mut s = NormalizedString::from("aaaab");
1473 s.replace('a', "b").unwrap();
1474 assert_eq!(s.get(), "bbbbb");
1475
1476 let mut s = NormalizedString::from("aaaab");
1478 s.replace("aaa", "b").unwrap();
1479 assert_eq!(s.get(), "bab");
1480
1481 let mut s = NormalizedString::from(" Hello friend ");
1483 let re = Regex::new(r"\s+").unwrap();
1484 s.replace(&re, "_").unwrap();
1485 assert_eq!(s.get(), "_Hello_friend_");
1486 }
1487
1488 #[test]
1489 fn split() {
1490 use SplitDelimiterBehavior::*;
1491 let s = NormalizedString::from("The-final--countdown");
1492
1493 let test = |behavior: SplitDelimiterBehavior, result: Vec<&str>| {
1494 let splits = s.split('-', behavior).unwrap();
1495 assert_eq!(splits.iter().map(|n| n.get()).collect::<Vec<_>>(), result);
1496 };
1497
1498 test(Removed, vec!["The", "final", "countdown"]);
1499 test(Isolated, vec!["The", "-", "final", "-", "-", "countdown"]);
1500 test(MergedWithPrevious, vec!["The-", "final-", "-", "countdown"]);
1501 test(MergedWithNext, vec!["The", "-final", "-", "-countdown"]);
1502 test(Contiguous, vec!["The", "-", "final", "--", "countdown"]);
1503 }
1504
1505 #[test]
1506 fn transform_range_single_bytes() {
1507 let s = NormalizedString::from("Hello friend");
1508
1509 let mut current = s.clone();
1511 current.transform_range(Range::Original(0..4), vec![('Y', 0)], 3);
1512 assert_eq!(
1513 current,
1514 NormalizedString {
1515 original: "Hello friend".into(),
1516 normalized: "Yo friend".into(),
1517 alignments: vec![
1518 (3, 4),
1519 (4, 5),
1520 (5, 6),
1521 (6, 7),
1522 (7, 8),
1523 (8, 9),
1524 (9, 10),
1525 (10, 11),
1526 (11, 12)
1527 ],
1528 original_shift: 0,
1529 }
1530 );
1531
1532 assert_eq!(
1533 current.alignments_original(),
1534 vec![
1535 (0, 0),
1536 (0, 0),
1537 (0, 0),
1538 (0, 1),
1539 (1, 2),
1540 (2, 3),
1541 (3, 4),
1542 (4, 5),
1543 (5, 6),
1544 (6, 7),
1545 (7, 8),
1546 (8, 9)
1547 ]
1548 );
1549
1550 let mut current = s.clone();
1552 current.transform_range(
1553 Range::Original(3..10),
1554 vec![('_', 0), ('F', 0), ('R', -2)],
1555 2,
1556 );
1557 assert_eq!(
1558 current,
1559 NormalizedString {
1560 original: "Hello friend".into(),
1561 normalized: "Hel_FRnd".into(),
1562 alignments: vec![
1563 (0, 1),
1564 (1, 2),
1565 (2, 3),
1566 (5, 6),
1567 (6, 7),
1568 (7, 8),
1569 (10, 11),
1570 (11, 12)
1571 ],
1572 original_shift: 0,
1573 }
1574 );
1575
1576 assert_eq!(
1577 current.alignments_original(),
1578 vec![
1579 (0, 1),
1580 (1, 2),
1581 (2, 3),
1582 (3, 3),
1583 (3, 3),
1584 (3, 4),
1585 (4, 5),
1586 (5, 6),
1587 (6, 6),
1588 (6, 6),
1589 (6, 7),
1590 (7, 8)
1591 ]
1592 );
1593
1594 let mut current = s.clone();
1596 current.transform_range(Range::Original(5..), vec![('_', 0), ('F', -5)], 0);
1597 assert_eq!(
1598 current,
1599 NormalizedString {
1600 original: "Hello friend".into(),
1601 normalized: "Hello_F".into(),
1602 alignments: vec![(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)],
1603 original_shift: 0,
1604 }
1605 );
1606 assert_eq!(
1607 current.alignments_original(),
1608 vec![
1609 (0, 1),
1610 (1, 2),
1611 (2, 3),
1612 (3, 4),
1613 (4, 5),
1614 (5, 6),
1615 (6, 7),
1616 (7, 7),
1617 (7, 7),
1618 (7, 7),
1619 (7, 7),
1620 (7, 7)
1621 ]
1622 );
1623
1624 let mut current = s.clone();
1626 current.transform_range(Range::Original(0..1), vec![('H', 1), ('H', 0)], 0);
1627 assert_eq!(
1628 current,
1629 NormalizedString {
1630 original: "Hello friend".into(),
1631 normalized: "HHello friend".into(),
1632 alignments: vec![
1633 (0, 0),
1634 (0, 1),
1635 (1, 2),
1636 (2, 3),
1637 (3, 4),
1638 (4, 5),
1639 (5, 6),
1640 (6, 7),
1641 (7, 8),
1642 (8, 9),
1643 (9, 10),
1644 (10, 11),
1645 (11, 12)
1646 ],
1647 original_shift: 0,
1648 }
1649 );
1650 assert_eq!(
1651 current.alignments_original(),
1652 vec![
1653 (1, 2),
1654 (2, 3),
1655 (3, 4),
1656 (4, 5),
1657 (5, 6),
1658 (6, 7),
1659 (7, 8),
1660 (8, 9),
1661 (9, 10),
1662 (10, 11),
1663 (11, 12),
1664 (12, 13)
1665 ]
1666 );
1667 let mut current = s.clone();
1669 current.transform_range(Range::Original(0..0), vec![('H', 1)], 0);
1670 assert_eq!(
1671 current,
1672 NormalizedString {
1673 original: "Hello friend".into(),
1674 normalized: "HHello friend".into(),
1675 alignments: vec![
1676 (0, 0),
1677 (0, 1),
1678 (1, 2),
1679 (2, 3),
1680 (3, 4),
1681 (4, 5),
1682 (5, 6),
1683 (6, 7),
1684 (7, 8),
1685 (8, 9),
1686 (9, 10),
1687 (10, 11),
1688 (11, 12)
1689 ],
1690 original_shift: 0,
1691 }
1692 );
1693 assert_eq!(
1694 current.alignments_original(),
1695 vec![
1696 (1, 2),
1697 (2, 3),
1698 (3, 4),
1699 (4, 5),
1700 (5, 6),
1701 (6, 7),
1702 (7, 8),
1703 (8, 9),
1704 (9, 10),
1705 (10, 11),
1706 (11, 12),
1707 (12, 13)
1708 ]
1709 );
1710 let mut current = s.clone();
1712 current.transform_range(Range::Original(0..1), vec![('H', 0), ('H', 1)], 0);
1713 assert_eq!(
1714 current,
1715 NormalizedString {
1716 original: "Hello friend".into(),
1717 normalized: "HHello friend".into(),
1718 alignments: vec![
1719 (0, 1),
1720 (0, 1),
1721 (1, 2),
1722 (2, 3),
1723 (3, 4),
1724 (4, 5),
1725 (5, 6),
1726 (6, 7),
1727 (7, 8),
1728 (8, 9),
1729 (9, 10),
1730 (10, 11),
1731 (11, 12)
1732 ],
1733 original_shift: 0,
1734 }
1735 );
1736
1737 assert_eq!(
1738 current.alignments_original(),
1739 vec![
1740 (0, 2),
1741 (2, 3),
1742 (3, 4),
1743 (4, 5),
1744 (5, 6),
1745 (6, 7),
1746 (7, 8),
1747 (8, 9),
1748 (9, 10),
1749 (10, 11),
1750 (11, 12),
1751 (12, 13)
1752 ]
1753 );
1754
1755 let mut current = s.clone();
1757 current.transform_range(
1758 Range::Original(5..6),
1759 vec![('_', 0), ('m', 1), ('y', 1), ('_', 1)],
1760 0,
1761 );
1762 assert_eq!(
1763 current,
1764 NormalizedString {
1765 original: "Hello friend".into(),
1766 normalized: "Hello_my_friend".into(),
1767 alignments: vec![
1768 (0, 1),
1769 (1, 2),
1770 (2, 3),
1771 (3, 4),
1772 (4, 5),
1773 (5, 6),
1774 (5, 6),
1775 (5, 6),
1776 (5, 6),
1777 (6, 7),
1778 (7, 8),
1779 (8, 9),
1780 (9, 10),
1781 (10, 11),
1782 (11, 12)
1783 ],
1784 original_shift: 0,
1785 }
1786 );
1787 assert_eq!(
1788 current.alignments_original(),
1789 vec![
1790 (0, 1),
1791 (1, 2),
1792 (2, 3),
1793 (3, 4),
1794 (4, 5),
1795 (5, 9),
1796 (9, 10),
1797 (10, 11),
1798 (11, 12),
1799 (12, 13),
1800 (13, 14),
1801 (14, 15)
1802 ]
1803 );
1804
1805 let mut current = s;
1807 current.transform_range(Range::Original(11..), vec![('d', 0), ('_', 1), ('!', 1)], 0);
1808 assert_eq!(
1809 current,
1810 NormalizedString {
1811 original: "Hello friend".into(),
1812 normalized: "Hello friend_!".into(),
1813 alignments: vec![
1814 (0, 1),
1815 (1, 2),
1816 (2, 3),
1817 (3, 4),
1818 (4, 5),
1819 (5, 6),
1820 (6, 7),
1821 (7, 8),
1822 (8, 9),
1823 (9, 10),
1824 (10, 11),
1825 (11, 12),
1826 (11, 12),
1827 (11, 12)
1828 ],
1829 original_shift: 0,
1830 }
1831 );
1832 assert_eq!(
1833 current.alignments_original(),
1834 vec![
1835 (0, 1),
1836 (1, 2),
1837 (2, 3),
1838 (3, 4),
1839 (4, 5),
1840 (5, 6),
1841 (6, 7),
1842 (7, 8),
1843 (8, 9),
1844 (9, 10),
1845 (10, 11),
1846 (11, 14)
1847 ]
1848 );
1849 }
1850
1851 #[test]
1852 fn transform_range_multiple_bytes() {
1853 let s = NormalizedString::from("πΎπ π π");
1854
1855 let mut current = s.clone();
1857 current.transform_range(Range::Original(0..8), vec![('G', -1)], 0);
1858 assert_eq!(
1859 current,
1860 NormalizedString {
1861 original: "πΎπ π π".into(),
1862 normalized: "Gπ π".into(),
1863 alignments: vec![
1864 (0, 4),
1865 (8, 12),
1866 (8, 12),
1867 (8, 12),
1868 (8, 12),
1869 (12, 16),
1870 (12, 16),
1871 (12, 16),
1872 (12, 16)
1873 ],
1874 original_shift: 0,
1875 }
1876 );
1877 assert_eq!(
1878 current.alignments_original(),
1879 vec![
1880 (0, 1),
1881 (0, 1),
1882 (0, 1),
1883 (0, 1),
1884 (1, 1),
1885 (1, 1),
1886 (1, 1),
1887 (1, 1),
1888 (1, 5),
1889 (1, 5),
1890 (1, 5),
1891 (1, 5),
1892 (5, 9),
1893 (5, 9),
1894 (5, 9),
1895 (5, 9)
1896 ]
1897 );
1898 assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "G");
1899 assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "G");
1900 assert_eq!(
1901 current.get_range_original(Range::Original(0..4)).unwrap(),
1902 "πΎ"
1903 );
1904 assert_eq!(
1905 current.get_range_original(Range::Original(0..8)).unwrap(),
1906 "πΎπ "
1907 );
1908
1909 let mut current = s.clone();
1911 current.transform_range(Range::Original(4..12), vec![('o', -1)], 0);
1912 assert_eq!(
1913 current,
1914 NormalizedString {
1915 original: "πΎπ π π".into(),
1916 normalized: "πΎoπ".into(),
1917 alignments: vec![
1918 (0, 4),
1919 (0, 4),
1920 (0, 4),
1921 (0, 4),
1922 (4, 8),
1923 (12, 16),
1924 (12, 16),
1925 (12, 16),
1926 (12, 16)
1927 ],
1928 original_shift: 0,
1929 }
1930 );
1931 assert_eq!(
1932 current.alignments_original(),
1933 vec![
1934 (0, 4),
1935 (0, 4),
1936 (0, 4),
1937 (0, 4),
1938 (4, 5),
1939 (4, 5),
1940 (4, 5),
1941 (4, 5),
1942 (5, 5),
1943 (5, 5),
1944 (5, 5),
1945 (5, 5),
1946 (5, 9),
1947 (5, 9),
1948 (5, 9),
1949 (5, 9)
1950 ]
1951 );
1952
1953 let mut current = s.clone();
1955 current.transform_range(Range::Original(12..), vec![('d', 0), ('!', 1)], 0);
1956 assert_eq!(
1957 current,
1958 NormalizedString {
1959 original: "πΎπ π π".into(),
1960 normalized: "πΎπ π d!".into(),
1961 alignments: vec![
1962 (0, 4),
1963 (0, 4),
1964 (0, 4),
1965 (0, 4),
1966 (4, 8),
1967 (4, 8),
1968 (4, 8),
1969 (4, 8),
1970 (8, 12),
1971 (8, 12),
1972 (8, 12),
1973 (8, 12),
1974 (12, 16),
1975 (12, 16)
1976 ],
1977 original_shift: 0,
1978 }
1979 );
1980
1981 let mut current = s.clone();
1983 current.transform_range(Range::Original(0..4), vec![('_', 1), ('πΎ', 0)], 0);
1984 assert_eq!(
1985 current,
1986 NormalizedString {
1987 original: "πΎπ π π".into(),
1988 normalized: "_πΎπ π π".into(),
1989 alignments: vec![
1990 (0, 0),
1991 (0, 4),
1992 (0, 4),
1993 (0, 4),
1994 (0, 4),
1995 (4, 8),
1996 (4, 8),
1997 (4, 8),
1998 (4, 8),
1999 (8, 12),
2000 (8, 12),
2001 (8, 12),
2002 (8, 12),
2003 (12, 16),
2004 (12, 16),
2005 (12, 16),
2006 (12, 16)
2007 ],
2008 original_shift: 0,
2009 }
2010 );
2011 assert_eq!(
2012 current.alignments_original(),
2013 vec![
2014 (1, 5),
2015 (1, 5),
2016 (1, 5),
2017 (1, 5),
2018 (5, 9),
2019 (5, 9),
2020 (5, 9),
2021 (5, 9),
2022 (9, 13),
2023 (9, 13),
2024 (9, 13),
2025 (9, 13),
2026 (13, 17),
2027 (13, 17),
2028 (13, 17),
2029 (13, 17)
2030 ]
2031 );
2032
2033 assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "πΎπ ");
2034 assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "πΎ");
2035 assert_eq!(
2036 current.get_range_original(Range::Original(0..4)).unwrap(),
2037 "πΎ"
2038 );
2039 assert_eq!(
2040 current.get_range_original(Range::Original(0..8)).unwrap(),
2041 "πΎπ "
2042 );
2043 let mut current = s.clone();
2045 current.transform_range(Range::Original(0..0), vec![('_', 1)], 0);
2046 assert_eq!(
2047 current,
2048 NormalizedString {
2049 original: "πΎπ π π".into(),
2050 normalized: "_πΎπ π π".into(),
2051 alignments: vec![
2052 (0, 0),
2053 (0, 4),
2054 (0, 4),
2055 (0, 4),
2056 (0, 4),
2057 (4, 8),
2058 (4, 8),
2059 (4, 8),
2060 (4, 8),
2061 (8, 12),
2062 (8, 12),
2063 (8, 12),
2064 (8, 12),
2065 (12, 16),
2066 (12, 16),
2067 (12, 16),
2068 (12, 16)
2069 ],
2070 original_shift: 0,
2071 }
2072 );
2073 assert_eq!(
2074 current.alignments_original(),
2075 vec![
2076 (1, 5),
2077 (1, 5),
2078 (1, 5),
2079 (1, 5),
2080 (5, 9),
2081 (5, 9),
2082 (5, 9),
2083 (5, 9),
2084 (9, 13),
2085 (9, 13),
2086 (9, 13),
2087 (9, 13),
2088 (13, 17),
2089 (13, 17),
2090 (13, 17),
2091 (13, 17)
2092 ]
2093 );
2094
2095 assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "πΎπ ");
2096 assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "πΎ");
2097 assert_eq!(
2098 current.get_range_original(Range::Original(0..4)).unwrap(),
2099 "πΎ"
2100 );
2101 assert_eq!(
2102 current.get_range_original(Range::Original(0..8)).unwrap(),
2103 "πΎπ "
2104 );
2105 let mut current = s.clone();
2107 current.transform_range(Range::Original(0..4), vec![('πΎ', 0), ('o', 1)], 0);
2108 assert_eq!(
2109 current,
2110 NormalizedString {
2111 original: "πΎπ π π".into(),
2112 normalized: "πΎoπ π π".into(),
2113 alignments: vec![
2114 (0, 4),
2115 (0, 4),
2116 (0, 4),
2117 (0, 4),
2118 (0, 4),
2119 (4, 8),
2120 (4, 8),
2121 (4, 8),
2122 (4, 8),
2123 (8, 12),
2124 (8, 12),
2125 (8, 12),
2126 (8, 12),
2127 (12, 16),
2128 (12, 16),
2129 (12, 16),
2130 (12, 16)
2131 ],
2132 original_shift: 0,
2133 }
2134 );
2135 assert_eq!(
2136 current.alignments_original(),
2137 vec![
2138 (0, 5),
2139 (0, 5),
2140 (0, 5),
2141 (0, 5),
2142 (5, 9),
2143 (5, 9),
2144 (5, 9),
2145 (5, 9),
2146 (9, 13),
2147 (9, 13),
2148 (9, 13),
2149 (9, 13),
2150 (13, 17),
2151 (13, 17),
2152 (13, 17),
2153 (13, 17)
2154 ]
2155 );
2156 assert_eq!(current.get_range(Range::Original(0..8)).unwrap(), "πΎoπ ");
2157 assert_eq!(current.get_range(Range::Original(0..4)).unwrap(), "πΎo");
2158 assert_eq!(
2159 current.get_range_original(Range::Original(0..4)).unwrap(),
2160 "πΎ"
2161 );
2162 assert_eq!(
2163 current.get_range_original(Range::Original(0..8)).unwrap(),
2164 "πΎπ "
2165 );
2166
2167 let mut current = s.clone();
2169 current.transform_range(
2170 Range::Original(4..8),
2171 vec![('π ', 0), ('o', 1), ('o', 1), ('o', 1)],
2172 0,
2173 );
2174 assert_eq!(
2175 current,
2176 NormalizedString {
2177 original: "πΎπ π π".into(),
2178 normalized: "πΎπ oooπ π".into(),
2179 alignments: vec![
2180 (0, 4),
2181 (0, 4),
2182 (0, 4),
2183 (0, 4),
2184 (4, 8),
2185 (4, 8),
2186 (4, 8),
2187 (4, 8),
2188 (4, 8),
2189 (4, 8),
2190 (4, 8),
2191 (8, 12),
2192 (8, 12),
2193 (8, 12),
2194 (8, 12),
2195 (12, 16),
2196 (12, 16),
2197 (12, 16),
2198 (12, 16)
2199 ],
2200 original_shift: 0,
2201 }
2202 );
2203 assert_eq!(
2204 current.alignments_original(),
2205 vec![
2206 (0, 4),
2207 (0, 4),
2208 (0, 4),
2209 (0, 4),
2210 (4, 11),
2211 (4, 11),
2212 (4, 11),
2213 (4, 11),
2214 (11, 15),
2215 (11, 15),
2216 (11, 15),
2217 (11, 15),
2218 (15, 19),
2219 (15, 19),
2220 (15, 19),
2221 (15, 19)
2222 ]
2223 );
2224
2225 let mut current = s;
2227 current.transform_range(Range::Original(16..), vec![('!', 1)], 0);
2228 assert_eq!(
2229 current,
2230 NormalizedString {
2231 original: "πΎπ π π".into(),
2232 normalized: "πΎπ π π!".into(),
2233 alignments: vec![
2234 (0, 4),
2235 (0, 4),
2236 (0, 4),
2237 (0, 4),
2238 (4, 8),
2239 (4, 8),
2240 (4, 8),
2241 (4, 8),
2242 (8, 12),
2243 (8, 12),
2244 (8, 12),
2245 (8, 12),
2246 (12, 16),
2247 (12, 16),
2248 (12, 16),
2249 (12, 16),
2250 (12, 16)
2251 ],
2252 original_shift: 0,
2253 }
2254 );
2255 assert_eq!(
2256 current.alignments_original(),
2257 vec![
2258 (0, 4),
2259 (0, 4),
2260 (0, 4),
2261 (0, 4),
2262 (4, 8),
2263 (4, 8),
2264 (4, 8),
2265 (4, 8),
2266 (8, 12),
2267 (8, 12),
2268 (8, 12),
2269 (8, 12),
2270 (12, 17),
2271 (12, 17),
2272 (12, 17),
2273 (12, 17)
2274 ]
2275 );
2276 }
2277
2278 #[test]
2279 fn transform_check() {
2280 let mut s = NormalizedString::from("abcβ¦");
2281 s.nfkd();
2282 let transforms = vec![('a', -2), ('.', 0), ('.', 0), ('.', 0)];
2283 s.transform(transforms, 0);
2284 s.lowercase();
2285 assert_eq!(s.get(), "a...");
2286 }
2287}