1extern crate unicode_segmentation as uniseg;
50
51#[doc(hidden)]
57macro_rules! debug_unreachable {
58 () => {
59 if cfg!(ndebug) {
60 ::util::unreachable()
61 } else {
62 panic!("entered unreachable code")
63 }
64 };
65}
66
67pub use grapheme::{Gc, GcBuf};
68
69pub mod grapheme;
70mod util;
71
72use uniseg::UnicodeSegmentation as UniSeg;
73
74pub struct StrCursor<'a> {
94 s: &'a str,
95 at: *const u8,
96}
97
98impl<'a> StrCursor<'a> {
99 #[inline]
103 pub fn new_at_start(s: &'a str) -> StrCursor<'a> {
104 StrCursor {
105 s: s,
106 at: s.as_ptr(),
107 }
108 }
109
110 #[inline]
114 pub fn new_at_end(s: &'a str) -> StrCursor<'a> {
115 StrCursor {
116 s: s,
117 at: byte_pos_to_ptr(s, s.len()),
118 }
119 }
120
121 #[inline]
125 pub fn new_at_left_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
126 let cur = StrCursor::new_at_cp_left_of_byte_pos(s, byte_pos);
128
129 let prev = cur.at_prev();
131
132 let prev = match prev {
133 None => return cur, Some(c) => c
135 };
136
137 if prev.byte_pos() + prev.after().unwrap().len() > byte_pos {
139 prev
140 } else {
141 cur
142 }
143 }
144
145 #[inline]
149 pub fn new_at_right_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
150 let cur = StrCursor::new_at_left_of_byte_pos(s, byte_pos);
152 if cur.byte_pos() == byte_pos {
153 return cur;
154 }
155
156 cur.at_next().unwrap()
158 }
159
160 #[inline]
168 pub fn new_at_cp_left_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
169 StrCursor {
170 s: s,
171 at: unsafe { seek_utf8_cp_start_left(s, byte_pos_to_ptr(s, byte_pos)) },
172 }
173 }
174
175 #[inline]
183 pub fn new_at_cp_right_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
184 StrCursor {
185 s: s,
186 at: unsafe { seek_utf8_cp_start_right(s, byte_pos_to_ptr(s, byte_pos)) },
187 }
188 }
189
190 #[inline]
194 pub fn at_prev(mut self) -> Option<StrCursor<'a>> {
195 match self.try_seek_left_gr() {
196 true => Some(self),
197 false => None
198 }
199 }
200
201 #[inline]
205 pub fn at_next(mut self) -> Option<StrCursor<'a>> {
206 match self.try_seek_right_gr() {
207 true => Some(self),
208 false => None
209 }
210 }
211
212 #[inline]
220 pub fn at_prev_cp(mut self) -> Option<StrCursor<'a>> {
221 match self.try_seek_left_cp() {
222 true => Some(self),
223 false => None
224 }
225 }
226
227 #[inline]
235 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
236 match self.try_seek_right_cp() {
237 true => Some(self),
238 false => None
239 }
240 }
241
242 #[inline]
250 pub fn seek_prev(&mut self) {
251 if !self.try_seek_left_gr() {
252 panic!("cannot seek past the beginning of a string");
253 }
254 }
255
256 #[inline]
264 pub fn seek_next(&mut self) {
265 if !self.try_seek_right_gr() {
266 panic!("cannot seek past the end of a string");
267 }
268 }
269
270 #[inline]
282 pub fn seek_prev_cp(&mut self) {
283 if !self.try_seek_left_cp() {
284 panic!("cannot seek past the beginning of a string");
285 }
286 }
287
288 #[inline]
300 pub fn seek_next_cp(&mut self) {
301 if !self.try_seek_right_cp() {
302 panic!("cannot seek past the end of a string");
303 }
304 }
305
306 #[inline]
312 pub fn prev(mut self) -> Option<(&'a Gc, StrCursor<'a>)> {
313 unsafe {
314 let g = match self.before() {
315 Some(g) => g,
316 None => return None,
317 };
318 self.unsafe_set_at(g.as_str());
319 Some((g, self))
320 }
321 }
322
323 #[inline]
333 pub fn prev_cp(mut self) -> Option<(char, StrCursor<'a>)> {
334 unsafe {
335 let cp = match self.cp_before() {
336 Some(cp) => cp,
337 None => return None,
338 };
339 self.unsafe_seek_left(cp.len_utf8());
340 Some((cp, self))
341 }
342 }
343
344 #[inline]
350 pub fn next(mut self) -> Option<(&'a Gc, StrCursor<'a>)> {
351 unsafe {
352 let g = match self.after() {
353 Some(g) => g,
354 None => return None,
355 };
356 self.unsafe_seek_right(g.len());
357 Some((g, self))
358 }
359 }
360
361 #[inline]
371 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
372 unsafe {
373 let cp = match self.cp_after() {
374 Some(cp) => cp,
375 None => return None,
376 };
377 self.unsafe_seek_right(cp.len_utf8());
378 Some((cp, self))
379 }
380 }
381
382 #[inline]
386 pub fn before(&self) -> Option<&'a Gc> {
387 self.at_prev().and_then(|cur| cur.after())
388 }
389
390 #[inline]
394 pub fn after(&self) -> Option<&'a Gc> {
395 Gc::split_from(self.slice_after()).map(|(gc, _)| gc)
396 }
397
398 #[inline]
402 pub fn slice_before(&self) -> &'a str {
403 unsafe {
404 self.s.slice_unchecked(0, self.byte_pos())
405 }
406 }
407
408 #[inline]
412 pub fn slice_after(&self) -> &'a str {
413 unsafe {
414 self.s.slice_unchecked(self.byte_pos(), self.s.len())
415 }
416 }
417
418 #[inline]
424 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
425 if !str_eq_literal(self.s, until.s) {
426 None
427 } else {
428 use std::cmp::{max, min};
429 unsafe {
430 let beg = min(self.at, until.at);
431 let end = max(self.at, until.at);
432 let len = end as usize - beg as usize;
433 let bytes = ::std::slice::from_raw_parts(beg, len);
434 Some(::std::str::from_utf8_unchecked(bytes))
435 }
436 }
437 }
438
439 #[inline]
443 pub fn cp_before(&self) -> Option<char> {
444 self.at_prev_cp().and_then(|cur| cur.cp_after())
445 }
446
447 #[inline]
451 pub fn cp_after(&self) -> Option<char> {
452 self.slice_after().chars().next()
453 }
454
455 #[inline]
459 pub fn slice_all(&self) -> &'a str {
460 self.s
461 }
462
463 #[inline]
467 pub fn byte_pos(&self) -> usize {
468 self.at as usize - self.s.as_ptr() as usize
469 }
470
471 #[inline]
472 fn try_seek_left_cp(&mut self) -> bool {
473 unsafe {
474 if self.byte_pos() == 0 {
476 return false;
477 }
478 self.at = seek_utf8_cp_start_left(self.s, self.at.offset(-1));
479 true
480 }
481 }
482
483 #[inline]
484 fn try_seek_right_cp(&mut self) -> bool {
485 unsafe {
486 if self.byte_pos() == self.s.len() {
488 return false;
489 }
490 self.at = seek_utf8_cp_start_right(self.s, self.at.offset(1));
491 true
492 }
493 }
494
495 #[inline]
496 fn try_seek_left_gr(&mut self) -> bool {
497 let len = {
498 let gr = UniSeg::graphemes(self.slice_before(), true).next_back();
499 gr.map(|gr| gr.len())
500 };
501 match len {
502 Some(len) => {
503 unsafe {
504 self.at = self.at.offset(-(len as isize));
505 }
506 true
507 },
508 None => false
509 }
510 }
511
512 #[inline]
513 fn try_seek_right_gr(&mut self) -> bool {
514 let len = {
515 let gr = UniSeg::graphemes(self.slice_after(), true).next();
516 gr.map(|gr| gr.len())
517 };
518 match len {
519 Some(len) => {
520 unsafe {
521 self.at = self.at.offset(len as isize);
522 }
523 true
524 },
525 None => false
526 }
527 }
528
529 #[inline]
533 pub unsafe fn unsafe_seek_left(&mut self, bytes: usize) {
534 self.at = self.at.offset(-(bytes as isize));
535 }
536
537 #[inline]
541 pub unsafe fn unsafe_seek_right(&mut self, bytes: usize) {
542 self.at = self.at.offset(bytes as isize);
543 }
544
545 #[inline]
549 pub unsafe fn unsafe_set_at(&mut self, s: &'a str) {
550 self.at = s.as_bytes().as_ptr();
551 }
552}
553
554impl<'a> Copy for StrCursor<'a> {}
555
556impl<'a> Clone for StrCursor<'a> {
557 fn clone(&self) -> StrCursor<'a> {
558 *self
559 }
560}
561
562impl<'a> std::fmt::Debug for StrCursor<'a> {
563 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
564 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
565 }
566}
567
568impl<'a> Eq for StrCursor<'a> {}
569
570impl<'a> PartialEq for StrCursor<'a> {
571 fn eq(&self, other: &StrCursor<'a>) -> bool {
572 (self.at == other.at)
573 && (self.s.as_ptr() == other.s.as_ptr())
574 && (self.s.len() == other.s.len())
575 }
576
577 fn ne(&self, other: &StrCursor<'a>) -> bool {
578 (self.at != other.at)
579 || (self.s.as_ptr() != other.s.as_ptr())
580 || (self.s.len() != other.s.len())
581 }
582}
583
584impl<'a> PartialOrd for StrCursor<'a> {
585 fn partial_cmp(&self, other: &StrCursor<'a>) -> Option<std::cmp::Ordering> {
586 if (self.s.as_ptr() != other.s.as_ptr()) || (self.s.len() != other.s.len()) {
588 None
589 } else {
590 self.at.partial_cmp(&other.at)
591 }
592 }
593}
594
595impl<'a> std::hash::Hash for StrCursor<'a> {
596 fn hash<H>(&self, state: &mut H)
597 where H: std::hash::Hasher {
598 self.s.as_ptr().hash(state);
599 self.s.len().hash(state);
600 self.at.hash(state);
601 }
602}
603
604#[cfg(test)]
605#[test]
606fn test_new_at_start() {
607 let cur = StrCursor::new_at_start("abcdef");
608 assert_eq!(cur.slice_before(), "");
609 assert_eq!(cur.slice_after(), "abcdef");
610}
611
612#[cfg(test)]
613#[test]
614fn test_new_at_end() {
615 let cur = StrCursor::new_at_end("abcdef");
616 assert_eq!(cur.slice_before(), "abcdef");
617 assert_eq!(cur.slice_after(), "");
618}
619
620#[cfg(test)]
621#[test]
622fn test_new_at_cp_left_of_byte_pos() {
623 let s = "This is a 本当 test.";
624 let cur = StrCursor::new_at_cp_left_of_byte_pos(s, 11);
625 assert_eq!(cur.slice_before(), "This is a ");
626 assert_eq!(cur.slice_after(), "本当 test.");
627}
628
629#[cfg(test)]
630#[test]
631fn test_new_at_cp_right_of_byte_pos() {
632 let s = "This is a 本当 test.";
633 let cur = StrCursor::new_at_cp_right_of_byte_pos(s, 11);
634 assert_eq!(cur.slice_before(), "This is a 本");
635 assert_eq!(cur.slice_after(), "当 test.");
636}
637
638#[cfg(test)]
639#[test]
640fn test_new_at_left_of_byte_pos() {
641 let s = "Jäger,Jäger,大嫌い,💪❤!";
642 let r = (0..s.len()+1).map(|i| (i, StrCursor::new_at_left_of_byte_pos(s, i)))
643 .map(|(i, cur)| (i, cur.byte_pos(), cur.after().map(Gc::as_str)))
644 .collect::<Vec<_>>();
645 assert_eq!(r, vec![
646 (0, 0, Some("J")),
647 (1, 1, Some("ä")),
648 (2, 1, Some("ä")),
649 (3, 3, Some("g")),
650 (4, 4, Some("e")),
651 (5, 5, Some("r")),
652 (6, 6, Some(",")),
653 (7, 7, Some("J")),
654 (8, 8, Some("ä")),
655 (9, 8, Some("ä")),
656 (10, 8, Some("ä")),
657 (11, 11, Some("g")),
658 (12, 12, Some("e")),
659 (13, 13, Some("r")),
660 (14, 14, Some(",")),
661 (15, 15, Some("大")),
662 (16, 15, Some("大")),
663 (17, 15, Some("大")),
664 (18, 18, Some("嫌")),
665 (19, 18, Some("嫌")),
666 (20, 18, Some("嫌")),
667 (21, 21, Some("い")),
668 (22, 21, Some("い")),
669 (23, 21, Some("い")),
670 (24, 24, Some(",")),
671 (25, 25, Some("💪")),
672 (26, 25, Some("💪")),
673 (27, 25, Some("💪")),
674 (28, 25, Some("💪")),
675 (29, 29, Some("❤")),
676 (30, 29, Some("❤")),
677 (31, 29, Some("❤")),
678 (32, 32, Some("!")),
679 (33, 33, None),
680 ]);
681}
682
683#[cfg(test)]
684#[test]
685fn test_new_at_right_of_byte_pos() {
686 let s = "Jäger,Jäger,大嫌い,💪❤!";
687 let r = (0..s.len()+1).map(|i| (i, StrCursor::new_at_right_of_byte_pos(s, i)))
688 .map(|(i, cur)| (i, cur.byte_pos(), cur.after().map(Gc::as_str)))
689 .collect::<Vec<_>>();
690 assert_eq!(r, vec![
691 (0, 0, Some("J")),
692 (1, 1, Some("ä")),
693 (2, 3, Some("g")),
694 (3, 3, Some("g")),
695 (4, 4, Some("e")),
696 (5, 5, Some("r")),
697 (6, 6, Some(",")),
698 (7, 7, Some("J")),
699 (8, 8, Some("ä")),
700 (9, 11, Some("g")),
701 (10, 11, Some("g")),
702 (11, 11, Some("g")),
703 (12, 12, Some("e")),
704 (13, 13, Some("r")),
705 (14, 14, Some(",")),
706 (15, 15, Some("大")),
707 (16, 18, Some("嫌")),
708 (17, 18, Some("嫌")),
709 (18, 18, Some("嫌")),
710 (19, 21, Some("い")),
711 (20, 21, Some("い")),
712 (21, 21, Some("い")),
713 (22, 24, Some(",")),
714 (23, 24, Some(",")),
715 (24, 24, Some(",")),
716 (25, 25, Some("💪")),
717 (26, 29, Some("❤")),
718 (27, 29, Some("❤")),
719 (28, 29, Some("❤")),
720 (29, 29, Some("❤")),
721 (30, 32, Some("!")),
722 (31, 32, Some("!")),
723 (32, 32, Some("!")),
724 (33, 33, None),
725 ]);
726}
727
728#[cfg(test)]
729#[test]
730fn test_at_prev_cp() {
731 let s = "大嫌い,💪❤";
732 let cur = StrCursor::new_at_end(s);
733 let bps = test_util::finite_iterate(cur, StrCursor::at_prev_cp)
734 .map(|cur| cur.byte_pos())
735 .collect::<Vec<_>>();
736 assert_eq!(bps, vec![14, 10, 9, 6, 3, 0]);
737}
738
739#[cfg(test)]
740#[test]
741fn test_at_next_cp() {
742 let s = "大嫌い,💪❤";
743 let cur = StrCursor::new_at_start(s);
744 let bps = test_util::finite_iterate(cur, StrCursor::at_next_cp)
745 .map(|cur| cur.byte_pos())
746 .collect::<Vec<_>>();
747 assert_eq!(bps, vec![3, 6, 9, 10, 14, 17]);
748}
749
750#[cfg(test)]
751#[test]
752fn test_at_prev_and_before() {
753 let s = "noe\u{0308}l";
754 let cur = StrCursor::new_at_end(s);
755 let bps = test_util::finite_iterate_lead(cur, StrCursor::at_prev)
756 .map(|cur| (cur.byte_pos(), cur.after().map(Gc::as_str)))
757 .collect::<Vec<_>>();
758 assert_eq!(bps, vec![
759 (6, None),
760 (5, Some("l")),
761 (2, Some("e\u{0308}")),
762 (1, Some("o")),
763 (0, Some("n")),
764 ]);
765}
766
767#[cfg(test)]
768#[test]
769fn test_at_next_and_after() {
770 let s = "noe\u{0308}l";
771 let cur = StrCursor::new_at_start(s);
772 let bps = test_util::finite_iterate_lead(cur, StrCursor::at_next)
773 .map(|cur| (cur.byte_pos(), cur.after().map(Gc::as_str)))
774 .collect::<Vec<_>>();
775 assert_eq!(bps, vec![
776 (0, Some("n")),
777 (1, Some("o")),
778 (2, Some("e\u{0308}")),
779 (5, Some("l")),
780 (6, None),
781 ]);
782}
783
784#[cfg(test)]
785#[test]
786fn test_prev() {
787 let s = "Jäger,Jäger,大嫌い,💪❤!";
788 let cur = StrCursor::new_at_end(s);
789 let r = test_util::finite_iterate_lead(cur, StrCursor::at_prev)
790 .map(|cur| cur.prev().map(|(gr, cur)| (gr.as_str(), cur.byte_pos())))
791 .collect::<Vec<_>>();
792 assert_eq!(r, vec![
793 Some(("!", 32)),
794 Some(("❤", 29)),
795 Some(("💪", 25)),
796 Some((",", 24)),
797 Some(("い", 21)),
798 Some(("嫌", 18)),
799 Some(("大", 15)),
800 Some((",", 14)),
801 Some(("r", 13)),
802 Some(("e", 12)),
803 Some(("g", 11)),
804 Some(("ä", 8)),
805 Some(("J", 7)),
806 Some((",", 6)),
807 Some(("r", 5)),
808 Some(("e", 4)),
809 Some(("g", 3)),
810 Some(("ä", 1)),
811 Some(("J", 0)),
812 None,
813 ]);
814}
815
816#[cfg(test)]
817#[test]
818fn test_prev_cp() {
819 let s = "Jäger,Jäger,大嫌い,💪❤!";
820 let cur = StrCursor::new_at_end(s);
821 let r = test_util::finite_iterate_lead(cur, StrCursor::at_prev_cp)
822 .map(|cur| cur.prev_cp().map(|(cp, cur)| (cp, cur.byte_pos())))
823 .collect::<Vec<_>>();
824 assert_eq!(r, vec![
825 Some(('!', 32)),
826 Some(('❤', 29)),
827 Some(('💪', 25)),
828 Some((',', 24)),
829 Some(('い', 21)),
830 Some(('嫌', 18)),
831 Some(('大', 15)),
832 Some((',', 14)),
833 Some(('r', 13)),
834 Some(('e', 12)),
835 Some(('g', 11)),
836 Some(('̈', 9)),
837 Some(('a', 8)),
838 Some(('J', 7)),
839 Some((',', 6)),
840 Some(('r', 5)),
841 Some(('e', 4)),
842 Some(('g', 3)),
843 Some(('ä', 1)),
844 Some(('J', 0)),
845 None,
846 ]);
847}
848
849#[cfg(test)]
850#[test]
851fn test_next() {
852 let s = "Jäger,Jäger,大嫌い,💪❤!";
853 let cur = StrCursor::new_at_start(s);
854 let r = test_util::finite_iterate_lead(cur, StrCursor::at_next)
855 .map(|cur| cur.next().map(|(gr, cur)| (gr.as_str(), cur.byte_pos())))
856 .collect::<Vec<_>>();
857 assert_eq!(r, vec![
858 Some(("J", 1)),
859 Some(("ä", 3)),
860 Some(("g", 4)),
861 Some(("e", 5)),
862 Some(("r", 6)),
863 Some((",", 7)),
864 Some(("J", 8)),
865 Some(("ä", 11)),
866 Some(("g", 12)),
867 Some(("e", 13)),
868 Some(("r", 14)),
869 Some((",", 15)),
870 Some(("大", 18)),
871 Some(("嫌", 21)),
872 Some(("い", 24)),
873 Some((",", 25)),
874 Some(("💪", 29)),
875 Some(("❤", 32)),
876 Some(("!", 33)),
877 None,
878 ]);
879}
880
881#[cfg(test)]
882#[test]
883fn test_next_cp() {
884 let s = "Jäger,Jäger,大嫌い,💪❤!";
885 let cur = StrCursor::new_at_start(s);
886 let r = test_util::finite_iterate_lead(cur, StrCursor::at_next_cp)
887 .map(|cur| cur.next_cp().map(|(cp, cur)| (cp, cur.byte_pos())))
888 .collect::<Vec<_>>();
889 assert_eq!(r, vec![
890 Some(('J', 1)),
891 Some(('ä', 3)),
892 Some(('g', 4)),
893 Some(('e', 5)),
894 Some(('r', 6)),
895 Some((',', 7)),
896 Some(('J', 8)),
897 Some(('a', 9)),
898 Some(('̈', 11)),
899 Some(('g', 12)),
900 Some(('e', 13)),
901 Some(('r', 14)),
902 Some((',', 15)),
903 Some(('大', 18)),
904 Some(('嫌', 21)),
905 Some(('い', 24)),
906 Some((',', 25)),
907 Some(('💪', 29)),
908 Some(('❤', 32)),
909 Some(('!', 33)),
910 None,
911 ]);
912}
913
914#[cfg(test)]
915#[test]
916fn test_seek_prev() {
917 let s = "Jäger,Jäger,大嫌い,💪❤!";
918 let mut cur = StrCursor::new_at_end(s);
919 let mut r = vec![];
920 for i in 0..19 {
921 println!("i: {:?}", i);
922 println!("cur.byte_pos(): {:?}", cur.byte_pos());
923 cur.seek_prev();
924 r.push((cur.after().unwrap().as_str(), cur.byte_pos()));
925 }
926 assert_eq!(r, vec![
927 ("!", 32),
928 ("❤", 29),
929 ("💪", 25),
930 (",", 24),
931 ("い", 21),
932 ("嫌", 18),
933 ("大", 15),
934 (",", 14),
935 ("r", 13),
936 ("e", 12),
937 ("g", 11),
938 ("ä", 8),
939 ("J", 7),
940 (",", 6),
941 ("r", 5),
942 ("e", 4),
943 ("g", 3),
944 ("ä", 1),
945 ("J", 0),
946 ]);
947}
948
949#[cfg(test)]
950#[test]
951#[should_panic]
952fn test_seek_prev_panic() {
953 let s = "Jäger,Jäger,大嫌い,💪❤!";
954 let mut cur = StrCursor::new_at_start(s);
955 cur.seek_prev();
956}
957
958#[cfg(test)]
959#[test]
960fn test_seek_prev_cp() {
961 let s = "Jäger,Jäger,大嫌い,💪❤!";
962 let mut cur = StrCursor::new_at_end(s);
963 let mut r = vec![];
964 for _ in 0..20 {
965 cur.seek_prev_cp();
966 r.push((cur.cp_after().unwrap(), cur.byte_pos()));
967 }
968 assert_eq!(r, vec![
969 ('!', 32),
970 ('❤', 29),
971 ('💪', 25),
972 (',', 24),
973 ('い', 21),
974 ('嫌', 18),
975 ('大', 15),
976 (',', 14),
977 ('r', 13),
978 ('e', 12),
979 ('g', 11),
980 ('̈', 9),
981 ('a', 8),
982 ('J', 7),
983 (',', 6),
984 ('r', 5),
985 ('e', 4),
986 ('g', 3),
987 ('ä', 1),
988 ('J', 0),
989 ]);
990}
991
992#[cfg(test)]
993#[test]
994#[should_panic]
995fn test_seek_prev_cp_panic() {
996 let s = "Jäger,Jäger,大嫌い,💪❤!";
997 let mut cur = StrCursor::new_at_start(s);
998 cur.seek_prev_cp();
999}
1000
1001#[cfg(test)]
1002#[test]
1003fn test_seek_next() {
1004 let s = "Jäger,Jäger,大嫌い,💪❤!";
1005 let mut cur = StrCursor::new_at_start(s);
1006 let mut r = vec![];
1007 for _ in 0..19 {
1008 cur.seek_next();
1009 r.push((cur.before().unwrap().as_str(), cur.byte_pos()));
1010 }
1011 assert_eq!(r, vec![
1012 ("J", 1),
1013 ("ä", 3),
1014 ("g", 4),
1015 ("e", 5),
1016 ("r", 6),
1017 (",", 7),
1018 ("J", 8),
1019 ("ä", 11),
1020 ("g", 12),
1021 ("e", 13),
1022 ("r", 14),
1023 (",", 15),
1024 ("大", 18),
1025 ("嫌", 21),
1026 ("い", 24),
1027 (",", 25),
1028 ("💪", 29),
1029 ("❤", 32),
1030 ("!", 33),
1031 ]);
1032}
1033
1034#[cfg(test)]
1035#[test]
1036#[should_panic]
1037fn test_seek_next_panic() {
1038 let s = "Jäger,Jäger,大嫌い,💪❤!";
1039 let mut cur = StrCursor::new_at_end(s);
1040 cur.seek_next();
1041}
1042
1043#[cfg(test)]
1044#[test]
1045fn test_seek_next_cp() {
1046 let s = "Jäger,Jäger,大嫌い,💪❤!";
1047 let mut cur = StrCursor::new_at_start(s);
1048 let mut r = vec![];
1049 for _ in 0..20 {
1050 cur.seek_next_cp();
1051 r.push((cur.cp_before().unwrap(), cur.byte_pos()));
1052 }
1053 assert_eq!(r, vec![
1054 ('J', 1),
1055 ('ä', 3),
1056 ('g', 4),
1057 ('e', 5),
1058 ('r', 6),
1059 (',', 7),
1060 ('J', 8),
1061 ('a', 9),
1062 ('̈', 11),
1063 ('g', 12),
1064 ('e', 13),
1065 ('r', 14),
1066 (',', 15),
1067 ('大', 18),
1068 ('嫌', 21),
1069 ('い', 24),
1070 (',', 25),
1071 ('💪', 29),
1072 ('❤', 32),
1073 ('!', 33),
1074 ]);
1075}
1076
1077#[cfg(test)]
1078#[test]
1079#[should_panic]
1080fn test_seek_next_cp_panic() {
1081 let s = "Jäger,Jäger,大嫌い,💪❤!";
1082 let mut cur = StrCursor::new_at_end(s);
1083 cur.seek_next_cp();
1084}
1085
1086#[cfg(test)]
1087#[test]
1088fn test_char_before_and_after() {
1089 let s = "大嫌い,💪❤";
1090 let cur = StrCursor::new_at_start(s);
1091 let r = test_util::finite_iterate_lead(cur, StrCursor::at_next_cp)
1092 .map(|cur| (cur.byte_pos(), cur.cp_before(), cur.cp_after()))
1093 .collect::<Vec<_>>();
1094 assert_eq!(r, vec![
1095 (0, None, Some('大')),
1096 (3, Some('大'), Some('嫌')),
1097 (6, Some('嫌'), Some('い')),
1098 (9, Some('い'), Some(',')),
1099 (10, Some(','), Some('💪')),
1100 (14, Some('💪'), Some('❤')),
1101 (17, Some('❤'), None)
1102 ]);
1103}
1104
1105#[cfg(test)]
1106#[test]
1107fn test_slice_between() {
1108 let s = "they hit, fight, kick, wreak havoc, and rejoice";
1109 let cur0 = StrCursor::new_at_start(s);
1110 let cur1 = StrCursor::new_at_end(s);
1111 let cur2 = StrCursor::new_at_end("nobody knows what they're lookin' for");
1112 let cur3 = StrCursor::new_at_end(&s[1..]);
1113 assert_eq!(cur0.slice_between(cur1), Some(s));
1114 assert_eq!(cur1.slice_between(cur0), Some(s));
1115 assert_eq!(cur0.slice_between(cur2), None);
1116 assert_eq!(cur0.slice_between(cur3), None);
1117}
1118
1119#[inline]
1120fn byte_pos_to_ptr(s: &str, byte_pos: usize) -> *const u8 {
1121 if s.len() < byte_pos {
1122 panic!("byte position out of bounds: the len is {} but the position is {}",
1123 s.len(), byte_pos);
1124 }
1125 unsafe { s.as_ptr().offset(byte_pos as isize) }
1126}
1127
1128#[inline]
1129unsafe fn seek_utf8_cp_start_left(s: &str, mut from: *const u8) -> *const u8 {
1130 let beg = s.as_ptr();
1131 while from > beg && (*from & 0b11_00_0000 == 0b10_00_0000) {
1132 from = from.offset(-1);
1133 }
1134 from
1135}
1136
1137#[cfg(test)]
1138#[test]
1139fn test_seek_utf8_cp_start_left() {
1140 let s = "カブム!";
1141 let b = s.as_bytes();
1142 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[0]) }, &b[0]);
1143 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[1]) }, &b[0]);
1144 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[2]) }, &b[0]);
1145 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[3]) }, &b[3]);
1146 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[4]) }, &b[3]);
1147 assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[5]) }, &b[3]);
1148}
1149
1150#[inline]
1151unsafe fn seek_utf8_cp_start_right(s: &str, mut from: *const u8) -> *const u8 {
1152 let end = s.as_ptr().offset(s.len() as isize);
1153 while from < end && (*from & 0b11_00_0000 == 0b10_00_0000) {
1154 from = from.offset(1);
1155 }
1156 from
1157}
1158
1159#[cfg(test)]
1160#[test]
1161fn test_seek_utf8_cp_start_right() {
1162 let s = "カブム!";
1163 let b = s.as_bytes();
1164 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[0]) }, &b[0]);
1165 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[1]) }, &b[3]);
1166 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[2]) }, &b[3]);
1167 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[3]) }, &b[3]);
1168 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[4]) }, &b[6]);
1169 assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[5]) }, &b[6]);
1170}
1171
1172#[inline]
1173fn str_eq_literal(a: &str, b: &str) -> bool {
1174 a.as_bytes().as_ptr() == b.as_bytes().as_ptr()
1175 && a.len() == b.len()
1176}
1177
1178#[cfg(test)]
1179#[test]
1180fn test_str_eq_literal() {
1181 let s = "hare hare yukai";
1182 assert!(str_eq_literal(s, s));
1183 assert!(str_eq_literal(&s[0..4], &s[0..4]));
1184 assert!(!str_eq_literal(&s[0..4], &s[5..9]));
1185 assert!(!str_eq_literal(&s[0..4], &s[0..3]));
1186}
1187
1188#[cfg(test)]
1189mod test_util {
1190 pub struct FiniteIter<T, F>(Option<T>, F);
1191
1192 impl<T, F> Iterator for FiniteIter<T, F>
1193 where
1194 F: FnMut(T) -> Option<T>,
1195 T: Clone,
1196 {
1197 type Item = T;
1198
1199 fn next(&mut self) -> Option<Self::Item> {
1200 self.0.take().and_then(|last| {
1201 match (self.1)(last) {
1202 Some(e) => {
1203 self.0 = Some(e);
1204 self.0.clone()
1205 },
1206 None => None
1207 }
1208 })
1209 }
1210 }
1211
1212 pub fn finite_iterate<T, F>(seed: T, f: F) -> FiniteIter<T, F>
1213 where
1214 F: FnMut(T) -> Option<T>,
1215 T: Clone,
1216 {
1217 FiniteIter(Some(seed), f)
1218 }
1219 pub struct FiniteIterLead<T, F>(Option<T>, F, bool);
1220
1221 impl<T, F> Iterator for FiniteIterLead<T, F>
1222 where
1223 F: FnMut(T) -> Option<T>,
1224 T: Clone,
1225 {
1226 type Item = T;
1227
1228 fn next(&mut self) -> Option<Self::Item> {
1229 if !self.2 {
1230 self.2 = true;
1231 return self.0.clone();
1232 }
1233
1234 self.0.take().and_then(|last| {
1235 match (self.1)(last) {
1236 Some(e) => {
1237 self.0 = Some(e);
1238 self.0.clone()
1239 },
1240 None => None
1241 }
1242 })
1243 }
1244 }
1245
1246 pub fn finite_iterate_lead<T, F>(seed: T, f: F) -> FiniteIterLead<T, F>
1247 where
1248 F: FnMut(T) -> Option<T>,
1249 T: Clone,
1250 {
1251 FiniteIterLead(Some(seed), f, false)
1252 }
1253}