xi_core_lib/
word_boundaries.rs1use xi_rope::{Cursor, Rope, RopeInfo};
19
20pub struct WordCursor<'a> {
21 inner: Cursor<'a, RopeInfo>,
22}
23
24impl<'a> WordCursor<'a> {
25 pub fn new(text: &'a Rope, pos: usize) -> WordCursor<'a> {
26 let inner = Cursor::new(text, pos);
27 WordCursor { inner }
28 }
29
30 pub fn prev_boundary(&mut self) -> Option<usize> {
32 if let Some(ch) = self.inner.prev_codepoint() {
33 let mut prop = get_word_property(ch);
34 let mut candidate = self.inner.pos();
35 while let Some(prev) = self.inner.prev_codepoint() {
36 let prop_prev = get_word_property(prev);
37 if classify_boundary(prop_prev, prop).is_start() {
38 break;
39 }
40 prop = prop_prev;
41 candidate = self.inner.pos();
42 }
43 self.inner.set(candidate);
44 return Some(candidate);
45 }
46 None
47 }
48
49 pub fn next_boundary(&mut self) -> Option<usize> {
51 if let Some(ch) = self.inner.next_codepoint() {
52 let mut prop = get_word_property(ch);
53 let mut candidate = self.inner.pos();
54 while let Some(next) = self.inner.next_codepoint() {
55 let prop_next = get_word_property(next);
56 if classify_boundary(prop, prop_next).is_end() {
57 break;
58 }
59 prop = prop_next;
60 candidate = self.inner.pos();
61 }
62 self.inner.set(candidate);
63 return Some(candidate);
64 }
65 None
66 }
67
68 pub fn select_word(&mut self) -> (usize, usize) {
71 let initial = self.inner.pos();
72 let init_prop_after = self.inner.next_codepoint().map(get_word_property);
73 self.inner.set(initial);
74 let init_prop_before = self.inner.prev_codepoint().map(get_word_property);
75 let mut start = initial;
76 let init_boundary = if let (Some(pb), Some(pa)) = (init_prop_before, init_prop_after) {
77 classify_boundary_initial(pb, pa)
78 } else {
79 WordBoundary::Both
80 };
81 let mut prop_after = init_prop_after;
82 let mut prop_before = init_prop_before;
83 if prop_after.is_none() {
84 start = self.inner.pos();
85 prop_after = prop_before;
86 prop_before = self.inner.prev_codepoint().map(get_word_property);
87 }
88 while let (Some(pb), Some(pa)) = (prop_before, prop_after) {
89 if start == initial {
90 if init_boundary.is_start() {
91 break;
92 }
93 } else if !init_boundary.is_boundary() {
94 if classify_boundary(pb, pa).is_boundary() {
95 break;
96 }
97 } else if classify_boundary(pb, pa).is_start() {
98 break;
99 }
100 start = self.inner.pos();
101 prop_after = prop_before;
102 prop_before = self.inner.prev_codepoint().map(get_word_property);
103 }
104 self.inner.set(initial);
105 let mut end = initial;
106 prop_after = init_prop_after;
107 prop_before = init_prop_before;
108 if prop_before.is_none() {
109 prop_before = self.inner.next_codepoint().map(get_word_property);
110 end = self.inner.pos();
111 prop_after = self.inner.next_codepoint().map(get_word_property);
112 }
113 while let (Some(pb), Some(pa)) = (prop_before, prop_after) {
114 if end == initial {
115 if init_boundary.is_end() {
116 break;
117 }
118 } else if !init_boundary.is_boundary() {
119 if classify_boundary(pb, pa).is_boundary() {
120 break;
121 }
122 } else if classify_boundary(pb, pa).is_end() {
123 break;
124 }
125 end = self.inner.pos();
126 prop_before = prop_after;
127 prop_after = self.inner.next_codepoint().map(get_word_property);
128 }
129 self.inner.set(end);
130 (start, end)
131 }
132}
133
134#[derive(PartialEq, Eq)]
135enum WordBoundary {
136 Interior,
137 Start, End, Both,
140}
141
142impl WordBoundary {
143 fn is_start(&self) -> bool {
144 *self == WordBoundary::Start || *self == WordBoundary::Both
145 }
146
147 fn is_end(&self) -> bool {
148 *self == WordBoundary::End || *self == WordBoundary::Both
149 }
150
151 fn is_boundary(&self) -> bool {
152 *self != WordBoundary::Interior
153 }
154}
155
156fn classify_boundary(prev: WordProperty, next: WordProperty) -> WordBoundary {
157 use self::WordBoundary::*;
158 use self::WordProperty::*;
159 match (prev, next) {
160 (Lf, _) => Both,
161 (_, Lf) => Both,
162 (Space, Other) => Start,
163 (Space, Punctuation) => Start,
164 (Punctuation, Other) => Start,
165 (Other, Space) => End,
166 (Punctuation, Space) => End,
167 (Other, Punctuation) => End,
168 _ => Interior,
169 }
170}
171
172fn classify_boundary_initial(prev: WordProperty, next: WordProperty) -> WordBoundary {
173 use self::WordBoundary::*;
174 use self::WordProperty::*;
175 match (prev, next) {
176 (Lf, Other) => Start,
177 (Other, Lf) => End,
178 (Lf, Space) => Interior,
179 (Lf, Punctuation) => Interior,
180 (Space, Lf) => Interior,
181 (Punctuation, Lf) => Interior,
182 (Space, Punctuation) => Interior,
183 (Punctuation, Space) => Interior,
184 _ => classify_boundary(prev, next),
185 }
186}
187
188#[derive(Copy, Clone)]
189enum WordProperty {
190 Lf,
191 Space,
192 Punctuation,
193 Other, }
195
196fn get_word_property(codepoint: char) -> WordProperty {
197 if codepoint <= ' ' {
198 if codepoint == '\n' {
200 return WordProperty::Lf;
201 }
202 return WordProperty::Space;
203 } else if codepoint <= '\u{3f}' {
204 if (0xfc00fffe00000000u64 >> (codepoint as u32)) & 1 != 0 {
206 return WordProperty::Punctuation;
207 }
208 } else if codepoint <= '\u{7f}' {
209 if (0x7800000178000001u64 >> ((codepoint as u32) & 0x3f)) & 1 != 0 {
211 return WordProperty::Punctuation;
212 }
213 }
214 WordProperty::Other
215}