1use crate::license_detection::index::LicenseIndex;
4use crate::license_detection::index::dictionary::{KnownToken, QueryToken, TokenId, TokenKind};
5use crate::license_detection::models::PositionSpan;
6use crate::license_detection::position_set::PositionSet;
7use crate::license_detection::spdx_lid::split_spdx_lid;
8use crate::license_detection::tokenize::STOPWORDS;
9use crate::license_detection::tokenize::tokenize_as_ids;
10use once_cell::sync::Lazy;
11use regex::Regex;
12use std::cell::{OnceCell, RefCell};
13use std::collections::HashMap;
14
15static QUERY_PATTERN: Lazy<Regex> =
16 Lazy::new(|| Regex::new(r"[^_\W]+\+?[^_\W]*").expect("valid query regex"));
17static MATCHED_TEXT_PATTERN: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"(?P<token>[^_\W]+\+?[^_\W]*)|(?P<punct>[_\W\s\+]+[_\W\s]?)")
19 .expect("valid matched text regex")
20});
21
22#[derive(Clone)]
23struct MatchedTextToken {
24 value: String,
25 line_num: usize,
26 pos: Option<usize>,
27 is_text: bool,
28 is_matched: bool,
29}
30
31#[derive(Debug)]
43pub struct Query<'a> {
44 pub text: String,
48
49 pub tokens: Vec<TokenId>,
53
54 pub line_by_pos: Vec<usize>,
61
62 pub unknowns_by_pos: HashMap<Option<usize>, usize>,
70
71 pub stopwords_by_pos: HashMap<Option<usize>, usize>,
77
78 pub shorts_and_digits_pos: PositionSet,
84
85 pub high_matchables: PositionSet,
91
92 pub low_matchables: PositionSet,
98
99 pub is_binary: bool,
103
104 pub(crate) query_run_ranges: Vec<(usize, Option<usize>)>,
110
111 pub spdx_lines: Vec<(String, usize, usize)>,
118
119 pub index: &'a LicenseIndex,
121}
122
123pub fn matched_text_from_text(text: &str, start_line: usize, end_line: usize) -> String {
124 if start_line == 0 || end_line == 0 || start_line > end_line {
125 return String::new();
126 }
127
128 text.lines()
129 .enumerate()
130 .filter_map(|(idx, line)| {
131 let line_num = idx + 1;
132 if line_num >= start_line && line_num <= end_line {
133 Some(line)
134 } else {
135 None
136 }
137 })
138 .collect::<Vec<_>>()
139 .join("\n")
140}
141
142pub fn matched_text_diagnostics_from_text(
143 text: &str,
144 query: &Query<'_>,
145 matched_positions: &PositionSet,
146 start_pos: usize,
147 end_pos: usize,
148 start_line: usize,
149 end_line: usize,
150) -> String {
151 let tokens = tokenize_matched_text(text, query);
152 let reportable_tokens = collect_reportable_tokens(
153 tokens,
154 matched_positions,
155 start_pos,
156 end_pos,
157 start_line,
158 end_line,
159 );
160 let line_endings = collect_line_endings(text);
161
162 render_diagnostic_tokens(&reportable_tokens, &line_endings)
163}
164
165fn tokenize_matched_text(text: &str, query: &Query<'_>) -> Vec<MatchedTextToken> {
166 let mut tokens = Vec::new();
167 let mut pos = 0usize;
168 let mut line_num = 1usize;
169
170 for line in text.split_inclusive('\n') {
171 for capture in MATCHED_TEXT_PATTERN.captures_iter(line) {
172 if let Some(token_match) = capture.name("token") {
173 let token_text = token_match.as_str();
174 let retokenized: Vec<String> = QUERY_PATTERN
175 .find_iter(&token_text.to_lowercase())
176 .map(|m| m.as_str().to_string())
177 .filter(|token| !STOPWORDS.contains(token.as_str()))
178 .collect();
179
180 if retokenized.is_empty() {
181 tokens.push(MatchedTextToken {
182 value: token_text.to_string(),
183 line_num,
184 pos: None,
185 is_text: true,
186 is_matched: false,
187 });
188 } else if retokenized.len() == 1 {
189 let token = &retokenized[0];
190 let token_pos = if query.index.dictionary.get(token).is_some() {
191 let current_pos = pos;
192 pos += 1;
193 Some(current_pos)
194 } else {
195 None
196 };
197
198 tokens.push(MatchedTextToken {
199 value: token_text.to_string(),
200 line_num,
201 pos: token_pos,
202 is_text: true,
203 is_matched: false,
204 });
205 } else {
206 for token in retokenized {
207 let token_pos = if query.index.dictionary.get(&token).is_some() {
208 let current_pos = pos;
209 pos += 1;
210 Some(current_pos)
211 } else {
212 None
213 };
214
215 tokens.push(MatchedTextToken {
216 value: token,
217 line_num,
218 pos: token_pos,
219 is_text: true,
220 is_matched: false,
221 });
222 }
223 }
224 } else if let Some(punct_match) = capture.name("punct") {
225 tokens.push(MatchedTextToken {
226 value: punct_match.as_str().to_string(),
227 line_num,
228 pos: None,
229 is_text: false,
230 is_matched: false,
231 });
232 }
233 }
234
235 line_num += 1;
236 }
237
238 tokens
239}
240
241fn collect_reportable_tokens(
242 tokens: Vec<MatchedTextToken>,
243 matched_positions: &PositionSet,
244 start_pos: usize,
245 end_pos: usize,
246 start_line: usize,
247 end_line: usize,
248) -> Vec<MatchedTextToken> {
249 let mut reportable = Vec::new();
250 let mut started = false;
251 let mut finished = false;
252 let mut end_real_pos = None;
253 let mut last_real_pos = None;
254
255 for (real_pos, mut token) in tokens.into_iter().enumerate() {
256 if token.line_num < start_line {
257 continue;
258 }
259
260 if token.line_num > end_line {
261 break;
262 }
263
264 let mut is_included = false;
265
266 if token.pos.is_some_and(|pos| matched_positions.contains(pos)) {
267 token.is_matched = true;
268 is_included = true;
269 }
270
271 if !started && token.pos == Some(start_pos) {
272 started = true;
273 is_included = true;
274 }
275
276 if started && !finished {
277 is_included = true;
278 }
279
280 if token.pos == Some(end_pos) {
281 finished = true;
282 started = false;
283 end_real_pos = Some(real_pos);
284 }
285
286 if finished && !started && end_real_pos.is_some() && last_real_pos == end_real_pos {
287 end_real_pos = None;
288 if !token.is_text && !token.value.trim().is_empty() {
289 is_included = true;
290 }
291 }
292
293 last_real_pos = Some(real_pos);
294
295 if is_included {
296 reportable.push(token);
297 }
298 }
299
300 reportable
301}
302
303fn collect_line_endings(text: &str) -> Vec<String> {
304 text.split_inclusive('\n')
305 .map(|line| {
306 if line.ends_with("\r\n") {
307 "\r\n".to_string()
308 } else if line.ends_with('\n') {
309 "\n".to_string()
310 } else {
311 String::new()
312 }
313 })
314 .collect()
315}
316
317fn render_diagnostic_tokens(tokens: &[MatchedTextToken], line_endings: &[String]) -> String {
318 let mut rendered = String::new();
319 let mut previous_line: Option<usize> = None;
320
321 for token in tokens {
322 if let Some(prev_line) = previous_line
323 && token.line_num > prev_line
324 {
325 for line in prev_line..token.line_num {
326 if let Some(line_ending) = line_endings.get(line.saturating_sub(1)) {
327 rendered.push_str(line_ending.as_str());
328 }
329 }
330 }
331
332 let token_value = if token.is_text {
333 token.value.as_str()
334 } else {
335 token
336 .value
337 .strip_suffix("\r\n")
338 .or_else(|| token.value.strip_suffix('\n'))
339 .unwrap_or(token.value.as_str())
340 };
341
342 if token.is_text && !STOPWORDS.contains(token.value.to_lowercase().as_str()) {
343 if token.is_matched {
344 rendered.push_str(token_value);
345 } else {
346 rendered.push('[');
347 rendered.push_str(token_value);
348 rendered.push(']');
349 }
350 } else {
351 rendered.push_str(token_value);
352 }
353
354 previous_line = Some(token.line_num);
355 }
356
357 rendered
358}
359
360impl<'a> Query<'a> {
361 const TEXT_LINE_THRESHOLD: usize = 15;
376 const BINARY_LINE_THRESHOLD: usize = 50;
377 const MAX_TOKEN_PER_LINE: usize = 25;
378
379 fn compute_spdx_offset(
380 tokens: &[QueryToken],
381 dictionary: &crate::license_detection::index::dictionary::TokenDictionary,
382 ) -> Option<usize> {
383 let get_known_id = |i: usize| -> Option<TokenId> {
384 match tokens.get(i)? {
385 QueryToken::Known(known) => Some(known.id),
386 _ => None,
387 }
388 };
389
390 let spdx_id = dictionary.get("spdx")?;
391 let license_id = dictionary.get("license")?;
392 let identifier_id = dictionary.get("identifier")?;
393 let licence_id = dictionary.get("licence");
394
395 let licenses_id = dictionary.get("licenses");
396 let nuget_id = dictionary.get("nuget");
397 let org_id = dictionary.get("org");
398
399 let is_spdx_prefix = |ids: [Option<TokenId>; 3]| -> bool {
400 ids.iter().all(|id| id.is_some())
401 && ids[0] == Some(spdx_id)
402 && (ids[1] == Some(license_id) || ids[1] == licence_id)
403 && ids[2] == Some(identifier_id)
404 };
405
406 let is_nuget_prefix = |ids: [Option<TokenId>; 3]| -> bool {
407 licenses_id.is_some()
408 && nuget_id.is_some()
409 && org_id.is_some()
410 && ids[0] == licenses_id
411 && ids[1] == Some(nuget_id.unwrap())
412 && ids[2] == Some(org_id.unwrap())
413 };
414
415 if tokens.len() >= 3 {
416 let first_three = [get_known_id(0), get_known_id(1), get_known_id(2)];
417 if is_spdx_prefix(first_three) || is_nuget_prefix(first_three) {
418 return Some(0);
419 }
420 }
421
422 if tokens.len() >= 4 {
423 let second_three = [get_known_id(1), get_known_id(2), get_known_id(3)];
424 if is_spdx_prefix(second_three) || is_nuget_prefix(second_three) {
425 return Some(1);
426 }
427 }
428
429 if tokens.len() >= 5 {
430 let third_three = [get_known_id(2), get_known_id(3), get_known_id(4)];
431 if is_spdx_prefix(third_three) || is_nuget_prefix(third_three) {
432 return Some(2);
433 }
434 }
435
436 None
437 }
438
439 pub fn from_extracted_text(
440 text: &str,
441 index: &'a LicenseIndex,
442 binary_derived: bool,
443 ) -> Result<Self, anyhow::Error> {
444 let line_threshold = if binary_derived {
445 Self::BINARY_LINE_THRESHOLD
446 } else {
447 Self::TEXT_LINE_THRESHOLD
448 };
449
450 Self::with_source_options(text, index, line_threshold, Some(binary_derived))
451 }
452
453 pub fn query_runs(&self) -> Vec<QueryRun<'_>> {
457 self.query_run_ranges
458 .iter()
459 .map(|&(start, end)| QueryRun::new(self, start, end))
460 .collect()
461 }
462
463 fn with_source_options(
464 text: &str,
465 index: &'a LicenseIndex,
466 line_threshold: usize,
467 binary_derived: Option<bool>,
468 ) -> Result<Self, anyhow::Error> {
469 let is_binary = match binary_derived {
470 Some(is_binary) => is_binary,
471 None => Self::detect_binary(text)?,
472 };
473 let has_long_lines = Self::detect_long_lines(text);
474
475 let mut tokens = Vec::new();
476 let mut line_by_pos = Vec::new();
477 let mut unknowns_by_pos: HashMap<Option<usize>, usize> = HashMap::new();
478 let mut stopwords_by_pos: HashMap<Option<usize>, usize> = HashMap::new();
479 let mut shorts_and_digits_pos = PositionSet::new();
480 let mut spdx_lines: Vec<(String, usize, usize)> = Vec::new();
481
482 let mut known_pos: Option<usize> = None;
483 let mut started = false;
484 let mut current_line = 1usize;
485
486 let mut tokens_by_line: Vec<Vec<Option<KnownToken>>> = Vec::new();
487
488 for line in text.lines() {
489 let line_trimmed = line.trim();
490 let mut line_tokens: Vec<Option<KnownToken>> = Vec::new();
491
492 let mut line_first_known_pos = None;
493
494 let line_query_tokens = tokenize_as_ids(line_trimmed, &index.dictionary);
495
496 for query_token in &line_query_tokens {
497 match query_token {
498 QueryToken::Known(known_token) => {
499 known_pos = Some(known_pos.map_or(0, |p| p + 1));
500 started = true;
501 tokens.push(known_token.id);
502 line_by_pos.push(current_line);
503 line_tokens.push(Some(*known_token));
504
505 if line_first_known_pos.is_none() {
506 line_first_known_pos = known_pos;
507 }
508
509 if known_token.is_short_or_digit {
510 let _ = shorts_and_digits_pos.insert(known_pos.unwrap());
511 }
512 }
513 QueryToken::Unknown if !started => {
514 *unknowns_by_pos.entry(None).or_insert(0) += 1;
515 line_tokens.push(None);
516 }
517 QueryToken::Unknown => {
518 *unknowns_by_pos.entry(known_pos).or_insert(0) += 1;
519 line_tokens.push(None);
520 }
521 QueryToken::Stopword if !started => {
522 *stopwords_by_pos.entry(None).or_insert(0) += 1;
523 }
524 QueryToken::Stopword => {
525 *stopwords_by_pos.entry(known_pos).or_insert(0) += 1;
526 }
527 }
528 }
529
530 let line_last_known_pos = known_pos;
531
532 let spdx_start_offset =
533 Self::compute_spdx_offset(&line_query_tokens, &index.dictionary);
534
535 if let Some(offset) = spdx_start_offset
536 && let Some(line_first_known_pos) = line_first_known_pos
537 {
538 let (spdx_prefix, spdx_expression) = split_spdx_lid(line);
539 let spdx_text = format!("{}{}", spdx_prefix.unwrap_or_default(), spdx_expression);
540 let spdx_start_known_pos = line_first_known_pos + offset;
541
542 if spdx_start_known_pos <= line_last_known_pos.unwrap() {
543 let spdx_end = line_last_known_pos.unwrap() + 1;
544 spdx_lines.push((spdx_text, spdx_start_known_pos, spdx_end));
545 }
546 }
547
548 tokens_by_line.push(line_tokens);
549 current_line += 1;
550 }
551
552 let high_matchables: PositionSet = tokens
553 .iter()
554 .enumerate()
555 .filter(|(_pos, tid)| index.dictionary.token_kind(**tid) == TokenKind::Legalese)
556 .map(|(pos, _tid)| pos)
557 .collect();
558
559 let low_matchables: PositionSet = tokens
560 .iter()
561 .enumerate()
562 .filter(|(_pos, tid)| index.dictionary.token_kind(**tid) == TokenKind::Regular)
563 .map(|(pos, _tid)| pos)
564 .collect();
565
566 let query_runs = Self::compute_query_runs(&tokens_by_line, line_threshold, has_long_lines);
567
568 Ok(Query {
569 text: text.to_string(),
570 tokens,
571 line_by_pos,
572 unknowns_by_pos,
573 stopwords_by_pos,
574 shorts_and_digits_pos,
575 high_matchables,
576 low_matchables,
577 is_binary,
578 query_run_ranges: query_runs,
579 spdx_lines,
580 index,
581 })
582 }
583
584 fn detect_binary(text: &str) -> Result<bool, anyhow::Error> {
598 let null_byte_count = text.bytes().filter(|&b| b == 0).count();
599
600 if null_byte_count > 0 {
601 return Ok(true);
602 }
603
604 let non_printable_ratio = text
605 .chars()
606 .filter(|&c| {
607 !c.is_ascii() && !c.is_ascii_graphic() && c != '\n' && c != '\r' && c != '\t'
608 })
609 .count() as f64
610 / text.len().max(1) as f64;
611
612 Ok(non_printable_ratio > 0.3)
613 }
614
615 fn detect_long_lines(text: &str) -> bool {
625 text.lines()
626 .any(|line| crate::license_detection::tokenize::count_tokens(line) > 25)
627 }
628
629 fn break_long_lines(lines: &[Vec<Option<KnownToken>>]) -> Vec<Vec<Option<KnownToken>>> {
630 lines
631 .iter()
632 .flat_map(|line| {
633 if line.is_empty() {
634 return Vec::new();
635 }
636
637 if line.len() <= Self::MAX_TOKEN_PER_LINE {
638 vec![line.clone()]
639 } else {
640 line.chunks(Self::MAX_TOKEN_PER_LINE)
641 .map(|chunk| chunk.to_vec())
642 .collect()
643 }
644 })
645 .collect()
646 }
647
648 fn compute_query_runs(
649 tokens_by_line: &[Vec<Option<KnownToken>>],
650 line_threshold: usize,
651 has_long_lines: bool,
652 ) -> Vec<(usize, Option<usize>)> {
653 let processed_lines = if has_long_lines {
654 Self::break_long_lines(tokens_by_line)
655 } else {
656 tokens_by_line.to_vec()
657 };
658
659 let mut query_runs = Vec::new();
660 let mut query_run_start = 0usize;
661 let mut query_run_end = None;
662 let mut empty_lines = 0usize;
663 let mut pos = 0usize;
664 let mut query_run_is_all_digit = true;
665
666 for line_tokens in processed_lines {
667 if query_run_end.is_some() && empty_lines >= line_threshold {
668 if !query_run_is_all_digit {
669 query_runs.push((query_run_start, query_run_end));
670 }
671 query_run_start = pos;
672 query_run_end = None;
673 empty_lines = 0;
674 query_run_is_all_digit = true;
675 }
676
677 if query_run_end.is_none() {
678 query_run_start = pos;
679 }
680
681 if line_tokens.is_empty() {
682 empty_lines += 1;
683 continue;
684 }
685
686 let line_is_all_digit = line_tokens
687 .iter()
688 .all(|token_id| token_id.map(|known| known.is_digit_only).unwrap_or(true));
689 let mut line_has_known_tokens = false;
690 let mut line_has_good_tokens = false;
691
692 for known in line_tokens.into_iter().flatten() {
693 line_has_known_tokens = true;
694 if known.kind == TokenKind::Legalese {
695 line_has_good_tokens = true;
696 }
697 if !known.is_digit_only {
698 query_run_is_all_digit = false;
699 }
700 query_run_end = Some(pos);
701 pos += 1;
702 }
703
704 if line_is_all_digit || !line_has_known_tokens {
705 empty_lines += 1;
706 continue;
707 }
708
709 if line_has_good_tokens {
710 empty_lines = 0;
711 } else {
712 empty_lines += 1;
713 }
714 }
715
716 if let Some(end) = query_run_end
717 && !query_run_is_all_digit
718 {
719 query_runs.push((query_run_start, Some(end)));
720 }
721
722 query_runs
723 }
724
725 #[inline]
735 pub fn line_for_pos(&self, pos: usize) -> Option<usize> {
736 self.line_by_pos.get(pos).copied()
737 }
738
739 #[inline]
741 pub fn is_empty(&self) -> bool {
742 self.tokens.is_empty()
743 }
744
745 pub fn whole_query_run(&self) -> QueryRun<'a> {
749 QueryRun::whole_query_snapshot(self)
750 }
751
752 pub fn subtract(&mut self, span: &PositionSpan) {
761 self.high_matchables.remove_span(span);
762 self.low_matchables.remove_span(span);
763 }
764
765 pub fn matched_text(&self, start_line: usize, end_line: usize) -> String {
779 matched_text_from_text(&self.text, start_line, end_line)
780 }
781}
782
783#[derive(Debug, Clone)]
784struct WholeQueryRunSnapshot<'a> {
785 index: &'a LicenseIndex,
786 tokens: Vec<TokenId>,
787 line_by_pos: Vec<usize>,
788 high_matchables: PositionSet,
789 low_matchables: PositionSet,
790}
791
792#[derive(Debug, Clone)]
800pub struct QueryRun<'a> {
801 query: Option<&'a Query<'a>>,
802 whole_query_snapshot: Option<WholeQueryRunSnapshot<'a>>,
803 pub start: usize,
804 pub end: Option<usize>,
805 cached_high_matchables: OnceCell<PositionSet>,
806 cached_low_matchables: OnceCell<PositionSet>,
807 combined_matchables: RefCell<Option<PositionSet>>,
808}
809
810impl<'a> QueryRun<'a> {
811 pub fn new(query: &'a Query<'a>, start: usize, end: Option<usize>) -> Self {
820 Self {
821 query: Some(query),
822 whole_query_snapshot: None,
823 start,
824 end,
825 cached_high_matchables: OnceCell::new(),
826 cached_low_matchables: OnceCell::new(),
827 combined_matchables: RefCell::new(None),
828 }
829 }
830
831 fn whole_query_snapshot(query: &Query<'a>) -> Self {
832 let end = if query.is_empty() {
833 None
834 } else {
835 Some(query.tokens.len() - 1)
836 };
837
838 Self {
839 query: None,
840 whole_query_snapshot: Some(WholeQueryRunSnapshot {
841 index: query.index,
842 tokens: query.tokens.clone(),
843 line_by_pos: query.line_by_pos.clone(),
844 high_matchables: query.high_matchables.clone(),
845 low_matchables: query.low_matchables.clone(),
846 }),
847 start: 0,
848 end,
849 cached_high_matchables: OnceCell::new(),
850 cached_low_matchables: OnceCell::new(),
851 combined_matchables: RefCell::new(None),
852 }
853 }
854
855 fn source_tokens(&self) -> &[TokenId] {
856 if let Some(query) = self.query {
857 &query.tokens
858 } else {
859 &self
860 .whole_query_snapshot
861 .as_ref()
862 .expect("snapshot-backed whole query run should have snapshot data")
863 .tokens
864 }
865 }
866
867 fn source_line_by_pos(&self) -> &[usize] {
868 if let Some(query) = self.query {
869 &query.line_by_pos
870 } else {
871 &self
872 .whole_query_snapshot
873 .as_ref()
874 .expect("snapshot-backed whole query run should have snapshot data")
875 .line_by_pos
876 }
877 }
878
879 fn source_high_matchables(&self) -> &PositionSet {
880 if let Some(query) = self.query {
881 &query.high_matchables
882 } else {
883 &self
884 .whole_query_snapshot
885 .as_ref()
886 .expect("snapshot-backed whole query run should have snapshot data")
887 .high_matchables
888 }
889 }
890
891 fn source_low_matchables(&self) -> &PositionSet {
892 if let Some(query) = self.query {
893 &query.low_matchables
894 } else {
895 &self
896 .whole_query_snapshot
897 .as_ref()
898 .expect("snapshot-backed whole query run should have snapshot data")
899 .low_matchables
900 }
901 }
902
903 pub fn get_index(&self) -> &LicenseIndex {
905 if let Some(query) = self.query {
906 query.index
907 } else {
908 self.whole_query_snapshot
909 .as_ref()
910 .expect("snapshot-backed whole query run should have snapshot data")
911 .index
912 }
913 }
914
915 pub fn line_for_pos(&self, pos: usize) -> Option<usize> {
923 self.source_line_by_pos().get(pos).copied()
924 }
925
926 pub fn tokens(&self) -> &[TokenId] {
932 match self.end {
933 Some(end) => &self.source_tokens()[self.start..=end],
934 None => &[],
935 }
936 }
937
938 pub fn tokens_with_pos(&self) -> impl Iterator<Item = (usize, TokenId)> + '_ {
942 self.tokens()
943 .iter()
944 .copied()
945 .enumerate()
946 .map(|(i, tid)| (self.start + i, tid))
947 }
948
949 pub fn is_digits_only(&self) -> bool {
953 self.tokens()
954 .iter()
955 .all(|&tid| self.get_index().dictionary.is_digit_only_token(tid))
956 }
957
958 pub fn is_matchable(&self, include_low: bool, exclude_positions: &[PositionSpan]) -> bool {
968 if self.is_digits_only() {
969 return false;
970 }
971
972 let matchables = self.matchables(include_low);
973
974 if exclude_positions.is_empty() {
975 return !matchables.is_empty();
976 }
977
978 let mut matchable_set = matchables;
979 for span in exclude_positions {
980 matchable_set.remove_span(span);
981 }
982
983 !matchable_set.is_empty()
984 }
985
986 pub fn matchables(&self, include_low: bool) -> PositionSet {
987 if include_low {
988 if let Some(ref cached) = *self.combined_matchables.borrow() {
989 return cached.clone();
990 }
991 let combined = self.low_matchables().union(&self.high_matchables());
992 *self.combined_matchables.borrow_mut() = Some(combined.clone());
993 combined
994 } else {
995 self.high_matchables()
996 }
997 }
998
999 pub fn matchable_tokens(&self) -> Vec<Option<TokenId>> {
1000 let high_matchables = self.high_matchables();
1001 if high_matchables.is_empty() {
1002 return Vec::new();
1003 }
1004
1005 let matchables = self.matchables(true);
1006 self.tokens_with_pos()
1007 .map(|(pos, tid)| {
1008 if matchables.contains(pos) {
1009 Some(tid)
1010 } else {
1011 None
1012 }
1013 })
1014 .collect()
1015 }
1016
1017 pub fn high_matchables(&self) -> PositionSet {
1018 self.cached_high_matchables
1019 .get_or_init(|| {
1020 let start = self.start;
1021 let end = self.end.map(|e| e + 1).unwrap_or(usize::MAX);
1022 let source = self.source_high_matchables();
1023 let live_span = PositionSpan::new(start, end);
1024 source
1025 .iter()
1026 .filter(|&pos| live_span.contains(pos))
1027 .collect()
1028 })
1029 .clone()
1030 }
1031
1032 pub fn low_matchables(&self) -> PositionSet {
1033 self.cached_low_matchables
1034 .get_or_init(|| {
1035 let start = self.start;
1036 let end = self.end.map(|e| e + 1).unwrap_or(usize::MAX);
1037 let source = self.source_low_matchables();
1038 let live_span = PositionSpan::new(start, end);
1039 source
1040 .iter()
1041 .filter(|&pos| live_span.contains(pos))
1042 .collect()
1043 })
1044 .clone()
1045 }
1046}
1047
1048#[cfg(test)]
1049mod test;