1mod false_positive;
10pub(crate) mod filter_low_quality;
11mod handle_overlaps;
12mod merge;
13
14use crate::license_detection::index::LicenseIndex;
15use crate::license_detection::models::{LicenseMatch, MatcherKind};
16use crate::license_detection::query::Query;
17
18use filter_low_quality::{
20 filter_below_rule_minimum_coverage, filter_false_positive_matches,
21 filter_invalid_matches_to_single_word_gibberish, filter_matches_missing_required_phrases,
22 filter_matches_to_spurious_single_token, filter_short_matches_scattered_on_too_many_lines,
23 filter_spurious_matches, filter_too_short_matches,
24};
25use merge::{filter_license_references_with_text_match, update_match_scores};
26
27pub use handle_overlaps::{
29 filter_contained_matches, filter_overlapping_matches, restore_non_overlapping,
30};
31pub use merge::merge_overlapping_matches;
32
33pub use false_positive::filter_false_positive_license_lists_matches;
35
36const SMALL_RULE: usize = 15;
37
38pub fn filter_invalid_contained_unknown_matches(
53 unknown_matches: &[LicenseMatch],
54 good_matches: &[LicenseMatch],
55) -> Vec<LicenseMatch> {
56 unknown_matches
57 .iter()
58 .filter(|unknown| {
59 let unknown_start = unknown.start_token;
60 let unknown_end = unknown.end_token;
61
62 let is_contained = good_matches
63 .iter()
64 .any(|good| good.start_token <= unknown_start && good.end_token >= unknown_end);
65
66 !is_contained
67 })
68 .cloned()
69 .collect()
70}
71
72pub fn split_weak_matches(
88 index: &LicenseIndex,
89 matches: &[LicenseMatch],
90) -> (Vec<LicenseMatch>, Vec<LicenseMatch>) {
91 let mut good = Vec::new();
92 let mut weak = Vec::new();
93
94 for m in matches {
95 let is_false_positive = index.false_positive_rids.contains(&m.rid);
96 let is_weak = (!is_false_positive && m.has_unknown())
97 || (m.matcher == MatcherKind::Seq && m.len() <= SMALL_RULE && m.coverage() <= 25.0);
98
99 if is_weak {
100 weak.push(m.clone());
101 } else {
102 good.push(m.clone());
103 }
104 }
105
106 (good, weak)
107}
108
109pub fn refine_matches(
141 index: &LicenseIndex,
142 matches: Vec<LicenseMatch>,
143 query: &Query,
144) -> Vec<LicenseMatch> {
145 refine_matches_internal(index, matches, query, true)
146}
147
148pub fn refine_matches_without_false_positive_filter(
155 index: &LicenseIndex,
156 matches: Vec<LicenseMatch>,
157 query: &Query,
158) -> Vec<LicenseMatch> {
159 refine_matches_internal(index, matches, query, false)
160}
161
162pub fn refine_aho_matches(
174 index: &LicenseIndex,
175 matches: Vec<LicenseMatch>,
176 query: &Query,
177) -> Vec<LicenseMatch> {
178 if matches.is_empty() {
179 return Vec::new();
180 }
181
182 let (with_required_phrases, _missing_phrases) =
183 filter_matches_missing_required_phrases(index, &matches, query);
184
185 let non_spurious = filter_spurious_matches(&with_required_phrases, query);
186
187 let above_min_cov = filter_below_rule_minimum_coverage(index, &non_spurious);
188
189 let non_single_spurious = filter_matches_to_spurious_single_token(&above_min_cov, query, 5);
190
191 let non_short = filter_too_short_matches(index, &non_single_spurious);
192
193 let non_scattered = filter_short_matches_scattered_on_too_many_lines(index, &non_short);
194
195 let non_gibberish =
196 filter_invalid_matches_to_single_word_gibberish(index, &non_scattered, query);
197
198 let merged_again = merge_overlapping_matches(&non_gibberish);
199
200 let merged_again = filter_binary_low_coverage_same_expression_seq_bridges(merged_again, query);
201
202 let (non_contained, discarded_contained) = filter_contained_matches(&merged_again);
203
204 let (kept, discarded_overlapping) = filter_overlapping_matches(non_contained, index);
205
206 let mut matches_after_first_restore = kept.clone();
207
208 if !discarded_contained.is_empty() {
209 let (restored_contained, _) = restore_non_overlapping(&kept, discarded_contained);
210 matches_after_first_restore.extend(restored_contained);
211 }
212
213 let mut final_matches = matches_after_first_restore.clone();
214
215 if !discarded_overlapping.is_empty() {
216 let (restored_overlapping, _) =
217 restore_non_overlapping(&matches_after_first_restore, discarded_overlapping);
218 final_matches.extend(restored_overlapping);
219 }
220
221 let (non_contained_final, _) = filter_contained_matches(&final_matches);
222
223 let filtered_refs = filter_license_references_with_text_match(&non_contained_final);
224
225 let mut final_scored = filtered_refs;
226 update_match_scores(&mut final_scored, query);
227
228 final_scored
229}
230
231fn refine_matches_internal(
232 index: &LicenseIndex,
233 matches: Vec<LicenseMatch>,
234 query: &Query,
235 filter_false_positive: bool,
236) -> Vec<LicenseMatch> {
237 if matches.is_empty() {
238 return Vec::new();
239 }
240
241 let merged = merge_overlapping_matches(&matches);
242
243 let (with_required_phrases, _missing_phrases) =
244 filter_matches_missing_required_phrases(index, &merged, query);
245
246 let non_spurious = filter_spurious_matches(&with_required_phrases, query);
247
248 let above_min_cov = filter_below_rule_minimum_coverage(index, &non_spurious);
249
250 let non_single_spurious = filter_matches_to_spurious_single_token(&above_min_cov, query, 5);
251
252 let non_short = filter_too_short_matches(index, &non_single_spurious);
253
254 let non_scattered = filter_short_matches_scattered_on_too_many_lines(index, &non_short);
255
256 let non_gibberish =
257 filter_invalid_matches_to_single_word_gibberish(index, &non_scattered, query);
258
259 let merged_again = merge_overlapping_matches(&non_gibberish);
260
261 let merged_again = filter_binary_low_coverage_same_expression_seq_bridges(merged_again, query);
262
263 let (non_contained, discarded_contained) = filter_contained_matches(&merged_again);
264
265 let (kept, discarded_overlapping) = filter_overlapping_matches(non_contained, index);
266
267 let mut matches_after_first_restore = kept.clone();
268
269 if !discarded_contained.is_empty() {
270 let (restored_contained, _) = restore_non_overlapping(&kept, discarded_contained);
271 matches_after_first_restore.extend(restored_contained);
272 }
273
274 let mut final_matches = matches_after_first_restore.clone();
275
276 if !discarded_overlapping.is_empty() {
277 let (restored_overlapping, _) =
278 restore_non_overlapping(&matches_after_first_restore, discarded_overlapping);
279 final_matches.extend(restored_overlapping);
280 }
281
282 let (non_contained_final, _) = filter_contained_matches(&final_matches);
283
284 let result = if filter_false_positive {
285 let non_fp = filter_false_positive_matches(index, &non_contained_final);
286 let (kept, _discarded) = filter_false_positive_license_lists_matches(non_fp);
287 kept
288 } else {
289 non_contained_final
290 };
291
292 let merged_final = merge_overlapping_matches(&result);
293
294 let filtered_refs = filter_license_references_with_text_match(&merged_final);
295
296 let mut final_scored = filtered_refs;
297 update_match_scores(&mut final_scored, query);
298
299 final_scored
300}
301
302fn filter_binary_low_coverage_same_expression_seq_bridges(
303 matches: Vec<LicenseMatch>,
304 query: &Query,
305) -> Vec<LicenseMatch> {
306 if !query.is_binary {
307 return matches;
308 }
309
310 matches
311 .iter()
312 .filter(|m| {
313 if m.matcher != MatcherKind::Seq || m.coverage() >= 90.0 {
314 return true;
315 }
316
317 !matches.iter().any(|other| {
318 other.matcher == MatcherKind::Aho
319 && other.coverage() == 100.0
320 && other.license_expression == m.license_expression
321 && other.qoverlap(m) > 0
322 && !m.qcontains(other)
323 })
324 })
325 .cloned()
326 .collect()
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332 use crate::license_detection::models::MatchCoordinates;
333 use crate::license_detection::models::position_span::PositionSpan;
334 use crate::models::LineNumber;
335 use crate::models::MatchScore;
336
337 fn parse_rule_id(rule_identifier: &str) -> Option<usize> {
338 let trimmed = rule_identifier.trim();
339 if let Some(stripped) = trimmed.strip_prefix('#') {
340 stripped.parse().ok()
341 } else {
342 trimmed.parse().ok()
343 }
344 }
345
346 fn create_test_match(
347 rule_identifier: &str,
348 start_line: usize,
349 end_line: usize,
350 score: MatchScore,
351 coverage: f32,
352 relevance: u8,
353 ) -> LicenseMatch {
354 let matched_len = end_line - start_line + 1;
355 let rule_len = matched_len;
356 let rid = parse_rule_id(rule_identifier).unwrap_or(0);
357 LicenseMatch {
358 rid,
359 license_expression: "mit".to_string(),
360 license_expression_spdx: Some("MIT".to_string()),
361 from_file: None,
362 start_line: LineNumber::new(start_line).unwrap(),
363 end_line: LineNumber::new(end_line).unwrap(),
364 start_token: start_line,
365 end_token: end_line + 1,
366 matcher: crate::license_detection::models::MatcherKind::Aho,
367 score,
368 matched_length: matched_len,
369 rule_length: rule_len,
370 match_coverage: coverage,
371 rule_relevance: relevance,
372 rule_identifier: rule_identifier.to_string(),
373 rule_url: "https://example.com".to_string(),
374 matched_text: None,
375 referenced_filenames: None,
376 rule_kind: crate::license_detection::models::RuleKind::None,
377 is_from_license: false,
378 rule_start_token: 0,
379 coordinates: MatchCoordinates::query_region(PositionSpan::range(
380 start_line,
381 end_line + 1,
382 )),
383 candidate_resemblance: 0.0,
384 candidate_containment: 0.0,
385 }
386 }
387
388 #[test]
389 fn test_refine_matches_full_pipeline() {
390 let mut index = LicenseIndex::with_legalese_count(10);
391 let _ = index.false_positive_rids.insert(99);
392
393 let mut m1 = create_test_match("#1", 1, 10, MatchScore::from_percentage(0.5), 100.0, 100);
394 m1.rule_length = 100;
395 m1.rule_start_token = 0;
396 m1.coordinates = MatchCoordinates::rule_aligned(
397 PositionSpan::range(1, 11),
398 PositionSpan::range(0, 10),
399 PositionSpan::empty(),
400 );
401 let mut m2 = create_test_match("#1", 5, 15, MatchScore::from_percentage(0.5), 100.0, 100);
402 m2.rule_length = 100;
403 m2.rule_start_token = 4;
404 m2.coordinates = MatchCoordinates::rule_aligned(
405 PositionSpan::range(5, 16),
406 PositionSpan::range(4, 15),
407 PositionSpan::empty(),
408 );
409 let mut m3 = create_test_match("#2", 20, 25, MatchScore::from_percentage(0.5), 100.0, 80);
410 m3.coordinates = MatchCoordinates::rule_aligned(
411 PositionSpan::range(20, 26),
412 PositionSpan::range(0, 6),
413 PositionSpan::empty(),
414 );
415 let mut m4 = create_test_match("#99", 30, 35, MatchScore::from_percentage(0.5), 100.0, 100);
416 m4.coordinates = MatchCoordinates::rule_aligned(
417 PositionSpan::range(30, 36),
418 PositionSpan::range(0, 6),
419 PositionSpan::empty(),
420 );
421
422 let matches = vec![m1, m2, m3, m4];
423
424 let query = Query::from_extracted_text("test text", &index, false).unwrap();
425 let refined = refine_matches(&index, matches, &query);
426
427 assert_eq!(refined.len(), 2);
428
429 let rule1_match = refined.iter().find(|m| m.rule_identifier == "#1").unwrap();
430 assert_eq!(rule1_match.start_line, LineNumber::ONE);
431 assert_eq!(rule1_match.end_line, LineNumber::new(15).unwrap());
432
433 let rule2_match = refined.iter().find(|m| m.rule_identifier == "#2").unwrap();
434 assert_eq!(rule2_match.score, MatchScore::from_percentage(80.0));
435 }
436
437 #[test]
438 fn test_refine_matches_empty() {
439 let index = LicenseIndex::with_legalese_count(10);
440 let matches: Vec<LicenseMatch> = vec![];
441 let query = Query::from_extracted_text("", &index, false).unwrap();
442
443 let refined = refine_matches(&index, matches, &query);
444
445 assert_eq!(refined.len(), 0);
446 }
447
448 #[test]
449 fn test_refine_matches_single() {
450 let index = LicenseIndex::with_legalese_count(10);
451 let matches = vec![create_test_match(
452 "#1",
453 1,
454 10,
455 MatchScore::from_percentage(0.5),
456 100.0,
457 100,
458 )];
459 let query = Query::from_extracted_text("test text", &index, false).unwrap();
460
461 let refined = refine_matches(&index, matches, &query);
462
463 assert_eq!(refined.len(), 1);
464 assert_eq!(refined[0].score, MatchScore::MAX);
465 }
466
467 #[test]
468 fn test_refine_matches_no_merging_needed() {
469 let index = LicenseIndex::with_legalese_count(10);
470
471 let mut m1 = create_test_match("#1", 1, 10, MatchScore::from_percentage(0.9), 90.0, 100);
472 m1.coordinates = MatchCoordinates::rule_aligned(
473 PositionSpan::range(1, 11),
474 PositionSpan::range(0, 10),
475 PositionSpan::empty(),
476 );
477 let mut m2 = create_test_match("#2", 20, 30, MatchScore::from_percentage(0.85), 85.0, 100);
478 m2.coordinates = MatchCoordinates::rule_aligned(
479 PositionSpan::range(20, 31),
480 PositionSpan::range(0, 11),
481 PositionSpan::empty(),
482 );
483
484 let matches = vec![m1, m2];
485
486 let query = Query::from_extracted_text("test text", &index, false).unwrap();
487
488 let refined = refine_matches(&index, matches, &query);
489
490 assert_eq!(refined.len(), 2);
491 }
492
493 #[test]
494 fn test_filter_binary_low_coverage_same_expression_seq_bridges_drops_seq_bridge() {
495 let index = LicenseIndex::with_legalese_count(10);
496 let query = Query::from_extracted_text("binary strings", &index, true).unwrap();
497
498 let mut exact = create_test_match("#1", 140, 140, MatchScore::MAX, 100.0, 100);
499 exact.license_expression = "bsd-new".to_string();
500 exact.matcher = MatcherKind::Aho;
501 exact.start_token = 10;
502 exact.end_token = 16;
503 exact.matched_length = 6;
504 exact.coordinates = MatchCoordinates::rule_aligned(
505 PositionSpan::range(10, 16),
506 PositionSpan::empty(),
507 PositionSpan::empty(),
508 );
509
510 let mut seq =
511 create_test_match("#2", 140, 141, MatchScore::from_percentage(10.0), 52.9, 100);
512 seq.license_expression = "bsd-new".to_string();
513 seq.matcher = MatcherKind::Seq;
514 seq.start_token = 10;
515 seq.end_token = 18;
516 seq.matched_length = 7;
517 seq.coordinates = MatchCoordinates::rule_aligned(
518 PositionSpan::from_positions(vec![10, 11, 12, 13, 14, 16, 17]),
519 PositionSpan::empty(),
520 PositionSpan::empty(),
521 );
522
523 let filtered = filter_binary_low_coverage_same_expression_seq_bridges(
524 vec![seq.clone(), exact.clone()],
525 &query,
526 );
527
528 assert_eq!(filtered, vec![exact]);
529 }
530
531 #[test]
532 fn test_refine_aho_matches_restores_inner_merge_before_containment() {
533 let index = LicenseIndex::with_legalese_count(10);
534
535 let mut first = create_test_match("#1", 1, 10, MatchScore::from_percentage(0.9), 50.0, 100);
536 first.rule_length = 20;
537 first.rule_start_token = 0;
538 first.coordinates = MatchCoordinates::rule_aligned(
539 PositionSpan::range(1, 11),
540 PositionSpan::range(0, 10),
541 PositionSpan::empty(),
542 );
543
544 let mut second =
545 create_test_match("#1", 11, 20, MatchScore::from_percentage(0.85), 50.0, 100);
546 second.rule_length = 20;
547 second.rule_start_token = 10;
548 second.coordinates = MatchCoordinates::rule_aligned(
549 PositionSpan::range(11, 21),
550 PositionSpan::range(10, 20),
551 PositionSpan::empty(),
552 );
553
554 let query = Query::from_extracted_text("test text", &index, false).unwrap();
555 let refined = refine_aho_matches(&index, vec![first, second], &query);
556
557 assert_eq!(refined.len(), 1);
558 assert_eq!(refined[0].rule_identifier, "#1");
559 assert_eq!(refined[0].start_line, LineNumber::ONE);
560 assert_eq!(refined[0].end_line, LineNumber::new(20).unwrap());
561 }
562
563 #[test]
564 fn test_refine_matches_pipeline_preserves_non_overlapping_different_rules() {
565 let index = LicenseIndex::with_legalese_count(10);
566
567 let mut m1 = create_test_match("#1", 1, 10, MatchScore::from_percentage(0.9), 90.0, 100);
568 m1.coordinates = MatchCoordinates::rule_aligned(
569 PositionSpan::range(1, 11),
570 PositionSpan::range(0, 10),
571 PositionSpan::empty(),
572 );
573 let mut m2 = create_test_match("#2", 20, 30, MatchScore::from_percentage(0.85), 85.0, 100);
574 m2.coordinates = MatchCoordinates::rule_aligned(
575 PositionSpan::range(20, 31),
576 PositionSpan::range(0, 11),
577 PositionSpan::empty(),
578 );
579 let mut m3 = create_test_match("#3", 40, 50, MatchScore::from_percentage(0.8), 80.0, 100);
580 m3.coordinates = MatchCoordinates::rule_aligned(
581 PositionSpan::range(40, 51),
582 PositionSpan::range(0, 11),
583 PositionSpan::empty(),
584 );
585
586 let matches = vec![m1, m2, m3];
587
588 let query = Query::from_extracted_text("test text", &index, false).unwrap();
589 let refined = refine_matches(&index, matches, &query);
590
591 assert_eq!(refined.len(), 3);
592 }
593
594 #[test]
595 fn test_refine_matches_complex_scenario() {
596 let mut index = LicenseIndex::with_legalese_count(10);
597 let _ = index.false_positive_rids.insert(999);
598
599 let mut m1 = create_test_match("#1", 1, 10, MatchScore::from_percentage(0.7), 100.0, 100);
600 m1.matched_length = 100;
601 m1.rule_length = 100;
602 m1.rule_start_token = 0;
603 m1.coordinates = MatchCoordinates::rule_aligned(
604 PositionSpan::range(1, 11),
605 PositionSpan::range(0, 10),
606 PositionSpan::empty(),
607 );
608 let mut m2 = create_test_match("#1", 8, 15, MatchScore::from_percentage(0.8), 100.0, 100);
609 m2.matched_length = 100;
610 m2.rule_length = 100;
611 m2.rule_start_token = 7;
612 m2.coordinates = MatchCoordinates::rule_aligned(
613 PositionSpan::range(8, 16),
614 PositionSpan::range(7, 15),
615 PositionSpan::empty(),
616 );
617 let mut m3 = create_test_match("#2", 20, 50, MatchScore::from_percentage(0.9), 100.0, 100);
618 m3.matched_length = 300;
619 m3.rule_length = 300;
620 m3.rule_start_token = 0;
621 m3.coordinates = MatchCoordinates::rule_aligned(
622 PositionSpan::range(20, 51),
623 PositionSpan::range(0, 31),
624 PositionSpan::empty(),
625 );
626 let mut m4 = create_test_match("#2", 25, 45, MatchScore::from_percentage(0.85), 100.0, 100);
627 m4.matched_length = 150;
628 m4.rule_length = 300;
629 m4.rule_start_token = 5;
630 m4.coordinates = MatchCoordinates::rule_aligned(
631 PositionSpan::range(25, 46),
632 PositionSpan::range(5, 26),
633 PositionSpan::empty(),
634 );
635
636 let matches = vec![m1, m2, m3, m4];
637
638 let query = Query::from_extracted_text("test text", &index, false).unwrap();
639 let refined = refine_matches(&index, matches, &query);
640
641 assert!(
642 refined.len() >= 2,
643 "Should have at least 2 matches after refinement"
644 );
645 }
646
647 #[test]
648 fn test_split_weak_matches_has_unknown() {
649 let mut m = LicenseMatch {
650 license_expression: "unknown".to_string(),
651 matcher: crate::license_detection::models::MatcherKind::Hash,
652 matched_length: 100,
653 match_coverage: 100.0,
654 ..LicenseMatch::default()
655 };
656 m.end_token = 100;
657 m.rule_length = 100;
658
659 let index = LicenseIndex::with_legalese_count(10);
660 let (good, weak) = split_weak_matches(&index, &[m.clone()]);
661 assert!(weak.contains(&m));
662 assert!(!good.contains(&m));
663 }
664
665 #[test]
666 fn test_split_weak_matches_short_seq_low_coverage() {
667 let mut m = LicenseMatch {
668 license_expression: "mit".to_string(),
669 matcher: crate::license_detection::models::MatcherKind::Seq,
670 matched_length: 10,
671 match_coverage: 20.0,
672 ..LicenseMatch::default()
673 };
674 m.end_token = 10;
675 m.rule_length = 50;
676
677 let index = LicenseIndex::with_legalese_count(10);
678 let (good, weak) = split_weak_matches(&index, &[m.clone()]);
679 assert!(weak.contains(&m));
680 assert!(!good.contains(&m));
681 }
682
683 #[test]
684 fn test_split_weak_matches_keeps_false_positive_unknown_out_of_weak_bucket() {
685 let m = LicenseMatch {
686 rid: 42,
687 license_expression: "unknown".to_string(),
688 matcher: crate::license_detection::models::MatcherKind::Aho,
689 matched_length: 3,
690 rule_length: 3,
691 match_coverage: 100.0,
692 ..LicenseMatch::default()
693 };
694
695 let mut index = LicenseIndex::with_legalese_count(10);
696 index.false_positive_rids.insert(42);
697
698 let (good, weak) = split_weak_matches(&index, std::slice::from_ref(&m));
699 assert!(good.contains(&m));
700 assert!(!weak.contains(&m));
701 }
702
703 #[test]
704 fn test_split_weak_matches_short_seq_high_coverage() {
705 let mut m = LicenseMatch {
706 license_expression: "mit".to_string(),
707 matcher: crate::license_detection::models::MatcherKind::Seq,
708 matched_length: 10,
709 match_coverage: 80.0,
710 ..LicenseMatch::default()
711 };
712 m.end_token = 10;
713 m.rule_length = 15;
714
715 let index = LicenseIndex::with_legalese_count(10);
716 let (good, weak) = split_weak_matches(&index, &[m.clone()]);
717 assert!(good.contains(&m));
718 assert!(!weak.contains(&m));
719 }
720
721 #[test]
722 fn test_split_weak_matches_non_seq_short() {
723 let mut m = LicenseMatch {
724 license_expression: "mit".to_string(),
725 matcher: crate::license_detection::models::MatcherKind::Hash,
726 matched_length: 10,
727 match_coverage: 20.0,
728 ..LicenseMatch::default()
729 };
730 m.end_token = 10;
731 m.rule_length = 15;
732
733 let index = LicenseIndex::with_legalese_count(10);
734 let (good, weak) = split_weak_matches(&index, &[m.clone()]);
735 assert!(good.contains(&m));
736 assert!(!weak.contains(&m));
737 }
738
739 #[test]
740 fn test_split_weak_matches_mixed() {
741 let mut good_match = LicenseMatch {
742 license_expression: "mit".to_string(),
743 matcher: crate::license_detection::models::MatcherKind::Hash,
744 matched_length: 50,
745 match_coverage: 95.0,
746 ..LicenseMatch::default()
747 };
748 good_match.end_token = 50;
749 good_match.rule_length = 50;
750
751 let mut weak_unknown = LicenseMatch {
752 license_expression: "unknown".to_string(),
753 matcher: crate::license_detection::models::MatcherKind::Unknown,
754 matched_length: 30,
755 match_coverage: 50.0,
756 ..LicenseMatch::default()
757 };
758 weak_unknown.end_token = 30;
759 weak_unknown.rule_length = 30;
760
761 let mut weak_seq = LicenseMatch {
762 license_expression: "apache-2.0".to_string(),
763 matcher: crate::license_detection::models::MatcherKind::Seq,
764 matched_length: 10,
765 match_coverage: 20.0,
766 ..LicenseMatch::default()
767 };
768 weak_seq.end_token = 10;
769 weak_seq.rule_length = 50;
770
771 let matches = vec![good_match.clone(), weak_unknown.clone(), weak_seq.clone()];
772 let index = LicenseIndex::with_legalese_count(10);
773 let (good, weak) = split_weak_matches(&index, &matches);
774
775 assert_eq!(good.len(), 1);
776 assert_eq!(weak.len(), 2);
777 assert!(good.contains(&good_match));
778 assert!(weak.contains(&weak_unknown));
779 assert!(weak.contains(&weak_seq));
780 }
781}