1pub mod aho_match;
4pub mod automaton;
5pub(crate) mod detection;
6pub mod embedded;
7mod position_set;
8mod token_multiset;
9mod token_set;
10
11#[cfg(test)]
12mod embedded_test;
13pub mod expression;
14#[cfg(all(test, feature = "golden-tests"))]
15mod golden_test;
16#[cfg(feature = "golden-tests")]
17pub mod golden_utils;
18pub mod hash_match;
19pub mod index;
20mod match_refine;
21pub mod models;
22pub mod query;
23pub mod rules;
24pub mod seq_match;
25pub mod spdx_lid;
26pub mod spdx_mapping;
27#[cfg(test)]
28mod test_utils;
29pub mod tokenize;
30pub mod unknown_match;
31
32use bit_set::BitSet;
33use std::collections::HashSet;
34use std::fs;
35use std::path::Path;
36use std::sync::Arc;
37
38use anyhow::Result;
39
40use crate::license_detection::embedded::index::{
41 load_embedded_artifact_metadata_from_bytes, load_embedded_license_index_from_bytes,
42};
43use crate::license_detection::index::build_index_from_loaded;
44use crate::license_detection::query::Query;
45use crate::license_detection::rules::{
46 load_loaded_licenses_from_directory, load_loaded_rules_from_directory,
47};
48use crate::license_detection::spdx_mapping::{SpdxMapping, build_spdx_mapping};
49use crate::utils::text::strip_utf8_bom_str;
50
51use crate::license_detection::detection::{
52 attach_source_path_to_detections, empty_detection, populate_detection_from_group_with_spdx,
53};
54use crate::license_detection::models::MatcherKind;
55
56#[allow(dead_code)]
59pub const SCANCODE_LICENSES_RULES_PATH: &str =
60 "reference/scancode-toolkit/src/licensedcode/data/rules";
61
62#[allow(dead_code)]
65pub const SCANCODE_LICENSES_LICENSES_PATH: &str =
66 "reference/scancode-toolkit/src/licensedcode/data/licenses";
67
68#[allow(dead_code)]
71pub const SCANCODE_LICENSES_DATA_PATH: &str = "reference/scancode-toolkit/src/licensedcode/data";
72
73pub const DEFAULT_LICENSEDB_URL_TEMPLATE: &str = "https://scancode-licensedb.aboutcode.org/{}";
74
75pub(crate) use detection::{
76 LicenseDetection, group_matches_by_region, post_process_detections, sort_matches_by_line,
77};
78pub use models::LicenseMatch;
79
80pub use aho_match::aho_match;
81pub use hash_match::hash_match;
82pub use match_refine::{
83 filter_invalid_contained_unknown_matches, merge_overlapping_matches, refine_matches,
84 refine_matches_without_false_positive_filter, split_weak_matches,
85};
86pub use position_set::PositionSet;
87pub use spdx_lid::spdx_lid_match;
88pub use token_multiset::TokenMultiset;
89pub use token_set::TokenSet;
90pub use unknown_match::unknown_match;
91
92use self::seq_match::{MAX_NEAR_DUPE_CANDIDATES, select_seq_candidates, seq_match_with_candidates};
93
94#[derive(Debug, Clone)]
100pub struct LicenseDetectionEngine {
101 index: Arc<index::LicenseIndex>,
102 spdx_mapping: SpdxMapping,
103 spdx_license_list_version: Option<String>,
104}
105
106const MAX_DETECTION_SIZE: usize = 10 * 1024 * 1024; const MAX_REGULAR_SEQ_CANDIDATES: usize = 70;
108const MAX_REDUNDANT_SEQ_CONTAINER_BOUNDARY_GAP: usize = 8;
109const MAX_REDUNDANT_SEQ_CONTAINER_UNMATCHED_GAP: usize = 2;
110
111fn truncate_detection_text(clean_text: &str) -> &str {
112 if clean_text.len() <= MAX_DETECTION_SIZE {
113 return clean_text;
114 }
115
116 log::debug!(
117 "Content size {} exceeds limit {}, truncating for detection",
118 clean_text.len(),
119 MAX_DETECTION_SIZE
120 );
121
122 let boundary = clean_text.floor_char_boundary(MAX_DETECTION_SIZE);
123 &clean_text[..boundary]
124}
125
126fn query_span_for_match(m: &LicenseMatch) -> Option<models::PositionSpan> {
127 (!m.query_span().is_empty()).then(|| m.query_span().clone())
128}
129
130fn has_full_match_coverage(m: &LicenseMatch) -> bool {
131 m.coverage() == 100.0
132}
133
134fn is_redundant_same_expression_seq_container(
135 container: &LicenseMatch,
136 candidate_contained_matches: &[LicenseMatch],
137) -> bool {
138 let container_is_redundant_coverage =
139 has_full_match_coverage(container) || container.coverage() >= 99.0;
140 if container.matcher != MatcherKind::Seq || !container_is_redundant_coverage {
141 return false;
142 }
143
144 let container_qspan_set = container.qspan_set();
145
146 let mut contained: Vec<&LicenseMatch> = candidate_contained_matches
147 .iter()
148 .filter(|m| {
149 m.matcher == MatcherKind::Aho
150 && has_full_match_coverage(m)
151 && m.license_expression == container.license_expression
152 && m.overlaps_with(&container_qspan_set)
153 })
154 .collect();
155
156 if contained.len() < 2 {
157 return false;
158 }
159
160 let material_children = contained.iter().filter(|m| m.matched_length > 1).count();
161 if material_children < 2 {
162 return false;
163 }
164
165 contained.sort_by_key(|m| m.qspan_bounds());
166
167 let mut child_union = PositionSet::new();
168 for m in &contained {
169 child_union.extend_from_span(m.query_span());
170 }
171
172 let container_only_positions = container_qspan_set.difference(&child_union);
173 let child_only_positions = child_union.difference(&container_qspan_set);
174
175 let mut bridge_positions = BitSet::new();
176 for pair in contained.windows(2) {
177 let (_, previous_end) = pair[0].qspan_bounds();
178 let (next_start, _) = pair[1].qspan_bounds();
179
180 if next_start < previous_end {
181 return false;
182 }
183
184 for pos in previous_end..next_start {
185 bridge_positions.insert(pos);
186 }
187 }
188
189 let container_only_boundary_positions = container_only_positions
190 .iter()
191 .filter(|&pos| !bridge_positions.contains(pos))
192 .count();
193
194 if container_only_positions.len() == 1
195 && container_only_boundary_positions == 0
196 && child_only_positions.is_empty()
197 {
198 return false;
199 }
200
201 if child_only_positions.is_empty()
202 && container_only_positions.len() == container_only_boundary_positions
203 && container_only_boundary_positions <= 3
204 {
205 let earliest_child = contained
206 .iter()
207 .map(|m| m.qspan_bounds().0)
208 .min()
209 .unwrap_or(usize::MAX);
210 let latest_child = contained
211 .iter()
212 .map(|m| m.qspan_bounds().1.saturating_sub(1))
213 .max()
214 .unwrap_or(0);
215
216 let is_one_sided_boundary = container_only_positions
217 .iter()
218 .all(|pos| pos < earliest_child)
219 || container_only_positions
220 .iter()
221 .all(|pos| pos > latest_child);
222
223 if is_one_sided_boundary {
224 return false;
225 }
226 }
227
228 let max_container_only_positions =
229 MAX_REDUNDANT_SEQ_CONTAINER_BOUNDARY_GAP * contained.len() + 1;
230 let max_container_boundary_positions =
231 MAX_REDUNDANT_SEQ_CONTAINER_BOUNDARY_GAP * (contained.len() - 1);
232 let max_child_only_positions = MAX_REDUNDANT_SEQ_CONTAINER_UNMATCHED_GAP + 1;
233
234 container_only_positions.len() <= max_container_only_positions
235 && container_only_boundary_positions <= max_container_boundary_positions
236 && child_only_positions.len() <= max_child_only_positions
237}
238
239fn filter_redundant_same_expression_seq_containers(
240 seq_matches: Vec<LicenseMatch>,
241 candidate_contained_matches: &[LicenseMatch],
242) -> Vec<LicenseMatch> {
243 seq_matches
244 .into_iter()
245 .filter(|m| !is_redundant_same_expression_seq_container(m, candidate_contained_matches))
246 .collect()
247}
248
249fn is_redundant_low_coverage_composite_seq_wrapper(
250 container: &LicenseMatch,
251 candidate_contained_matches: &[LicenseMatch],
252) -> bool {
253 if container.matcher != seq_match::MATCH_SEQ || container.coverage() >= 30.0 {
254 return false;
255 }
256
257 let container_qspan_set = container.qspan_set();
258
259 let children: Vec<&LicenseMatch> = candidate_contained_matches
260 .iter()
261 .filter(|m| {
262 m.matcher == aho_match::MATCH_AHO
263 && has_full_match_coverage(m)
264 && m.license_expression != container.license_expression
265 && m.overlaps_with(&container_qspan_set)
266 })
267 .collect();
268
269 if children.len() < 2 {
270 return false;
271 }
272
273 let unique_expressions: HashSet<&str> = children
274 .iter()
275 .map(|m| m.license_expression.as_str())
276 .collect();
277 if unique_expressions.len() < 2 {
278 return false;
279 }
280
281 let mut child_union = PositionSet::new();
282 for m in &children {
283 child_union.extend_from_span(m.query_span());
284 }
285
286 let container_only_positions = container_qspan_set.difference(&child_union);
287 let child_only_positions = child_union.difference(&container_qspan_set);
288
289 let mut sorted_children = children;
290 sorted_children.sort_by_key(|m| m.qspan_bounds());
291
292 let mut bridge_positions = BitSet::new();
293 for pair in sorted_children.windows(2) {
294 let (_, previous_end) = pair[0].qspan_bounds();
295 let (next_start, _) = pair[1].qspan_bounds();
296 for pos in previous_end..next_start {
297 bridge_positions.insert(pos);
298 }
299 }
300
301 let container_only_boundary_positions = container_only_positions
302 .iter()
303 .filter(|&pos| !bridge_positions.contains(pos))
304 .count();
305
306 child_only_positions.is_empty()
307 && container_only_positions.len() <= MAX_REDUNDANT_SEQ_CONTAINER_BOUNDARY_GAP
308 && container_only_boundary_positions <= MAX_REDUNDANT_SEQ_CONTAINER_BOUNDARY_GAP
309}
310
311fn filter_redundant_low_coverage_composite_seq_wrappers(
312 seq_matches: Vec<LicenseMatch>,
313 candidate_contained_matches: &[LicenseMatch],
314) -> Vec<LicenseMatch> {
315 seq_matches
316 .into_iter()
317 .filter(|m| {
318 !is_redundant_low_coverage_composite_seq_wrapper(m, candidate_contained_matches)
319 })
320 .collect()
321}
322
323fn subtract_spdx_match_qspans(
324 query: &mut Query<'_>,
325 matched_qspans: &mut Vec<models::PositionSpan>,
326 aho_extra_matchables: &mut PositionSet,
327 spdx_matches: &[LicenseMatch],
328) {
329 for m in spdx_matches {
330 let Some(span) = query_span_for_match(m) else {
331 continue;
332 };
333
334 aho_extra_matchables.extend_from_span(&span);
335 query.subtract(&span);
336
337 if has_full_match_coverage(m) {
338 matched_qspans.push(span);
339 }
340 }
341}
342
343fn merge_and_prepare_aho_matches(
344 index: &index::LicenseIndex,
345 query: &mut Query<'_>,
346 matched_qspans: &mut Vec<models::PositionSpan>,
347 refined_aho: &[LicenseMatch],
348) -> (Vec<LicenseMatch>, bool) {
349 let merged_aho = merge_overlapping_matches(refined_aho);
350 let mut saw_long_exact_license_text_match = false;
351
352 for m in &merged_aho {
353 let Some(span) = query_span_for_match(m) else {
354 continue;
355 };
356
357 if has_full_match_coverage(m) {
358 matched_qspans.push(span.clone());
359 }
360
361 if index
362 .rules_by_rid
363 .get(m.rid)
364 .is_some_and(|rule| rule.is_license_text())
365 && m.rule_length > 120
366 && m.coverage() > 98.0
367 {
368 query.subtract(&span);
369 saw_long_exact_license_text_match = true;
370 }
371 }
372
373 (merged_aho, saw_long_exact_license_text_match)
374}
375
376fn collect_whole_query_exact_followup_matches(
377 index: &index::LicenseIndex,
378 query: &mut Query<'_>,
379 matched_qspans: &mut Vec<models::PositionSpan>,
380 whole_run: &query::QueryRun<'_>,
381) -> Vec<LicenseMatch> {
382 let mut seq_all_matches = Vec::new();
383
384 if whole_run.is_matchable(false, matched_qspans) {
385 let near_dupe_candidates =
386 select_seq_candidates(index, whole_run, true, MAX_NEAR_DUPE_CANDIDATES);
387
388 if !near_dupe_candidates.is_empty() {
389 let near_dupe_matches =
390 seq_match_with_candidates(index, whole_run, &near_dupe_candidates);
391
392 for m in &near_dupe_matches {
393 if !m.query_span().is_empty() {
394 let span = m.query_span().clone();
395 query.subtract(&span);
396 matched_qspans.push(span);
397 }
398 }
399
400 seq_all_matches.extend(near_dupe_matches);
401 }
402 }
403
404 seq_all_matches
405}
406
407fn collect_regular_seq_matches(
408 index: &index::LicenseIndex,
409 query: &Query<'_>,
410 matched_qspans: &[models::PositionSpan],
411 candidate_contained_matches: &[LicenseMatch],
412) -> Vec<LicenseMatch> {
413 let mut seq_all_matches = Vec::new();
414
415 for query_run in query.query_runs() {
416 if !query_run.is_matchable(false, matched_qspans) {
417 continue;
418 }
419
420 let candidates =
421 select_seq_candidates(index, &query_run, false, MAX_REGULAR_SEQ_CANDIDATES);
422 if !candidates.is_empty() {
423 let matches = seq_match_with_candidates(index, &query_run, &candidates);
424 seq_all_matches.extend(matches);
425 }
426 }
427
428 let merged_seq = merge_overlapping_matches(&seq_all_matches);
429 let filtered_same_expression =
430 filter_redundant_same_expression_seq_containers(merged_seq, candidate_contained_matches);
431 filter_redundant_low_coverage_composite_seq_wrappers(
432 filtered_same_expression,
433 candidate_contained_matches,
434 )
435}
436
437impl LicenseDetectionEngine {
438 fn from_index(
443 index: index::LicenseIndex,
444 spdx_license_list_version: Option<String>,
445 ) -> Result<Self> {
446 let mut license_vec: Vec<_> = index.licenses_by_key.values().cloned().collect();
447 license_vec.sort_by(|a, b| a.key.cmp(&b.key));
448 let spdx_mapping = build_spdx_mapping(&license_vec);
449
450 Ok(Self {
451 index: Arc::new(index),
452 spdx_mapping,
453 spdx_license_list_version,
454 })
455 }
456
457 pub fn from_embedded() -> Result<Self> {
466 let artifact_bytes = include_bytes!("../../resources/license_detection/license_index.zst");
467 let loaded = load_embedded_license_index_from_bytes(artifact_bytes)
468 .map_err(|e| anyhow::anyhow!("Failed to load embedded license index: {}", e))?;
469 Self::from_index(
470 loaded.index,
471 Some(loaded.metadata.spdx_license_list_version),
472 )
473 }
474
475 pub fn from_directory(rules_path: &Path) -> Result<Self> {
483 let (rules_dir, licenses_dir) = if rules_path.ends_with("data") {
484 (rules_path.join("rules"), rules_path.join("licenses"))
485 } else if rules_path.ends_with("rules") {
486 let parent = rules_path.parent().ok_or_else(|| {
487 anyhow::anyhow!("Cannot determine parent directory for rules path")
488 })?;
489 (rules_path.to_path_buf(), parent.join("licenses"))
490 } else {
491 (rules_path.to_path_buf(), rules_path.to_path_buf())
492 };
493
494 let loaded_rules = load_loaded_rules_from_directory(&rules_dir)?;
495 let loaded_licenses = load_loaded_licenses_from_directory(&licenses_dir)?;
496 let index = build_index_from_loaded(loaded_rules, loaded_licenses, false);
497 let spdx_license_list_version = detect_scancode_spdx_license_list_version(&rules_dir)?;
498
499 Self::from_index(index, spdx_license_list_version)
500 }
501
502 pub fn embedded_spdx_license_list_version() -> Result<String> {
503 let artifact_bytes = include_bytes!("../../resources/license_detection/license_index.zst");
504 Ok(load_embedded_artifact_metadata_from_bytes(artifact_bytes)
505 .map_err(|e| {
506 anyhow::anyhow!("Failed to load embedded license artifact metadata: {}", e)
507 })?
508 .spdx_license_list_version)
509 }
510
511 pub fn detect_with_kind(
512 &self,
513 text: &str,
514 unknown_licenses: bool,
515 binary_derived: bool,
516 ) -> Result<Vec<LicenseDetection>> {
517 self.detect_with_kind_with_score(text, unknown_licenses, binary_derived, 0.0)
518 }
519
520 pub fn detect_with_kind_with_score(
521 &self,
522 text: &str,
523 unknown_licenses: bool,
524 binary_derived: bool,
525 min_score: f32,
526 ) -> Result<Vec<LicenseDetection>> {
527 let clean_text = strip_utf8_bom_str(text);
528
529 let content = truncate_detection_text(clean_text);
530
531 let mut query = Query::from_extracted_text(content, &self.index, binary_derived)?;
532 let whole_query_run = query.whole_query_run();
533
534 let mut all_matches = Vec::new();
535 let mut candidate_contained_matches = Vec::new();
536 let mut aho_extra_matchables = PositionSet::new();
537 let mut matched_qspans: Vec<models::PositionSpan> = Vec::new();
538
539 {
542 let hash_matches = hash_match(&self.index, &whole_query_run);
543
544 if !hash_matches.is_empty() {
545 let mut matches = hash_matches;
546 sort_matches_by_line(&mut matches);
547
548 let groups = group_matches_by_region(&matches);
549 let detections: Vec<LicenseDetection> = groups
550 .iter()
551 .map(|group| {
552 let mut detection = empty_detection();
553 populate_detection_from_group_with_spdx(
554 &mut detection,
555 group,
556 &self.spdx_mapping,
557 Some(content),
558 );
559 detection
560 })
561 .collect();
562
563 return Ok(post_process_detections(detections, min_score));
564 }
565 }
566
567 {
569 let spdx_matches = spdx_lid_match(&self.index, &query);
570 subtract_spdx_match_qspans(
571 &mut query,
572 &mut matched_qspans,
573 &mut aho_extra_matchables,
574 &spdx_matches,
575 );
576 all_matches.extend(spdx_matches);
577 }
578
579 {
581 let aho_matches = if aho_extra_matchables.is_empty() {
582 aho_match(&self.index, &whole_query_run)
583 } else {
584 aho_match::aho_match_with_extra_matchables(
585 &self.index,
586 &whole_query_run,
587 Some(&aho_extra_matchables),
588 )
589 };
590
591 let refined_aho = match_refine::refine_aho_matches(&self.index, aho_matches, &query);
594 candidate_contained_matches.extend(refined_aho.clone());
595 let (merged_aho, _) = merge_and_prepare_aho_matches(
596 &self.index,
597 &mut query,
598 &mut matched_qspans,
599 &refined_aho,
600 );
601 all_matches.extend(merged_aho);
602
603 let whole_query_followup = collect_whole_query_exact_followup_matches(
604 &self.index,
605 &mut query,
606 &mut matched_qspans,
607 &whole_query_run,
608 );
609 all_matches.extend(whole_query_followup);
610
611 let merged_seq = collect_regular_seq_matches(
612 &self.index,
613 &query,
614 &matched_qspans,
615 &candidate_contained_matches,
616 );
617 all_matches.extend(merged_seq);
618 }
619
620 let merged_matches =
623 refine_matches_without_false_positive_filter(&self.index, all_matches, &query);
624
625 let refined_matches = if unknown_licenses {
628 let (good_matches, weak_matches) = split_weak_matches(&self.index, &merged_matches);
630
631 let unknown_matches = unknown_match(&self.index, &query, &good_matches);
633 let filtered_unknown =
634 filter_invalid_contained_unknown_matches(&unknown_matches, &good_matches);
635
636 let mut all_matches = good_matches;
637 all_matches.extend(filtered_unknown);
638 all_matches.extend(weak_matches);
641 all_matches
642 } else {
643 merged_matches
644 };
645
646 let refined = refine_matches(&self.index, refined_matches, &query);
648
649 let mut sorted = refined;
650 sort_matches_by_line(&mut sorted);
651
652 let groups = group_matches_by_region(&sorted);
653
654 let detections: Vec<LicenseDetection> = groups
655 .iter()
656 .map(|group| {
657 let mut detection = empty_detection();
658 populate_detection_from_group_with_spdx(
659 &mut detection,
660 group,
661 &self.spdx_mapping,
662 Some(content),
663 );
664 detection
665 })
666 .collect();
667
668 let detections = post_process_detections(detections, min_score);
669
670 Ok(detections)
671 }
672
673 pub fn detect_with_kind_and_source(
674 &self,
675 text: &str,
676 unknown_licenses: bool,
677 binary_derived: bool,
678 source_path: &str,
679 ) -> Result<Vec<LicenseDetection>> {
680 let mut detections = self.detect_with_kind(text, unknown_licenses, binary_derived)?;
681 attach_source_path_to_detections(&mut detections, source_path);
682 Ok(detections)
683 }
684
685 pub fn detect_with_kind_and_source_with_score(
686 &self,
687 text: &str,
688 unknown_licenses: bool,
689 binary_derived: bool,
690 source_path: &str,
691 min_score: f32,
692 ) -> Result<Vec<LicenseDetection>> {
693 let mut detections =
694 self.detect_with_kind_with_score(text, unknown_licenses, binary_derived, min_score)?;
695 attach_source_path_to_detections(&mut detections, source_path);
696 Ok(detections)
697 }
698
699 #[cfg(any(test, feature = "golden-tests"))]
704 pub fn detect_matches_with_kind(
705 &self,
706 text: &str,
707 unknown_licenses: bool,
708 binary_derived: bool,
709 ) -> Result<Vec<LicenseMatch>> {
710 let clean_text = strip_utf8_bom_str(text);
711
712 let content = truncate_detection_text(clean_text);
713
714 let mut query = Query::from_extracted_text(content, &self.index, binary_derived)?;
715 let whole_query_run = query.whole_query_run();
716
717 let mut all_matches = Vec::new();
718 let mut candidate_contained_matches = Vec::new();
719 let mut aho_extra_matchables = PositionSet::new();
720 let mut matched_qspans: Vec<models::PositionSpan> = Vec::new();
721
722 {
724 let hash_matches = hash_match(&self.index, &whole_query_run);
725
726 if !hash_matches.is_empty() {
727 let mut matches = hash_matches;
728 sort_matches_by_line(&mut matches);
729 return Ok(matches);
730 }
731 }
732
733 {
735 let spdx_matches = spdx_lid_match(&self.index, &query);
736 subtract_spdx_match_qspans(
737 &mut query,
738 &mut matched_qspans,
739 &mut aho_extra_matchables,
740 &spdx_matches,
741 );
742 all_matches.extend(spdx_matches);
743 }
744
745 {
747 let aho_matches = if aho_extra_matchables.is_empty() {
748 aho_match(&self.index, &whole_query_run)
749 } else {
750 aho_match::aho_match_with_extra_matchables(
751 &self.index,
752 &whole_query_run,
753 Some(&aho_extra_matchables),
754 )
755 };
756 let refined_aho = match_refine::refine_aho_matches(&self.index, aho_matches, &query);
757 candidate_contained_matches.extend(refined_aho.clone());
758 let (merged_aho, _) = merge_and_prepare_aho_matches(
759 &self.index,
760 &mut query,
761 &mut matched_qspans,
762 &refined_aho,
763 );
764 all_matches.extend(merged_aho);
765
766 let whole_query_followup = collect_whole_query_exact_followup_matches(
767 &self.index,
768 &mut query,
769 &mut matched_qspans,
770 &whole_query_run,
771 );
772 all_matches.extend(whole_query_followup);
773
774 let merged_seq = collect_regular_seq_matches(
775 &self.index,
776 &query,
777 &matched_qspans,
778 &candidate_contained_matches,
779 );
780 all_matches.extend(merged_seq);
781 }
782
783 let merged_matches =
785 refine_matches_without_false_positive_filter(&self.index, all_matches, &query);
786
787 let refined_matches = if unknown_licenses {
789 let (good_matches, weak_matches) = split_weak_matches(&self.index, &merged_matches);
790 let unknown_matches = unknown_match(&self.index, &query, &good_matches);
791 let filtered_unknown =
792 filter_invalid_contained_unknown_matches(&unknown_matches, &good_matches);
793
794 let mut all_matches = good_matches;
795 all_matches.extend(filtered_unknown);
796 all_matches.extend(weak_matches);
797 all_matches
798 } else {
799 merged_matches
800 };
801
802 let refined = refine_matches(&self.index, refined_matches, &query);
804
805 let mut sorted = refined;
806 sort_matches_by_line(&mut sorted);
807
808 Ok(sorted)
810 }
811
812 pub fn index(&self) -> &index::LicenseIndex {
814 &self.index
815 }
816
817 pub fn spdx_license_list_version(&self) -> Option<&str> {
818 self.spdx_license_list_version.as_deref()
819 }
820
821 #[cfg(test)]
823 pub fn spdx_mapping(&self) -> &SpdxMapping {
824 &self.spdx_mapping
825 }
826}
827
828pub fn detect_scancode_spdx_license_list_version(search_path: &Path) -> Result<Option<String>> {
829 for ancestor in search_path.ancestors() {
830 let candidate = ancestor.join("scancode_config.py");
831 if candidate.is_file() {
832 let config = fs::read_to_string(&candidate)?;
833 return Ok(parse_scancode_spdx_license_list_version(&config));
834 }
835 }
836
837 Ok(None)
838}
839
840fn parse_scancode_spdx_license_list_version(config: &str) -> Option<String> {
841 config.lines().find_map(|line| {
842 let trimmed = line.trim();
843 let (_, value) = trimmed.split_once('=')?;
844 (trimmed.starts_with("spdx_license_list_version")).then(|| {
845 value
846 .trim()
847 .trim_matches('"')
848 .trim_matches('\'')
849 .to_string()
850 })
851 })
852}
853
854#[cfg(test)]
855mod tests;