provenant/license_detection/
hash_match.rs1use sha1::{Digest, Sha1};
7
8use crate::license_detection::index::LicenseIndex;
9use crate::license_detection::index::dictionary::{TokenId, TokenKind};
10use crate::license_detection::models::{LicenseMatch, MatcherKind};
11use crate::license_detection::query::QueryRun;
12use crate::license_detection::spans::Span;
13
14pub const MATCH_HASH: MatcherKind = MatcherKind::Hash;
15
16pub fn compute_hash(tokens: &[TokenId]) -> [u8; 20] {
29 let mut hasher = Sha1::new();
30
31 for token in tokens {
32 let signed = token.raw() as i16;
33 hasher.update(signed.to_le_bytes());
34 }
35
36 hasher.finalize().into()
37}
38
39pub fn hash_match(index: &LicenseIndex, query_run: &QueryRun) -> Vec<LicenseMatch> {
53 let mut matches = Vec::new();
54 let query_hash = compute_hash(query_run.tokens());
55
56 if let Some(&rid) = index.rid_by_hash.get(&query_hash) {
57 let rule = &index.rules_by_rid[rid];
58 let itokens = &index.tids_by_rid[rid];
59
60 let _qspan =
61 Span::from_range(query_run.start..query_run.end.map_or(query_run.start, |e| e + 1));
62 let rule_length = rule.tokens.len();
63 let _ispan = Span::from_range(0..rule_length);
64
65 let end = query_run.end.unwrap_or(query_run.start);
66 let qspan_positions: Vec<usize> = (query_run.start..=end).collect();
67 let ispan_positions: Vec<usize> = (0..rule_length).collect();
68 let hispan_positions: Vec<usize> = (0..rule_length)
69 .filter(|&p| index.dictionary.token_kind(itokens[p]) == TokenKind::Legalese)
70 .collect();
71
72 let matched_length = query_run.tokens().len();
73 let match_coverage = 100.0;
74
75 let start_line = query_run.line_for_pos(query_run.start).unwrap_or(1);
76 let end_line = if let Some(end) = query_run.end {
77 query_run.line_for_pos(end).unwrap_or(start_line)
78 } else {
79 start_line
80 };
81
82 let license_match = LicenseMatch {
83 license_expression: rule.license_expression.clone(),
84 license_expression_spdx: index
85 .rule_metadata_by_identifier
86 .get(&rule.identifier)
87 .and_then(|metadata| metadata.license_expression_spdx.clone()),
88 from_file: None,
89 start_line,
90 end_line,
91 start_token: query_run.start,
92 end_token: query_run.end.map_or(query_run.start, |e| e + 1),
93 matcher: MATCH_HASH,
94 score: 100.0,
95 matched_length,
96 rule_length,
97 match_coverage,
98 rule_relevance: rule.relevance,
99 rid,
100 rule_identifier: rule.identifier.clone(),
101 rule_url: rule.rule_url().unwrap_or_default(),
102 matched_text: None,
103 referenced_filenames: rule.referenced_filenames.clone(),
104 rule_kind: rule.kind(),
105 is_from_license: rule.is_from_license,
106 matched_token_positions: None,
107 hilen: hispan_positions.len(),
108 rule_start_token: 0,
109 qspan_positions: Some(qspan_positions),
110 ispan_positions: Some(ispan_positions),
111 hispan_positions: Some(hispan_positions),
112 candidate_resemblance: 0.0,
113 candidate_containment: 0.0,
114 };
115
116 matches.push(license_match);
117 }
118
119 matches
120}
121
122#[cfg(test)]
123#[path = "hash_match_test.rs"]
124mod tests;