provenant/license_detection/
hash_match.rs1use sha1::{Digest, Sha1};
10
11use crate::license_detection::index::LicenseIndex;
12use crate::license_detection::index::dictionary::{TokenId, TokenKind};
13use crate::license_detection::models::position_span::PositionSpan;
14use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind};
15use crate::license_detection::query::QueryRun;
16use crate::models::LineNumber;
17use crate::models::MatchScore;
18
19pub const MATCH_HASH: MatcherKind = MatcherKind::Hash;
20
21pub fn compute_hash(tokens: &[TokenId]) -> [u8; 20] {
34 let mut hasher = Sha1::new();
35
36 for token in tokens {
37 let signed = token.raw() as i16;
38 hasher.update(signed.to_le_bytes());
39 }
40
41 hasher.finalize().into()
42}
43
44pub fn hash_match(index: &LicenseIndex, query_run: &QueryRun) -> Vec<LicenseMatch> {
58 let mut matches = Vec::new();
59 let query_hash = compute_hash(query_run.tokens());
60
61 if let Some(rid) = index.rid_by_hash.get(&query_hash) {
62 let Some(rule) = index.rule(*rid) else {
63 return matches;
64 };
65 let Some(itokens) = index.rule_tokens(*rid) else {
66 return matches;
67 };
68
69 let rule_length = rule.tokens.len();
70
71 let matched_length = query_run.tokens().len();
72 let match_coverage = 100.0;
73
74 let start_line = query_run
75 .line_for_pos(query_run.start)
76 .and_then(LineNumber::new)
77 .unwrap_or(LineNumber::ONE);
78 let end_line = if let Some(end) = query_run.end {
79 query_run
80 .line_for_pos(end)
81 .and_then(LineNumber::new)
82 .unwrap_or(start_line)
83 } else {
84 start_line
85 };
86
87 let end = query_run.end.unwrap_or(query_run.start);
88 let qspan = PositionSpan::range(query_run.start, end + 1);
89 let ispan = PositionSpan::range(0, rule_length);
90 let hispan = PositionSpan::from_positions(
91 (0..rule_length)
92 .filter(|&p| index.dictionary.token_kind(itokens[p]) == TokenKind::Legalese),
93 );
94
95 let license_match = LicenseMatch {
96 license_expression: rule.license_expression.clone(),
97 license_expression_spdx: index
98 .rule_metadata_by_identifier
99 .get(&rule.identifier)
100 .and_then(|metadata| metadata.license_expression_spdx.clone()),
101 from_file: None,
102 start_line,
103 end_line,
104 start_token: query_run.start,
105 end_token: query_run.end.map_or(query_run.start, |e| e + 1),
106 matcher: MATCH_HASH,
107 score: MatchScore::MAX,
108 matched_length,
109 rule_length,
110 match_coverage,
111 rule_relevance: rule.relevance,
112 rid: *rid,
113 rule_identifier: rule.identifier.clone(),
114 rule_url: rule.rule_url().unwrap_or_default(),
115 matched_text: None,
116 referenced_filenames: rule.referenced_filenames.clone(),
117 rule_kind: rule.kind(),
118 is_from_license: rule.is_from_license,
119 rule_start_token: 0,
120 coordinates: MatchCoordinates::rule_aligned(qspan, ispan, hispan),
121 candidate_resemblance: 0.0,
122 candidate_containment: 0.0,
123 };
124
125 matches.push(license_match);
126 }
127
128 matches
129}
130
131#[cfg(test)]
132#[path = "hash_match_test.rs"]
133mod tests;