provenant/license_detection/
hash_match.rs1use sha1::{Digest, Sha1};
7
8use crate::license_detection::index::LicenseIndex;
9use crate::license_detection::index::dictionary::{TokenId, TokenKind};
10use crate::license_detection::models::position_span::PositionSpan;
11use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind};
12use crate::license_detection::query::QueryRun;
13
14pub const MATCH_HASH: MatcherKind = MatcherKind::Hash;
15
16pub fn compute_hash(tokens: &[TokenId]) -> [u8; 20] {
29 let mut hasher = Sha1::new();
30
31 for token in tokens {
32 let signed = token.raw() as i16;
33 hasher.update(signed.to_le_bytes());
34 }
35
36 hasher.finalize().into()
37}
38
39pub fn hash_match(index: &LicenseIndex, query_run: &QueryRun) -> Vec<LicenseMatch> {
53 let mut matches = Vec::new();
54 let query_hash = compute_hash(query_run.tokens());
55
56 if let Some(&rid) = index.rid_by_hash.get(&query_hash) {
57 let rule = &index.rules_by_rid[rid];
58 let itokens = &index.tids_by_rid[rid];
59
60 let rule_length = rule.tokens.len();
61
62 let matched_length = query_run.tokens().len();
63 let match_coverage = 100.0;
64
65 let start_line = query_run.line_for_pos(query_run.start).unwrap_or(1);
66 let end_line = if let Some(end) = query_run.end {
67 query_run.line_for_pos(end).unwrap_or(start_line)
68 } else {
69 start_line
70 };
71
72 let end = query_run.end.unwrap_or(query_run.start);
73 let qspan = PositionSpan::range(query_run.start, end + 1);
74 let ispan = PositionSpan::range(0, rule_length);
75 let hispan = PositionSpan::from_positions(
76 (0..rule_length)
77 .filter(|&p| index.dictionary.token_kind(itokens[p]) == TokenKind::Legalese),
78 );
79
80 let license_match = LicenseMatch {
81 license_expression: rule.license_expression.clone(),
82 license_expression_spdx: index
83 .rule_metadata_by_identifier
84 .get(&rule.identifier)
85 .and_then(|metadata| metadata.license_expression_spdx.clone()),
86 from_file: None,
87 start_line,
88 end_line,
89 start_token: query_run.start,
90 end_token: query_run.end.map_or(query_run.start, |e| e + 1),
91 matcher: MATCH_HASH,
92 score: 100.0,
93 matched_length,
94 rule_length,
95 match_coverage,
96 rule_relevance: rule.relevance,
97 rid,
98 rule_identifier: rule.identifier.clone(),
99 rule_url: rule.rule_url().unwrap_or_default(),
100 matched_text: None,
101 referenced_filenames: rule.referenced_filenames.clone(),
102 rule_kind: rule.kind(),
103 is_from_license: rule.is_from_license,
104 rule_start_token: 0,
105 coordinates: MatchCoordinates::rule_aligned(qspan, ispan, hispan),
106 candidate_resemblance: 0.0,
107 candidate_containment: 0.0,
108 };
109
110 matches.push(license_match);
111 }
112
113 matches
114}
115
116#[cfg(test)]
117#[path = "hash_match_test.rs"]
118mod tests;