provenant/license_detection/
hash_match.rs1use sha1::{Digest, Sha1};
7
8use crate::license_detection::index::LicenseIndex;
9use crate::license_detection::index::dictionary::{TokenId, TokenKind};
10use crate::license_detection::models::position_span::PositionSpan;
11use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind};
12use crate::license_detection::query::QueryRun;
13use crate::models::LineNumber;
14use crate::models::MatchScore;
15
16pub const MATCH_HASH: MatcherKind = MatcherKind::Hash;
17
18pub fn compute_hash(tokens: &[TokenId]) -> [u8; 20] {
31 let mut hasher = Sha1::new();
32
33 for token in tokens {
34 let signed = token.raw() as i16;
35 hasher.update(signed.to_le_bytes());
36 }
37
38 hasher.finalize().into()
39}
40
41pub fn hash_match(index: &LicenseIndex, query_run: &QueryRun) -> Vec<LicenseMatch> {
55 let mut matches = Vec::new();
56 let query_hash = compute_hash(query_run.tokens());
57
58 if let Some(&rid) = index.rid_by_hash.get(&query_hash) {
59 let rule = &index.rules_by_rid[rid];
60 let itokens = &index.tids_by_rid[rid];
61
62 let rule_length = rule.tokens.len();
63
64 let matched_length = query_run.tokens().len();
65 let match_coverage = 100.0;
66
67 let start_line = query_run
68 .line_for_pos(query_run.start)
69 .and_then(LineNumber::new)
70 .unwrap_or(LineNumber::ONE);
71 let end_line = if let Some(end) = query_run.end {
72 query_run
73 .line_for_pos(end)
74 .and_then(LineNumber::new)
75 .unwrap_or(start_line)
76 } else {
77 start_line
78 };
79
80 let end = query_run.end.unwrap_or(query_run.start);
81 let qspan = PositionSpan::range(query_run.start, end + 1);
82 let ispan = PositionSpan::range(0, rule_length);
83 let hispan = PositionSpan::from_positions(
84 (0..rule_length)
85 .filter(|&p| index.dictionary.token_kind(itokens[p]) == TokenKind::Legalese),
86 );
87
88 let license_match = LicenseMatch {
89 license_expression: rule.license_expression.clone(),
90 license_expression_spdx: index
91 .rule_metadata_by_identifier
92 .get(&rule.identifier)
93 .and_then(|metadata| metadata.license_expression_spdx.clone()),
94 from_file: None,
95 start_line,
96 end_line,
97 start_token: query_run.start,
98 end_token: query_run.end.map_or(query_run.start, |e| e + 1),
99 matcher: MATCH_HASH,
100 score: MatchScore::MAX,
101 matched_length,
102 rule_length,
103 match_coverage,
104 rule_relevance: rule.relevance,
105 rid,
106 rule_identifier: rule.identifier.clone(),
107 rule_url: rule.rule_url().unwrap_or_default(),
108 matched_text: None,
109 referenced_filenames: rule.referenced_filenames.clone(),
110 rule_kind: rule.kind(),
111 is_from_license: rule.is_from_license,
112 rule_start_token: 0,
113 coordinates: MatchCoordinates::rule_aligned(qspan, ispan, hispan),
114 candidate_resemblance: 0.0,
115 candidate_containment: 0.0,
116 };
117
118 matches.push(license_match);
119 }
120
121 matches
122}
123
124#[cfg(test)]
125#[path = "hash_match_test.rs"]
126mod tests;