pub mod builder;
pub mod dictionary;
pub mod token_sets;
#[allow(unused_imports)]
pub use builder::{
build_index, build_index_from_loaded, build_index_from_loaded_with_automatons,
loaded_license_to_license, loaded_rule_to_rule,
};
use crate::license_detection::TokenSet;
use crate::license_detection::automaton::Automaton;
use crate::license_detection::index::dictionary::{TokenDictionary, TokenId};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct IndexedRuleMetadata {
pub license_expression_spdx: Option<String>,
pub skip_for_required_phrase_generation: bool,
pub replaced_by: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct LicenseIndex {
pub dictionary: TokenDictionary,
pub len_legalese: usize,
pub rid_by_hash: HashMap<[u8; 20], usize>,
pub rules_by_rid: Vec<crate::license_detection::models::Rule>,
pub tids_by_rid: Vec<Vec<TokenId>>,
pub rules_automaton: Automaton,
pub unknown_automaton: Automaton,
pub sets_by_rid: HashMap<usize, TokenSet>,
pub rule_metadata_by_identifier: HashMap<String, IndexedRuleMetadata>,
pub msets_by_rid: HashMap<usize, HashMap<TokenId, usize>>,
pub high_sets_by_rid: HashMap<usize, TokenSet>,
pub high_postings_by_rid: HashMap<usize, HashMap<TokenId, Vec<usize>>>,
pub false_positive_rids: HashSet<usize>,
#[allow(dead_code)]
pub approx_matchable_rids: HashSet<usize>,
pub licenses_by_key: HashMap<String, crate::license_detection::models::License>,
pub pattern_id_to_rid: Vec<Vec<usize>>,
pub rid_by_spdx_key: HashMap<String, usize>,
pub unknown_spdx_rid: Option<usize>,
pub rids_by_high_tid: HashMap<TokenId, HashSet<usize>>,
}
impl LicenseIndex {}
impl LicenseIndex {
pub fn new(dictionary: TokenDictionary) -> Self {
use crate::license_detection::automaton::AutomatonBuilder;
let len_legalese = dictionary.legalese_count();
Self {
dictionary,
len_legalese,
rid_by_hash: HashMap::new(),
rules_by_rid: Vec::new(),
tids_by_rid: Vec::new(),
rules_automaton: AutomatonBuilder::new().build(),
unknown_automaton: AutomatonBuilder::new().build(),
sets_by_rid: HashMap::new(),
rule_metadata_by_identifier: HashMap::new(),
msets_by_rid: HashMap::new(),
high_sets_by_rid: HashMap::new(),
high_postings_by_rid: HashMap::new(),
false_positive_rids: HashSet::new(),
approx_matchable_rids: HashSet::new(),
licenses_by_key: HashMap::new(),
pattern_id_to_rid: Vec::new(),
rid_by_spdx_key: HashMap::new(),
unknown_spdx_rid: None,
rids_by_high_tid: HashMap::new(),
}
}
pub fn with_legalese_count(legalese_count: usize) -> Self {
Self::new(TokenDictionary::new(legalese_count))
}
}
impl Default for LicenseIndex {
fn default() -> Self {
Self::with_legalese_count(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn simple_license(key: &str, name: &str, spdx: &str, category: &str, text: &str) -> License {
License {
key: key.to_string(),
short_name: Some(name.to_string()),
name: name.to_string(),
language: Some("en".to_string()),
spdx_license_key: Some(spdx.to_string()),
other_spdx_license_keys: vec![],
category: Some(category.to_string()),
owner: None,
homepage_url: None,
text: text.to_string(),
reference_urls: vec![],
osi_license_key: Some(spdx.to_string()),
text_urls: vec![],
osi_url: None,
faq_url: None,
other_urls: vec![],
notes: None,
is_deprecated: false,
is_exception: false,
is_unknown: false,
is_generic: false,
replaced_by: vec![],
minimum_coverage: None,
standard_notice: None,
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
ignorable_urls: None,
ignorable_emails: None,
}
}
use crate::license_detection::models::License;
#[test]
fn test_license_index_new() {
let dict = TokenDictionary::new(10);
let index = LicenseIndex::new(dict);
assert_eq!(index.dictionary.legalese_count(), 10);
assert!(index.rid_by_hash.is_empty());
assert!(index.sets_by_rid.is_empty());
assert!(index.msets_by_rid.is_empty());
assert!(index.high_postings_by_rid.is_empty());
assert!(index.false_positive_rids.is_empty());
assert!(index.approx_matchable_rids.is_empty());
assert!(index.licenses_by_key.is_empty());
}
#[test]
fn test_license_index_with_legalese_count() {
let index = LicenseIndex::with_legalese_count(15);
assert_eq!(index.dictionary.legalese_count(), 15);
assert!(index.rid_by_hash.is_empty());
}
#[test]
fn test_license_index_default() {
let index = LicenseIndex::default();
assert_eq!(index.dictionary.legalese_count(), 0);
assert!(index.rid_by_hash.is_empty());
}
#[test]
fn test_automaton_default() {
use crate::license_detection::automaton::AutomatonBuilder;
let automaton = AutomatonBuilder::new().build();
let _ = format!("{:?}", automaton);
}
#[test]
fn test_license_index_clone() {
let index = LicenseIndex::with_legalese_count(5);
let cloned = index.clone();
assert_eq!(cloned.dictionary.legalese_count(), 5);
assert!(cloned.rid_by_hash.is_empty());
}
#[test]
fn test_license_index_add_license() {
let mut index = LicenseIndex::default();
let license = simple_license(
"test-license",
"Test License",
"TEST",
"Permissive",
"Test license text",
);
index.licenses_by_key.insert(license.key.clone(), license);
assert_eq!(index.licenses_by_key.len(), 1);
assert!(index.licenses_by_key.contains_key("test-license"));
}
#[test]
fn test_license_index_add_licenses() {
let mut index = LicenseIndex::default();
let licenses = vec![
simple_license(
"license-1",
"License 1",
"LIC1",
"Permissive",
"License 1 text",
),
simple_license(
"license-2",
"License 2",
"LIC2",
"Copyleft",
"License 2 text",
),
];
for license in licenses {
index.licenses_by_key.insert(license.key.clone(), license);
}
assert_eq!(index.licenses_by_key.len(), 2);
assert!(index.licenses_by_key.contains_key("license-1"));
assert!(index.licenses_by_key.contains_key("license-2"));
}
#[test]
fn test_license_index_get_license() {
let mut index = LicenseIndex::default();
let license = simple_license(
"mit",
"MIT License",
"MIT",
"Permissive",
"MIT License text",
);
index.licenses_by_key.insert(license.key.clone(), license);
let retrieved = index.licenses_by_key.get("mit");
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap().name, "MIT License");
assert!(!index.licenses_by_key.contains_key("unknown"));
}
#[test]
fn test_license_index_license_count() {
let mut index = LicenseIndex::default();
assert_eq!(index.licenses_by_key.len(), 0);
let license = simple_license("test", "Test", "TEST", "Permissive", "Text");
index.licenses_by_key.insert(license.key.clone(), license);
assert_eq!(index.licenses_by_key.len(), 1);
}
}