use bit_set::BitSet;
use std::collections::{HashMap, HashSet};
use crate::license_detection::index::LicenseIndex;
use crate::license_detection::index::dictionary::{TokenDictionary, TokenId};
use crate::license_detection::models::Rule;
use crate::license_detection::query::Query;
pub fn create_test_index(legalese: &[(&str, u16)], len_legalese: usize) -> LicenseIndex {
let dictionary = TokenDictionary::new_with_legalese(
&legalese.iter().map(|(s, i)| (*s, *i)).collect::<Vec<_>>(),
);
let mut index = LicenseIndex::new(dictionary);
index.len_legalese = len_legalese;
index
}
pub fn create_test_index_default() -> LicenseIndex {
create_test_index(&[("mit", 0), ("license", 1), ("apache", 2), ("2.0", 3)], 2)
}
pub fn create_mock_rule(
license_expression: &str,
tokens: Vec<u16>,
is_small: bool,
is_tiny: bool,
) -> Rule {
let tokens: Vec<TokenId> = tokens.into_iter().map(TokenId::new).collect();
let length_unique = tokens.len();
Rule {
identifier: format!("{}.LICENSE", license_expression),
license_expression: license_expression.to_string(),
text: String::new(),
tokens,
rule_kind: crate::license_detection::models::RuleKind::Text,
is_false_positive: false,
is_required_phrase: false,
is_from_license: false,
relevance: 100,
minimum_coverage: None,
has_stored_minimum_coverage: false,
is_continuous: true,
required_phrase_spans: vec![],
stopwords_by_pos: HashMap::new(),
referenced_filenames: None,
ignorable_urls: None,
ignorable_emails: None,
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
language: None,
notes: None,
length_unique,
high_length_unique: length_unique,
high_length: length_unique,
min_matched_length: 0,
min_high_matched_length: 0,
min_matched_length_unique: 0,
min_high_matched_length_unique: 0,
is_small,
is_tiny,
starts_with_license: false,
ends_with_license: false,
is_deprecated: false,
spdx_license_key: None,
other_spdx_license_keys: vec![],
}
}
pub fn create_mock_rule_simple(license_expression: &str, relevance: u8) -> Rule {
Rule {
identifier: format!("{}.LICENSE", license_expression),
license_expression: license_expression.to_string(),
text: String::new(),
tokens: Vec::new(),
rule_kind: crate::license_detection::models::RuleKind::None,
is_false_positive: false,
is_required_phrase: false,
is_from_license: false,
relevance,
minimum_coverage: None,
has_stored_minimum_coverage: false,
is_continuous: false,
required_phrase_spans: vec![],
stopwords_by_pos: HashMap::new(),
referenced_filenames: None,
ignorable_urls: None,
ignorable_emails: None,
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
language: None,
notes: None,
length_unique: 0,
high_length_unique: 0,
high_length: 0,
min_matched_length: 0,
min_high_matched_length: 0,
min_matched_length_unique: 0,
min_high_matched_length_unique: 0,
is_small: false,
is_tiny: false,
starts_with_license: false,
ends_with_license: false,
is_deprecated: false,
spdx_license_key: None,
other_spdx_license_keys: vec![],
}
}
pub fn create_mock_query_with_tokens<'a>(tokens: &[u16], index: &'a LicenseIndex) -> Query<'a> {
let tokens: Vec<TokenId> = tokens.iter().copied().map(TokenId::new).collect();
let token_count = tokens.len();
let line_by_pos = vec![1; token_count];
Query {
text: String::new(),
tokens,
line_by_pos,
unknowns_by_pos: HashMap::new(),
stopwords_by_pos: HashMap::new(),
shorts_and_digits_pos: HashSet::new(),
high_matchables: (0..token_count).collect(),
low_matchables: BitSet::new(),
is_binary: false,
query_run_ranges: Vec::new(),
spdx_lines: Vec::new(),
index,
}
}