impl SemanticSimilarity {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn new() -> Self {
let stopwords = vec![
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with",
"by", "from", "as", "is", "was", "are", "were", "be", "been", "being", "have", "has",
"had", "do", "does", "did", "will", "would", "should", "could", "may", "might", "must",
"can", "cannot",
]
.into_iter()
.map(|s| s.to_string())
.collect();
Self { stopwords }
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "score_range")]
pub fn calculate(&self, claim: &str, fact: &str) -> f32 {
let claim_lower = claim.to_lowercase();
let fact_lower = fact.to_lowercase();
let claim_words = self.extract_keywords(&claim_lower);
let fact_words = self.extract_keywords(&fact_lower);
if claim_words.is_empty() || fact_words.is_empty() {
return 0.0;
}
let mut score = 0.0;
let mut total_weight = 0.0;
for claim_word in &claim_words {
let weight = self.get_word_weight(claim_word);
total_weight += weight;
if fact_words.contains(claim_word) {
score += weight;
}
else if fact_words
.iter()
.any(|fw| fw.contains(claim_word.as_str()) || claim_word.contains(fw))
{
score += weight * 0.5;
}
}
if total_weight == 0.0 {
return 0.0;
}
let base_score = score / total_weight;
let boost = self.semantic_keyword_boost(&claim_lower, &fact_lower);
(base_score + boost).min(1.0)
}
fn extract_keywords(&self, text: &str) -> Vec<String> {
text.split_whitespace()
.filter(|word| !self.stopwords.contains(&word.to_string()))
.map(|s| s.to_string())
.collect()
}
fn get_word_weight(&self, word: &str) -> f32 {
match word {
"rust" | "typescript" | "javascript" | "python" | "c" | "cpp" | "go" | "java"
| "kotlin" | "ruby" | "php" | "swift" | "haskell" => 3.0,
"analyze" | "analyzes" | "analyzing" | "analysis" => 2.5,
"compile" | "compiles" | "compiling" | "compilation" => 2.5,
"support" | "supports" | "supporting" | "supported" => 2.0,
"detect" | "detects" | "detecting" | "detection" => 2.0,
"generate" | "generates" | "generating" => 2.0,
"complexity" | "metrics" | "code" | "files" | "functions" => 1.5,
"pmat" => 1.0,
_ => 1.0, }
}
fn semantic_keyword_boost(&self, claim: &str, fact: &str) -> f32 {
let mut boost = 0.0;
let action_verbs = ["compile", "compiles", "analyze", "support", "generate"];
for verb in &action_verbs {
if claim.contains(verb)
&& !claim.contains("cannot")
&& !claim.contains("does not")
&& (fact.contains(&format!("does not {}", verb))
|| fact.contains(&format!("cannot {}", verb))
|| fact.contains(&format!("not {}", verb))
|| (fact.contains(verb) && (fact.contains("but not") || fact.contains("only"))))
{
return -0.8; }
if claim.contains(verb) && fact.contains(verb) {
let claim_negative = claim.contains("cannot") || claim.contains("does not");
let fact_negative = fact.contains("cannot")
|| fact.contains("does not")
|| fact.contains("but not");
if claim_negative == fact_negative {
boost += 0.3; }
}
}
let languages = ["rust", "typescript", "javascript", "python", "c", "cpp"];
for lang in &languages {
if claim.contains(lang) && fact.contains(lang) {
boost += 0.4;
break;
}
}
if (claim.contains("complexity") && fact.contains("complexity"))
|| (claim.contains("metrics") && fact.contains("metrics"))
{
boost += 0.2;
}
boost
}
}
impl Default for SemanticSimilarity {
fn default() -> Self {
Self::new()
}
}