use super::{SyntheticConfig, SyntheticGenerator};
use crate::error::Result;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, PartialEq)]
pub struct CodeEdaConfig {
pub rename_prob: f32,
pub comment_prob: f32,
pub reorder_prob: f32,
pub remove_prob: f32,
pub num_augments: usize,
pub min_tokens: usize,
pub language: CodeLanguage,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CodeLanguage {
#[default]
Rust,
Python,
Generic,
}
impl Default for CodeEdaConfig {
fn default() -> Self {
Self {
rename_prob: 0.15,
comment_prob: 0.1,
reorder_prob: 0.05,
remove_prob: 0.1,
num_augments: 4,
min_tokens: 5,
language: CodeLanguage::Rust,
}
}
}
impl CodeEdaConfig {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_rename_prob(mut self, prob: f32) -> Self {
self.rename_prob = prob.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_comment_prob(mut self, prob: f32) -> Self {
self.comment_prob = prob.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_reorder_prob(mut self, prob: f32) -> Self {
self.reorder_prob = prob.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_remove_prob(mut self, prob: f32) -> Self {
self.remove_prob = prob.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_num_augments(mut self, n: usize) -> Self {
self.num_augments = n.max(1);
self
}
#[must_use]
pub fn with_min_tokens(mut self, n: usize) -> Self {
self.min_tokens = n;
self
}
#[must_use]
pub fn with_language(mut self, lang: CodeLanguage) -> Self {
self.language = lang;
self
}
}
#[derive(Debug, Clone)]
pub struct VariableSynonyms {
synonyms: HashMap<String, Vec<String>>,
}
impl Default for VariableSynonyms {
fn default() -> Self {
let mut synonyms = HashMap::new();
synonyms.insert(
"x".to_string(),
vec!["value".to_string(), "val".to_string()],
);
synonyms.insert(
"y".to_string(),
vec!["result".to_string(), "res".to_string()],
);
synonyms.insert(
"i".to_string(),
vec!["index".to_string(), "idx".to_string()],
);
synonyms.insert(
"j".to_string(),
vec!["inner".to_string(), "jdx".to_string()],
);
synonyms.insert(
"n".to_string(),
vec!["count".to_string(), "num".to_string()],
);
synonyms.insert("s".to_string(), vec!["str".to_string(), "text".to_string()]);
synonyms.insert(
"tmp".to_string(),
vec!["temp".to_string(), "scratch".to_string()],
);
synonyms.insert(
"data".to_string(),
vec!["input".to_string(), "payload".to_string()],
);
synonyms.insert(
"result".to_string(),
vec!["output".to_string(), "ret".to_string()],
);
synonyms.insert(
"buf".to_string(),
vec!["buffer".to_string(), "data".to_string()],
);
synonyms.insert(
"len".to_string(),
vec!["length".to_string(), "size".to_string()],
);
synonyms.insert(
"err".to_string(),
vec!["error".to_string(), "e".to_string()],
);
synonyms.insert(
"msg".to_string(),
vec!["message".to_string(), "text".to_string()],
);
synonyms.insert("fn".to_string(), vec!["func".to_string()]);
synonyms.insert(
"args".to_string(),
vec!["params".to_string(), "arguments".to_string()],
);
Self { synonyms }
}
}
impl VariableSynonyms {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add_synonym(&mut self, word: String, alternatives: Vec<String>) {
self.synonyms.insert(word, alternatives);
}
#[must_use]
pub fn get(&self, word: &str) -> Option<&[String]> {
self.synonyms.get(word).map(Vec::as_slice)
}
#[must_use]
pub fn has_synonym(&self, word: &str) -> bool {
self.synonyms.contains_key(word)
}
}
#[derive(Debug, Clone)]
pub struct CodeEda {
config: CodeEdaConfig,
synonyms: VariableSynonyms,
reserved: HashSet<String>,
}
include!("code_eda_impl.rs");
include!("code_eda_tests.rs");