#[derive(Debug)]
pub enum FileType {
Json,
Tsv,
}
#[derive(Debug)]
pub enum OutputFormat {
TextFile,
Numpy,
}
#[derive(Debug)]
pub struct Configuration {
pub produce_entity_occurrence_count: bool,
pub embeddings_dimension: u16,
pub max_number_of_iteration: u8,
pub seed: Option<i64>,
pub prepend_field: bool,
pub log_every_n: u32,
pub in_memory_embedding_calculation: bool,
pub input: Vec<String>,
pub file_type: FileType,
pub output_dir: Option<String>,
pub output_format: OutputFormat,
pub relation_name: String,
pub columns: Vec<Column>,
}
#[derive(Debug, Default)]
pub struct Column {
pub name: String,
pub transient: bool,
pub complex: bool,
pub reflexive: bool,
pub ignored: bool,
}
impl Configuration {
pub fn default(input: String, columns: Vec<Column>) -> Configuration {
Configuration {
produce_entity_occurrence_count: true,
embeddings_dimension: 128,
max_number_of_iteration: 4,
seed: None,
prepend_field: true,
log_every_n: 1000,
in_memory_embedding_calculation: true,
file_type: FileType::Tsv,
input: vec![input],
output_dir: None,
output_format: OutputFormat::TextFile,
relation_name: String::from("emb"),
columns,
}
}
pub fn not_ignored_columns(&self) -> Vec<&Column> {
self.columns.iter().filter(|&c| !c.ignored).collect()
}
}
pub fn extract_fields(cols: Vec<&str>) -> Result<Vec<Column>, String> {
let mut columns: Vec<Column> = Vec::new();
for col in cols {
let parts: Vec<&str> = col.split("::").collect();
let column_name: &str;
let mut transient = false;
let mut complex = false;
let mut reflexive = false;
let mut ignored = false;
let parts_len = parts.len();
if parts_len > 1 {
column_name = *parts.last().unwrap();
let column_name_idx = parts_len - 1;
for &part in &parts[..column_name_idx] {
if part.eq_ignore_ascii_case("transient") {
transient = true;
} else if part.eq_ignore_ascii_case("complex") {
complex = true;
} else if part.eq_ignore_ascii_case("reflexive") {
reflexive = true;
} else if part.eq_ignore_ascii_case("ignore") {
ignored = true;
} else {
let message = format!("Unrecognized column field modifier: {}", part);
return Err(message);
}
}
} else {
column_name = col;
}
let column = Column {
name: column_name.to_string(),
transient,
complex,
reflexive,
ignored,
};
columns.push(column);
}
Ok(columns)
}
pub fn validate_fields(cols: Vec<Column>) -> Result<Vec<Column>, String> {
for col in &cols {
if col.reflexive && col.transient {
let message = format!("A field cannot be REFLEXIVE and simultaneously TRANSIENT. It does not make sense: {}", col.name);
return Err(message);
}
if col.reflexive && !col.complex {
let message = format!(
"A field cannot be REFLEXIVE but NOT COMPLEX. It does not make sense: {}",
col.name
);
return Err(message);
}
}
Ok(cols)
}