pub mod conllu;
pub mod exmaralda;
pub mod file_nodes;
pub mod git;
pub mod graphml;
pub mod meta;
pub mod none;
pub mod opus;
pub mod ptb;
pub mod relannis;
pub mod saltxml;
pub mod table;
pub mod text;
pub mod textgrid;
pub mod toolbox;
pub mod treetagger;
pub mod webanno;
pub mod whisper;
pub mod xlsx;
pub mod xml;
use crate::{StepID, workflow::StatusSender};
use graphannis::update::GraphUpdate;
use percent_encoding::{AsciiSet, CONTROLS};
use serde::{Deserialize, Serialize};
use std::path::Path;
pub trait Importer: Sync {
fn import_corpus(
&self,
input_path: &Path,
step_id: StepID,
config: GenericImportConfiguration,
tx: Option<StatusSender>,
) -> Result<GraphUpdate, Box<dyn std::error::Error>>;
fn default_file_extensions(&self) -> &[&str];
fn default_configuration(&self) -> GenericImportConfiguration {
GenericImportConfiguration {
root_as: None,
extensions: self
.default_file_extensions()
.iter()
.map(<&str>::to_string)
.collect(),
}
}
}
pub const NODE_NAME_ENCODE_SET: &AsciiSet = &CONTROLS
.add(b':')
.add(b'/')
.add(b' ')
.add(b'%')
.add(b'\\')
.add(b'<')
.add(b'>')
.add(b'"')
.add(b'|')
.add(b'?')
.add(b'*');
#[derive(Clone, Default, Deserialize, PartialEq, Serialize)]
pub struct GenericImportConfiguration {
#[serde(alias = "as", default)]
pub(crate) root_as: Option<String>,
#[serde(default)]
pub(crate) extensions: Vec<String>,
}
impl<'a> GenericImportConfiguration {
pub fn custom_root_name(&'a self) -> Option<String> {
self.root_as.clone()
}
pub fn extensions(&'a self) -> &'a Vec<String> {
self.extensions.as_ref()
}
#[cfg(test)]
pub fn new_with_root_name(root_name: String) -> GenericImportConfiguration {
GenericImportConfiguration {
root_as: Some(root_name),
extensions: vec![],
}
}
#[cfg(test)]
pub fn new_with_extensions(extensions: Vec<String>) -> GenericImportConfiguration {
GenericImportConfiguration {
root_as: None,
extensions,
}
}
#[cfg(test)]
pub fn new_with_default_extensions(importer: &dyn Importer) -> GenericImportConfiguration {
use itertools::Itertools;
GenericImportConfiguration {
root_as: None,
extensions: importer
.default_file_extensions()
.iter()
.map(<&str>::to_string)
.collect_vec(),
}
}
#[cfg(test)]
pub fn and_extensions(self, extensions: Vec<String>) -> GenericImportConfiguration {
GenericImportConfiguration {
root_as: self.root_as,
extensions,
}
}
}