use std::{fs, io::Write, path::PathBuf};
use facet::Facet;
use graphannis::{
AnnotationGraph,
graph::{AnnoKey, NodeID},
};
use graphannis_core::{annostorage::ValueSearch, graph::ANNIS_NS};
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use crate::progress::ProgressReporter;
use super::Exporter;
#[derive(Facet, Deserialize, Serialize, Clone, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct ExportMeta {
#[serde(default = "default_name_key", with = "crate::estarde::anno_key")]
name_key: AnnoKey,
#[serde(default, with = "crate::estarde::anno_key::in_sequence")]
only: Vec<AnnoKey>,
#[serde(default)]
write_ns: bool,
}
impl Default for ExportMeta {
fn default() -> Self {
Self {
name_key: default_name_key(),
only: Default::default(),
write_ns: Default::default(),
}
}
}
fn default_name_key() -> AnnoKey {
AnnoKey {
name: "doc".into(),
ns: ANNIS_NS.into(),
}
}
const FILE_EXTENSION: &str = "meta";
impl Exporter for ExportMeta {
fn export_corpus(
&self,
graph: &graphannis::AnnotationGraph,
output_path: &std::path::Path,
step_id: crate::StepID,
tx: Option<crate::workflow::StatusSender>,
) -> Result<(), Box<dyn std::error::Error>> {
let nodes = graph
.get_node_annos()
.exact_anno_search(
Some(&self.name_key.ns),
&self.name_key.name,
ValueSearch::Any,
)
.flatten()
.collect_vec();
let progress = ProgressReporter::new(tx, step_id, nodes.len())?;
for m in nodes {
if let Some(name) = graph
.get_node_annos()
.get_value_for_item(&m.node, &self.name_key)?
{
let name_with_extension = format!("{name}.{}", self.file_extension());
let file_path = output_path.join(name_with_extension);
self.export_document(file_path, m.node, graph)?;
progress.worked(1)?;
}
}
Ok(())
}
fn file_extension(&self) -> &str {
FILE_EXTENSION
}
}
impl ExportMeta {
fn export_document(
&self,
path: PathBuf,
node: NodeID,
graph: &AnnotationGraph,
) -> Result<(), anyhow::Error> {
let mut writable = fs::File::create(path)?;
let annos = graph.get_node_annos().get_annotations_for_item(&node)?;
let mut lines = Vec::with_capacity(annos.len());
for anno in annos {
if (self.only.is_empty() && anno.key.ns.as_str() != ANNIS_NS)
|| self.only.contains(&anno.key)
{
let key = if self.write_ns {
[anno.key.ns.as_str(), anno.key.name.as_str()].join("::")
} else {
anno.key.name.to_string()
};
let line = [key, anno.val.to_string()].join("=");
lines.push(line);
}
}
lines.sort();
for line in lines {
writable.write_all(line.as_bytes())?;
writable.write_all("\n".as_bytes())?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::{fs, path::Path};
use graphannis::{AnnotationGraph, graph::AnnoKey};
use insta::assert_snapshot;
use crate::{
importer::{GenericImportConfiguration, Importer, exmaralda::ImportEXMARaLDA},
test_util::export_to_string,
};
use super::ExportMeta;
#[test]
fn serialize() {
let module = ExportMeta::default();
let serialization = toml::to_string(&module);
assert!(
serialization.is_ok(),
"Serialization failed: {:?}",
serialization.err()
);
assert_snapshot!(serialization.unwrap());
}
#[test]
fn serialize_custom() {
let module = ExportMeta {
name_key: AnnoKey {
ns: "default_ns".into(),
name: "document".into(),
},
only: vec![
AnnoKey {
ns: "default_ns".into(),
name: "author".into(),
},
AnnoKey {
ns: "default_ns".into(),
name: "date".into(),
},
],
write_ns: true,
};
let serialization = toml::to_string(&module);
assert!(
serialization.is_ok(),
"Serialization failed: {:?}",
serialization.err()
);
assert_snapshot!(serialization.unwrap());
}
fn test(snapshot_name: &str, workflow_path: &Path) {
let toml_str = fs::read_to_string(workflow_path);
assert!(toml_str.is_ok());
let r = toml::from_str(toml_str.unwrap().as_str());
let exporter: ExportMeta = r.unwrap();
let importer = ImportEXMARaLDA::default();
let u = importer.import_corpus(
Path::new("tests/data/import/exmaralda/clean/"),
crate::StepID {
module_name: "test_import".to_string(),
path: None,
},
GenericImportConfiguration::new_with_default_extensions(&importer),
None,
);
assert!(u.is_ok());
let g = AnnotationGraph::with_default_graphstorages(true);
assert!(g.is_ok());
let mut update = u.unwrap();
let mut graph = g.unwrap();
assert!(graph.apply_update(&mut update, |_| {}).is_ok());
let export = export_to_string(&graph, exporter);
assert!(export.is_ok());
assert_snapshot!(snapshot_name, export.unwrap());
}
#[test]
fn with_ns() {
test("with_ns", Path::new("tests/data/export/meta/ns.toml"));
}
#[test]
fn default() {
test("default", Path::new("tests/data/export/meta/default.toml"));
}
#[test]
fn non_default_key() {
test(
"non_default",
Path::new("tests/data/export/meta/non_default_key.toml"),
);
}
#[test]
fn only() {
test("only", Path::new("tests/data/export/meta/only.toml"));
}
}