use crate::{
StepID,
error::{AnnattoError, Result},
exporter::Exporter,
importer::{GenericImportConfiguration, Importer},
util::get_all_files,
workflow::StatusSender,
};
use graphannis::AnnotationGraph;
use graphannis_core::{
annostorage::{EdgeAnnotationStorage, NodeAnnotationStorage, ValueSearch},
graph::{ANNIS_NS, NODE_NAME_KEY},
types::{Edge, NodeID},
};
use itertools::Itertools;
use pretty_assertions::assert_eq;
use std::{cmp::Ordering, fs, io::BufWriter, path::Path};
use tempfile::TempDir;
pub fn import_as_graphml_string<I, P>(
importer: I,
path: P,
graph_configuration: Option<&str>,
) -> Result<String>
where
I: Importer,
P: AsRef<Path>,
{
import_as_graphml_string_2(importer, path, graph_configuration, true, None)
}
pub fn import_as_graphml_string_2<I, P>(
importer: I,
path: P,
graph_configuration: Option<&str>,
disk_based: bool,
tx: Option<StatusSender>,
) -> Result<String>
where
I: Importer,
P: AsRef<Path>,
{
let step_id = StepID {
module_name: "import_under_test".to_string(),
path: None,
};
let mut u = importer
.import_corpus(
path.as_ref(),
step_id.clone(),
GenericImportConfiguration::new_with_default_extensions(&importer),
tx,
)
.map_err(|e| AnnattoError::Import {
reason: e.to_string(),
importer: step_id.module_name.to_string(),
path: path.as_ref().to_path_buf(),
})?;
let mut g = AnnotationGraph::with_default_graphstorages(disk_based)?;
g.apply_update(&mut u, |_| {})?;
let mut buf = BufWriter::new(Vec::new());
graphannis_core::graph::serialization::graphml::export_stable_order(
&g,
graph_configuration,
&mut buf,
|_| {},
)?;
let bytes = buf.into_inner()?;
let actual = String::from_utf8(bytes)?;
Ok(actual)
}
pub fn export_to_string<E>(graph: &AnnotationGraph, exporter: E) -> Result<String>
where
E: Exporter,
{
let output_path = TempDir::new()?;
let result = export_to_string_in_directory(graph, exporter, output_path)?;
Ok(result)
}
pub fn export_to_string_in_directory<E, P>(
graph: &AnnotationGraph,
exporter: E,
output_path: P,
) -> Result<String>
where
E: Exporter,
P: AsRef<Path>,
{
let step_id = StepID {
module_name: "export_under_test".to_string(),
path: Some(output_path.as_ref().to_path_buf()),
};
exporter
.export_corpus(graph, output_path.as_ref(), step_id.clone(), None)
.map_err(|e| AnnattoError::Export {
reason: format!("Could not export graph to read its output: {:?}", e),
exporter: step_id.module_name.to_string(),
path: output_path.as_ref().to_path_buf(),
})?;
let mut buffer = String::new();
for path in get_all_files(output_path.as_ref(), &[exporter.file_extension()])? {
let path_str = if let Some(file_name) = path.file_name() {
file_name.to_string_lossy().to_string()
} else {
"".to_string()
};
buffer.push_str(format!("---- {}:\n", path_str).as_str());
let file_data = fs::read_to_string(path)?;
buffer.push_str(&file_data);
}
Ok(buffer)
}
fn compare_edge_annos(
annos1: &dyn EdgeAnnotationStorage,
annos2: &dyn EdgeAnnotationStorage,
items1: &[Edge],
items2: &[Edge],
) {
assert_eq!(items1.len(), items2.len());
for i in 0..items1.len() {
let mut annos1 = annos1.get_annotations_for_item(&items1[i]).unwrap();
annos1.sort();
let mut annos2 = annos2.get_annotations_for_item(&items2[i]).unwrap();
annos2.sort();
assert_eq!(annos1, annos2);
}
}
fn compare_node_annos(
annos1: &dyn NodeAnnotationStorage,
annos2: &dyn NodeAnnotationStorage,
items1: &[NodeID],
items2: &[NodeID],
) {
assert_eq!(items1.len(), items2.len());
for i in 0..items1.len() {
let mut annos1 = annos1.get_annotations_for_item(&items1[i]).unwrap();
annos1.sort();
let mut annos2 = annos2.get_annotations_for_item(&items2[i]).unwrap();
annos2.sort();
assert_eq!(annos1, annos2);
}
}
pub fn compare_graphs(g1: &AnnotationGraph, g2: &AnnotationGraph) {
let nodes1: Vec<String> = g1
.get_node_annos()
.exact_anno_search(Some(ANNIS_NS), "node_name", ValueSearch::Any)
.filter_map(|m| m.unwrap().extract_annotation(g1.get_node_annos()).unwrap())
.map(|a| a.val.into())
.sorted()
.collect();
let nodes2: Vec<String> = g2
.get_node_annos()
.exact_anno_search(Some(ANNIS_NS), "node_name", ValueSearch::Any)
.filter_map(|m| m.unwrap().extract_annotation(g2.get_node_annos()).unwrap())
.map(|a| a.val.into())
.sorted()
.collect();
assert_eq!(&nodes1, &nodes2);
let nodes1: Vec<NodeID> = nodes1
.into_iter()
.filter_map(|n| g1.get_node_annos().get_node_id_from_name(&n).unwrap())
.collect();
let nodes2: Vec<NodeID> = nodes2
.into_iter()
.filter_map(|n| g2.get_node_annos().get_node_id_from_name(&n).unwrap())
.collect();
compare_node_annos(g1.get_node_annos(), g2.get_node_annos(), &nodes1, &nodes2);
let mut components1 = g1.get_all_components(None, None);
components1.sort();
let mut components2 = g2.get_all_components(None, None);
components2.sort();
assert_eq!(components1, components2);
for c in components1 {
let gs1 = g1.get_graphstorage_as_ref(&c).unwrap();
let gs2 = g2.get_graphstorage_as_ref(&c).unwrap();
for i in 0..nodes1.len() {
let start1 = nodes1[i];
let start2 = nodes2[i];
let targets1: Result<Vec<String>> = gs1
.get_outgoing_edges(start1)
.filter_map_ok(|target| {
g1.get_node_annos()
.get_value_for_item(&target, &NODE_NAME_KEY)
.unwrap()
})
.map_ok(|n| n.into())
.map(|n| n.map_err(AnnattoError::from))
.collect();
let mut targets1 = targets1.unwrap();
targets1.sort();
let targets2: Result<Vec<String>> = gs2
.get_outgoing_edges(start2)
.filter_map_ok(|target| {
g2.get_node_annos()
.get_value_for_item(&target, &NODE_NAME_KEY)
.unwrap()
})
.map(|n| n.map_err(AnnattoError::from))
.map_ok(|n| n.to_string())
.collect();
let mut targets2 = targets2.unwrap();
targets2.sort();
assert_eq!(targets1, targets2);
let edges1: Vec<Edge> = targets1
.iter()
.map(|t| Edge {
source: start1,
target: g1
.get_node_annos()
.get_node_id_from_name(t)
.unwrap()
.unwrap(),
})
.collect();
let edges2: Vec<Edge> = targets2
.iter()
.map(|t| Edge {
source: start2,
target: g2
.get_node_annos()
.get_node_id_from_name(t)
.unwrap()
.unwrap(),
})
.collect();
compare_edge_annos(
gs1.get_anno_storage(),
gs2.get_anno_storage(),
&edges1,
&edges2,
);
}
}
}
pub(crate) fn compare_results<T: Ord, E: Into<anyhow::Error>>(
a: &std::result::Result<T, E>,
b: &std::result::Result<T, E>,
) -> Ordering {
if let Ok(a) = a
&& let Ok(b) = b
{
a.cmp(b)
} else if a.is_err() {
Ordering::Less
} else if b.is_err() {
Ordering::Greater
} else {
Ordering::Equal
}
}