use std::{
collections::{BTreeMap, btree_map::Entry},
fmt::Display,
fs,
io::Write,
path::{Path, PathBuf},
sync::mpsc,
};
use anyhow::anyhow;
use facet::Facet;
use graphannis::{AnnotationGraph, aql, errors::GraphAnnisError};
use graphannis_core::{
errors::GraphAnnisCoreError,
graph::{ANNIS_NS, NODE_NAME_KEY, NODE_TYPE},
};
use itertools::Itertools;
use serde::Serialize;
use serde_derive::Deserialize;
use tabled::{Table, Tabled};
use crate::{
Manipulator, StepID,
error::AnnattoError,
workflow::{StatusMessage, StatusSender},
};
#[derive(Facet, Deserialize, Serialize, Clone, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Check {
tests: Vec<Test>, #[serde(default)]
report: Option<ReportLevel>, #[serde(default)]
failed_only: bool,
#[serde(default)]
policy: FailurePolicy,
#[serde(default)]
save: Option<PathBuf>,
#[serde(default)]
overwrite: bool,
}
#[derive(Facet, Clone, Default, Deserialize, Serialize, PartialEq)]
#[serde(rename_all = "snake_case")]
#[repr(u8)]
enum FailurePolicy {
Warn,
#[default]
Fail,
}
#[derive(Facet, Deserialize, Default, Serialize, Clone, PartialEq)]
#[serde(rename_all = "snake_case")]
#[repr(u8)]
enum ReportLevel {
#[default] List,
Verbose,
}
impl Manipulator for Check {
fn manipulate_corpus(
&self,
graph: &mut graphannis::AnnotationGraph,
workflow_directory: &Path,
_step_id: StepID,
tx: Option<crate::workflow::StatusSender>,
) -> Result<(), Box<dyn std::error::Error>> {
let (r, policies) = self.run_tests(graph)?;
if let (Some(level), Some(sender)) = (&self.report, &tx) {
self.print_report(level, &r, sender)?;
}
if let Some(path) = &self.save {
let (sender, receiver) = mpsc::channel();
self.print_report(
self.report.as_ref().unwrap_or(&ReportLevel::default()),
&r,
&sender,
)?;
if let Some(StatusMessage::Info(msg)) = receiver.into_iter().next() {
let target_path = if path.is_absolute() {
path.to_path_buf()
} else {
workflow_directory.join(path)
};
let color_free = msg
.replace(&ansi_term::Color::Red.prefix().to_string(), "")
.replace(&ansi_term::Color::Red.suffix().to_string(), "")
.replace(&ansi_term::Color::Green.prefix().to_string(), "")
.replace(&ansi_term::Color::Green.suffix().to_string(), "")
.replace(&ansi_term::Color::Purple.prefix().to_string(), "")
.replace(&ansi_term::Color::Purple.suffix().to_string(), "");
if target_path.exists() {
if let Some(sender) = &tx {
sender.send(StatusMessage::Info(format!(
"{} check log to file {} ...",
if self.overwrite {
"Writing"
} else {
"Appending"
},
target_path.to_string_lossy()
)))?;
}
let mut f = if self.overwrite {
fs::remove_file(target_path.as_path())?;
fs::File::create(target_path)?
} else {
fs::OpenOptions::new().append(true).open(target_path)?
};
f.write_all("\n\n".as_bytes())?;
f.write_all(color_free.as_bytes())?;
f.flush()?;
} else {
fs::write(target_path, color_free)?;
}
}
}
let failed_checks = r
.iter()
.filter(|(_, r)| !matches!(r, TestResult::Passed))
.map(|(d, _)| d.to_string())
.collect_vec();
if !failed_checks.is_empty() {
let global_demands_fail = matches!(&self.policy, FailurePolicy::Fail);
let critical = r.iter().zip(policies).any(|((_, tr), tp)| {
if !matches!(tr, TestResult::Passed) {
if let Some(fp) = &tp {
matches!(fp, FailurePolicy::Fail) && global_demands_fail
} else {
global_demands_fail
}
} else {
false
}
});
if critical {
return Err(AnnattoError::ChecksFailed { failed_checks }.into());
}
if let Some(sender) = &tx {
let msg = StatusMessage::Warning(format!(
"One or more checks failed:\n{}",
failed_checks.join("\n")
));
sender.send(msg)?;
}
}
Ok(())
}
fn requires_statistics(&self) -> bool {
true
}
}
type NamedResults = Vec<(String, TestResult)>;
type Policies = Vec<Option<FailurePolicy>>;
impl Check {
fn result_to_table_entry(
description: &String,
result: &TestResult,
level: &ReportLevel,
) -> TestTableEntry {
match level {
ReportLevel::List => TestTableEntry {
description: description.to_string(),
result: result.to_string(),
appendix: None,
},
ReportLevel::Verbose => {
let verbose_desc = match result {
TestResult::Failed { query, .. } => {
[description.to_string(), query.to_string()].join("\n")
}
_ => description.to_string(),
};
let appendix = match result {
TestResult::Passed => None,
TestResult::Failed { is, .. } => {
if is.is_empty() {
None } else {
let mut v = Vec::with_capacity(is.len() + 1);
v.push(format!("Matches for query of test `{description}`:"));
v.extend(is.iter().map(|ms| ms.to_string()).sorted_unstable());
Some(v.join("\n"))
}
}
TestResult::ProcessingError { error } => Some(error.to_string()),
};
TestTableEntry {
description: verbose_desc,
result: result.to_string(),
appendix,
}
}
}
}
fn results_to_table(
results: &[(String, TestResult)],
level: &ReportLevel,
failed_only: bool,
) -> String {
let table_data = results
.iter()
.filter_map(|(d, r)| {
if !failed_only || !matches!(r, TestResult::Passed) {
Some(Check::result_to_table_entry(d, r, level))
} else {
None
}
})
.collect_vec();
let mut output = String::default();
let mut table_buffer = Vec::new();
for entry in table_data {
table_buffer.push(entry);
let appendix = &table_buffer[table_buffer.len() - 1].appendix;
if let Some(bottom_details) = appendix {
output.push_str(&Table::new(&table_buffer).to_string());
output.push('\n');
output.push_str(bottom_details);
output.push_str("\n\n");
table_buffer = Vec::default();
}
}
if !table_buffer.is_empty() {
output.push_str(&Table::new(&table_buffer).to_string());
}
output
}
fn print_report(
&self,
level: &ReportLevel,
results: &[(String, TestResult)],
sender: &StatusSender,
) -> Result<(), Box<dyn std::error::Error>> {
let table = Check::results_to_table(results, level, self.failed_only);
sender.send(StatusMessage::Info(table))?;
Ok(())
}
fn run_tests(
&self,
graph: &mut AnnotationGraph,
) -> Result<(NamedResults, Policies), Box<dyn std::error::Error>> {
let mut results = Vec::with_capacity(self.tests.len());
let mut policies = Vec::with_capacity(self.tests.len());
let mut graph_cache = BTreeMap::default();
for test in &self.tests {
let aql_tests: Vec<AQLTest> = test.into();
for aql_test in aql_tests {
results.push((
aql_test.description.to_string(),
Check::run_test(graph, &aql_test, &mut graph_cache),
));
policies.push(aql_test.policy);
}
}
Ok((results, policies))
}
fn run_test(
g: &AnnotationGraph,
test: &AQLTest,
graph_cache: &mut BTreeMap<String, AnnotationGraph>,
) -> TestResult {
let query_s = test.query.as_str();
let expected_result = &test.expected;
let result = Check::run_query(g, query_s);
match result {
Ok(r) => {
let n = r.len();
let (passes, expected_r) = match expected_result {
QueryResult::Numeric(n_exp) => (&n == n_exp, QueryResult::Numeric(*n_exp)),
QueryResult::Query(alt_query) => {
let alt_result = Check::run_query(g, &alt_query[..]);
match alt_result {
Ok(alt_matches) => (
alt_matches.len() == n,
QueryResult::Numeric(alt_matches.len()),
),
Err(err) => {
return TestResult::ProcessingError { error: err };
}
}
}
QueryResult::ClosedInterval(lower, upper) => (
n.ge(lower) && n.le(upper),
QueryResult::ClosedInterval(*lower, *upper),
),
QueryResult::SemiOpenInterval(lower, upper) => {
let forward_r = QueryResult::SemiOpenInterval(*lower, *upper);
if upper.is_infinite() || upper.is_nan() {
(n.ge(lower), forward_r)
} else {
let u = upper.abs().ceil() as usize;
(n.ge(lower) && u.gt(&n), forward_r)
}
}
QueryResult::CorpusQuery(db_dir, corpus_name, query) => {
let path = db_dir.join(corpus_name);
let path_string = path.to_string_lossy().to_string();
let entry = graph_cache.entry(path_string.to_string());
let external_g = match entry {
Entry::Vacant(e) => {
let eg = AnnotationGraph::with_default_graphstorages(false);
match eg {
Err(err) => {
return TestResult::ProcessingError { error: err.into() };
}
Ok(mut external_g) => {
if let Err(err) = external_g.open(&db_dir.join(corpus_name))
{
return TestResult::ProcessingError {
error: err.into(),
};
}
e.insert(external_g)
}
}
}
Entry::Occupied(e) => e.into_mut(),
};
if let Err(err) = external_g.ensure_loaded_all() {
return TestResult::ProcessingError { error: err.into() };
}
let e_n = Check::run_query(external_g, query);
match e_n {
Ok(v) => (v.len() == n, QueryResult::Numeric(v.len())),
Err(err) => {
return TestResult::ProcessingError { error: err };
}
}
}
QueryResult::ClosedLQueryInterval(query, upper) => {
let lower = Check::run_query(g, query);
match lower {
Ok(v) => (
v.len().le(&n) && upper.ge(&n),
QueryResult::ClosedInterval(v.len(), *upper),
),
Err(error) => return TestResult::ProcessingError { error },
}
}
QueryResult::ClosedRQueryInterval(lower, query) => {
let upper = Check::run_query(g, query);
match upper {
Ok(v) => (
lower.le(&n) && v.len().ge(&n),
QueryResult::ClosedInterval(*lower, v.len()),
),
Err(error) => return TestResult::ProcessingError { error },
}
}
QueryResult::ClosedQueryInterval(query_l, query_r) => {
let lower = Check::run_query(g, query_l);
let upper = Check::run_query(g, query_r);
if let Ok(l) = &lower
&& let Ok(u) = &upper
{
(
l.len().le(&n) && u.len().ge(&n),
QueryResult::ClosedInterval(l.len(), u.len()),
)
} else if let Err(error) = lower {
return TestResult::ProcessingError { error };
} else {
return TestResult::ProcessingError {
error: upper.err().unwrap_or(
GraphAnnisCoreError::Other(
anyhow!(
"Something went wrong determining the upper bound."
)
.into(),
)
.into(),
),
};
}
}
QueryResult::SemiOpenQueryInterval(query, upper) => {
let lower = Check::run_query(g, query);
match lower {
Ok(v) => {
let l = v.len();
(
l.le(&n) && (!upper.is_normal() || upper.ge(&(n as f64))),
if upper.is_normal() {
QueryResult::ClosedInterval(l, *upper as usize)
} else {
QueryResult::SemiOpenInterval(l, *upper)
},
)
}
Err(error) => return TestResult::ProcessingError { error },
}
}
};
if passes {
TestResult::Passed
} else {
TestResult::Failed {
query: test.query.to_string(),
expected: expected_r,
is: r,
}
}
}
Err(e) => TestResult::ProcessingError { error: e },
}
}
fn run_query(g: &AnnotationGraph, query_s: &str) -> Result<Vec<String>, GraphAnnisError> {
let parsed_query = aql::parse(query_s, false)?;
let it = aql::execute_query_on_graph(g, &parsed_query, true, None)?;
let mut result = Vec::with_capacity(it.size_hint().0);
for m in it {
let m = m?;
let mut match_desc = String::new();
for (i, singlematch) in m.iter().enumerate() {
if i > 0 {
match_desc.push(' ');
}
let singlematch_anno_key = &singlematch.anno_key;
if singlematch_anno_key.ns != ANNIS_NS || singlematch_anno_key.name != NODE_TYPE {
if !singlematch_anno_key.ns.is_empty() {
match_desc.push_str(&singlematch_anno_key.ns);
match_desc.push_str("::");
}
match_desc.push_str(&singlematch_anno_key.name);
match_desc.push_str("::");
}
if let Some(node_name) = g
.get_node_annos()
.get_value_for_item(&singlematch.node, &NODE_NAME_KEY)?
{
match_desc.push_str(&node_name);
}
}
result.push(match_desc);
}
Ok(result)
}
}
struct AQLTest {
query: String,
expected: QueryResult,
description: String,
policy: Option<FailurePolicy>, }
impl From<&Test> for Vec<AQLTest> {
fn from(value: &Test) -> Self {
match value {
Test::QueryTest {
query,
expected,
description,
policy,
} => vec![AQLTest {
query: query.to_string(),
expected: match expected {
QueryResult::Numeric(n) => QueryResult::Numeric(*n),
QueryResult::Query(q) => QueryResult::Query(q.to_string()),
QueryResult::ClosedInterval(a, b) => QueryResult::ClosedInterval(*a, *b),
QueryResult::SemiOpenInterval(a, b) => QueryResult::SemiOpenInterval(*a, *b),
QueryResult::CorpusQuery(db, c, q) => {
QueryResult::CorpusQuery(db.to_path_buf(), c.to_string(), q.to_string())
}
QueryResult::ClosedLQueryInterval(q, n) => {
QueryResult::ClosedLQueryInterval(q.to_string(), *n)
}
QueryResult::ClosedRQueryInterval(n, q) => {
QueryResult::ClosedRQueryInterval(*n, q.to_string())
}
QueryResult::ClosedQueryInterval(ql, qr) => {
QueryResult::ClosedQueryInterval(ql.to_string(), qr.to_string())
}
QueryResult::SemiOpenQueryInterval(q, b) => {
QueryResult::SemiOpenQueryInterval(q.to_string(), *b)
}
},
description: description.to_string(),
policy: (*policy).clone(),
}],
Test::LayerTest {
layers,
edge: target,
optional,
} => {
let mut tests = Vec::new();
for (anno_qname, list_of_values) in layers {
let joint_values = list_of_values.join("|");
let inner_query_frag = format!("{anno_qname}!=/{joint_values}/");
let (value_query, exist_query) = if let Some(edge_spec) = target {
(
format!("node {edge_spec}[{inner_query_frag}] node"),
format!("node {edge_spec}[{anno_qname}=/.*/] node"),
)
} else {
(inner_query_frag, anno_qname.to_string())
};
let existence_policy = if *optional {
Some(FailurePolicy::Warn)
} else {
Some(FailurePolicy::Fail)
};
tests.push(AQLTest {
query: exist_query,
expected: QueryResult::SemiOpenInterval(1, f64::INFINITY),
description: format!("Layer `{anno_qname}` exists"),
policy: existence_policy, });
tests.push(AQLTest {
query: value_query,
expected: QueryResult::Numeric(0),
description: format!("Check layer `{anno_qname}` for invalid values."),
policy: None, })
}
tests
}
}
}
}
#[derive(Facet, Deserialize, Serialize, Clone, PartialEq)]
#[serde(untagged, deny_unknown_fields)]
#[repr(u8)]
enum Test {
QueryTest {
query: String,
expected: QueryResult,
description: String,
#[serde(default)]
policy: Option<FailurePolicy>, },
LayerTest {
layers: BTreeMap<String, Vec<String>>,
#[serde(default)]
edge: Option<String>,
#[serde(default)]
optional: bool,
},
}
enum TestResult {
Passed,
Failed {
query: String,
expected: QueryResult,
is: Vec<String>,
},
ProcessingError {
error: GraphAnnisError,
},
}
impl Display for TestResult {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
TestResult::Passed => format!(
"{}passed{}",
ansi_term::Color::Green.prefix(),
ansi_term::Color::Green.suffix()
),
TestResult::Failed {
is: matches,
expected,
..
} => {
let exp = match expected {
QueryResult::Numeric(n) => format!("{n} ≠"),
QueryResult::ClosedInterval(l, u) => format!("[{l}, {u}] ∌ "),
QueryResult::SemiOpenInterval(l, u) => format!("[{l}, {u}] ∌ "),
_ => "".to_string(),
};
format!(
"{}{exp}{}{}",
ansi_term::Color::Red.prefix(),
matches.len(),
ansi_term::Color::Red.suffix()
)
}
TestResult::ProcessingError { error } => format!(
"{}invalid: {}{}",
ansi_term::Color::Purple.prefix(),
error,
ansi_term::Color::Purple.suffix()
),
};
write!(f, "{s}")
}
}
#[derive(Tabled)]
struct TestTableEntry {
description: String,
result: String,
#[tabled(skip)]
appendix: Option<String>,
}
#[derive(Facet, Deserialize, Serialize, Clone, PartialEq)]
#[serde(untagged)]
#[repr(u8)]
enum QueryResult {
Numeric(usize),
Query(String),
ClosedInterval(usize, usize),
ClosedLQueryInterval(String, usize),
ClosedRQueryInterval(usize, String),
ClosedQueryInterval(String, String),
SemiOpenInterval(usize, f64),
SemiOpenQueryInterval(String, f64),
CorpusQuery(PathBuf, String, String), }
#[cfg(test)]
mod tests {
use core::f64;
use std::{
collections::BTreeMap,
fs,
path::{Path, PathBuf},
sync::mpsc,
usize,
};
use graphannis::{
AnnotationGraph,
model::AnnotationComponentType,
update::{GraphUpdate, UpdateEvent},
};
use graphannis_core::graph::ANNIS_NS;
use insta::assert_snapshot;
use itertools::Itertools;
use tempfile::tempdir;
use toml;
use crate::{
StepID,
manipulator::{
Manipulator,
check::{AQLTest, FailurePolicy, QueryResult, ReportLevel, TestResult},
},
util::example_generator,
util::update_graph_silent,
workflow::StatusMessage,
};
use super::{Check, Test};
#[test]
fn serialize_custom() {
let module = Check {
policy: FailurePolicy::Warn,
tests: vec![
Test::QueryTest {
query: "tok @* doc=/largest-doc/".to_string(),
expected: QueryResult::SemiOpenInterval(1, f64::INFINITY),
description: "I expect a lot of tokens".to_string(),
policy: None
},
Test::QueryTest {
query: "pos".to_string(),
expected: QueryResult::ClosedQueryInterval(
"norm".to_string(),
"tok".to_string(),
),
description: "Plausible number of pos annotations.".to_string(),
policy: None
},
Test::QueryTest {
query: "sentence".to_string(),
expected: QueryResult::ClosedLQueryInterval("doc".to_string(), 400),
description: "Plausible distribution of sentence annotations.".to_string(),
policy: None
},
Test::QueryTest {
query: "doc _ident_ author=/William Shakespeare/".to_string(),
expected: QueryResult::ClosedRQueryInterval(1, "doc".to_string()),
description: "At least one document in the corpus was written by Shakespeare, hopefully all of them!".to_string(),
policy: None
},
Test::QueryTest {
query: "lemma=/hello/".to_string(),
expected: QueryResult::SemiOpenQueryInterval("doc".to_string(), f64::INFINITY),
description: "There are at least as many hellos as there are documents.".to_string(),
policy: None
},
Test::LayerTest {
layers: vec![(
"Reflexive".to_string(),
vec!["yes".to_string(), "no".to_string()]
.into_iter()
.collect(),
)]
.into_iter()
.collect(),
edge: None,
optional: true
},
],
report: Some(ReportLevel::List),
failed_only: true,
save: Some(PathBuf::from("this/is/a/non-existing/path.log")),
overwrite: false,
};
let serialization = toml::to_string(&module);
assert!(
serialization.is_ok(),
"Serialization failed: {:?}",
serialization.err()
);
assert_snapshot!(serialization.unwrap());
}
#[test]
fn graph_statistics() {
let g = AnnotationGraph::with_default_graphstorages(false);
assert!(g.is_ok());
let mut graph = g.unwrap();
let mut u = GraphUpdate::default();
example_generator::create_corpus_structure_simple(&mut u);
assert!(update_graph_silent(&mut graph, &mut u).is_ok());
let check: Check = Check {
tests: vec![],
report: None,
failed_only: false,
policy: FailurePolicy::Warn,
save: None,
overwrite: false,
};
assert!(
check
.validate_graph(
&mut graph,
StepID {
module_name: "test".to_string(),
path: None
},
None
)
.is_ok()
);
assert!(graph.global_statistics.is_some());
}
#[test]
fn test_check_on_disk() {
let r = test(true);
assert!(r.is_ok(), "Error when testing on disk: {:?}", r.err());
}
#[test]
fn test_check_in_mem() {
let r = test(false);
assert!(r.is_ok(), "Error when testing in memory: {:?}", r.err());
}
#[test]
fn test_failing_checks_on_disk() {
let r = test_failing_checks(true, false);
assert!(r.is_ok(), "Error when testing on disk: {:?}", r.err());
}
#[test]
fn test_failing_checks_in_mem() {
let r = test_failing_checks(true, false);
assert!(r.is_ok(), "Error when testing in memory: {:?}", r.err());
}
#[test]
fn test_failing_checks_with_nodes_on_disk() {
let r = test_failing_checks(true, true);
assert!(r.is_ok(), "Error when testing on disk: {:?}", r.err());
}
#[test]
fn test_failing_checks_with_nodes_in_mem() {
let r = test_failing_checks(true, true);
assert!(r.is_ok(), "Error when testing in memory: {:?}", r.err());
}
fn test(on_disk: bool) -> Result<(), Box<dyn std::error::Error>> {
let serialized_data =
fs::read_to_string("./tests/data/graph_op/check/serialized_check.toml")?;
let mut check: Check = toml::from_str(serialized_data.as_str())?;
let tmp_report_dir = tempdir()?;
let report_path = if on_disk {
Some(tmp_report_dir.as_ref().join("test_check_report.txt"))
} else {
None
};
check.save = report_path;
let mut g = input_graph(on_disk, "corpus")?;
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let (sender, receiver) = mpsc::channel();
check.manipulate_corpus(&mut g, tempdir()?.path(), step_id, Some(sender))?;
assert!(check.report.is_some()); assert!(matches!(check.report.as_ref().unwrap(), &ReportLevel::List));
assert!(receiver.iter().count() > 0); if let Some(path) = &check.save {
let written_report = fs::read_to_string(path)?;
assert_snapshot!(written_report);
}
Ok(())
}
#[test]
fn test_policy_hierarchy() {
let serialized_data =
fs::read_to_string("./tests/data/graph_op/check/competing_policies.toml").unwrap();
let check: Check = toml::from_str(serialized_data.as_str()).unwrap();
let mut g = input_graph(true, "corpus").unwrap();
assert!(
check
.manipulate_corpus(
&mut g,
Path::new("./"),
StepID {
module_name: "test_check_policies".to_string(),
path: None
},
None
)
.is_ok()
);
}
#[test]
fn test_policy_hierarchy_fail() {
let serialized_data =
fs::read_to_string("./tests/data/graph_op/check/competing_policies_fail.toml").unwrap();
let check: Check = toml::from_str(serialized_data.as_str()).unwrap();
let mut g = input_graph(true, "corpus").unwrap();
let run = check.manipulate_corpus(
&mut g,
Path::new("./"),
StepID {
module_name: "test_check_policies".to_string(),
path: None,
},
None,
);
assert!(run.is_err());
assert_snapshot!(run.err().unwrap());
}
fn test_failing_checks(
on_disk: bool,
with_nodes: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let toml_path = if with_nodes {
"./tests/data/graph_op/check/serialized_check_failing_with_nodes.toml"
} else {
"./tests/data/graph_op/check/serialized_check_failing.toml"
};
let serialized_data = fs::read_to_string(toml_path)?;
let check: Check = toml::from_str(serialized_data.as_str())?;
let mut g = input_graph(on_disk, "corpus")?;
let (sender, _receiver) = mpsc::channel();
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let result = check.manipulate_corpus(&mut g, tempdir()?.path(), step_id, Some(sender));
assert!(result.is_err());
assert!(check.report.is_some());
if with_nodes {
assert!(matches!(
check.report.as_ref().unwrap(),
ReportLevel::Verbose
));
} else {
assert!(matches!(check.report.as_ref().unwrap(), ReportLevel::List));
}
let (r, _) = check.run_tests(&mut g)?;
assert!(
r.iter()
.map(|(_, tr)| match tr {
TestResult::Failed { is, .. } => is.len(),
TestResult::ProcessingError { .. } => 1,
_ => 0,
})
.sum::<usize>()
> 0
);
if with_nodes {
assert!(
r.iter()
.any(|(_, tr)| matches!(tr, TestResult::Failed { .. }))
);
assert!(
r.iter()
.any(|(_, tr)| matches!(tr, TestResult::ProcessingError { .. }))
);
}
Ok(())
}
#[test]
fn test_layer_check_in_mem() {
let r = test_layer_check(false);
assert!(r.is_ok(), "{:?}", r.err());
}
#[test]
fn test_layer_check_on_disk() {
let r = test_layer_check(true);
assert!(r.is_ok(), "{:?}", r.err());
}
#[test]
fn test_layer_check_fail_in_mem() {
let r = test_layer_check_fail(false);
assert!(r.is_ok(), "{:?}", r.err());
}
#[test]
fn test_layer_check_fail_on_disk() {
let r = test_layer_check_fail(true);
assert!(r.is_ok(), "{:?}", r.err());
}
fn test_layer_check(on_disk: bool) -> Result<(), Box<dyn std::error::Error>> {
let mut g = input_graph(on_disk, "corpus")?;
let toml_path = "./tests/data/graph_op/check/serialized_layer_check.toml";
let s = fs::read_to_string(toml_path)?;
let check: Check = toml::from_str(s.as_str())?;
let (results, _) = check.run_tests(&mut g)?;
let all_pass = results
.iter()
.all(|(_, tr)| matches!(tr, TestResult::Passed));
if !all_pass {
let table_string = Check::results_to_table(&results, &ReportLevel::Verbose, false);
println!("{}", table_string);
}
assert!(all_pass);
Ok(())
}
fn test_layer_check_fail(on_disk: bool) -> Result<(), Box<dyn std::error::Error>> {
let mut g = input_graph(on_disk, "corpus")?;
let toml_path = "./tests/data/graph_op/check/serialized_layer_check_failing.toml";
let s = fs::read_to_string(toml_path)?;
let check: Check = toml::from_str(s.as_str())?;
let (results, _) = check.run_tests(&mut g)?;
let failing = results
.iter()
.filter(|(_, tr)| matches!(tr, TestResult::Failed { .. }))
.count();
let passing = results
.iter()
.filter(|(_, tr)| matches!(tr, TestResult::Passed))
.count();
if passing != failing {
let table_string = Check::results_to_table(&results, &ReportLevel::Verbose, false);
println!("{}", table_string);
}
assert_eq!(passing, failing);
Ok(())
}
#[test]
fn test_layer_check_fail_policy_warn() {
let gr = input_graph(false, "corpus");
assert!(gr.is_ok());
let mut g = gr.unwrap();
let toml_path = "./tests/data/graph_op/check/serialized_layer_check_failing_warn.toml";
if let Ok(s) = fs::read_to_string(toml_path) {
let processor_opt: Result<Check, _> = toml::from_str(s.as_str());
assert!(processor_opt.is_ok());
let check = processor_opt.unwrap();
let (sender, receiver) = mpsc::channel();
let tmp = tempdir();
assert!(tmp.is_ok());
let dummy_value = tmp.unwrap();
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let run = check.manipulate_corpus(&mut g, dummy_value.path(), step_id, Some(sender));
assert!(run.is_ok());
assert_eq!(
receiver
.into_iter()
.filter(|msg| matches!(msg, StatusMessage::Warning { .. }))
.count(),
1
);
}
let toml_path_fail = "./tests/data/graph_op/check/serialized_layer_check_failing.toml";
if let Ok(s) = fs::read_to_string(toml_path_fail) {
let processor_opt: Result<Check, _> = toml::from_str(s.as_str());
assert!(processor_opt.is_ok());
let check = processor_opt.unwrap();
let (sender, _receiver) = mpsc::channel();
let tmp = tempdir();
assert!(tmp.is_ok());
let dummy_value = tmp.unwrap();
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let run = check.manipulate_corpus(&mut g, dummy_value.path(), step_id, Some(sender));
assert!(run.is_err());
assert_snapshot!(run.err().unwrap().to_string());
}
}
#[test]
fn test_layer_test_to_aql_test() {
let mut layers = BTreeMap::new();
layers.insert(
"layer1".to_string(),
vec!["v1".to_string(), "v2".to_string(), "v3".to_string()],
);
layers.insert(
"layer2".to_string(),
vec!["v1".to_string(), "v2".to_string(), "v3".to_string()],
);
layers.insert(
"layer3".to_string(),
vec!["v1".to_string(), "v2".to_string(), "v3".to_string()],
);
let aql_tests: Vec<AQLTest> = (&Test::LayerTest {
layers,
edge: None,
optional: false,
})
.into();
assert_eq!(aql_tests.len(), 6);
}
#[test]
fn test_append_report() {
let g = input_graph(false, "corpus");
assert!(g.is_ok());
let mut graph = g.unwrap();
let tests = vec![Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(4),
description: "Correct number of tokens".to_string(),
policy: None,
}];
let tmp = tempdir();
assert!(tmp.is_ok());
let tmp_dir = tmp.unwrap();
let report_path = tmp_dir.path().join("annatto_test_report_out_append.txt");
let check = Check {
policy: FailurePolicy::Fail,
tests,
report: Some(ReportLevel::List),
failed_only: false,
save: Some(report_path.clone()),
overwrite: false,
};
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let run = check.manipulate_corpus(&mut graph, tmp_dir.path(), step_id.clone(), None);
assert!(run.is_ok(), "Error writing report: {:?}", run.err());
assert!(report_path.exists());
let another_check = Check {
policy: FailurePolicy::Fail,
tests: vec![Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(4),
description: "Correct number of tokens".to_string(),
policy: None,
}],
report: None,
failed_only: false,
save: Some(report_path.clone()),
overwrite: false,
};
let (sender, receiver) = mpsc::channel();
let application =
another_check.manipulate_corpus(&mut graph, tmp_dir.path(), step_id, Some(sender));
assert!(application.is_ok(), "Error: {:?}", application.err());
let log_contents = fs::read_to_string(report_path);
assert_snapshot!(log_contents.unwrap());
let mut log_message = receiver
.into_iter()
.map(|m| match m {
StatusMessage::Info(msg) => msg.to_string(),
_ => "".to_string(),
})
.join("\n");
log_message.replace_range(
log_message.find("/").unwrap_or_default()..log_message.rfind("/").unwrap_or_default(),
"<tmp-dir>",
);
assert_snapshot!("log_message_append", log_message);
}
#[test]
fn test_overwrite_report() {
let g = input_graph(false, "corpus");
assert!(g.is_ok());
let mut graph = g.unwrap();
let tests = vec![Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(4),
description: "Correct number of tokens".to_string(),
policy: None,
}];
let tmp = tempdir();
assert!(tmp.is_ok());
let tmp_dir = tmp.unwrap();
let report_path = tmp_dir.path().join("annatto_test_report_out_overwrite.txt");
let check = Check {
policy: FailurePolicy::Fail,
tests,
report: Some(ReportLevel::List),
failed_only: false,
save: Some(report_path.clone()),
overwrite: true,
};
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let run = check.manipulate_corpus(&mut graph, tmp_dir.path(), step_id.clone(), None);
assert!(run.is_ok(), "Error writing report: {:?}", run.err());
assert!(report_path.exists());
let another_check = Check {
policy: FailurePolicy::Fail,
tests: vec![Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(4),
description: "Correct number of tokens".to_string(),
policy: None,
}],
report: None,
failed_only: false,
save: Some(report_path.clone()),
overwrite: true,
};
let (sender, receiver) = mpsc::channel();
let application =
another_check.manipulate_corpus(&mut graph, tmp_dir.path(), step_id, Some(sender));
assert!(application.is_ok(), "Error: {:?}", application.err());
let log_contents = fs::read_to_string(report_path);
assert_snapshot!(log_contents.unwrap());
let mut log_message = receiver
.into_iter()
.map(|m| match m {
StatusMessage::Info(msg) => msg.to_string(),
_ => "".to_string(),
})
.join("\n");
log_message.replace_range(
log_message.find("/").unwrap_or_default()..log_message.rfind("/").unwrap_or_default(),
"<tmp-dir>",
);
assert_snapshot!("log_message_overwrite", log_message);
}
#[test]
fn test_write_report_verbose() {
let g = input_graph(true, "corpus");
assert!(g.is_ok());
let mut graph = g.unwrap();
let tests = vec![
Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(4),
description: "Correct number of tokens is 4".to_string(),
policy: None,
},
Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(2),
description: "Correct number of tokens is 2".to_string(),
policy: None,
},
Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(3),
description: "Correct number of tokens is 3".to_string(),
policy: None,
},
Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::Numeric(1),
description: "Correct number of tokens is 1".to_string(),
policy: None,
},
Test::LayerTest {
layers: vec![(
"pos".to_string(),
vec!["DET".to_string(), "NOUN".to_string()],
)]
.into_iter()
.collect(),
edge: None,
optional: false,
},
];
let tmp = tempdir();
assert!(tmp.is_ok());
let tmp_dir = tmp.unwrap();
let report_path = tmp_dir.path().join("annatto_test_report_out_verbose.txt");
let check = Check {
policy: FailurePolicy::Warn,
tests,
report: Some(ReportLevel::Verbose),
failed_only: false,
save: Some(report_path.clone()),
overwrite: false,
};
let step_id = StepID {
module_name: "check_verbose".to_string(),
path: None,
};
let run = check.manipulate_corpus(&mut graph, tmp_dir.path(), step_id.clone(), None);
assert!(run.is_ok(), "Error writing report: {:?}", run.err());
assert!(report_path.exists());
let log_contents = fs::read_to_string(report_path);
assert_snapshot!(log_contents.unwrap());
}
#[test]
fn with_external_corpus() {
let g = input_graph(true, "new-corpus");
assert!(g.is_ok());
let mut graph = g.unwrap();
let query = "/This/ _ident_ pos=/PRON/ . /is/ _ident_ pos=/VERB/ . /a/ _ident_ pos=/DET/ . /test/ _ident_ pos=/NOUN/";
let check = Check {
policy: FailurePolicy::Fail,
tests: vec![
Test::QueryTest {
query: query.to_string(),
expected: QueryResult::Numeric(1),
description: "Control test to make sure the query actually works".to_string(),
policy: None,
},
Test::QueryTest {
description: "Query sequence.".to_string(),
query: query.to_string(),
expected: QueryResult::CorpusQuery(
Path::new("tests/data/graph_op/check/external_db/").to_path_buf(),
"corpus".to_string(),
query.to_string(),
),
policy: None,
},
Test::QueryTest {
description: "Query nodes.".to_string(),
query: "node".to_string(),
expected: QueryResult::CorpusQuery(
Path::new("tests/data/graph_op/check/external_db/").to_path_buf(),
"corpus".to_string(),
"node".to_string(),
),
policy: None,
},
],
report: None,
failed_only: false,
save: None,
overwrite: false,
};
let result = check.run_tests(&mut graph);
assert!(result.is_ok(), "{:?}", result.err());
let step_id = StepID {
module_name: "check".to_string(),
path: None,
};
let manip = check.manipulate_corpus(&mut graph, Path::new("./"), step_id, None);
assert!(manip.is_ok(), "{:?}", manip.err());
}
#[test]
fn failed_only() {
let g = input_graph(true, "corpus");
let check = Check {
failed_only: true,
report: Some(ReportLevel::Verbose),
tests: vec![
Test::QueryTest {
query: "tok".to_string(),
expected: QueryResult::SemiOpenInterval(1, f64::INFINITY),
description: "gimme some tokens, please".to_string(),
policy: None,
},
Test::QueryTest {
query: "weird_anno_name".to_string(),
expected: QueryResult::Numeric(1),
description: "I want that".to_string(),
policy: None,
},
],
policy: FailurePolicy::Warn,
save: None,
overwrite: false,
};
assert!(g.is_ok());
let mut graph = g.unwrap();
let (tx, rx) = mpsc::channel();
let run = check.manipulate_corpus(
&mut graph,
Path::new("./"),
StepID {
module_name: "test".to_string(),
path: None,
},
Some(tx),
);
assert!(run.is_ok(), "Error: {:?}", run.err());
let output = rx
.into_iter()
.map(|m| match m {
StatusMessage::Info(msg) => msg,
_ => "".to_string(),
})
.join("\n");
assert_snapshot!(output);
}
fn input_graph(
on_disk: bool,
root_name: &str,
) -> Result<AnnotationGraph, Box<dyn std::error::Error>> {
let mut g = AnnotationGraph::with_default_graphstorages(on_disk)?;
let mut u = GraphUpdate::default();
let root_corpus = root_name;
let doc_name = "doc";
let doc_node = format!("{root_corpus}/{doc_name}");
u.add_event(UpdateEvent::AddNode {
node_name: root_corpus.to_string(),
node_type: "corpus".to_string(),
})?;
u.add_event(UpdateEvent::AddNode {
node_name: doc_node.to_string(),
node_type: "corpus".to_string(),
})?;
u.add_event(UpdateEvent::AddEdge {
source_node: doc_node.to_string(),
target_node: root_corpus.to_string(),
layer: ANNIS_NS.to_string(),
component_type: AnnotationComponentType::PartOf.to_string(),
component_name: "".to_string(),
})?;
let s_node = format!("{doc_node}#s1");
u.add_event(UpdateEvent::AddNode {
node_name: s_node.to_string(),
node_type: "node".to_string(),
})?;
u.add_event(UpdateEvent::AddNodeLabel {
node_name: s_node.to_string(),
anno_ns: "".to_string(),
anno_name: "sentence".to_string(),
anno_value: "1".to_string(),
})?;
for (i, (text_value, pos_value)) in [
("This", "PRON"),
("is", "VERB"),
("a", "DET"),
("test", "NOUN"),
]
.iter()
.enumerate()
{
let tok_node = format!("{doc_node}#t{}", &i + &1);
u.add_event(UpdateEvent::AddNode {
node_name: tok_node.to_string(),
node_type: "node".to_string(),
})?;
u.add_event(UpdateEvent::AddNodeLabel {
node_name: tok_node.to_string(),
anno_ns: ANNIS_NS.to_string(),
anno_name: "tok".to_string(),
anno_value: text_value.to_string(),
})?;
u.add_event(UpdateEvent::AddNodeLabel {
node_name: tok_node.to_string(),
anno_ns: "".to_string(),
anno_name: "pos".to_string(),
anno_value: pos_value.to_string(),
})?;
u.add_event(UpdateEvent::AddEdge {
source_node: s_node.to_string(),
target_node: tok_node.to_string(),
layer: ANNIS_NS.to_string(),
component_type: AnnotationComponentType::Coverage.to_string(),
component_name: "".to_string(),
})?;
if i > 0 {
u.add_event(UpdateEvent::AddEdge {
source_node: format!("{doc_node}#t{}", &i),
target_node: tok_node.to_string(),
layer: ANNIS_NS.to_string(),
component_type: AnnotationComponentType::Ordering.to_string(),
component_name: "".to_string(),
})?;
}
}
let dep = "dep";
let deprel = "deprel";
for (source_id, target_id, label) in [(4, 1, "nsubj"), (4, 2, "cop"), (4, 3, "det")] {
let source_node = format!("{doc_node}#t{}", source_id);
let target_node = format!("{doc_node}#t{}", target_id);
u.add_event(UpdateEvent::AddEdge {
source_node: source_node.to_string(),
target_node: target_node.to_string(),
layer: "".to_string(),
component_type: AnnotationComponentType::Pointing.to_string(),
component_name: dep.to_string(),
})?;
u.add_event(UpdateEvent::AddEdgeLabel {
source_node: source_node.to_string(),
target_node: target_node.to_string(),
layer: "".to_string(),
component_type: AnnotationComponentType::Pointing.to_string(),
component_name: dep.to_string(),
anno_ns: "".to_string(),
anno_name: deprel.to_string(),
anno_value: label.to_string(),
})?;
}
let cat = "cat";
let func = "func";
for (members, name, category) in [
(vec![("t1", None)], "n1", "DP"),
(vec![("t3", Some("head")), ("t4", None)], "n2", "DP"),
(vec![("t2", Some("head")), ("n2", None)], "n3", "IP"),
(vec![("n1", Some("head")), ("n3", None)], "n4", "CP"),
] {
let node_name = format!("{doc_node}#{name}");
u.add_event(UpdateEvent::AddNode {
node_name: node_name.to_string(),
node_type: "node".to_string(),
})?;
u.add_event(UpdateEvent::AddNodeLabel {
node_name: node_name.to_string(),
anno_ns: "".to_string(),
anno_name: cat.to_string(),
anno_value: category.to_string(),
})?;
for (member, function_opt) in members {
let target_name = format!("{doc_node}#{member}");
u.add_event(UpdateEvent::AddEdge {
source_node: node_name.to_string(),
target_node: target_name.to_string(),
layer: "".to_string(),
component_type: AnnotationComponentType::Dominance.to_string(),
component_name: "".to_string(),
})?;
if let Some(function) = function_opt {
u.add_event(UpdateEvent::AddEdgeLabel {
source_node: node_name.to_string(),
target_node: target_name.to_string(),
layer: "".to_string(),
component_type: AnnotationComponentType::Dominance.to_string(),
component_name: "".to_string(),
anno_ns: "".to_string(),
anno_name: func.to_string(),
anno_value: function.to_string(),
})?;
}
}
}
g.apply_update(&mut u, |_| {})?;
Ok(g)
}
}