annatomic 0.4.0

The Annatomic annotation editor is intended to be used for the [RIDGES corpus](https://www.linguistik.hu-berlin.de/en/institut-en/professuren-en/korpuslinguistik/research/ridges-projekt). It is based on [graphANNIS](https://github.com/korpling/graphANNIS) and thus is internal data model is in principle suitable for a wide range of annotation concepts. "
Documentation
use anyhow::Context;
use graphannis::{AnnotationGraph, graph::NodeID, update::GraphUpdate};
use graphannis_core::graph::NODE_NAME_KEY;
use itertools::Itertools;

use crate::app::util::example_generator;

use super::TokenHelper;

#[test]
fn example_graph_token() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));
    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let token_helper = TokenHelper::new(&g).unwrap();

    let ordered_token_ids = token_helper
        .get_ordered_token("root/doc1", None)
        .unwrap()
        .into_iter()
        .map(|t_id| token_helper.spanned_text(&[t_id]).unwrap())
        .collect_vec();

    assert_eq!(
        vec![
            "Is",
            "this",
            "example",
            "more",
            "complicated",
            "than",
            "it",
            "appears",
            "to",
            "be",
            "?"
        ],
        ordered_token_ids
    );
}

#[test]
fn ordered_token_with_segmentation() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));
    example_generator::create_segmentation(&mut updates);

    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let token_helper = TokenHelper::new(&g).unwrap();

    let ordered_token_ids = token_helper
        .get_ordered_token("root/doc1", Some("seg"))
        .unwrap()
        .into_iter()
        .map(|t_id| token_helper.spanned_text(&[t_id]).unwrap())
        .collect_vec();

    assert_eq!(vec!["This", "more", "complicated",], ordered_token_ids);
}

#[test]
fn token_before_and_after_base() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));
    // Create a span we will use to the the token before and after
    example_generator::make_span(
        &mut updates,
        "root/doc1#span",
        &["root/doc1#tok3", "root/doc1#tok4"],
        true,
    );
    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let span_id = g
        .get_node_annos()
        .get_node_id_from_name("root/doc1#span")
        .unwrap()
        .unwrap();

    let token_helper = TokenHelper::new(&g).unwrap();

    // Get an node and find the token before and after another token
    let tok3_id = g
        .get_node_annos()
        .get_node_id_from_name("root/doc1#tok3")
        .unwrap()
        .unwrap();
    assert_eq!(
        "root/doc1#tok2",
        node_name(
            token_helper
                .get_token_before(tok3_id, None)
                .unwrap()
                .unwrap(),
            &g
        )
    );
    assert_eq!(
        "root/doc1#tok4",
        node_name(
            token_helper
                .get_token_after(tok3_id, None)
                .unwrap()
                .unwrap(),
            &g
        )
    );

    // Get an node and find the token before and after the span
    assert_eq!(
        "root/doc1#tok2",
        node_name(
            token_helper
                .get_token_before(span_id, None)
                .unwrap()
                .unwrap(),
            &g
        )
    );
    assert_eq!(
        "root/doc1#tok5",
        node_name(
            token_helper
                .get_token_after(span_id, None)
                .unwrap()
                .unwrap(),
            &g
        )
    );
}

#[test]
fn token_before_and_after_segmentation() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));
    example_generator::create_segmentation(&mut updates);

    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let token_helper = TokenHelper::new(&g).unwrap();

    // Get an node and find the token before and after another token
    let seg2_id = g
        .get_node_annos()
        .get_node_id_from_name("root/doc1#seg2")
        .unwrap()
        .unwrap();

    assert_eq!(
        "root/doc1#seg1",
        node_name(
            token_helper
                .get_token_before(seg2_id, Some("seg"))
                .unwrap()
                .unwrap(),
            &g
        )
    );
    assert_eq!(
        "root/doc1#seg3",
        node_name(
            token_helper
                .get_token_after(seg2_id, Some("seg"))
                .unwrap()
                .unwrap(),
            &g
        )
    );
}

#[test]
fn continuous_span_from_single_base_token() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));

    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let tok7_id = g
        .get_node_annos()
        .get_node_id_from_name("root/doc1#tok7")
        .unwrap()
        .unwrap();

    let token_helper = TokenHelper::new(&g).unwrap();

    let spans = token_helper
        .continuous_segmentation_spans(&[tok7_id], None)
        .unwrap();
    assert_eq!(vec![(tok7_id, tok7_id)], spans);
}

#[test]
fn continuous_span_from_base_token() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));

    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let t0 = node_id("root/doc1#tok0", &g);

    let t3 = node_id("root/doc1#tok3", &g);
    let t4 = node_id("root/doc1#tok4", &g);
    let t5 = node_id("root/doc1#tok5", &g);

    let t7 = node_id("root/doc1#tok7", &g);
    let t8 = node_id("root/doc1#tok8", &g);

    let t10 = node_id("root/doc1#tok10", &g);

    let token_helper = TokenHelper::new(&g).unwrap();

    let spans = token_helper
        .continuous_segmentation_spans(&[t0, t3, t4, t5, t7, t8, t10], None)
        .unwrap();
    assert_eq!(vec![(t0, t0), (t3, t5), (t7, t8), (t10, t10)], spans);
}

#[test]
fn continuous_span_from_single_seg_node() {
    let mut updates = GraphUpdate::new();
    example_generator::create_corpus_structure_simple(&mut updates);
    example_generator::create_tokens(&mut updates, Some("root/doc1"));
    example_generator::create_segmentation(&mut updates);

    let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
    g.apply_update(&mut updates, |_msg| {}).unwrap();

    let seg3_id = node_id("root/doc1#seg3", &g);

    let token_helper = TokenHelper::new(&g).unwrap();

    let spans = token_helper
        .continuous_segmentation_spans(&[seg3_id], Some("seg"))
        .unwrap();
    assert_eq!(vec![(seg3_id, seg3_id)], spans);
}

fn node_name(id: NodeID, g: &AnnotationGraph) -> String {
    g.get_node_annos()
        .get_value_for_item(&id, &NODE_NAME_KEY)
        .unwrap()
        .unwrap()
        .to_string()
}

fn node_id(node_name: &str, g: &AnnotationGraph) -> NodeID {
    let result = g.get_node_annos().get_node_id_from_name(node_name).unwrap();
    let item = result.with_context(|| node_name.to_string()).unwrap();
    item
}