annatomic 0.2.0

The Annatomic annotation editor is intended to be used for the [RIDGES corpus](https://www.linguistik.hu-berlin.de/en/institut-en/professuren-en/korpuslinguistik/research/ridges-projekt). It is based on [graphANNIS](https://github.com/korpling/graphANNIS) and thus is internal data model is in principle suitable for a wide range of annotation concepts. "
Documentation
//! Data structures needed to represent concepts of the annotation graph.

use anyhow::Context as _;
use anyhow::Result;
use graphannis::{
    AnnotationGraph,
    graph::{AnnoKey, NodeID},
};
use graphannis_core::graph::ANNIS_NS;
use graphannis_core::graph::NODE_NAME_KEY;
use ordermap::OrderSet;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::hash::Hash;

use super::util::token_helper::TokenHelper;

#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct Token {
    pub node_name: String,
    pub start: usize,
    pub end: usize,
    pub labels: BTreeMap<AnnoKey, String>,
}

/// Describes a single continous span.
#[derive(Clone, Debug)]
pub struct Span {
    pub node_name: String,
    /// The IDs of the token covered, in token order.
    pub sorted_covered_token_ids: OrderSet<NodeID>,
    pub labels: BTreeMap<AnnoKey, String>,
}

/// Combines several non-overlapping spans into one row.
#[derive(Clone, Debug)]
pub struct SpanRow {
    pub spans: Vec<Span>,
    /// **Non-sorted** set of the union of all token occupied by the spans of this row.
    pub occupied_token: HashSet<NodeID>,
    pub anno_keys: BTreeSet<AnnoKey>,
}

impl Token {
    pub fn from_graph(
        node_id: NodeID,
        start: usize,
        end: usize,
        graph: &AnnotationGraph,
    ) -> Result<Self> {
        let mut labels = BTreeMap::new();
        let node_name = graph
            .get_node_annos()
            .get_value_for_item(&node_id, &NODE_NAME_KEY)?
            .with_context(|| format!("No node name for node with id {node_id}"))?;
        for anno in graph.get_node_annos().get_annotations_for_item(&node_id)? {
            labels.insert(anno.key, anno.val.to_string());
        }
        Ok(Token {
            node_name: node_name.to_string(),
            start,
            end,
            labels,
        })
    }
}

impl Span {
    pub fn from_graph(
        node_id: NodeID,
        tok_helper: &TokenHelper<'_>,
        graph: &AnnotationGraph,
    ) -> Result<Self> {
        let mut labels = BTreeMap::new();
        let node_name = graph
            .get_node_annos()
            .get_value_for_item(&node_id, &NODE_NAME_KEY)?
            .with_context(|| format!("No node name for node with id {node_id}"))?;
        for anno in graph.get_node_annos().get_annotations_for_item(&node_id)? {
            if anno.key.ns != ANNIS_NS {
                labels.insert(anno.key, anno.val.to_string());
            }
        }

        let mut sorted_covered_token_ids = OrderSet::new();

        for t in tok_helper.covered_token(node_id)? {
            sorted_covered_token_ids.insert(t);
        }
        Ok(Span {
            node_name: node_name.to_string(),
            sorted_covered_token_ids,
            labels,
        })
    }
}

impl From<Span> for SpanRow {
    fn from(span: Span) -> Self {
        Self {
            anno_keys: span.labels.keys().cloned().collect(),
            occupied_token: span.sorted_covered_token_ids.iter().copied().collect(),
            spans: vec![span],
        }
    }
}

impl SpanRow {
    /// Determines whether this row can merge the given additional span. They
    /// can be merged if they are not overlapping and contain the same
    /// annotation keys.
    pub(crate) fn can_merge_span(&self, span: &Span) -> bool {
        let is_disjoint = if self.occupied_token.len() <= span.sorted_covered_token_ids.len() {
            self.occupied_token
                .iter()
                .all(|v| !span.sorted_covered_token_ids.contains(v))
        } else {
            span.sorted_covered_token_ids
                .iter()
                .all(|v| !self.occupied_token.contains(v))
        };
        if !is_disjoint {
            false
        } else {
            self.anno_keys.iter().eq(span.labels.keys())
        }
    }

    /// Merges the given span with this row rows into one. Returns `true` if the merge was sucessful, but `false` if not.
    pub(crate) fn merge_span(&mut self, other: &Span) -> Result<bool> {
        if self.can_merge_span(other) {
            self.occupied_token
                .extend(other.sorted_covered_token_ids.iter().copied());
            // Add all spans from the other token;
            // TODO: avoid cloning here by passing the span as value and returning the span if not successfully merged.
            self.spans.push(other.clone());
            Ok(true)
        } else {
            Ok(false)
        }
    }
}