1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
//! Projectivization/deprojectivization of graphs.

use std::cmp::{max, min};
use std::collections::{HashMap, HashSet};

use itertools::Itertools;
use petgraph::graph::{node_index, EdgeIndex, NodeIndex};
use petgraph::visit::{Bfs, EdgeRef, NodeFiltered, Walker};
use petgraph::{Directed, Direction, Graph};

use crate::graph::{DepTriple, Sentence};
use crate::{BfsWithDepth, GraphError};

pub trait Deprojectivize {
    fn deprojectivize(&self, sentence: &mut Sentence) -> Result<(), GraphError>;
}

pub trait Projectivize {
    fn projectivize(&self, sentence: &mut Sentence) -> Result<(), GraphError>;
}

/// A projectivizer using the 'head' marking strategy. See: *Pseudo-Projective
/// Dependency Parsing*, Nivre and Nilsson, 2005.
#[derive(Clone, Copy, Eq, PartialEq)]
pub struct HeadProjectivizer;

impl HeadProjectivizer {
    pub fn new() -> Self {
        HeadProjectivizer {}
    }

    /// Deprojectivize the next node in the array of lifted nodes.
    ///
    /// Returns the index of the node that was lifted.
    fn deprojectivize_next(
        self,
        graph: &mut Graph<(), String, Directed>,
        lifted_sorted: &[NodeIndex],
        head_labels: &HashMap<NodeIndex, String>,
    ) -> Option<usize> {
        for (idx, lifted_node) in lifted_sorted.iter().enumerate() {
            let pref_head_rel = head_labels
                .get(lifted_node)
                .expect("Lifted node without preferred head relation");

            let head_edge = graph
                .first_edge(*lifted_node, Direction::Incoming)
                .expect("Lifted node without an incoming edge");
            let (cur_head, _) = graph
                .edge_endpoints(head_edge)
                .expect("Endpoints of lifted edge could not be found");

            if let Some(new_head) =
                self.search_attachment_point(&graph, cur_head, *lifted_node, pref_head_rel)
            {
                let head_rel = graph
                    .remove_edge(head_edge)
                    .expect("Lifted edge to be removed could not be found");
                graph.add_edge(new_head, *lifted_node, head_rel);
                return Some(idx);
            }
        }

        None
    }

    /// Find the correct attachment point for the lifted token/node.
    fn search_attachment_point(
        self,
        graph: &Graph<(), String, Directed>,
        cur_head: NodeIndex,
        lifted_node: NodeIndex,
        pref_head_rel: &str,
    ) -> Option<NodeIndex> {
        // We are looking for a token dominated by cur_head to attach
        // lifted_node to. This token should:
        //
        // 1. Be attached to its head using pref_head_rel.
        // 2. Not be lifted_node itself or any of its decendants.
        // 3. As high in the tree as possible.
        //
        // From the set of candidates, we pick the token that is the
        // closest to the current head.

        // Requirement (2): use a view of the graph that excludes
        // to avoid attachment to the lifted_node or any of its children.
        let graph_without_lifted = NodeFiltered::from_fn(graph, |n| n != lifted_node);

        // Requirement (3): process the dependency tree by increasing depth
        // until the reattachment token is found.
        for (_, nodes) in &BfsWithDepth::new(&graph_without_lifted, node_index(0))
            .iter(&graph_without_lifted)
            .skip(1)
            .group_by(|&(_, depth)| depth)
        {
            // Requirement (1): Only retain edges with the preferred relation.
            let level_candidates = nodes.map(|(node, _)| node).filter(|&node| {
                let edge = match graph.first_edge(node, Direction::Incoming) {
                    Some(edge) => edge,
                    None => return false,
                };

                graph[edge] == pref_head_rel
            });

            // When there are multiple candidates, return the token closes to the head.
            let min_candidate = level_candidates.min_by_key(|&node| {
                max(node.index(), cur_head.index()) - min(node.index(), cur_head.index())
            });

            if min_candidate.is_some() {
                return min_candidate;
            }
        }

        None
    }

    /// Lift the edge identified by `edge_idx`. This will reattach the edge
    /// to the parent of the head. If this was the first lifting operation,
    /// the dependency relation of the original head is added to the dependency
    /// relation (following the head-strategy).
    fn lift(
        self,
        graph: &mut Graph<(), String, Directed>,
        lifted: &mut HashSet<NodeIndex>,
        edge_idx: EdgeIndex,
    ) {
        let (source, target) = graph
            .edge_endpoints(edge_idx)
            .expect("lift() called with invalid index");
        let parent_edge = graph
            .first_edge(source, Direction::Incoming)
            .expect("Cannot find incoming edge of the to-be lifted node");
        let parent_rel = graph[parent_edge].clone();
        let (parent, _) = graph
            .edge_endpoints(parent_edge)
            .expect("Cannot find endpoints of to-be lifted edge");

        let rel = graph
            .remove_edge(edge_idx)
            .expect("Cannot remove edge to-be lifted");

        if lifted.contains(&target) {
            graph.add_edge(parent, target, rel);
        } else {
            graph.add_edge(parent, target, format!("{}|{}", rel, parent_rel));
            lifted.insert(target);
        }
    }

    /// Prepare for deprojectivizing: remove head annotations from lifted
    /// relations. Return the transformed graph + indices of lifted nodes
    /// and their head labels.
    fn prepare_deproj(
        self,
        graph: &Graph<(), String, Directed>,
    ) -> (Graph<(), String, Directed>, HashMap<NodeIndex, String>) {
        let mut pref_head_labels = HashMap::new();

        let prepared_graph = graph.map(
            |_, &node_val| node_val,
            |edge_idx, edge_val| {
                let sep_idx = match edge_val.find('|') {
                    Some(idx) => idx,
                    None => return edge_val.clone(),
                };

                let (_, dep) = graph
                    .edge_endpoints(edge_idx)
                    .expect("Cannot lookup edge endpoints");

                pref_head_labels.insert(dep, edge_val[sep_idx + 1..].to_owned());

                edge_val[..sep_idx].to_owned()
            },
        );

        (prepared_graph, pref_head_labels)
    }
}

impl Default for HeadProjectivizer {
    fn default() -> Self {
        HeadProjectivizer
    }
}

impl Projectivize for HeadProjectivizer {
    fn projectivize(&self, sentence: &mut Sentence) -> Result<(), GraphError> {
        let mut graph = simplify_graph(sentence)?;
        let mut lifted = HashSet::new();

        // Lift non-projective edges until there are no non-projective
        // edges left.
        loop {
            let np_edges = non_projective_edges(&graph);
            if np_edges.is_empty() {
                break;
            }

            self.lift(&mut graph, &mut lifted, np_edges[0]);
        }

        // The graph is now a projective tree. Update the dependency relations
        // in the sentence to correspond to the graph.
        update_sentence(&graph, sentence);

        Ok(())
    }
}

impl Deprojectivize for HeadProjectivizer {
    fn deprojectivize(&self, sentence: &mut Sentence) -> Result<(), GraphError> {
        let graph = simplify_graph(sentence)?;

        // Find nodes and corresponding edges that are lifted and remove
        // head labels from dependency relations.
        let (mut graph, head_labels) = self.prepare_deproj(&graph);
        if head_labels.is_empty() {
            return Ok(());
        }

        // Get and sort lifted tokens by increasing depth.
        let mut lifted_sorted = Vec::new();
        let mut bfs = Bfs::new(&graph, node_index(0));
        while let Some(node) = bfs.next(&graph) {
            if head_labels.get(&node).is_some() {
                lifted_sorted.push(node);
            }
        }

        // Deprojectivize the graph, re-attaching one token at a time,
        // with the preference of a token that is not deep in the tree.
        while let Some(idx) = self.deprojectivize_next(&mut graph, &lifted_sorted, &head_labels) {
            lifted_sorted.remove(idx);
        }

        update_sentence(&graph, sentence);

        Ok(())
    }
}

pub fn simplify_graph(sentence: &Sentence) -> Result<Graph<(), String, Directed>, GraphError> {
    let mut edges = Vec::with_capacity(sentence.len() + 1);
    for idx in 0..sentence.len() {
        let triple = match sentence.dep_graph().head(idx) {
            Some(triple) => triple,
            None => continue,
        };

        let head_rel = match triple.relation() {
            Some(head_rel) => head_rel,
            None => {
                return Err(GraphError::IncompleteGraph {
                    value: format!(
                        "edge from {} to {} does not have a label",
                        triple.head(),
                        triple.dependent()
                    ),
                })
            }
        };

        edges.push((
            node_index(triple.head()),
            node_index(triple.dependent()),
            head_rel.to_owned(),
        ))
    }

    Ok(Graph::<(), String, Directed>::from_edges(edges))
}

/// Returns non-projective edges in the graph, ordered by length.
pub fn non_projective_edges(graph: &Graph<(), String, Directed>) -> Vec<EdgeIndex> {
    let mut non_projective = Vec::new();

    for i in 0..graph.node_count() {
        let mut i_reachable = HashSet::new();
        let mut bfs = Bfs::new(&graph, node_index(i));
        while let Some(node) = bfs.next(&graph) {
            i_reachable.insert(node.index());
        }

        for edge in graph.edges(node_index(i)) {
            // An edge i -> k is projective, iff:
            //
            // i > j > k or i < j < k, and i ->* j
            for j in min(i, edge.target().index())..max(i, edge.target().index()) {
                if !i_reachable.contains(&j) {
                    non_projective.push(edge);
                    break;
                }
            }
        }
    }

    non_projective.sort_by(|a, b| {
        let a_len = max(a.source().index(), a.target().index())
            - min(a.source().index(), a.target().index());
        let b_len = max(b.source().index(), b.target().index())
            - min(b.source().index(), b.target().index());

        a_len.cmp(&b_len)
    });

    non_projective.iter().map(EdgeRef::id).collect()
}

/// Update a sentence with dependency relations from a graph.
fn update_sentence(graph: &Graph<(), String, Directed>, sentence: &mut Sentence) {
    let mut sent_graph = sentence.dep_graph_mut();
    for edge_ref in graph.edge_references() {
        sent_graph.add_deprel(DepTriple::new(
            edge_ref.source().index(),
            Some(edge_ref.weight().clone()),
            edge_ref.target().index(),
        ));
    }
}

#[cfg(test)]
mod tests {
    use lazy_static::lazy_static;
    use petgraph::graph::{node_index, NodeIndex};

    use crate::graph::Sentence;
    use crate::proj::{
        non_projective_edges, simplify_graph, Deprojectivize, HeadProjectivizer, Projectivize,
    };
    use crate::tests::read_sentences;

    lazy_static! {
        static ref NON_PROJECTIVE_EDGES: Vec<Vec<(NodeIndex, NodeIndex)>> = vec![
            vec![(node_index(8), node_index(1))],
            vec![(node_index(10), node_index(2))],
            vec![(node_index(5), node_index(1))],
            vec![
                (node_index(1), node_index(3)),
                (node_index(7), node_index(5))
            ],
        ];
    }

    fn sent_non_projective_edges(sents: &[Sentence]) -> Vec<Vec<(NodeIndex, NodeIndex)>> {
        let mut np_edges = Vec::new();

        for sent in sents {
            let graph = simplify_graph(sent).unwrap();
            let np: Vec<_> = non_projective_edges(&graph)
                .iter()
                .map(|idx| graph.edge_endpoints(*idx).unwrap())
                .collect();
            np_edges.push(np);
        }

        np_edges
    }

    static PROJECTIVE_SENTENCES_FILENAME: &str = "testdata/projective.conll";

    static NONPROJECTIVE_SENTENCES_FILENAME: &str = "testdata/nonprojective.conll";

    #[test]
    fn deprojectivize_test() {
        let projectivizer = HeadProjectivizer::new();
        let non_projective: Vec<_> = read_sentences(PROJECTIVE_SENTENCES_FILENAME)
            .into_iter()
            .map(|mut s| {
                projectivizer
                    .deprojectivize(&mut s)
                    .expect("Cannot deprojectivize sentence");
                s
            })
            .collect();

        assert_eq!(
            read_sentences(NONPROJECTIVE_SENTENCES_FILENAME),
            non_projective
        );
    }

    #[test]
    fn non_projective_test() {
        let test_edges =
            sent_non_projective_edges(&read_sentences(NONPROJECTIVE_SENTENCES_FILENAME));
        assert_eq!(*NON_PROJECTIVE_EDGES, test_edges);
    }

    #[test]
    fn projectivize_test() {
        let projectivizer = HeadProjectivizer::new();
        let projective: Vec<_> = read_sentences(NONPROJECTIVE_SENTENCES_FILENAME)
            .into_iter()
            .map(|mut s| {
                projectivizer
                    .projectivize(&mut s)
                    .expect("Cannot projectivize sentence");
                s
            })
            .collect();

        assert_eq!(read_sentences(PROJECTIVE_SENTENCES_FILENAME), projective);
    }
}