Function tokenizations::get_alignments[][src]

pub fn get_alignments<S: AsRef<str>>(a: &[S], b: &[S]) -> (Alignment, Alignment)

Returns the tokenizations alignments a2b (from a to b) and b2a (from b to a) based on the shortest edit script (SES).

Examples

use tokenizations::get_alignments;

let a = vec!["New York"];
let b = vec!["New", "York"];
// calculate the two alignments `a2b` and `b2a` at the same time
let (a2b, b2a) = get_alignments(&a, &b);

// `a2b[i]` is a set that holds indices `j`s of `b` such that `a[i]` corresponds to `b[j]`
assert_eq!(a2b, vec![[0, 1]]);
// `b2a` is the inverse of `a2b`
assert_eq!(b2a, vec![[0], [0]]);

// `get_alignments` can be applied to noisy tokens.
let a = vec!["à", "la", "gorge"];
let b = vec!["a", "la", "gorge"]; // dropped accent
let (a2b, b2a) = get_alignments(&a, &b);
assert_eq!(a2b, vec![[0], [1], [2]]);
assert_eq!(a2b, vec![[0], [1], [2]]);