[−][src]Function tokenizations::get_alignments
pub fn get_alignments(a: &[&str], b: &[&str]) -> (Alignment, Alignment)
Returns tokenizations alignments a2b
(from a
to b
) and b2a
(from b
to a
) based on shortest edit script (SES).
Examples
use tokenizations::get_alignments; let a = vec!["New York"]; let b = vec!["New", "York"]; // calculate the two alignments `a2b` and `b2a` at the same time let (a2b, b2a) = get_alignments(&a, &b); // `a2b[i]` is a set that holds indices `j`s of `b` such that `a[i]` corresponds to `b[j]` assert_eq!(a2b, vec![[0, 1]]); // `b2a` is the inverse of `a2b` assert_eq!(b2a, vec![[0], [0]]); // `get_alignments` can be applied to noisy tokens. let a = vec!["à", "la", "gorge"]; let b = vec!["a", "la", "gorge"]; // dropped accent let (a2b, b2a) = get_alignments(&a, &b); assert_eq!(a2b, vec![[0], [1], [2]]); assert_eq!(a2b, vec![[0], [1], [2]]);