1use stam::*;
2
3use seal::pair::{AlignmentSet, InMemoryAlignmentMatrix, NeedlemanWunsch, SmithWaterman, Step};
4use std::str::FromStr;
5
6const TRIM_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
7
8#[derive(Clone, Debug)]
9pub struct AlignmentConfig {
10 pub case_sensitive: bool,
12
13 pub algorithm: AlignmentAlgorithm,
15
16 pub annotation_id_prefix: Option<String>,
18
19 pub trim: bool,
22
23 pub simple_only: bool,
25
26 pub minimal_align_length: usize,
28
29 pub max_errors: Option<AbsoluteOrRelative>,
34
35 pub grow: bool,
38
39 pub verbose: bool,
41
42 pub quiet: bool,
44}
45
46impl Default for AlignmentConfig {
47 fn default() -> Self {
48 Self {
49 case_sensitive: true,
50 algorithm: AlignmentAlgorithm::default(),
51 annotation_id_prefix: None,
52 minimal_align_length: 0,
53 max_errors: None,
54 trim: false,
55 simple_only: false,
56 verbose: false,
57 grow: false,
58 quiet: false,
59 }
60 }
61}
62
63#[derive(Clone, Debug)]
64pub enum AlignmentAlgorithm {
65 NeedlemanWunsch {
67 equal: isize,
68 align: isize,
69 insert: isize,
70 delete: isize,
71 },
72 SmithWaterman {
74 equal: isize,
75 align: isize,
76 insert: isize,
77 delete: isize,
78 },
79}
80
81impl Default for AlignmentAlgorithm {
82 fn default() -> Self {
83 Self::SmithWaterman {
84 equal: 2,
85 align: -1,
86 insert: -1,
87 delete: -1,
88 }
89 }
90}
91
92pub fn align<'store>(
96 store: &'store mut AnnotationStore,
97 query: Query<'store>,
98 queries2: Vec<Query<'store>>,
99 use_var: Option<&str>,
100 use_var2: Option<&str>,
101 config: &AlignmentConfig,
102) -> Result<Vec<AnnotationHandle>, StamError> {
103 let mut buildtranspositions = Vec::new();
104 {
105 let iter = store.query(query)?;
106 for resultrow in iter {
107 if let Ok(result) = resultrow.get_by_name_or_last(use_var) {
108 for (i, query2raw) in queries2.iter().enumerate() {
109 if !config.quiet {
111 eprintln!("Aligning #{}/{}...", i + 1, queries2.len());
112 }
113 let (text, query2) = match result {
114 QueryResultItem::TextResource(resource) => ( resource.clone().to_textselection(), query2raw.clone().with_resourcevar(use_var.unwrap_or("resource"), resource)),
115 QueryResultItem::Annotation(annotation) => {
116 if let Some(tsel) = annotation.textselections().next() {
117 (tsel, query2raw.clone().with_annotationvar(use_var.unwrap_or("annotation"), annotation))
118 } else {
119 return Err(StamError::OtherError("Annotation references multiple texts, this is not supported yet by stam align"));
120 }
121 }
122 QueryResultItem::TextSelection(tsel) => ( tsel.clone(), query2raw.clone().with_textvar(use_var.unwrap_or("text"), tsel)),
123 _ => return Err(StamError::OtherError("Obtained result type can not by used by stam align, expected ANNOTATION, RESOURCE or TEXT"))
124 };
125
126 let iter2 = store.query(query2)?;
127 for resultrow2 in iter2 {
128 if let Ok(result) = resultrow2.get_by_name_or_last(use_var2) {
129 let text2 = match result {
130 QueryResultItem::TextResource(resource) => resource.clone().to_textselection(),
131 QueryResultItem::Annotation(annotation) => {
132 if let Some(tsel) = annotation.textselections().next() {
133 tsel
134 } else {
135 return Err(StamError::OtherError("Annotation references multiple texts, this is not supported yet by stam align"));
136 }
137 }
138 QueryResultItem::TextSelection(tsel) => tsel.clone(),
139 _ => return Err(StamError::OtherError("Obtained result type can not by used by stam align, expected ANNOTATION, RESOURCE or TEXT"))
140 };
141 let builders = align_texts(&text, &text2, config)?;
142 buildtranspositions.extend(builders);
143 } else if let Some(use_var2) = use_var2 {
144 return Err(StamError::QuerySyntaxError(
145 format!(
146 "No result found for variable {}, so nothing to align",
147 use_var2
148 ),
149 "(align)",
150 ));
151 }
152 }
153 }
154 } else if let Some(use_var) = use_var {
155 return Err(StamError::QuerySyntaxError(
156 format!(
157 "No result found for variable {}, so nothing to align",
158 use_var
159 ),
160 "(align)",
161 ));
162 }
163 }
164 }
165 let mut transpositions = Vec::with_capacity(buildtranspositions.len());
166 for builder in buildtranspositions {
167 transpositions.push(store.annotate(builder)?);
168 }
169 if !config.quiet {
170 eprintln!("{} annotations(s) created", transpositions.len());
171 }
172 Ok(transpositions)
173}
174
175#[derive(Clone, PartialEq, Debug)]
176struct AlignedFragment {
177 begin1: usize,
178 begin2: usize,
179 length: usize,
180}
181
182impl AlignedFragment {
183 fn to_offsets<'a>(&self) -> (Offset, Offset) {
184 (
185 Offset::simple(self.begin1, self.begin1 + self.length),
186 Offset::simple(self.begin2, self.begin2 + self.length),
187 )
188 }
189
190 fn strdiff(
192 &self,
193 textstring1: &str,
194 textstring2: &str,
195 config: &AlignmentConfig,
196 ) -> Option<usize> {
197 for (i, (c1, c2)) in textstring1.chars().zip(textstring2.chars()).enumerate() {
198 if c1 != c2 {
199 if config.case_sensitive {
200 return Some(i);
201 } else {
202 if c1.to_lowercase().to_string() != c2.to_lowercase().to_string() {
203 return Some(i);
204 }
205 }
206 }
207 }
208 return None;
209 }
210
211 fn publish<'store>(
212 &self,
213 select1: &mut Vec<SelectorBuilder<'static>>,
214 select2: &mut Vec<SelectorBuilder<'static>>,
215 text: &ResultTextSelection<'store>,
216 text2: &ResultTextSelection<'store>,
217 config: &AlignmentConfig,
218 ) -> Result<bool, StamError> {
219 let (offset1, offset2) = self.to_offsets(); let mut textsel1 = text.textselection(&offset1)?;
221 let mut textsel2 = text2.textselection(&offset2)?;
222 let mut textstring1 = textsel1.text();
223 let mut textstring2 = textsel2.text();
224 if !config.grow {
225 if let Some(newlength) = self.strdiff(textstring1, textstring2, config) {
228 let mut shorterfragment = self.clone();
229 if newlength > 1 {
230 shorterfragment.length = newlength;
231 return shorterfragment.publish(select1, select2, text, text2, config);
232 } else {
233 return Ok(false);
234 }
235 }
236 }
237 if config.trim {
238 if let Ok(trimmed) = text.textselection(&offset1)?.trim_text(&TRIM_CHARS) {
239 textsel1 = trimmed;
240 textstring1 = textsel1.text();
241 } else {
242 return Ok(false);
244 }
245 if let Ok(trimmed) = text2.textselection(&offset2)?.trim_text(&TRIM_CHARS) {
246 textsel2 = trimmed;
247 textstring2 = textsel2.text();
248 } else {
249 return Ok(false);
251 }
252 };
253 if config.verbose {
254 println!(
255 "{}\t{}-{}\t{}\t{}-{}\t\"{}\"\t\"{}\"",
256 text.resource().id().unwrap_or("-"),
257 &textsel1.begin(),
258 &textsel1.end(),
259 text2.resource().id().unwrap_or("-"),
260 &textsel2.begin(),
261 &textsel2.end(),
262 textstring1
263 .replace("\"", "\\\"")
264 .replace("\t", "\\t")
265 .replace("\n", "\\n"),
266 textstring2
267 .replace("\"", "\\\"")
268 .replace("\t", "\\t")
269 .replace("\n", "\\n")
270 );
271 }
272 let offset1: Offset = textsel1.inner().into();
273 let offset2: Offset = textsel2.inner().into();
274 select1.push(SelectorBuilder::TextSelector(
275 text.resource().handle().into(),
276 offset1,
277 ));
278 select2.push(SelectorBuilder::TextSelector(
279 text2.resource().handle().into(),
280 offset2,
281 ));
282 Ok(true)
283 }
284}
285
286pub fn align_texts<'store>(
289 text: &ResultTextSelection<'store>,
290 text2: &ResultTextSelection<'store>,
291 config: &AlignmentConfig,
292) -> Result<Vec<AnnotationBuilder<'static>>, StamError> {
293 let mut builders = Vec::with_capacity(3);
294 let seq1: Vec<char> = text.text().chars().collect();
295 let seq2: Vec<char> = text2.text().chars().collect();
296
297 let alignment_set: Result<AlignmentSet<InMemoryAlignmentMatrix>, _> = match config.algorithm {
298 AlignmentAlgorithm::SmithWaterman {
299 equal,
300 align,
301 insert,
302 delete,
303 } => {
304 let algorithm = SmithWaterman::new(equal, align, insert, delete);
305 AlignmentSet::new(seq1.len(), seq2.len(), algorithm, |x, y| {
306 if seq1[x] != seq2[y] {
307 if !config.case_sensitive {
308 return seq1[x].to_lowercase().to_string()
309 == seq2[y].to_lowercase().to_string();
310 } else {
311 return false;
312 }
313 }
314 true
315 })
316 }
317 AlignmentAlgorithm::NeedlemanWunsch {
318 equal,
319 align,
320 insert,
321 delete,
322 } => {
323 let algorithm = NeedlemanWunsch::new(equal, align, insert, delete);
324 AlignmentSet::new(seq1.len(), seq2.len(), algorithm, |x, y| {
325 if seq1[x] != seq2[y] {
326 if !config.case_sensitive {
327 return seq1[x].to_lowercase().to_string()
328 == seq2[y].to_lowercase().to_string();
329 } else {
330 return false;
331 }
332 }
333 true
334 })
335 }
336 };
337
338 match alignment_set {
339 Ok(alignment_set) => {
340 let alignment = match config.algorithm {
341 AlignmentAlgorithm::SmithWaterman { .. } => alignment_set.local_alignment(),
342 AlignmentAlgorithm::NeedlemanWunsch { .. } => alignment_set.global_alignment(),
343 };
344 let mut select1: Vec<SelectorBuilder<'static>> = Vec::new();
345 let mut select2: Vec<SelectorBuilder<'static>> = Vec::new();
346
347 let mut fragment: Option<AlignedFragment> = None;
348 let mut totalalignlength = 0;
349 let mut errors = 0;
350 let mut last = None;
351 for step in alignment.steps() {
352 match step {
353 Step::Align { x, y } => {
354 if let Some(fragment) = fragment.as_mut() {
355 fragment.length += 1;
356 last = last.map(|x| x + 1);
357 totalalignlength += 1;
358 } else {
359 fragment = Some(AlignedFragment {
360 begin1: x,
361 begin2: y,
362 length: 1,
363 });
364 if let Some(last) = last {
365 errors += x - last;
367 } else {
368 errors += x;
370 }
371 last = Some(x + 1);
372 totalalignlength += 1;
373 }
374 }
375 _ => {
376 if let Some(fragment) = fragment.take() {
377 fragment.publish(&mut select1, &mut select2, text, text2, config)?;
378 }
379 }
380 }
381 }
382
383 if let Some(fragment) = fragment.take() {
384 fragment.publish(&mut select1, &mut select2, text, text2, config)?;
385 }
386
387 if let Some(max_errors) = config.max_errors {
388 let max_errors = max_errors.as_absolute(seq1.len());
389 if let Some(last) = last {
390 errors += seq1.len() - last;
392 }
393 if errors > max_errors {
394 return Ok(builders);
396 }
397 }
398 if totalalignlength < config.minimal_align_length {
399 return Ok(builders);
401 }
402
403 if select1.is_empty() || (config.simple_only && select1.len() > 1) {
404 return Ok(builders);
407 }
408
409 if config.grow && select1.len() > 1 {
410 let id = if let Some(prefix) = config.annotation_id_prefix.as_ref() {
411 generate_id(&format!("{}translation", prefix), "")
412 } else {
413 generate_id("translation", "")
414 };
415
416 let mut newselect1 = select1.pop().expect("must have an item");
417 if let Some(s) = select1.get(0) {
418 let mut offset = newselect1.offset().expect("must have offset").clone();
419 offset.begin = s.offset().expect("must have offset").begin;
420 newselect1 = SelectorBuilder::textselector(
421 newselect1.resource().unwrap().clone(),
422 offset,
423 );
424 }
425 let mut newselect2 = select2.pop().expect("must have an item");
426 if let Some(s) = select2.get(0) {
427 let mut offset = newselect2.offset().expect("must have offset").clone();
428 offset.begin = s.offset().expect("must have offset").begin;
429 newselect2 = SelectorBuilder::textselector(
430 newselect2.resource().unwrap().clone(),
431 offset,
432 );
433 }
434
435 builders.push(
436 AnnotationBuilder::new()
437 .with_id(id.clone())
438 .with_data(
439 "https://w3id.org/stam/extensions/stam-translate/",
440 "Translation",
441 DataValue::Null,
442 )
443 .with_target(SelectorBuilder::DirectionalSelector(vec![
444 newselect1, newselect2,
445 ])),
446 );
447 Ok(builders)
448 } else {
449 let id = if let Some(prefix) = config.annotation_id_prefix.as_ref() {
450 generate_id(&format!("{}transposition-", prefix), "")
451 } else {
452 generate_id("transposition-", "")
453 };
454 if select1.len() == 1 {
455 builders.push(
457 AnnotationBuilder::new()
458 .with_id(id.clone())
459 .with_data(
460 "https://w3id.org/stam/extensions/stam-transpose/",
461 "Transposition",
462 DataValue::Null,
463 )
464 .with_target(SelectorBuilder::DirectionalSelector(vec![
465 select1.into_iter().next().unwrap(),
466 select2.into_iter().next().unwrap(),
467 ])),
468 );
469 } else {
470 let annotation1id = format!("{}-side1", id);
472 builders.push(
473 AnnotationBuilder::new()
474 .with_id(annotation1id.clone())
475 .with_data(
476 "https://w3id.org/stam/extensions/stam-transpose/",
477 "TranspositionSide",
478 DataValue::Null,
479 )
480 .with_target(SelectorBuilder::DirectionalSelector(select1)),
481 );
482 let annotation2id = format!("{}-side2", id);
483 builders.push(
484 AnnotationBuilder::new()
485 .with_id(annotation2id.clone())
486 .with_data(
487 "https://w3id.org/stam/extensions/stam-transpose/",
488 "TranspositionSide",
489 DataValue::Null,
490 )
491 .with_target(SelectorBuilder::DirectionalSelector(select2)),
492 );
493 builders.push(
494 AnnotationBuilder::new()
495 .with_id(id.clone())
496 .with_data(
497 "https://w3id.org/stam/extensions/stam-transpose/",
498 "Transposition",
499 DataValue::Null,
500 )
501 .with_target(SelectorBuilder::DirectionalSelector(vec![
502 SelectorBuilder::AnnotationSelector(annotation1id.into(), None),
503 SelectorBuilder::AnnotationSelector(annotation2id.into(), None),
504 ])),
505 );
506 }
507 Ok(builders)
508 }
509 }
510 Err(error) => {
511 eprintln!("ALIGNMENT ERROR: {:?}", error);
512 return Err(StamError::OtherError(
513 "Failed to generated alignment set due to error",
514 ));
515 }
516 }
517}
518
519pub fn alignments_tsv_out<'a>(
520 store: &'a AnnotationStore,
521 query: Query<'a>,
522 use_var: Option<&str>,
523) -> Result<(), StamError> {
524 let iter = store.query(query)?;
525 for resultrow in iter {
526 if let Ok(result) = resultrow.get_by_name_or_last(use_var) {
527 if let QueryResultItem::Annotation(annotation) = result {
528 print_transposition(annotation);
529 } else {
530 return Err(StamError::OtherError(
531 "Only queries that return ANNOTATION are supported when outputting aligments",
532 ));
533 }
534 }
535 }
536 Ok(())
537}
538
539pub fn print_transposition<'a>(annotation: &ResultItem<'a, Annotation>) {
540 let mut annoiter = annotation.annotations_in_targets(AnnotationDepth::One);
541 if let (Some(left), Some(right)) = (annoiter.next(), annoiter.next()) {
542 for (text1, text2) in left.textselections().zip(right.textselections()) {
544 print_alignment(annotation, &text1, &text2)
545 }
546 } else {
547 let mut textiter = annotation.textselections();
549 if let (Some(text1), Some(text2)) = (textiter.next(), textiter.next()) {
550 print_alignment(annotation, &text1, &text2)
551 }
552 }
553}
554
555fn print_alignment<'a>(
556 annotation: &ResultItem<'a, Annotation>,
557 text1: &ResultTextSelection<'a>,
558 text2: &ResultTextSelection<'a>,
559) {
560 println!(
561 "{}\t{}\t{}-{}\t{}\t{}-{}\t\"{}\"\t\"{}\"\t{}",
562 annotation.id().unwrap_or("-"),
563 text1.resource().id().unwrap_or("-"),
564 text1.begin(),
565 text1.end(),
566 text2.resource().id().unwrap_or("-"),
567 text2.begin(),
568 text2.end(),
569 text1
570 .text()
571 .replace("\"", "\\\"")
572 .replace("\t", "\\t")
573 .replace("\n", "\\n"),
574 text2
575 .text()
576 .replace("\"", "\\\"")
577 .replace("\t", "\\t")
578 .replace("\n", "\\n"),
579 {
580 let ids: Vec<_> = annotation
581 .annotations_in_targets(AnnotationDepth::One)
582 .map(|a| a.id().unwrap_or("-"))
583 .collect();
584 ids.join("|")
585 }
586 );
587}
588
589#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
590pub enum AbsoluteOrRelative {
591 Absolute(usize),
592 Relative(f64),
593}
594
595impl AbsoluteOrRelative {
596 pub fn as_absolute(self, total: usize) -> usize {
597 match self {
598 Self::Absolute(i) => i,
599 Self::Relative(f) => (f * total as f64).round() as usize,
600 }
601 }
602}
603
604impl FromStr for AbsoluteOrRelative {
605 type Err = &'static str;
606
607 fn from_str(s: &str) -> Result<Self, Self::Err> {
608 if let Ok(i) = s.parse::<usize>() {
609 Ok(Self::Absolute(i))
610 } else if let Ok(f) = s.parse::<f64>() {
611 Ok(Self::Relative(f))
612 } else {
613 Err("Value must be either an integer (absolute) or a floating point value (relative)")
614 }
615 }
616}