1extern crate dissimilar;
2
3use dissimilar::{diff, Chunk};
4use std::borrow::ToOwned;
5use std::cmp::PartialEq;
6use std::fmt;
7use std::str::FromStr;
8
9#[derive(Debug)]
10pub struct ParseError(pub String);
11
12#[derive(Debug)]
13pub enum ApplyError {
14 NoMatch,
15 WithMessage(String),
16}
17
18#[derive(Debug, Clone, PartialEq)]
19pub struct EditScript<T> {
20 pub mode: Mode,
21 pub distance: u32,
22 pub instructions: Vec<EditInstruction<T>>,
23}
24
25#[derive(Debug, Clone, PartialEq)]
26pub enum EditInstruction<T> {
27 Insertion(T),
29
30 Deletion(T),
32
33 Identity(T),
35
36 GenericIdentity(u32),
38
39 InsertionOptions(Vec<T>),
41
42 DeletionOptions(Vec<T>),
44 IdentityOptions(Vec<T>),
47}
48
49impl<T: std::fmt::Display> fmt::Display for EditScript<T> {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 for instruction in self.instructions.iter() {
52 write!(f, "{}", instruction)?;
53 }
54 fmt::Result::Ok(())
55 }
56}
57
58impl<T: std::fmt::Display> fmt::Display for EditInstruction<T> {
59 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
60 match self {
61 EditInstruction::GenericIdentity(s) => {
62 write!(f, "=[#{}]", s)
63 }
64 EditInstruction::Identity(s) => {
65 write!(f, "=[{}]", s)
66 }
67 EditInstruction::Insertion(s) => {
68 write!(f, "+[{}]", s)
69 }
70 EditInstruction::Deletion(s) => {
71 write!(f, "-[{}]", s)
72 }
73 EditInstruction::IdentityOptions(s) => {
74 write!(
75 f,
76 "=[{}]",
77 s.iter()
78 .map(|x| x.to_string())
79 .collect::<Vec<String>>()
80 .join("|")
81 )
82 }
83 EditInstruction::InsertionOptions(s) => {
84 write!(
85 f,
86 "+[{}]",
87 s.iter()
88 .map(|x| x.to_string())
89 .collect::<Vec<String>>()
90 .join("|")
91 )
92 }
93 EditInstruction::DeletionOptions(s) => {
94 write!(
95 f,
96 "-[{}]",
97 s.iter()
98 .map(|x| x.to_string())
99 .collect::<Vec<String>>()
100 .join("|")
101 )
102 }
103 }
104 }
105}
106
107impl FromStr for EditScript<String> {
108 type Err = ParseError;
109
110 fn from_str(editscript: &str) -> Result<Self, Self::Err> {
111 let mut instructions: Vec<EditInstruction<String>> = Vec::new();
112 let mut begin = 0;
113 let mut distance = 0;
114 for (i, c) in editscript.char_indices() {
115 if c == ']' {
116 let instruction = EditInstruction::<String>::from_str(&editscript[begin..i + 1])?;
117 if instruction.is_change() {
118 distance += 1;
119 }
120 instructions.push(instruction);
121 begin = i + 1;
122 }
123 }
124 if instructions.is_empty() {
125 return Err(ParseError(format!(
126 "Not a valid edit script, no instructions found: {}",
127 editscript
128 )));
129 }
130 Ok(EditScript {
131 distance: distance,
132 instructions: instructions,
133 mode: Mode::Normal,
134 })
135 }
136}
137
138impl FromStr for EditInstruction<String> {
139 type Err = ParseError;
140
141 fn from_str(editinstruction: &str) -> Result<Self, Self::Err> {
142 if editinstruction.len() <= 3 {
143 return Err(ParseError(format!(
144 "String too short to describe a valid edit instruction: {}",
145 editinstruction
146 )));
147 }
148 let mut chariter = editinstruction.chars();
149 let operator = chariter.next(); let startbracket = chariter.next(); if startbracket != Some('[') {
152 return Err(ParseError(format!(
153 "Expected start bracket: {}",
154 editinstruction
155 )));
156 }
157 let s = &editinstruction[2..editinstruction.len() - 1];
158 let instruction = match operator {
159 Some('+') => {
160 if s.contains("|") {
161 EditInstruction::InsertionOptions(s.split("|").collect())
162 } else {
163 EditInstruction::Insertion(s)
164 }
165 }
166 Some('-') => {
167 if s.contains("|") {
168 EditInstruction::DeletionOptions(s.split("|").collect())
169 } else {
170 EditInstruction::Deletion(s)
171 }
172 }
173 Some('=') => {
174 if s.contains("|") {
175 if s.chars().nth(0) == Some('#') && s[1..].parse::<u32>().is_ok() {
176 return Err(ParseError(format!(
177 "GenericIdentity can not take multiple values"
178 )));
179 } else {
180 EditInstruction::IdentityOptions(s.split("|").collect())
181 }
182 } else {
183 if s.chars().nth(0) == Some('#') && s[1..].parse::<u32>().is_ok() {
184 EditInstruction::GenericIdentity(s[1..].parse::<u32>().unwrap())
185 } else {
186 EditInstruction::Identity(s)
187 }
188 }
189 }
190 _ => {
191 return Err(ParseError(format!(
192 "Parsing editscript failed, invalid operator"
193 )))
194 }
195 };
196 Ok(instruction.to_owned())
197 }
198}
199
200impl EditInstruction<&str> {
201 pub fn to_owned(&self) -> EditInstruction<String> {
204 match self {
205 EditInstruction::Insertion(s) => EditInstruction::Insertion(s.to_string()),
206 EditInstruction::Deletion(s) => EditInstruction::Deletion(s.to_string()),
207 EditInstruction::Identity(s) => EditInstruction::Identity(s.to_string()),
208 EditInstruction::GenericIdentity(n) => EditInstruction::GenericIdentity(*n),
209 EditInstruction::InsertionOptions(v) => {
210 EditInstruction::InsertionOptions(v.iter().map(|s| s.to_string()).collect())
211 }
212 EditInstruction::DeletionOptions(v) => {
213 EditInstruction::DeletionOptions(v.iter().map(|s| s.to_string()).collect())
214 }
215 EditInstruction::IdentityOptions(v) => {
216 EditInstruction::IdentityOptions(v.iter().map(|s| s.to_string()).collect())
217 }
218 }
219 }
220}
221
222impl EditScript<&str> {
223 pub fn to_owned(&self) -> EditScript<String> {
224 EditScript {
225 distance: self.distance,
226 mode: self.mode,
227 instructions: self.instructions.iter().map(|x| x.to_owned()).collect(),
228 }
229 }
230}
231
232impl EditInstruction<String> {
233 pub fn as_ref(&self) -> EditInstruction<&str> {
234 match self {
235 EditInstruction::Insertion(s) => EditInstruction::Insertion(s.as_str()),
236 EditInstruction::Deletion(s) => EditInstruction::Deletion(s.as_str()),
237 EditInstruction::Identity(s) => EditInstruction::Identity(s.as_str()),
238 EditInstruction::GenericIdentity(n) => EditInstruction::GenericIdentity(*n),
239 EditInstruction::InsertionOptions(v) => {
240 EditInstruction::InsertionOptions(v.iter().map(|s| s.as_str()).collect())
241 }
242 EditInstruction::DeletionOptions(v) => {
243 EditInstruction::DeletionOptions(v.iter().map(|s| s.as_str()).collect())
244 }
245 EditInstruction::IdentityOptions(v) => {
246 EditInstruction::IdentityOptions(v.iter().map(|s| s.as_str()).collect())
247 }
248 }
249 }
250}
251
252impl EditScript<String> {
253 pub fn as_ref(&self) -> EditScript<&str> {
254 EditScript {
255 distance: self.distance,
256 mode: self.mode,
257 instructions: self.instructions.iter().map(|x| x.as_ref()).collect(),
258 }
259 }
260}
261
262impl<T> EditScript<T> {
263 pub fn len(&self) -> usize {
264 self.instructions.len()
265 }
266}
267
268impl<T> EditInstruction<T> {
269 pub fn is_change(&self) -> bool {
270 match self {
271 EditInstruction::Insertion(_) | EditInstruction::Deletion(_) => true,
272 EditInstruction::Identity(_) => false,
273 EditInstruction::GenericIdentity(_) => false,
274 EditInstruction::InsertionOptions(_) | EditInstruction::DeletionOptions(_) => true,
275 EditInstruction::IdentityOptions(_) => false,
276 }
277 }
278}
279
280#[derive(Copy, Clone, Debug, PartialEq)]
281pub enum Mode {
282 Normal,
283 Suffix,
284 Prefix,
285
286 Infix,
289}
290
291impl Default for Mode {
292 fn default() -> Self {
293 Self::Normal
294 }
295}
296
297pub fn shortest_edit_script<'a>(
300 source: &'a str,
301 target: &'a str,
302 prefix: bool,
303 generic: bool,
304 allow_substitutions: bool,
305) -> EditScript<&'a str> {
306 let diffchunks: Vec<Chunk> = diff(&source, &target);
307 let mut prev: isize = 0;
308 let mut distance = 0;
309 let mut abort_at = None;
310 if prefix {
311 let mut tail = 0;
312 for chunk in diffchunks.iter() {
313 if let Chunk::Equal(_) = chunk {
314 tail += 1;
315 } else {
316 tail = 0;
317 }
318 }
319 abort_at = Some(diffchunks.len() - tail);
320 }
321 let mut instructions: Vec<EditInstruction<&'a str>> =
322 Vec::with_capacity(abort_at.unwrap_or(diffchunks.len()));
323 for (i, chunk) in diffchunks.iter().enumerate() {
324 if abort_at.is_some() && i == abort_at.unwrap() {
325 break;
326 }
327 match chunk {
328 Chunk::Equal(s) => {
329 if generic {
330 instructions.push(EditInstruction::GenericIdentity(s.chars().count() as u32));
331 } else {
332 instructions.push(EditInstruction::Identity(s));
333 }
334 prev = 0;
335 }
336 Chunk::Delete(s) => {
337 let length: isize = s.chars().count() as isize;
338 let is_substitution = prev > 0 && length == prev;
339 if !is_substitution || !allow_substitutions {
340 distance += length;
341 }
342 instructions.push(EditInstruction::Deletion(s));
343 prev = length * -1;
344 }
345 Chunk::Insert(s) => {
346 let length: isize = s.chars().count() as isize;
347 let is_substitution = prev < 0 && s.len() as isize * -1 == prev;
348 if !is_substitution || !allow_substitutions {
349 distance += length;
350 }
351 instructions.push(EditInstruction::Insertion(s));
352 prev = length;
353 }
354 }
355 }
356 EditScript {
357 instructions: instructions,
358 mode: match prefix {
359 true => Mode::Prefix,
360 false => Mode::Normal,
361 },
362 distance: distance as u32,
363 }
364}
365
366pub fn shortest_edit_script_suffix(
370 source: &str,
371 target: &str,
372 generic: bool,
373 allow_substitutions: bool,
374) -> EditScript<String> {
375 let source = source.to_owned().chars().rev().collect::<String>();
376 let target = target.to_owned().chars().rev().collect::<String>();
377 let diffchunks: Vec<Chunk> = diff(source.as_str(), target.as_str());
378 let mut prev: isize = 0;
379 let mut distance = 0;
380 let abort_at = {
381 let mut tail = 0;
382 for chunk in diffchunks.iter() {
383 if let Chunk::Equal(_) = chunk {
384 tail += 1;
385 } else {
386 tail = 0;
387 }
388 }
389 Some(diffchunks.len() - tail)
390 };
391 let mut instructions: Vec<EditInstruction<String>> =
392 Vec::with_capacity(abort_at.unwrap_or(diffchunks.len()));
393 for (i, chunk) in diffchunks.iter().enumerate() {
394 if i == abort_at.unwrap() {
395 break;
396 }
397 match chunk {
398 Chunk::Equal(s) => {
399 if generic {
400 instructions.push(EditInstruction::GenericIdentity(s.chars().count() as u32));
401 } else {
402 instructions.push(EditInstruction::Identity(
403 s.to_owned().chars().rev().collect::<String>(),
404 ));
405 }
406 prev = 0;
407 }
408 Chunk::Delete(s) => {
409 let length: isize = s.chars().count() as isize;
410 let is_substitution = prev > 0 && length == prev;
411 if !is_substitution || !allow_substitutions {
412 distance += length;
413 }
414 instructions.push(EditInstruction::Deletion(
415 s.to_owned().chars().rev().collect::<String>(),
416 ));
417 prev = length * -1;
418 }
419 Chunk::Insert(s) => {
420 let length: isize = s.chars().count() as isize;
421 let is_substitution = prev < 0 && s.len() as isize * -1 == prev;
422 if !is_substitution || !allow_substitutions {
423 distance += length;
424 }
425 instructions.push(EditInstruction::Insertion(
426 s.to_owned().chars().rev().collect::<String>(),
427 ));
428 prev = length;
429 }
430 }
431 }
432 EditScript {
433 instructions: instructions,
434 mode: Mode::Suffix,
435 distance: distance as u32,
436 }
437}
438
439pub trait ApplyEditScript {
440 fn apply_to(&self, input: &str, mode: Option<Mode>) -> Result<String, ApplyError>;
441}
442
443impl ApplyEditScript for EditScript<String> {
444 fn apply_to(&self, input: &str, mode: Option<Mode>) -> Result<String, ApplyError> {
445 self.as_ref().apply_to(input, mode)
446 }
447}
448
449fn instruction_applies(
451 instructioncontent: &str,
452 input: &str,
453 tail: &Option<String>,
454 tailchars: usize,
455) -> Result<usize, ApplyError> {
456 let instructionlength = instructioncontent.chars().count();
457 if instructionlength > tailchars {
458 return Err(ApplyError::NoMatch);
459 }
461 let refcontent = if let Some(tail) = tail {
462 &tail[..instructionlength]
463 } else {
464 &input[..instructionlength]
465 };
466 if refcontent != instructioncontent {
467 return Err(ApplyError::NoMatch);
468 } else {
469 Ok(instructionlength)
470 }
471}
472
473impl ApplyEditScript for EditScript<&str> {
474 fn apply_to(&self, input: &str, mode: Option<Mode>) -> Result<String, ApplyError> {
475 let mode = if let Some(mode) = mode {
476 mode
477 } else {
478 self.mode
479 };
480
481 if mode == Mode::Infix {
482 let mut head: Option<String> = None;
487
488 let mut begin = 0;
489 let mut skip = 0;
490
491 for (i, _) in input.char_indices() {
492 if skip > 0 {
493 skip -= 1;
494 continue;
495 }
496 if let Ok(result) = self.apply_to(&input[i..], Some(Mode::Normal)) {
497 if head.is_none() {
499 head = Some(String::new())
500 }; skip = result.chars().count();
502 head.as_mut().map(|h| {
503 if i > 0 && i > begin {
504 *h += &input[begin..i];
505 }
506 *h += result.as_str()
507 });
508 begin = i + skip;
509 }
510 }
511
512 if let Some(mut head) = head.take() {
513 if begin < input.chars().count() {
514 head += &input[begin..];
515 }
516 Ok(head)
517 } else {
518 Err(ApplyError::NoMatch)
519 }
520 } else if mode == Mode::Suffix {
521 let mut head: String = input.to_string();
523 let mut tail = String::new();
524 for instruction in self.instructions.iter() {
525 let headchars = head.chars().count();
526 match instruction {
531 EditInstruction::Deletion(suffix) => {
532 let suffixchars = suffix.chars().count();
533 if suffixchars > headchars {
534 return Err(ApplyError::WithMessage(format!("Edit script does not match current word, suffix is longer than head (unable to remove suffix {})", suffix)));
535 }
536 let foundsuffix: String = head
537 .chars()
538 .skip(headchars - suffixchars)
539 .take(suffixchars)
540 .collect();
541 if foundsuffix.as_str() != *suffix {
542 return Err(ApplyError::WithMessage(format!("Edit script does not match current word (unable to find and remove suffix '{}', found '{}' instead)", suffix, foundsuffix)));
543 }
544 head = head.chars().take(headchars - suffixchars).collect();
545 }
546 EditInstruction::Insertion(s) => {
547 tail.insert_str(0, s);
548 }
549 EditInstruction::GenericIdentity(keeplength) => {
550 let keeplength = *keeplength as usize;
551 if keeplength > headchars {
552 return Err(ApplyError::WithMessage(format!("Edit script does not match current word, length to keep is longer than head")));
553 }
554 tail = head
555 .chars()
556 .skip(headchars - keeplength)
557 .take(keeplength)
558 .collect::<String>()
559 + tail.as_str();
560 head = head.chars().take(headchars - keeplength).collect();
561 }
562 EditInstruction::Identity(suffix) => {
563 let suffixchars = suffix.chars().count();
564 if suffixchars > headchars {
565 return Err(ApplyError::WithMessage(format!("Edit script does not match current word, suffix is longer than head (unable to keep suffix {})", suffix)));
566 }
567 let foundsuffix: String = head
568 .chars()
569 .skip(headchars - suffixchars)
570 .take(suffixchars)
571 .collect();
572 if foundsuffix.as_str() != *suffix {
573 return Err(ApplyError::WithMessage(format!("Edit script does not match current word (unable to find and keep suffix {})", suffix)));
574 }
575 tail = head
576 .chars()
577 .skip(headchars - suffixchars)
578 .take(suffixchars)
579 .collect::<String>()
580 + tail.as_str();
581 head = head.chars().take(headchars - suffixchars).collect();
582 }
583 EditInstruction::IdentityOptions(suffixes) => {
584 for suffix in suffixes {
585 let suffixchars = suffix.chars().count();
586 if suffixchars > headchars {
587 continue; }
589 let foundsuffix: String = head
590 .chars()
591 .skip(headchars - suffixchars)
592 .take(suffixchars)
593 .collect();
594 if foundsuffix.as_str() == *suffix {
595 tail = head
597 .chars()
598 .skip(headchars - suffixchars)
599 .take(suffixchars)
600 .collect::<String>()
601 + tail.as_str();
602 head = head.chars().take(headchars - suffixchars).collect();
603 break;
604 }
605 }
606 }
607 EditInstruction::DeletionOptions(suffixes) => {
608 for suffix in suffixes {
609 let suffixchars = suffix.chars().count();
610 if suffixchars > headchars {
611 continue; }
613 let foundsuffix: String = head
614 .chars()
615 .skip(headchars - suffixchars)
616 .take(suffixchars)
617 .collect();
618 if foundsuffix.as_str() == *suffix {
619 head = head.chars().take(headchars - suffixchars).collect();
621 break;
622 }
623 }
624 }
625 EditInstruction::InsertionOptions(_) => {
626 return Err(ApplyError::WithMessage(format!("Edit script has multiple insertion options and is therefor ambiguous, unable to apply")));
627 }
628 }
629 }
630 head += tail.as_str();
631 Ok(head)
632 } else {
633 let mut tail: Option<String> = None;
638 let mut head: Option<String> = None;
639
640 let mut matches = false;
641
642 for instruction in self.instructions.iter() {
643 let tailchars = if let Some(tail) = tail.as_ref() {
644 tail.chars().count()
645 } else {
646 input.chars().count()
647 };
648 match instruction {
652 EditInstruction::Deletion(prefix) => {
653 match instruction_applies(prefix, input, &tail, tailchars) {
654 Ok(matchchars) => {
655 matches = true;
656 if tail.is_none() {
657 tail = Some(input.to_string())
658 }; tail.as_mut().map(|t| t.drain(..matchchars));
660 }
661 Err(e) => return Err(e),
662 }
663 }
664 EditInstruction::Insertion(s) => {
665 if head.is_none() {
666 head = Some(String::new())
667 }; head.as_mut().map(|h| *h += s);
669 matches = true;
670 }
671 EditInstruction::GenericIdentity(keeplength) => {
672 let keeplength = *keeplength as usize;
673 if keeplength > tailchars {
674 return Err(ApplyError::WithMessage(format!("Edit script does not match current word, length to keep is longer than head")));
675 }
676 if head.is_none() {
677 head = Some(String::new())
678 }; if tail.is_none() {
680 tail = Some(input.to_string())
681 }; if let (Some(head), Some(tail)) = (head.as_mut(), tail.as_mut()) {
683 head.extend(tail.drain(..keeplength));
684 matches = true;
685 } else {
686 panic!(
687 "Can't unpack head and tail for EditInstruction::GenericIdentity"
688 )
689 } }
691 EditInstruction::Identity(prefix) => {
692 match instruction_applies(prefix, input, &tail, tailchars) {
693 Ok(matchchars) => {
694 if head.is_none() {
695 head = Some(String::new())
696 }; if tail.is_none() {
698 tail = Some(input.to_string())
699 }; if let (Some(head), Some(tail)) = (head.as_mut(), tail.as_mut()) {
701 head.extend(tail.drain(..matchchars));
702 }
703 matches = true;
704 }
705 Err(e) => return Err(e),
706 }
707 }
708 EditInstruction::IdentityOptions(prefixes) => {
709 matches = false;
710 for prefix in prefixes {
711 if let Ok(matchchars) =
712 instruction_applies(prefix, input, &tail, tailchars)
713 {
714 if head.is_none() {
715 head = Some(String::new())
716 }; if tail.is_none() {
718 tail = Some(input.to_string())
719 }; if let (Some(head), Some(tail)) = (head.as_mut(), tail.as_mut()) {
721 head.extend(tail.drain(..matchchars));
722 }
723 matches = true;
724 break;
725 }
726 }
727 if !matches {
728 return Err(ApplyError::NoMatch);
729 }
730 }
731 EditInstruction::DeletionOptions(prefixes) => {
732 matches = false;
733 for prefix in prefixes {
734 if let Ok(matchchars) =
735 instruction_applies(prefix, input, &tail, tailchars)
736 {
737 if tail.is_none() {
738 tail = Some(input.to_string())
739 }; tail.as_mut().map(|t| t.drain(..matchchars));
741 matches = true;
742 break;
743 }
744 }
745 if !matches {
746 return Err(ApplyError::NoMatch);
747 }
748 }
749 EditInstruction::InsertionOptions(_) => {
750 return Err(ApplyError::WithMessage(format!("Edit script has multiple insertion options and is therefor ambiguous, unable to apply")));
751 }
752 }
753 }
754 if let Some(head) = head {
755 Ok(head)
756 } else if matches {
757 Ok(String::new())
758 } else {
759 Err(ApplyError::NoMatch)
760 }
761 }
762 }
763}