rapidfuzz 0.1.0

rapid string matching library
Documentation
use crate::string_utils;

pub fn editops_find(query: &str, choice: &str) -> Vec<LevEditOp> {		
	let string_affix = string_utils::StringAffix::find(query, choice);
	
	let first_string_len = string_affix.first_string_len;
	let second_string_len = string_affix.second_string_len;
	let prefix_len = string_affix.prefix_len;
	let first_string = &query[prefix_len..prefix_len+first_string_len];
	let second_string = &choice[prefix_len..prefix_len+second_string_len];

	let matrix_columns = first_string_len + 1;
	let matrix_rows = second_string_len + 1;

	// TODO maybe use an actual matrix for readability
	let mut cache_matrix: Vec<usize> = vec![0; matrix_rows*matrix_columns];
	for i in 0..matrix_rows {
		cache_matrix[i] = i;
	}
	for i in 1..matrix_columns {
		cache_matrix[matrix_rows*i] = i;
	}

	for (i, char1) in first_string.char_indices() {
		let mut prev = i * matrix_rows;
		let current = prev + matrix_rows;
		let mut x = i + 1;
		for (p, char2p) in second_string.char_indices() {
			let mut c3 = cache_matrix[prev] + (char1 != char2p) as usize;
			prev += 1;
			x += 1;
			if x >= c3 {
				x = c3;
			}
			c3 = cache_matrix[prev] + 1;
			if x > c3 {
				x = c3;
			}
			cache_matrix[current + 1 + p] = x;
		}
	}
	editops_from_cost_matrix(first_string, second_string, matrix_columns, matrix_rows, prefix_len, cache_matrix)
}

#[derive(Debug, PartialEq, Copy, Clone)]
pub enum LevEditType {
	LevEditKeep,
	LevEditReplace,
	LevEditInsert,
	LevEditDelete,
}

#[derive(Debug, PartialEq)]
pub struct LevEditOp {
	op_type: LevEditType,  /* editing operation type */
	first_start: usize, /* source block position */
	second_start: usize,  /* destination position */
}


fn editops_from_cost_matrix(string1: &str, string2: &str,  len1: usize, len2: usize, prefix_len: usize, cache_matrix: Vec<usize>) -> Vec<LevEditOp> {
	let mut dir = 0;

	let mut ops: Vec<LevEditOp> = vec![];
	ops.reserve(cache_matrix[len1*len2 - 1]);

	let mut i = len1 - 1;
	let mut j = len2 - 1;
	let mut p = len1*len2 - 1;

	let string1_chars: Vec<char> = string1.chars().collect();
	let string2_chars: Vec<char> = string2.chars().collect();

	//TODO this is still pretty ugly
	while i > 0 || j > 0 {
		let current_value = cache_matrix[p];

		let op_type;

		if dir == -1 && j > 0 && current_value == cache_matrix[p-1] + 1 {
			op_type = LevEditType::LevEditInsert;
		}
		else if dir == 1 && i > 0 && current_value == cache_matrix[p-len2] + 1 {
			op_type = LevEditType::LevEditDelete;
		}
		else if i > 0 && j > 0 && current_value == cache_matrix[p-len2-1]
		  && string1_chars[i - 1] == string2_chars[j - 1] {
			op_type = LevEditType::LevEditKeep;
		}
		else if i > 0 && j > 0 && current_value == cache_matrix[p-len2-1] + 1 {
			op_type = LevEditType::LevEditReplace;
		}
		  /* we can't turn directly from -1 to 1, in this case it would be better
		   * to go diagonally, but check it (dir == 0) */
		else if dir == 0 && j > 0 && current_value == cache_matrix[p-1] + 1 {
			op_type = LevEditType::LevEditInsert;
		}
		else if dir == 0 && i > 0 && current_value == cache_matrix[p-len2] + 1 {
			op_type = LevEditType::LevEditDelete;
		} else {
			panic!("something went terribly wrong");
		}

		match op_type {
			LevEditType::LevEditInsert => {
				j -= 1;
				p -= 1;
				dir = -1;
			},
			LevEditType::LevEditDelete => {
				i -= 1;
				p -= len2;
				dir = 1;
			},
			LevEditType::LevEditReplace => {
				i -= 1;
				j -= 1;
				p -= len2 + 1;
				dir = 0;
			},
			LevEditType::LevEditKeep => {
				i -= 1;
				j -= 1;
				p -= len2 + 1;
				dir = 0;
				/* LevEditKeep does not has to be stored */
				continue;
			}
		};

		let edit_op = LevEditOp {
			op_type: op_type,
			first_start: i + prefix_len,
			second_start: j + prefix_len,
		};
		ops.insert(0, edit_op);
	}

	ops
}


#[derive(Debug, PartialEq)]
pub struct LevMatchingBlock {
	pub first_start: usize,
	pub second_start: usize,
	pub len: usize,
}

pub fn editops_matching_blocks(len1: usize, len2: usize, ops: &Vec<LevEditOp>) -> Vec<LevMatchingBlock> {
	let mut first_start = 0;
	let mut second_start = 0;
	let mut mblocks: Vec<LevMatchingBlock> = vec![];

	for op in ops.iter() {
		if op.op_type == LevEditType::LevEditKeep {
			continue;
		}

		if first_start < op.first_start || second_start < op.second_start {
			let block = LevMatchingBlock {
				first_start,
				second_start,
				len: op.first_start - first_start,
			};
			mblocks.push(block);
			first_start = op.first_start;
			second_start = op.second_start;
		}

		match op.op_type {
			LevEditType::LevEditReplace => {
				first_start += 1;
				second_start += 1;
			},
			LevEditType::LevEditDelete => {
				first_start += 1;
			},
			LevEditType::LevEditInsert => {
				second_start += 1;
			},
			LevEditType::LevEditKeep => {},
		}
	}

	if first_start < len1 || second_start < len2 {
		let block = LevMatchingBlock {
			first_start,
			second_start,
			len: len1 - first_start,
		};
		mblocks.push(block);
	}

	let block = LevMatchingBlock {
		first_start: len1,
		second_start: len2,
		len: 0,
	};
	mblocks.push(block);
	mblocks
}