basic_pattern_scanner 0.1.0

Fast no-dependencies byte-pattern scanner with IDA-style and nibble-mask pattern support
Documentation
pub mod error;
pub mod pattern;
pub mod scanner;

#[cfg(test)]
mod tests {
	use crate::pattern::types::Pattern;
	use crate::scanner::scanner::*;

	fn offsets(matches: Vec<Match>) -> Vec<usize> {
		matches.into_iter().map(|m| m.offset).collect()
	}

	#[test]
	fn ida_str_exact() {
		let p = Pattern::from_ida_str("DE AD BE EF").unwrap();
		assert_eq!(p.bytes, &[0xDE, 0xAD, 0xBE, 0xEF]);
		assert_eq!(p.mask, &[0xFF, 0xFF, 0xFF, 0xFF]);
		assert_eq!(p.masked_bytes, &[0xDE, 0xAD, 0xBE, 0xEF]);
	}

	#[test]
	fn ida_str_with_wildcards() {
		let p = Pattern::from_ida_str("DE ?? BE EF").unwrap();
		assert_eq!(p.mask, &[0xFF, 0x00, 0xFF, 0xFF]);
		assert_eq!(p.masked_bytes, &[0xDE, 0x00, 0xBE, 0xEF]);
	}

	#[test]
	fn nibble_pattern_half_wildcards() {
		let p = Pattern::from_ida_like_with_nibble("?F").unwrap();
		assert_eq!(p.mask, &[0x0F]);
		assert_eq!(p.masked_bytes, &[0x0F]);
	}

	#[test]
	fn nibble_pattern_high_wildcard() {
		let p = Pattern::from_ida_like_with_nibble("A?").unwrap();
		assert_eq!(p.mask, &[0xF0]);
		assert_eq!(p.masked_bytes, &[0xA0]);
	}

	#[test]
	fn mask_shorter_than_bytes_is_error() {
		assert!(Pattern::new(vec![0xAA, 0xBB], vec![0xFF], crate::pattern::types::MaskType::Byte).is_err());
	}

	#[test]
	fn single_match_at_start() {
		let data = &[0xDE, 0xAD, 0xBE, 0xEF, 0x00];
		let p = Pattern::from_ida_str("DE AD BE EF").unwrap();
		assert_eq!(offsets(scan_all(data, &p)), &[0]);
	}

	#[test]
	fn single_match_at_end() {
		let data = &[0x00, 0x00, 0xDE, 0xAD];
		let p = Pattern::from_ida_str("DE AD").unwrap();
		assert_eq!(offsets(scan_all(data, &p)), &[2]);
	}

	#[test]
	fn multiple_matches() {
		let data = &[0xAA, 0xBB, 0x00, 0xAA, 0xBB];
		let p = Pattern::from_ida_str("AA BB").unwrap();
		assert_eq!(offsets(scan_all(data, &p)), &[0, 3]);
	}

	#[test]
	fn no_match() {
		let data = &[0x11, 0x22, 0x33];
		let p = Pattern::from_ida_str("AA BB").unwrap();
		assert!(scan_all(data, &p).is_empty());
	}

	#[test]
	fn wildcard_matches_any_byte() {
		let data = &[0xAA, 0x00, 0xBB, 0xAA, 0xFF, 0xBB];
		let p = Pattern::from_ida_str("AA ?? BB").unwrap();
		assert_eq!(offsets(scan_all(data, &p)), &[0, 3]);
	}

	#[test]
	fn all_wildcard_pattern_matches_every_position() {
		let data = &[0x01, 0x02, 0x03];
		let p = Pattern::from_ida_str("?? ??").unwrap();
		assert_eq!(offsets(scan_all(data, &p)), &[0, 1]);
	}

	#[test]
	fn pattern_longer_than_data_no_match() {
		let data = &[0xAA, 0xBB];
		let p = Pattern::from_ida_str("AA BB CC DD").unwrap();
		assert!(scan_all(data, &p).is_empty());
	}

	#[test]
	fn matches_at_correct_positions() {
		let data = &[0x00, 0xAA, 0xBB, 0x00];
		let p = Pattern::from_ida_str("AA BB").unwrap();
		assert!(!p.matches_at(data, 0));
		assert!(p.matches_at(data, 1));
		assert!(!p.matches_at(data, 2)); // would run off the end
	}

	#[test]
	fn matches_at_oob_returns_false() {
		let data = &[0xAA];
		let p = Pattern::from_ida_str("AA BB").unwrap();
		assert!(!p.matches_at(data, 0));
		assert!(!p.matches_at(data, 1));
	}


	#[test]
	fn nibble_mask_scan() {
		let p = Pattern::from_ida_like_with_nibble("?F").unwrap();
		let data = &[0x0F, 0x1F, 0xAF, 0x10, 0xFF];
		assert_eq!(offsets(scan_all(data, &p)), &[0, 1, 2, 4]);
	}

	#[test]
	fn nibble_mask_high_wildcard_scan() {
		let p = Pattern::from_ida_like_with_nibble("A?").unwrap();
		let data = &[0xA0, 0xAF, 0xBF, 0x0A];
		assert_eq!(offsets(scan_all(data, &p)), &[0, 1]);
	}

	#[test]
	fn scan_with_base_address() {
		let data = &[0x00, 0xAA, 0xBB];
		let p = Pattern::from_ida_str("AA BB").unwrap();
		let base: u64 = 0x140000000;
		let results = scan_all_with_base(data, &p, base);
		assert_eq!(results.len(), 1);
		assert_eq!(results[0].offset, 1);
		assert_eq!(results[0].address, base + 1);
	}

	#[test]
	fn iter_stops_early() {
		let data = &[0xAA, 0x00, 0xAA, 0x00, 0xAA];
		let p = Pattern::from_ida_str("AA").unwrap();
		let first = scan_all_iter(data, &p).next().unwrap();
		assert_eq!(first.offset, 0);
	}

	#[test]
	fn iter_and_collect_agree() {
		let data: Vec<u8> = (0u8..=255).collect();
		let p = Pattern::from_ida_str("10 11 12").unwrap();
		let via_iter: Vec<usize> = scan_all_iter(&data, &p).map(|m| m.offset).collect();
		let via_fn = offsets(scan_all(&data, &p));
		assert_eq!(via_iter, via_fn);
	}

	// x86-64 prologue: mov [rsp+?],rbx / mov [rsp+?],rsi / push rdi / sub rsp,?
	// wildcard bytes are the variable displacement/immediate operands
	#[test]
	fn realistic_prologue_with_mid_wildcards() {
		#[rustfmt::skip]
		let data: &[u8] = &[
			0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
			0x48, 0x89, 0x5C, 0x24, 0x08,
			0x48, 0x89, 0x74, 0x24, 0x10,
			0x57,
			0x48, 0x83, 0xEC, 0x28,
			0xC3, 0x90, 0x90, 0x90,
		];

		let p = Pattern::from_ida_str(
			"48 89 5C 24 ?? 48 89 74 24 ?? 57 48 83 EC ??"
		).unwrap();

		let matches = scan_all(data, &p);
		assert_eq!(matches.len(), 1);
		assert_eq!(matches[0].offset, 12);
		assert!(p.matches_at(data, 12));
		assert!(!p.matches_at(data, 0));
	}

	#[test]
	fn nibble_overlapping_matches_and_rejection() {
		let data: &[u8] = &[0xAB, 0xAB, 0xAB, 0xAB, 0xAB];

		let p_match = Pattern::from_ida_like_with_nibble("AB ?B").unwrap();
		assert_eq!(offsets(scan_all(data, &p_match)), &[0, 1, 2, 3]);

		let p_no_match = Pattern::from_ida_like_with_nibble("AB ?0").unwrap();
		assert!(scan_all(data, &p_no_match).is_empty());
	}
}