dectree_rs/
lib.rs

1#[doc = include_str!("../readme.md")]
2
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::rc::Rc;
6
7/// Represents a reference counted reference to a `RefCell<TreeNode<T>>`. This is used for nodes that 
8/// need to be mutated but are shared between multiple references.
9
10type RcRefCellTreeNode<T> = Rc<RefCell<TreeNode<T>>>;
11
12/// Represents a node in the decision tree. This is a recursive structure that can be used to represent
13/// a decision tree where each node is a choice and the leaf nodes are the final decision.
14#[derive(Clone, Debug)]
15struct TreeNode<T> where T: Clone + Default {
16	/// The depth of the node in the tree.
17	depth: i32,
18	/// The signatures that are valid at this node.
19	subtree_signatures: Vec<SignatureInfo<T>>,
20	/// The choices that can be made at this node.
21	choices:Vec<Option<RcRefCellTreeNode<T>>>,
22	/// The final decision at this node.
23	term: Vec<SignatureInfo<T>>,
24}
25
26impl<T> Default for TreeNode<T> where T: Clone + Default {
27	fn default() -> Self {
28		TreeNode {
29			depth: 0,
30			subtree_signatures: Vec::new(),
31			choices: vec![None; 256],
32			term: Vec::new()
33		}
34	}
35}
36
37/// Represents signature information. This is used to store the signature bytes, masks, and the object
38/// that is associated with the signature.
39#[derive(Clone, Debug)]
40struct SignatureInfo<T> where T: Clone + Default {
41	bytes: Vec<u8>,
42	masks: Vec<u8>,
43	object: T
44}
45
46/// Represents a decision tree that can be used to search for signatures. This is a tree structure that
47/// can be used to search for signatures in a binary blob. The tree is built by adding signatures to the
48/// tree and then searching for them.
49/// ```rust
50/// use dectree_rs::SignatureDecisionTree;
51/// 
52/// let mut tree = SignatureDecisionTree::new();
53/// tree.add_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32, 0x77, 0x89, 0x4f, 0x55], None, None);
54/// tree.add_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32], None, None);
55/// tree.add_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0x00], None, None);
56/// assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32, 0x00, 0x99, 0x36, 0x5f, 0x21, 0xfd], None), Some(()));
57/// assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32], None), Some(()));
58/// assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0x00], None), Some(()));
59/// assert_eq!(tree.get_signature(vec![0x55], None), None);
60/// ```
61#[derive(Clone, Debug, Default)]
62pub struct SignatureDecisionTree<T> where T: Clone + Default {
63	base_node: RcRefCellTreeNode<T>,
64	sigs_dup: HashMap<Vec<u8>, bool>
65}
66
67impl<T> SignatureDecisionTree<T> where T: Clone + Default {
68	
69	/// Create a new `SignatureDecisionTree`.
70	pub fn new() -> Self {
71		SignatureDecisionTree::default()
72	}
73
74	/// Add a choice to the search tree.
75	fn add_choice(&mut self, signature_info: SignatureInfo<T>, tree_node: Rc<RefCell<TreeNode<T>>>) {
76		let mut node_info_list = vec![(tree_node, signature_info)];
77		// Workaround to avoid recursion
78		while let Some((node, sig_info)) = node_info_list.pop() {
79			let mut borrowed_node = node.borrow_mut();
80			let (depth, mut sigs, choices, mut term) = (borrowed_node.depth, borrowed_node.subtree_signatures.clone(), borrowed_node.choices.clone(), borrowed_node.term.clone());
81			let bytes = &sig_info.bytes;
82			let siglen = sigs.len();
83			if bytes.len() as i32 > depth {
84				sigs.push(sig_info.clone());
85				*borrowed_node = TreeNode {
86					depth: borrowed_node.depth,
87					subtree_signatures: sigs.clone(),
88					choices: borrowed_node.choices.clone(),
89					term: borrowed_node.term.clone()
90				};
91			} else {
92				term.push(sig_info.clone());
93				*borrowed_node = TreeNode {
94					depth: borrowed_node.depth,
95					subtree_signatures: borrowed_node.subtree_signatures.clone(),
96					choices: borrowed_node.choices.clone(),
97					term
98				};
99				continue;
100			}
101			let choices = Rc::new(RefCell::new(choices));
102			// If one sig is [85, 139, 236] and another is [85, 139, 236, 232, 144], then
103			// we're gonna panic without this check
104			if siglen == 0 {
105				// If it has no sigs, we need to add a level
106				// modify the next node
107				continue;
108			} else if siglen == 1 {
109				// If it has one already, we *both* need to add another level
110				// (because if it is the only one, it thought it was last choice)
111				for sig in sigs.iter() {
112					let ch_val = sig.bytes[depth as usize];
113					let nn_node = self.get_node(depth, choices.clone(), ch_val as i32);
114					*borrowed_node = TreeNode {
115						depth: borrowed_node.depth,
116						subtree_signatures: borrowed_node.subtree_signatures.clone(),
117						choices: choices.borrow().clone(),
118						term: borrowed_node.term.clone()
119					};
120					node_info_list.push((nn_node, sig.clone()));
121				}
122			} else {
123				// This is already a choice node, keep on choosing...
124				let ch_val = bytes[depth as usize];
125				let nn_node = self.get_node(depth, choices.clone(), ch_val as i32);
126				*borrowed_node = TreeNode {
127					depth: borrowed_node.depth,
128					subtree_signatures: borrowed_node.subtree_signatures.clone(),
129					choices: choices.borrow().clone(),
130					term: borrowed_node.term.clone()
131				};
132				node_info_list.push((nn_node, sig_info));
133			}
134		}
135	}
136
137	/// Chose, (and or initialize) a sub node.
138	fn get_node(&self, depth: i32, choices: Rc<RefCell<Vec<Option<RcRefCellTreeNode<T>>>>>, choice: i32) -> RcRefCellTreeNode<T> {
139		let mut borrowed_choices = choices.borrow_mut();
140		let nn_node = borrowed_choices[choice as usize].clone();
141		if nn_node.is_none() {
142			let nn_node = TreeNode{
143				depth: depth + 1,
144				..Default::default()
145			};
146			borrowed_choices[choice as usize] = Some(Rc::new(RefCell::new(nn_node)));
147		}
148		let nn_node = borrowed_choices[choice as usize].as_ref().unwrap();
149		nn_node.clone()
150	}
151
152	/// Add a signature to the search tree.  If masks goes unspecified, it will be
153	/// assumed to be all ones `vec![0xff; bytes.len()]`.
154	/// 
155	/// Additionally, you may specify `val` as the object to get back with
156	/// `tree.get_signature()`.
157	pub fn add_signature(&mut self, bytes: Vec<u8>, masks: Option<Vec<u8>>, val: Option<T>) {
158		let masks = masks.unwrap_or(vec![0xff; bytes.len()]);
159		let val = val.unwrap_or_default();
160		// Detect and skip duplicate additions...
161		let byte_key = [bytes.clone(), masks.clone()].concat();
162		if self.sigs_dup.contains_key(&byte_key) {
163			return
164		}
165		self.sigs_dup.insert(byte_key, true);
166		let sig_info = SignatureInfo {
167			bytes,
168			masks,
169			object: val
170		};
171		self.add_choice(sig_info, Rc::clone(&self.base_node));
172	}
173
174	/// Check if a signature is in the search tree.
175	pub fn is_signature(&self, bytes: Vec<u8>, offset: Option<i32>) -> bool {
176		self.get_signature(bytes, offset).is_some()
177	}
178
179	/// Get the object associated with a signature in the search tree.
180	pub fn get_signature(&self, bytes: Vec<u8>, offset: Option<i32>) -> Option<T> {
181		let offset = offset.unwrap_or_default();
182		let mut matches = vec![];
183		let mut nn_node = Some(Rc::clone(&self.base_node));
184		loop {
185			if let Some(node) = nn_node {
186				let node = node.borrow();
187				let (depth, sigs, choices, term) = (&node.depth, &node.subtree_signatures, &node.choices, &node.term);
188				matches.append(&mut term.clone());
189				// Once we get down to one sig, there are no more branches,
190				// just check the byte sequence.
191				if sigs.len() == 1 {
192					let (sbytes, smasks) = (&sigs[0].bytes, &sigs[0].masks);
193					let mut is_match = true;
194					for i in (*depth as usize)..sbytes.len() {
195						let real_off = offset + i as i32;
196						// We still have pieces of the signature left, but we're out of bytes
197						if real_off >= bytes.len() as i32 {
198							is_match = false;
199							break;
200						}
201						let masked = bytes[real_off as usize] & smasks[i];
202						if masked != sbytes[i] {
203							is_match = false;
204							break;
205						}
206					}
207					if is_match {
208						matches.push(sigs[0].clone());
209					}
210					break;
211				}
212				// There are still more choices to make, keep on truckin'
213				nn_node = None;
214				for sig in sigs.iter() {
215					let (sbytes, smasks) = (&sig.bytes, &sig.masks);
216					if (offset + *depth) >= bytes.len() as i32 {
217						continue
218					}
219					// We've reached the end of the signature, Just mask the rest
220					let masked = bytes[(offset + *depth) as usize] & smasks[*depth as usize];
221					if masked == sbytes[*depth as usize] {
222						// FIXME: Find the *best* winner! Because of masking.
223						nn_node = choices[masked as usize].as_ref().map(Rc::clone);
224						break
225					}
226				}
227				// We failed to make our next choice
228				if nn_node.is_none() {
229					break
230				}
231			}
232		}
233		return if matches.is_empty() {
234			None
235		} else {
236			matches.sort_by(|a, b| b.bytes.len().cmp(&a.bytes.len()));
237			matches.first().map(|x| x.object.clone())
238		}
239	}
240}
241
242#[cfg(test)]
243mod tests {
244	#[test]
245	fn test_signature_subset() {
246		let signature_base = vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32, 0x77, 0x89, 0x4f, 0x55];
247		let mut tree = super::SignatureDecisionTree::new();
248		tree.add_signature(signature_base.clone(), None, None);
249		tree.add_signature(signature_base.clone().into_iter().take(7).collect(), None, Some(signature_base.clone().into_iter().take(7).collect()));
250		tree.add_signature(signature_base.clone().into_iter().take(4).collect(), None, Some(signature_base.clone().into_iter().take(4).collect()));
251		tree.add_signature([signature_base.clone(), vec![0xfe, 0x38]].concat(), None, Some([signature_base.clone(), vec![0xfe, 0x38]].concat()));
252		assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32, 0x00, 0x99, 0x36, 0x5f, 0x21, 0xfd], None), Some(signature_base.clone().into_iter().take(7).collect()));
253		assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0xff, 0x32], None), Some(signature_base.clone().into_iter().take(7).collect()));
254		assert_eq!(tree.get_signature(vec![0x55, 0xe9, 0xd8, 0x01, 0xfe, 0x00], None), Some(signature_base.clone().into_iter().take(4).collect()));
255		assert_eq!(tree.get_signature(vec![0x55], None), None);
256	}
257}