1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
use std::collections::HashMap;
use std::hash::Hash;

mod dyn_bit_field;
use dyn_bit_field::DynamicBitField;
mod bit_field;
mod query;

// Reexport a bunch of stuff
// (i.e. flatten the hierarchy to make the api easier to use)
pub use bit_field::BitField;
pub use query::Query;
pub use query::Expression;
pub use query::expressions;

/// This is the main star of this crate.
/// It is an efficient model of a vector of elements,
/// where each element is just a set of tags.
///
/// This datatype is intended to handle requests for a huge set
/// of elements whose tags fulfill a requirement, i.e. "all elements
/// with the tag 'fruit' but not with the tag 'apple'.
///
/// It is expected that the elements share a lot of tags, i.e.
/// there are a lot fewer tags than elements.
///
/// It is not optimized for simply iterating over the tags
/// of each element, hence it is not recommended to do such
/// a thing with this datatype too much.
pub struct TagVec<T, F = u32>
		where T: Eq + Hash + Clone, F: BitField {
	tag_fields: HashMap<T, DynamicBitField<F>>,
	len: usize,
}

impl<T: Eq + Hash + Clone, F: BitField> TagVec<T, F> {
	// I don't think this needs an example?
	/// Creates an empty, new bit vector.
	pub fn new() -> TagVec<T, F> {
		TagVec {
			tag_fields: HashMap::new(),
			len: 0,
		}
	}

	/// The number of elements in the TagVec
	pub fn len(&self) -> usize { self.len }

	/// Pushes a new element onto the bitvec,
	/// where the new element is defined
	/// as an iterator of tags(borrows of tags specifically)
	/// 
	/// And OMG the generics on this function are
	/// crazy
	pub fn push<'a, I, Q>(&mut self, tags: I) 
		where I: IntoIterator<Item = &'a Q>,
				Q: ?Sized + Hash + Eq + 'a,
				T: From<&'a Q> + std::borrow::Borrow<Q> {
		// Vec doesn't allocate when created, and
		// we will rarely see unknown tags come forth,
		// so this won't be a performance hog
		let mut skipped_tags: Vec<T> = Vec::new();	

		// Add tags to existing tag fields
		for tag in tags {
			match self.tag_fields.get_mut(tag) {
				Some(field) => field.push(true),
				None => skipped_tags.push(tag.into()),
			}
		}

		// Push false to any tag fields that this element didn't contain
		for tag_field in self.tag_fields.values_mut() {
			if tag_field.len() < self.len + 1 {
				tag_field.push(false);
			}
		}

		// Create new tag fields for tags that appeared just now
		// This shouldn't run too often since there are fewer tags than values hopefully
		for skipped_tag in skipped_tags {
			let mut new_field = DynamicBitField::with_false(self.len());
			new_field.push(true); // This is the first element to have the tag
			self.tag_fields.insert(skipped_tag, new_field);
		}

		self.len += 1;
	}

	/// Iterates over all elements who fulfill the given expression.
	/// The behind the scenes of this function are complete and utter
	/// black magic code, and that code is indeed very strange.
	/// Nonetheless, the use of this function is not strange, and in
	/// fact quite intuitive
	///
	/// ```
	/// use tag_vec::TagVec;
	///
	/// // Make it easier to construct an expression
	/// use tag_vec::expressions::*;
	///
	/// // Construct a tag_vec
	/// let mut tag_vec: TagVec<String> = TagVec::new();
	/// tag_vec.push(vec!["hello", "world"]);
	/// tag_vec.push(vec!["rust", "is", "good"]);
	/// tag_vec.push(vec!["hello", "is", "good"]);
	/// tag_vec.push(vec!["hello", "rust"]);
	///
	/// // Query something
	/// let mut query = tag_vec.query(tag("hello"));
   /// // The first element to contain the tag "hello" is number 0
	/// assert_eq!(query.next(), Some(0)); 
	/// // ... and so on
	/// assert_eq!(query.next(), Some(2)); 
	/// assert_eq!(query.next(), Some(3)); 
	/// assert_eq!(query.next(), None); // Oops, we ran out!
	///
	/// // Query something more complicated
	/// let mut query = tag_vec.query(and(tag("rust"), tag("good")));
   /// // Element "1" is the only element with both the "rust" and "good" tags
	/// assert_eq!(query.next(), Some(1)); 
	/// assert_eq!(query.next(), None);
	/// ```
	pub fn query<'a, Q>(&'a self, expr: query::Expression<'a, Q>) -> query::Query<'a, F>  
			where Q: ?Sized + Hash + Eq + 'a,
					T: std::borrow::Borrow<Q> {
		query::Query::create_from(self, expr)
	}

	/// Iterates over each tag of an element(an element is considered
	/// to _be_ its tags). The iterator is unordered, so be careful.
	/// Will panic if the index is out of bounds.
	///
	/// Examples:
	/// ```
	/// use tag_vec::TagVec;
	/// // It is good to give the type of the key to
	/// // the type, as it may be difficult for the compiler
	/// // to infer it
	/// let mut tag_vec: TagVec<String> = TagVec::new();
	/// tag_vec.push(vec!["hello", "world"]);
	///
	/// // We should find a "hello" tag but not a "hi" tag
	/// // in the iterator
	/// let tags = tag_vec.iter_element(0);
	/// assert!(tags.clone().any(|v| *v == "hello"));
	/// assert!(!tags.clone().any(|v| *v == "hi"));
	/// ```
	pub fn iter_element<'a>(&'a self, index: usize) -> IterElement<'a, T, F>
	{
		assert!(index < self.len(), "Cannot iter over an element out of bounds");

		IterElement {
			fields: self.tag_fields.iter(),
			element_id: index
		}
	}
}

/// Iterates over every tag over an element.
/// See ``TagVec::iter_element`` for more
/// information.
#[derive(Clone)]
pub struct IterElement<'a, T, F>
		where T: Eq + Hash + Clone, F: BitField {
	fields: std::collections::hash_map::Iter<'a, T, DynamicBitField<F>>,
	element_id: usize,
}

impl<T: Eq + Hash + Clone, F: BitField> std::iter::FusedIterator for IterElement<'_, T, F> {}

impl<'a, T: Eq + Hash + Clone, F: BitField> Iterator for IterElement<'a, T, F> {
	type Item = &'a T;

	fn next(&mut self) -> Option<&'a T> {
		// Try to find the next field that contains this element.
		// Once you find one, return it. 
		while let Some((key, field)) = self.fields.next() {
			if field.get_unchecked(self.element_id) {
				return Some(key);
			}
		}

		None
	}
}

#[cfg(test)]
mod tests {
	use super::*;

	#[test]
	fn pushing() {
		let mut tags = TagVec::<String>::new();
		assert_eq!(tags.tag_fields.len(), 0);
		tags.push(vec!["hello", "sir"]);
		tags.push(vec!["other", "sir"]);

		// Testing implementation detail thing, not part of
		// the API
		assert_eq!(tags.tag_fields.len(), 3);

		let tag_vec: Vec<_> = tags.iter_element(0).collect();
		assert_eq!(tag_vec.len(), 2);
		assert!(tag_vec.iter().any(|v| *v == "hello"));
		assert!(tag_vec.iter().any(|v| *v == "sir"));
	}

	#[test]
	fn extreme_queries() {
		let mut tags = TagVec::<String, u8>::new();
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi2", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi2", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi", "yuh"]);
		tags.push(vec!["hi2", "yuh"]);
		tags.push(vec!["hi", "yuh"]);

		use super::expressions::*;
		let contains: Vec<_> = tags.query(tag("hi2")).collect();
		assert_eq!(contains.len(), 3);
		assert_eq!(contains[0], 1);
		assert_eq!(contains[1], 12);
		assert_eq!(contains[2], 19);
	}
}