tf_idf_vectorizer/utils/math/vector/
mod.rs1pub mod serde;
3
4use std::{alloc::{alloc, dealloc, realloc, Layout}, fmt, marker::PhantomData, mem, ptr::{self, NonNull}};
5use std::ops::Index;
6use std::fmt::Debug;
7
8use num::Num;
9pub struct ZeroSpVec<N>
16where N: Num
17{
18 buf: RawZeroSpVec<N>,
19 len: usize,
20 nnz: usize,
21 zero: N,
22}
23
24pub trait ZeroSpVecTrait<N>: Clone + Default + Index<usize, Output = N>
25where N: Num
26{
27 unsafe fn ind_ptr(&self) -> *mut usize;
28 unsafe fn val_ptr(&self) -> *mut N;
29 unsafe fn raw_push(&mut self, index: usize, value: N);
30 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize>;
31 fn new() -> Self;
32 fn with_capacity(cap: usize) -> Self;
33 fn reserve(&mut self, additional: usize);
34 fn shrink_to_fit(&mut self);
35 fn is_empty(&self) -> bool;
36 fn len(&self) -> usize;
37 fn len_mut(&mut self) -> &mut usize;
38 fn capacity(&self) -> usize;
39 fn nnz(&self) -> usize;
40 fn add_dim(&mut self, dim: usize);
41 fn clear(&mut self);
42 fn push(&mut self, elem: N);
43 fn pop(&mut self) -> Option<N>;
44 fn get(&self, index: usize) -> Option<&N>;
45 fn get_ind(&self, index: usize) -> Option<usize>;
46 fn remove(&mut self, index: usize) -> N;
47 fn from_vec(vec: Vec<N>) -> Self;
48 fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>) -> Self;
49 fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>) -> Self;
50 fn iter(&self) -> ZeroSpVecIter<N>;
51 fn raw_iter(&self) -> ZeroSpVecRawIter<N>;
52}
53
54impl<N> ZeroSpVecTrait<N> for ZeroSpVec<N>
55where N: Num
56{
57 #[inline]
58 unsafe fn ind_ptr(&self) -> *mut usize {
59 self.buf.ind_ptr.as_ptr()
60 }
61
62 #[inline]
63 unsafe fn val_ptr(&self) -> *mut N {
64 self.buf.val_ptr.as_ptr()
65 }
66
67 #[inline]
74 unsafe fn raw_push(&mut self, index: usize, value: N) {
75 if self.nnz == self.buf.cap {
76 self.buf.grow();
77 }
78 unsafe {
79 let val_ptr = self.val_ptr().add(self.nnz);
80 let ind_ptr = self.ind_ptr().add(self.nnz);
81 ptr::write(val_ptr, value);
82 ptr::write(ind_ptr, index);
83 }
84 self.nnz += 1;
85 }
86
87 #[inline]
88 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize> {
89 if self.nnz == 0 {
91 return Err(0);
92 }
93
94 let mut left = 0;
95 let mut right = self.nnz - 1;
96 while left < right {
97 let mid = left + (right - left) / 2;
98 let mid_index = unsafe { ptr::read(self.ind_ptr().add(mid)) };
99 if mid_index == *index {
100 return Ok(mid);
101 } else if mid_index < *index {
102 left = mid + 1;
103 } else {
104 right = mid;
105 }
106 }
107
108 let final_index = unsafe { ptr::read(self.ind_ptr().add(left)) };
110 if final_index == *index {
111 Ok(left)
112 } else if final_index < *index {
113 Err(left + 1)
114 } else {
115 Err(left)
116 }
117 }
118
119 #[inline]
120 fn new() -> Self {
121 ZeroSpVec {
122 buf: RawZeroSpVec::new(),
123 len: 0,
124 nnz: 0,
125 zero: N::zero(),
126 }
127 }
128
129 #[inline]
130 fn with_capacity(cap: usize) -> Self {
131 let mut buf = RawZeroSpVec::new();
132 buf.cap = cap;
133 buf.cap_set();
134 ZeroSpVec {
135 buf: buf,
136 len: 0,
137 nnz: 0,
138 zero: N::zero(),
139 }
140 }
141
142 #[inline]
143 fn reserve(&mut self, additional: usize) {
144 let new_cap = self.nnz + additional;
145 if new_cap > self.buf.cap {
146 self.buf.cap = new_cap;
147 self.buf.re_cap_set();
148 }
149 }
150
151 #[inline]
152 fn shrink_to_fit(&mut self) {
153 if self.len < self.buf.cap {
154 let new_cap = self.nnz;
155 self.buf.cap = new_cap;
156 self.buf.re_cap_set();
157 }
158 }
159
160 #[inline]
161 fn is_empty(&self) -> bool {
162 self.len == 0
163 }
164
165 #[inline]
166 fn len(&self) -> usize {
167 self.len
168 }
169
170 #[inline]
171 fn len_mut(&mut self) -> &mut usize {
172 &mut self.len
173 }
174
175 #[inline]
176 fn capacity(&self) -> usize {
177 self.buf.cap
178 }
179
180 #[inline]
181 fn nnz(&self) -> usize {
182 self.nnz
183 }
184
185 #[inline]
186 fn add_dim(&mut self, dim: usize) {
187 self.len += dim;
188 }
189
190 #[inline]
191 fn clear(&mut self) {
192 while let Some(_) = self.pop() {
193 }
195 }
196
197 #[inline]
198 fn push(&mut self, elem: N) {
199 if self.nnz == self.buf.cap {
200 self.buf.grow();
201 }
202 if elem != N::zero() {
203 unsafe {
204 let val_ptr = self.val_ptr().add(self.nnz);
205 let ind_ptr = self.ind_ptr().add(self.nnz);
206 ptr::write(val_ptr, elem);
207 ptr::write(ind_ptr, self.len);
208 }
209 self.nnz += 1;
210 }
211 self.len += 1;
212 }
213
214 #[inline]
215 fn pop(&mut self) -> Option<N> {
216 if self.nnz == 0 {
217 return None;
218 }
219 let pop_element = if self.nnz == self.len {
220 self.nnz -= 1;
221 unsafe {
222 Some(ptr::read(self.val_ptr().add(self.nnz)))
223 }
224 } else {
225 Some(N::zero())
226 };
227 self.len -= 1;
228 pop_element
229 }
230
231 #[inline]
232 fn get(&self, index: usize) -> Option<&N> {
233 if index >= self.len {
234 return None;
235 }
236 match self.ind_binary_search(&index) {
237 Ok(idx) => {
238 unsafe {
239 Some(&*self.val_ptr().add(idx))
240 }
241 },
242 Err(_) => {
243 Some(&self.zero)
244 }
245 }
246 }
247
248 #[inline]
249 fn get_ind(&self, index: usize) -> Option<usize> {
250 if index >= self.nnz {
251 return None;
252 }
253 unsafe {
254 Some(ptr::read(self.ind_ptr().add(index)))
255 }
256 }
257
258
259
260 #[inline]
272 fn remove(&mut self, index: usize) -> N {
273 debug_assert!(index < self.len, "index out of bounds");
274
275 self.len -= 1;
277
278 match self.ind_binary_search(&index) {
279 Ok(i) => {
280 let removed_val = unsafe {
282 ptr::read(self.val_ptr().add(i))
283 };
284
285 let count = self.nnz - i - 1;
287 if count > 0 {
288 unsafe {
289 ptr::copy(
291 self.val_ptr().add(i + 1),
292 self.val_ptr().add(i),
293 count
294 );
295 ptr::copy(
297 self.ind_ptr().add(i + 1),
298 self.ind_ptr().add(i),
299 count
300 );
301 for offset in i..(self.nnz - 1) {
303 *self.ind_ptr().add(offset) -= 1;
304 }
305 }
306 }
307 self.nnz -= 1;
309
310 removed_val
312 }
313 Err(i) => {
314 if i < self.nnz {
318 unsafe {
319 for offset in i..self.nnz {
320 *self.ind_ptr().add(offset) -= 1;
321 }
322 }
323 }
324
325 N::zero()
327 }
328 }
329 }
330
331 #[inline]
332 fn from_vec(vec: Vec<N>) -> Self {
333 let mut zero_sp_vec = ZeroSpVec::with_capacity(vec.len());
334 for entry in vec {
335 zero_sp_vec.push(entry);
336 }
337 zero_sp_vec
338 }
339
340 #[inline]
341 fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>) -> Self {
342 let mut zero_sp_vec = ZeroSpVec::with_capacity(iter.size_hint().0);
343 for (index, value) in iter {
344 unsafe {
345 zero_sp_vec.raw_push(index, value);
346 }
347 zero_sp_vec.len += 1;
348 }
349 zero_sp_vec
350 }
351
352 #[inline]
355 fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>) -> Self {
356 let mut zero_sp_vec = ZeroSpVec::new();
357 for (index, value) in iter {
358 if value != N::zero() {
359 unsafe {
360 zero_sp_vec.raw_push(index, value);
361 }
362 zero_sp_vec.len += 1;
363 }
364 }
365 zero_sp_vec
366 }
367
368 #[inline]
369 fn iter(&self) -> ZeroSpVecIter<N> {
370 ZeroSpVecIter {
371 vec: self,
372 pos: 0,
373 }
374 }
375
376 #[inline]
377 fn raw_iter(&self) -> ZeroSpVecRawIter<N> {
378 ZeroSpVecRawIter {
379 vec: self,
380 pos: 0,
381 }
382 }
383}
384
385unsafe impl <N: Num + Send> Send for ZeroSpVec<N> {}
386unsafe impl <N: Num + Sync> Sync for ZeroSpVec<N> {}
387
388impl<N> Clone for ZeroSpVec<N>
389where N: Num
390{
391 #[inline]
392 fn clone(&self) -> Self {
393 ZeroSpVec {
394 buf: self.buf.clone(),
395 len: self.len,
396 nnz: self.nnz,
397 zero: N::zero(),
398 }
399 }
400}
401
402impl<N> Drop for ZeroSpVec<N>
403where N: Num
404{
405 #[inline]
406 fn drop(&mut self) {
407 }
409}
410
411impl<N> Default for ZeroSpVec<N>
412where N: Num
413{
414 #[inline]
415 fn default() -> Self {
416 ZeroSpVec::new()
417 }
418}
419
420impl<N> Index<usize> for ZeroSpVec<N>
421where N: Num
422{
423 type Output = N;
424
425 #[inline]
426 fn index(&self, index: usize) -> &Self::Output {
427 self.get(index).expect("index out of bounds")
428 }
429}
430
431impl<N: Num + Debug> Debug for ZeroSpVec<N> {
432 #[inline]
433 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
434 if f.sign_plus() {
435 f.debug_struct("DefaultSparseVec")
436 .field("buf", &self.buf)
437 .field("nnz", &self.nnz)
438 .field("len", &self.len)
439 .field("zero", &self.zero)
440 .finish()
441 } else if f.alternate() {
442 write!(f, "ZeroSpVec({:?})", self.iter().collect::<Vec<&N>>())
443 } else {
444 f.debug_list().entries((0..self.len).map(|i| self.get(i).unwrap())).finish()
445 }
446 }
447}
448
449pub struct ZeroSpVecIter<'a, N>
450where N: Num
451{
452 vec: &'a ZeroSpVec<N>,
453 pos: usize,
454}
455
456impl<'a, N> Iterator for ZeroSpVecIter<'a, N>
457where N: Num
458{
459 type Item = &'a N;
460
461 #[inline]
462 fn next(&mut self) -> Option<Self::Item> {
463 self.vec.get(self.pos).map(|val| {
464 self.pos += 1;
465 val
466 })
467 }
468}
469
470pub struct ZeroSpVecRawIter<'a, N>
471where N: Num
472{
473 vec: &'a ZeroSpVec<N>,
474 pos: usize,
475}
476
477impl<'a, N> Iterator for ZeroSpVecRawIter<'a, N>
478where N: Num
479{
480 type Item = (usize, &'a N);
481
482 #[inline]
483 fn next(&mut self) -> Option<Self::Item> {
484 if self.pos < self.vec.nnz() {
485 let index = unsafe { *self.vec.ind_ptr().add(self.pos) };
486 let value = unsafe { &*self.vec.val_ptr().add(self.pos) };
487 self.pos += 1;
488 Some((index, value))
489 } else {
490 None
491 }
492 }
493
494 #[inline]
495 fn size_hint(&self) -> (usize, Option<usize>) {
496 (self.vec.nnz(), Some(self.vec.len()))
497 }
498}
499
500impl<T> From<Vec<T>> for ZeroSpVec<T>
501where T: Num
502{
503 #[inline]
504 fn from(vec: Vec<T>) -> Self {
505 ZeroSpVec::from_vec(vec)
506 }
507}
508
509impl<'a, N> From<ZeroSpVecRawIter<'a, N>> for ZeroSpVec<N>
510where
511 N: Num + Copy,
512{
513 #[inline]
514 fn from(iter: ZeroSpVecRawIter<'a, N>) -> Self {
515 let mut vec = ZeroSpVec::new();
516 for (idx, val) in iter {
517 unsafe {
518 vec.raw_push(idx, *val);
519 }
520 vec.len += 1;
521 }
522 vec
523 }
524}
525
526
527
528
529
530
531
532
533
534#[derive(Debug)]
536struct RawZeroSpVec<N>
537where N: Num
538{
539 val_ptr: NonNull<N>,
540 ind_ptr: NonNull<usize>,
541 cap: usize,
546 _marker: PhantomData<N>, }
548
549impl<N> RawZeroSpVec<N>
550where N: Num
551{
552 #[inline]
553 fn new() -> Self {
554 let cap = if mem::size_of::<N>() == 0 { std::usize::MAX } else { 0 };
556
557 RawZeroSpVec {
558 val_ptr: NonNull::dangling(),
560 ind_ptr: NonNull::dangling(),
562 cap: cap,
563 _marker: PhantomData,
564 }
565 }
566
567 #[inline]
568 fn grow(&mut self) {
569 unsafe {
570 let val_elem_size = mem::size_of::<N>();
571 let ind_elem_size = mem::size_of::<usize>();
572
573 debug_assert!(val_elem_size != 0, "capacity overflow");
576
577 let t_align = mem::align_of::<N>();
579 let usize_align = mem::align_of::<usize>();
580
581 let (new_cap, val_ptr, ind_ptr): (usize, *mut N, *mut usize) =
583 if self.cap == 0 {
584 let new_val_layout = Layout::from_size_align(val_elem_size, t_align).expect("Failed to create memory layout");
585 let new_ind_layout = Layout::from_size_align(ind_elem_size, usize_align).expect("Failed to create memory layout");
586 (
587 1,
588 alloc(new_val_layout) as *mut N,
589 alloc(new_ind_layout) as *mut usize,
590 )
591 } else {
592 let new_cap = self.cap * 2;
594 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout for reallocation");
595 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout for reallocation");
596 (
597 new_cap,
598 realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * new_cap) as *mut N,
599 realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * new_cap) as *mut usize,
600 )
601 };
602
603 if val_ptr.is_null() || ind_ptr.is_null() {
605 oom();
606 }
607
608 self.val_ptr = NonNull::new_unchecked(val_ptr);
610 self.ind_ptr = NonNull::new_unchecked(ind_ptr);
611 self.cap = new_cap;
612 }
613 }
614
615 #[inline]
616 fn cap_set(&mut self) {
617 unsafe {
618 let val_elem_size = mem::size_of::<N>();
619 let ind_elem_size = mem::size_of::<usize>();
620
621 let t_align = mem::align_of::<N>();
622 let usize_align = mem::align_of::<usize>();
623
624 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
625 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
626 let new_val_ptr = alloc(new_val_layout) as *mut N;
627 let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
628 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
629 oom();
630 }
631 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
632 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
633 }
634 }
635
636 #[inline]
637 fn re_cap_set(&mut self) {
638 unsafe {
639 let val_elem_size = mem::size_of::<N>();
640 let ind_elem_size = mem::size_of::<usize>();
641
642 let t_align = mem::align_of::<N>();
643 let usize_align = mem::align_of::<usize>();
644
645 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
646 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
647 let new_val_ptr = realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * self.cap) as *mut N;
648 let new_ind_ptr = realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * self.cap) as *mut usize;
649 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
650 oom();
651 }
652 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
653 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
654 }
655 }
656}
657
658impl<N> Clone for RawZeroSpVec<N>
659where N: Num
660{
661 #[inline]
662 fn clone(&self) -> Self {
663 unsafe {
664 if self.cap == 0 || self.cap == usize::MAX {
667 return RawZeroSpVec {
668 val_ptr: NonNull::dangling(),
669 ind_ptr: NonNull::dangling(),
670 cap: self.cap,
671 _marker: PhantomData,
672 };
673 }
674
675 let val_elem_size = mem::size_of::<N>();
676 let ind_elem_size = mem::size_of::<usize>();
677
678 let t_align = mem::align_of::<N>();
679 let usize_align = mem::align_of::<usize>();
680
681 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
682 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
683 let new_val_ptr = alloc(new_val_layout) as *mut N;
684 let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
685 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
686 oom();
687 }
688 ptr::copy_nonoverlapping(self.val_ptr.as_ptr(), new_val_ptr, self.cap);
689 ptr::copy_nonoverlapping(self.ind_ptr.as_ptr(), new_ind_ptr, self.cap);
690
691 RawZeroSpVec {
692 val_ptr: NonNull::new_unchecked(new_val_ptr),
693 ind_ptr: NonNull::new_unchecked(new_ind_ptr),
694 cap: self.cap,
695 _marker: PhantomData,
696 }
697 }
698 }
699}
700
701unsafe impl<N: Num + Send> Send for RawZeroSpVec<N> {}
702unsafe impl<N: Num + Sync> Sync for RawZeroSpVec<N> {}
703
704impl<N> Drop for RawZeroSpVec<N>
705where N: Num
706{
707 #[inline]
708 fn drop(&mut self) {
709 unsafe {
710 if self.cap == 0 || self.cap == usize::MAX {
712 return;
713 }
714
715 let val_elem_size = mem::size_of::<N>();
716 let ind_elem_size = mem::size_of::<usize>();
717
718 let t_align = mem::align_of::<N>();
719 let usize_align = mem::align_of::<usize>();
720
721 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
722 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
723 dealloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout);
724 dealloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout);
725 }
726 }
727}
728
729#[cold]
736fn oom() {
737 ::std::process::exit(-9999);
738}