tf_idf_vectorizer/utils/math/vector/
mod.rs1pub mod serde;
3
4use std::{alloc::{alloc, dealloc, realloc, Layout}, fmt, marker::PhantomData, mem, ptr::{self, NonNull}};
5use std::ops::Index;
6use std::fmt::Debug;
7
8use num::Num;
9pub struct ZeroSpVec<N>
16where N: Num
17{
18 buf: RawZeroSpVec<N>,
19 len: usize,
20 nnz: usize,
21 zero: N,
22}
23
24pub trait ZeroSpVecTrait<N>: Clone + Default + Index<usize, Output = N>
25where N: Num
26{
27 unsafe fn ind_ptr(&self) -> *mut usize;
28 unsafe fn val_ptr(&self) -> *mut N;
29 unsafe fn raw_push(&mut self, index: usize, value: N);
30 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize>;
31 fn new() -> Self;
32 fn with_capacity(cap: usize) -> Self;
33 fn reserve(&mut self, additional: usize);
34 fn shrink_to_fit(&mut self);
35 fn is_empty(&self) -> bool;
36 fn len(&self) -> usize;
37 fn len_mut(&mut self) -> &mut usize;
38 fn capacity(&self) -> usize;
39 fn nnz(&self) -> usize;
40 fn add_dim(&mut self, dim: usize);
41 fn clear(&mut self);
42 fn push(&mut self, elem: N);
43 fn pop(&mut self) -> Option<N>;
44 fn get(&self, index: usize) -> Option<&N>;
45 fn get_ind(&self, index: usize) -> Option<usize>;
46 fn remove(&mut self, index: usize) -> N;
47 fn from_vec(vec: Vec<N>) -> Self;
48 unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
49 unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
50 fn iter(&self) -> ZeroSpVecIter<N>;
51 fn raw_iter(&self) -> ZeroSpVecRawIter<N>;
52}
53
54impl<N> ZeroSpVecTrait<N> for ZeroSpVec<N>
55where N: Num
56{
57 #[inline]
58 unsafe fn ind_ptr(&self) -> *mut usize {
59 self.buf.ind_ptr.as_ptr()
60 }
61
62 #[inline]
63 unsafe fn val_ptr(&self) -> *mut N {
64 self.buf.val_ptr.as_ptr()
65 }
66
67 #[inline]
74 unsafe fn raw_push(&mut self, index: usize, value: N) {
75 if self.nnz == self.buf.cap {
76 self.buf.grow();
77 }
78 unsafe {
79 let val_ptr = self.val_ptr().add(self.nnz);
80 let ind_ptr = self.ind_ptr().add(self.nnz);
81 ptr::write(val_ptr, value);
82 ptr::write(ind_ptr, index);
83 }
84 self.nnz += 1;
85 }
86
87 #[inline]
88 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize> {
89 if self.nnz == 0 {
91 return Err(0);
92 }
93
94 let mut left = 0;
95 let mut right = self.nnz - 1;
96 while left < right {
97 let mid = left + (right - left) / 2;
98 let mid_index = unsafe { ptr::read(self.ind_ptr().add(mid)) };
99 if mid_index == *index {
100 return Ok(mid);
101 } else if mid_index < *index {
102 left = mid + 1;
103 } else {
104 right = mid;
105 }
106 }
107
108 let final_index = unsafe { ptr::read(self.ind_ptr().add(left)) };
110 if final_index == *index {
111 Ok(left)
112 } else if final_index < *index {
113 Err(left + 1)
114 } else {
115 Err(left)
116 }
117 }
118
119 #[inline]
120 fn new() -> Self {
121 ZeroSpVec {
122 buf: RawZeroSpVec::new(),
123 len: 0,
124 nnz: 0,
125 zero: N::zero(),
126 }
127 }
128
129 #[inline]
130 fn with_capacity(cap: usize) -> Self {
131 let mut buf = RawZeroSpVec::new();
132 buf.cap = cap;
133 buf.cap_set();
134 ZeroSpVec {
135 buf: buf,
136 len: 0,
137 nnz: 0,
138 zero: N::zero(),
139 }
140 }
141
142 #[inline]
143 fn reserve(&mut self, additional: usize) {
144 let new_cap = self.nnz + additional;
145 if new_cap > self.buf.cap {
146 self.buf.cap = new_cap;
147 self.buf.re_cap_set();
148 }
149 }
150
151 #[inline]
152 fn shrink_to_fit(&mut self) {
153 if self.len < self.buf.cap {
154 let new_cap = self.nnz;
155 self.buf.cap = new_cap;
156 self.buf.re_cap_set();
157 }
158 }
159
160 #[inline]
161 fn is_empty(&self) -> bool {
162 self.len == 0
163 }
164
165 #[inline]
166 fn len(&self) -> usize {
167 self.len
168 }
169
170 #[inline]
171 fn len_mut(&mut self) -> &mut usize {
172 &mut self.len
173 }
174
175 #[inline]
176 fn capacity(&self) -> usize {
177 self.buf.cap
178 }
179
180 #[inline]
181 fn nnz(&self) -> usize {
182 self.nnz
183 }
184
185 #[inline]
186 fn add_dim(&mut self, dim: usize) {
187 self.len += dim;
188 }
189
190 #[inline]
191 fn clear(&mut self) {
192 while let Some(_) = self.pop() {
193 }
195 }
196
197 #[inline]
198 fn push(&mut self, elem: N) {
199 if self.nnz == self.buf.cap {
200 self.buf.grow();
201 }
202 if elem != N::zero() {
203 unsafe {
204 let val_ptr = self.val_ptr().add(self.nnz);
205 let ind_ptr = self.ind_ptr().add(self.nnz);
206 ptr::write(val_ptr, elem);
207 ptr::write(ind_ptr, self.len);
208 }
209 self.nnz += 1;
210 }
211 self.len += 1;
212 }
213
214 #[inline]
215 fn pop(&mut self) -> Option<N> {
216 if self.nnz == 0 {
217 return None;
218 }
219 let pop_element = if self.nnz == self.len {
220 self.nnz -= 1;
221 unsafe {
222 Some(ptr::read(self.val_ptr().add(self.nnz)))
223 }
224 } else {
225 Some(N::zero())
226 };
227 self.len -= 1;
228 pop_element
229 }
230
231 #[inline]
232 fn get(&self, index: usize) -> Option<&N> {
233 if index >= self.len {
234 return None;
235 }
236 match self.ind_binary_search(&index) {
237 Ok(idx) => {
238 unsafe {
239 Some(&*self.val_ptr().add(idx))
240 }
241 },
242 Err(_) => {
243 Some(&self.zero)
244 }
245 }
246 }
247
248 #[inline]
249 fn get_ind(&self, index: usize) -> Option<usize> {
250 if index >= self.nnz {
251 return None;
252 }
253 unsafe {
254 Some(ptr::read(self.ind_ptr().add(index)))
255 }
256 }
257
258
259
260 #[inline]
272 fn remove(&mut self, index: usize) -> N {
273 debug_assert!(index < self.len, "index out of bounds");
274
275 self.len -= 1;
277
278 match self.ind_binary_search(&index) {
279 Ok(i) => {
280 let removed_val = unsafe {
282 ptr::read(self.val_ptr().add(i))
283 };
284
285 let count = self.nnz - i - 1;
287 if count > 0 {
288 unsafe {
289 ptr::copy(
291 self.val_ptr().add(i + 1),
292 self.val_ptr().add(i),
293 count
294 );
295 ptr::copy(
297 self.ind_ptr().add(i + 1),
298 self.ind_ptr().add(i),
299 count
300 );
301 for offset in i..(self.nnz - 1) {
303 *self.ind_ptr().add(offset) -= 1;
304 }
305 }
306 }
307 self.nnz -= 1;
309
310 removed_val
312 }
313 Err(i) => {
314 if i < self.nnz {
318 unsafe {
319 for offset in i..self.nnz {
320 *self.ind_ptr().add(offset) -= 1;
321 }
322 }
323 }
324
325 N::zero()
327 }
328 }
329 }
330
331 #[inline]
332 fn from_vec(vec: Vec<N>) -> Self {
333 let mut zero_sp_vec = ZeroSpVec::with_capacity(vec.len());
334 for entry in vec {
335 zero_sp_vec.push(entry);
336 }
337 zero_sp_vec
338 }
339
340 #[inline]
342 unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
343 let mut zero_sp_vec = ZeroSpVec::with_capacity(iter.size_hint().0);
344 for (index, value) in iter {
345 unsafe {
346 zero_sp_vec.raw_push(index, value);
347 }
348 }
349 zero_sp_vec.len = len;
350 zero_sp_vec
351 }
352
353 #[inline]
357 unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
358 let mut zero_sp_vec = ZeroSpVec::new();
359 for (index, value) in iter {
360 if value != N::zero() {
361 unsafe {
362 zero_sp_vec.raw_push(index, value);
363 }
364 }
365 }
366 zero_sp_vec.len = len;
367 zero_sp_vec
368 }
369
370 #[inline]
371 fn iter(&self) -> ZeroSpVecIter<N> {
372 ZeroSpVecIter {
373 vec: self,
374 pos: 0,
375 }
376 }
377
378 #[inline]
379 fn raw_iter(&self) -> ZeroSpVecRawIter<N> {
380 ZeroSpVecRawIter {
381 vec: self,
382 pos: 0,
383 }
384 }
385}
386
387unsafe impl <N: Num + Send> Send for ZeroSpVec<N> {}
388unsafe impl <N: Num + Sync> Sync for ZeroSpVec<N> {}
389
390impl<N> Clone for ZeroSpVec<N>
391where N: Num
392{
393 #[inline]
394 fn clone(&self) -> Self {
395 ZeroSpVec {
396 buf: self.buf.clone(),
397 len: self.len,
398 nnz: self.nnz,
399 zero: N::zero(),
400 }
401 }
402}
403
404impl<N> Drop for ZeroSpVec<N>
405where N: Num
406{
407 #[inline]
408 fn drop(&mut self) {
409 }
411}
412
413impl<N> Default for ZeroSpVec<N>
414where N: Num
415{
416 #[inline]
417 fn default() -> Self {
418 ZeroSpVec::new()
419 }
420}
421
422impl<N> Index<usize> for ZeroSpVec<N>
423where N: Num
424{
425 type Output = N;
426
427 #[inline]
428 fn index(&self, index: usize) -> &Self::Output {
429 self.get(index).expect("index out of bounds")
430 }
431}
432
433impl<N: Num + Debug> Debug for ZeroSpVec<N> {
434 #[inline]
435 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
436 if f.sign_plus() {
437 f.debug_struct("DefaultSparseVec")
438 .field("buf", &self.buf)
439 .field("nnz", &self.nnz)
440 .field("len", &self.len)
441 .field("zero", &self.zero)
442 .finish()
443 } else if f.alternate() {
444 write!(f, "ZeroSpVec({:?})", self.iter().collect::<Vec<&N>>())
445 } else {
446 f.debug_list().entries((0..self.len).map(|i| self.get(i).unwrap())).finish()
447 }
448 }
449}
450
451pub struct ZeroSpVecIter<'a, N>
452where N: Num
453{
454 vec: &'a ZeroSpVec<N>,
455 pos: usize,
456}
457
458impl<'a, N> Iterator for ZeroSpVecIter<'a, N>
459where N: Num
460{
461 type Item = &'a N;
462
463 #[inline]
464 fn next(&mut self) -> Option<Self::Item> {
465 self.vec.get(self.pos).map(|val| {
466 self.pos += 1;
467 val
468 })
469 }
470}
471
472pub struct ZeroSpVecRawIter<'a, N>
473where N: Num
474{
475 vec: &'a ZeroSpVec<N>,
476 pos: usize,
477}
478
479impl<'a, N> Iterator for ZeroSpVecRawIter<'a, N>
480where N: Num
481{
482 type Item = (usize, &'a N);
483
484 #[inline]
485 fn next(&mut self) -> Option<Self::Item> {
486 if self.pos < self.vec.nnz() {
487 let index = unsafe { *self.vec.ind_ptr().add(self.pos) };
488 let value = unsafe { &*self.vec.val_ptr().add(self.pos) };
489 self.pos += 1;
490 Some((index, value))
491 } else {
492 None
493 }
494 }
495
496 #[inline]
497 fn size_hint(&self) -> (usize, Option<usize>) {
498 (self.vec.nnz(), Some(self.vec.len()))
499 }
500}
501
502impl<T> From<Vec<T>> for ZeroSpVec<T>
503where T: Num
504{
505 #[inline]
506 fn from(vec: Vec<T>) -> Self {
507 ZeroSpVec::from_vec(vec)
508 }
509}
510
511impl<'a, N> From<ZeroSpVecRawIter<'a, N>> for ZeroSpVec<N>
512where
513 N: Num + Copy,
514{
515 #[inline]
516 fn from(iter: ZeroSpVecRawIter<'a, N>) -> Self {
517 let mut vec = ZeroSpVec::new();
518 for (idx, val) in iter {
519 unsafe {
520 vec.raw_push(idx, *val);
521 }
522 vec.len += 1;
523 }
524 vec
525 }
526}
527
528
529
530
531
532
533
534
535
536#[derive(Debug)]
538struct RawZeroSpVec<N>
539where N: Num
540{
541 val_ptr: NonNull<N>,
542 ind_ptr: NonNull<usize>,
543 cap: usize,
548 _marker: PhantomData<N>, }
550
551impl<N> RawZeroSpVec<N>
552where N: Num
553{
554 #[inline]
555 fn new() -> Self {
556 let cap = if mem::size_of::<N>() == 0 { std::usize::MAX } else { 0 };
558
559 RawZeroSpVec {
560 val_ptr: NonNull::dangling(),
562 ind_ptr: NonNull::dangling(),
564 cap: cap,
565 _marker: PhantomData,
566 }
567 }
568
569 #[inline]
570 fn grow(&mut self) {
571 unsafe {
572 let val_elem_size = mem::size_of::<N>();
573 let ind_elem_size = mem::size_of::<usize>();
574
575 debug_assert!(val_elem_size != 0, "capacity overflow");
578
579 let t_align = mem::align_of::<N>();
581 let usize_align = mem::align_of::<usize>();
582
583 let (new_cap, val_ptr, ind_ptr): (usize, *mut N, *mut usize) =
585 if self.cap == 0 {
586 let new_val_layout = Layout::from_size_align(val_elem_size, t_align).expect("Failed to create memory layout");
587 let new_ind_layout = Layout::from_size_align(ind_elem_size, usize_align).expect("Failed to create memory layout");
588 (
589 1,
590 alloc(new_val_layout) as *mut N,
591 alloc(new_ind_layout) as *mut usize,
592 )
593 } else {
594 let new_cap = self.cap * 2;
596 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout for reallocation");
597 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout for reallocation");
598 (
599 new_cap,
600 realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * new_cap) as *mut N,
601 realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * new_cap) as *mut usize,
602 )
603 };
604
605 if val_ptr.is_null() || ind_ptr.is_null() {
607 oom();
608 }
609
610 self.val_ptr = NonNull::new_unchecked(val_ptr);
612 self.ind_ptr = NonNull::new_unchecked(ind_ptr);
613 self.cap = new_cap;
614 }
615 }
616
617 #[inline]
618 fn cap_set(&mut self) {
619 unsafe {
620 let val_elem_size = mem::size_of::<N>();
621 let ind_elem_size = mem::size_of::<usize>();
622
623 let t_align = mem::align_of::<N>();
624 let usize_align = mem::align_of::<usize>();
625
626 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
627 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
628 let new_val_ptr = alloc(new_val_layout) as *mut N;
629 let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
630 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
631 oom();
632 }
633 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
634 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
635 }
636 }
637
638 #[inline]
639 fn re_cap_set(&mut self) {
640 unsafe {
641 let val_elem_size = mem::size_of::<N>();
642 let ind_elem_size = mem::size_of::<usize>();
643
644 let t_align = mem::align_of::<N>();
645 let usize_align = mem::align_of::<usize>();
646
647 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
648 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
649 let new_val_ptr = realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * self.cap) as *mut N;
650 let new_ind_ptr = realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * self.cap) as *mut usize;
651 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
652 oom();
653 }
654 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
655 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
656 }
657 }
658}
659
660impl<N> Clone for RawZeroSpVec<N>
661where N: Num
662{
663 #[inline]
664 fn clone(&self) -> Self {
665 unsafe {
666 if self.cap == 0 || self.cap == usize::MAX {
669 return RawZeroSpVec {
670 val_ptr: NonNull::dangling(),
671 ind_ptr: NonNull::dangling(),
672 cap: self.cap,
673 _marker: PhantomData,
674 };
675 }
676
677 let val_elem_size = mem::size_of::<N>();
678 let ind_elem_size = mem::size_of::<usize>();
679
680 let t_align = mem::align_of::<N>();
681 let usize_align = mem::align_of::<usize>();
682
683 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
684 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
685 let new_val_ptr = alloc(new_val_layout) as *mut N;
686 let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
687 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
688 oom();
689 }
690 ptr::copy_nonoverlapping(self.val_ptr.as_ptr(), new_val_ptr, self.cap);
691 ptr::copy_nonoverlapping(self.ind_ptr.as_ptr(), new_ind_ptr, self.cap);
692
693 RawZeroSpVec {
694 val_ptr: NonNull::new_unchecked(new_val_ptr),
695 ind_ptr: NonNull::new_unchecked(new_ind_ptr),
696 cap: self.cap,
697 _marker: PhantomData,
698 }
699 }
700 }
701}
702
703unsafe impl<N: Num + Send> Send for RawZeroSpVec<N> {}
704unsafe impl<N: Num + Sync> Sync for RawZeroSpVec<N> {}
705
706impl<N> Drop for RawZeroSpVec<N>
707where N: Num
708{
709 #[inline]
710 fn drop(&mut self) {
711 unsafe {
712 if self.cap == 0 || self.cap == usize::MAX {
714 return;
715 }
716
717 let val_elem_size = mem::size_of::<N>();
718 let ind_elem_size = mem::size_of::<usize>();
719
720 let t_align = mem::align_of::<N>();
721 let usize_align = mem::align_of::<usize>();
722
723 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
724 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
725 dealloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout);
726 dealloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout);
727 }
728 }
729}
730
731#[cold]
738fn oom() {
739 ::std::process::exit(-9999);
740}