tf_idf_vectorizer/utils/math/vector/
mod.rs1pub mod serde;
3
4use std::{alloc::{alloc, dealloc, realloc, Layout}, fmt, marker::PhantomData, mem, ptr::{self, NonNull}};
5use std::ops::Index;
6use std::fmt::Debug;
7
8use num::Num;
9pub struct ZeroSpVec<N>
16where N: Num
17{
18 buf: RawZeroSpVec<N>,
19 len: usize,
20 nnz: usize,
21 zero: N,
22}
23
24pub trait ZeroSpVecTrait<N>: Clone + Default + Index<usize, Output = N>
25where N: Num
26{
27 unsafe fn ind_ptr(&self) -> *mut u32;
28 unsafe fn val_ptr(&self) -> *mut N;
29 unsafe fn raw_push(&mut self, index: usize, value: N);
30 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize>;
31 fn new() -> Self;
32 fn with_capacity(cap: usize) -> Self;
33 fn reserve(&mut self, additional: usize);
34 fn shrink_to_fit(&mut self);
35 fn is_empty(&self) -> bool;
36 fn len(&self) -> usize;
37 fn len_mut(&mut self) -> &mut usize;
38 fn capacity(&self) -> usize;
39 fn nnz(&self) -> usize;
40 fn add_dim(&mut self, dim: usize);
41 fn clear(&mut self);
42 fn push(&mut self, elem: N);
43 fn pop(&mut self) -> Option<N>;
44 fn get(&self, index: usize) -> Option<&N>;
45 fn get_ind(&self, index: usize) -> Option<usize>;
46 fn remove(&mut self, index: usize) -> N;
47 fn from_vec(vec: Vec<N>) -> Self;
48 unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
49 unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
50 fn iter(&self) -> ZeroSpVecIter<N>;
51 fn raw_iter(&self) -> ZeroSpVecRawIter<N>;
52}
53
54impl<N> ZeroSpVecTrait<N> for ZeroSpVec<N>
55where N: Num
56{
57 #[inline]
58 unsafe fn ind_ptr(&self) -> *mut u32 {
59 self.buf.ind_ptr.as_ptr()
60 }
61
62 #[inline]
63 unsafe fn val_ptr(&self) -> *mut N {
64 self.buf.val_ptr.as_ptr()
65 }
66
67 #[inline]
74 unsafe fn raw_push(&mut self, index: usize, value: N) {
75 if self.nnz == self.buf.cap {
76 self.buf.grow();
77 }
78 unsafe {
79 let val_ptr = self.val_ptr().add(self.nnz);
80 let ind_ptr = self.ind_ptr().add(self.nnz);
81 ptr::write(val_ptr, value);
82 debug_assert!(index <= u32::MAX as usize, "index overflow for u32 storage");
83 ptr::write(ind_ptr, index as u32);
84 }
85 self.nnz += 1;
86 }
87
88 #[inline]
89 fn ind_binary_search(&self, index: &usize) -> Result<usize, usize> {
90 if self.nnz == 0 {
92 return Err(0);
93 }
94
95 let mut left = 0;
96 let mut right = self.nnz - 1;
97 while left < right {
98 let mid = left + (right - left) / 2;
99 let mid_index = unsafe { ptr::read(self.ind_ptr().add(mid)) as usize };
100 if mid_index == *index {
101 return Ok(mid);
102 } else if mid_index < *index {
103 left = mid + 1;
104 } else {
105 right = mid;
106 }
107 }
108
109 let final_index = unsafe { ptr::read(self.ind_ptr().add(left)) as usize };
111 if final_index == *index {
112 Ok(left)
113 } else if final_index < *index {
114 Err(left + 1)
115 } else {
116 Err(left)
117 }
118 }
119
120 #[inline]
121 fn new() -> Self {
122 ZeroSpVec {
123 buf: RawZeroSpVec::new(),
124 len: 0,
125 nnz: 0,
126 zero: N::zero(),
127 }
128 }
129
130 #[inline]
131 fn with_capacity(cap: usize) -> Self {
132 let mut buf = RawZeroSpVec::new();
133 buf.cap = cap;
134 buf.cap_set();
135 ZeroSpVec {
136 buf: buf,
137 len: 0,
138 nnz: 0,
139 zero: N::zero(),
140 }
141 }
142
143 #[inline]
144 fn reserve(&mut self, additional: usize) {
145 let new_cap = self.nnz + additional;
146 if new_cap > self.buf.cap {
147 self.buf.cap = new_cap;
148 self.buf.re_cap_set();
149 }
150 }
151
152 #[inline]
153 fn shrink_to_fit(&mut self) {
154 if self.len < self.buf.cap {
155 let new_cap = self.nnz;
156 self.buf.cap = new_cap;
157 self.buf.re_cap_set();
158 }
159 }
160
161 #[inline]
162 fn is_empty(&self) -> bool {
163 self.len == 0
164 }
165
166 #[inline]
167 fn len(&self) -> usize {
168 self.len
169 }
170
171 #[inline]
172 fn len_mut(&mut self) -> &mut usize {
173 &mut self.len
174 }
175
176 #[inline]
177 fn capacity(&self) -> usize {
178 self.buf.cap
179 }
180
181 #[inline]
182 fn nnz(&self) -> usize {
183 self.nnz
184 }
185
186 #[inline]
187 fn add_dim(&mut self, dim: usize) {
188 self.len += dim;
189 }
190
191 #[inline]
192 fn clear(&mut self) {
193 while let Some(_) = self.pop() {
194 }
196 }
197
198 #[inline]
199 fn push(&mut self, elem: N) {
200 if self.nnz == self.buf.cap {
201 self.buf.grow();
202 }
203 if elem != N::zero() {
204 unsafe {
205 let val_ptr = self.val_ptr().add(self.nnz);
206 let ind_ptr = self.ind_ptr().add(self.nnz);
207 ptr::write(val_ptr, elem);
208 debug_assert!(self.len <= u32::MAX as usize, "index overflow for u32 storage");
209 ptr::write(ind_ptr, self.len as u32);
210 }
211 self.nnz += 1;
212 }
213 self.len += 1;
214 }
215
216 #[inline]
217 fn pop(&mut self) -> Option<N> {
218 if self.nnz == 0 {
219 return None;
220 }
221 let pop_element = if self.nnz == self.len {
222 self.nnz -= 1;
223 unsafe {
224 Some(ptr::read(self.val_ptr().add(self.nnz)))
225 }
226 } else {
227 Some(N::zero())
228 };
229 self.len -= 1;
230 pop_element
231 }
232
233 #[inline]
234 fn get(&self, index: usize) -> Option<&N> {
235 if index >= self.len {
236 return None;
237 }
238 match self.ind_binary_search(&index) {
239 Ok(idx) => {
240 unsafe {
241 Some(&*self.val_ptr().add(idx))
242 }
243 },
244 Err(_) => {
245 Some(&self.zero)
246 }
247 }
248 }
249
250 #[inline]
251 fn get_ind(&self, index: usize) -> Option<usize> {
252 if index >= self.nnz {
253 return None;
254 }
255 unsafe {
256 Some(ptr::read(self.ind_ptr().add(index)) as usize)
257 }
258 }
259
260
261
262 #[inline]
274 fn remove(&mut self, index: usize) -> N {
275 debug_assert!(index < self.len, "index out of bounds");
276
277 self.len -= 1;
279
280 match self.ind_binary_search(&index) {
281 Ok(i) => {
282 let removed_val = unsafe {
284 ptr::read(self.val_ptr().add(i))
285 };
286
287 let count = self.nnz - i - 1;
289 if count > 0 {
290 unsafe {
291 ptr::copy(
293 self.val_ptr().add(i + 1),
294 self.val_ptr().add(i),
295 count
296 );
297 ptr::copy(
299 self.ind_ptr().add(i + 1),
300 self.ind_ptr().add(i),
301 count
302 );
303 for offset in i..(self.nnz - 1) {
305 *self.ind_ptr().add(offset) -= 1;
306 }
307 }
308 }
309 self.nnz -= 1;
311
312 removed_val
314 }
315 Err(i) => {
316 if i < self.nnz {
320 unsafe {
321 for offset in i..self.nnz {
322 *self.ind_ptr().add(offset) -= 1;
323 }
324 }
325 }
326
327 N::zero()
329 }
330 }
331 }
332
333 #[inline]
334 fn from_vec(vec: Vec<N>) -> Self {
335 let mut zero_sp_vec = ZeroSpVec::with_capacity(vec.len());
336 for entry in vec {
337 zero_sp_vec.push(entry);
338 }
339 zero_sp_vec
340 }
341
342 #[inline]
344 unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
345 let mut zero_sp_vec = ZeroSpVec::with_capacity(iter.size_hint().0);
346 for (index, value) in iter {
347 unsafe {
348 zero_sp_vec.raw_push(index, value);
349 }
350 }
351 zero_sp_vec.len = len;
352 zero_sp_vec
353 }
354
355 #[inline]
359 unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
360 let mut zero_sp_vec = ZeroSpVec::new();
361 for (index, value) in iter {
362 if value != N::zero() {
363 unsafe {
364 zero_sp_vec.raw_push(index, value);
365 }
366 }
367 }
368 zero_sp_vec.len = len;
369 zero_sp_vec
370 }
371
372 #[inline]
373 fn iter(&self) -> ZeroSpVecIter<N> {
374 ZeroSpVecIter {
375 vec: self,
376 pos: 0,
377 }
378 }
379
380 #[inline]
381 fn raw_iter(&self) -> ZeroSpVecRawIter<N> {
382 ZeroSpVecRawIter {
383 vec: self,
384 pos: 0,
385 }
386 }
387}
388
389unsafe impl <N: Num + Send> Send for ZeroSpVec<N> {}
390unsafe impl <N: Num + Sync> Sync for ZeroSpVec<N> {}
391
392impl<N> Clone for ZeroSpVec<N>
393where N: Num
394{
395 #[inline]
396 fn clone(&self) -> Self {
397 ZeroSpVec {
398 buf: self.buf.clone(),
399 len: self.len,
400 nnz: self.nnz,
401 zero: N::zero(),
402 }
403 }
404}
405
406impl<N> Drop for ZeroSpVec<N>
407where N: Num
408{
409 #[inline]
410 fn drop(&mut self) {
411 }
413}
414
415impl<N> Default for ZeroSpVec<N>
416where N: Num
417{
418 #[inline]
419 fn default() -> Self {
420 ZeroSpVec::new()
421 }
422}
423
424impl<N> Index<usize> for ZeroSpVec<N>
425where N: Num
426{
427 type Output = N;
428
429 #[inline]
430 fn index(&self, index: usize) -> &Self::Output {
431 self.get(index).expect("index out of bounds")
432 }
433}
434
435impl<N: Num + Debug> Debug for ZeroSpVec<N> {
436 #[inline]
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438 if f.sign_plus() {
439 f.debug_struct("DefaultSparseVec")
440 .field("buf", &self.buf)
441 .field("nnz", &self.nnz)
442 .field("len", &self.len)
443 .field("zero", &self.zero)
444 .finish()
445 } else if f.alternate() {
446 write!(f, "ZeroSpVec({:?})", self.iter().collect::<Vec<&N>>())
447 } else {
448 f.debug_list().entries((0..self.len).map(|i| self.get(i).unwrap())).finish()
449 }
450 }
451}
452
453pub struct ZeroSpVecIter<'a, N>
454where N: Num
455{
456 vec: &'a ZeroSpVec<N>,
457 pos: usize,
458}
459
460impl<'a, N> Iterator for ZeroSpVecIter<'a, N>
461where N: Num
462{
463 type Item = &'a N;
464
465 #[inline]
466 fn next(&mut self) -> Option<Self::Item> {
467 self.vec.get(self.pos).map(|val| {
468 self.pos += 1;
469 val
470 })
471 }
472}
473
474pub struct ZeroSpVecRawIter<'a, N>
475where N: Num
476{
477 vec: &'a ZeroSpVec<N>,
478 pos: usize,
479}
480
481impl<'a, N> Iterator for ZeroSpVecRawIter<'a, N>
482where N: Num
483{
484 type Item = (usize, &'a N);
485
486 #[inline]
487 fn next(&mut self) -> Option<Self::Item> {
488 if self.pos < self.vec.nnz() {
489 let index = unsafe { *self.vec.ind_ptr().add(self.pos) as usize };
490 let value = unsafe { &*self.vec.val_ptr().add(self.pos) };
491 self.pos += 1;
492 Some((index, value))
493 } else {
494 None
495 }
496 }
497
498 #[inline]
499 fn size_hint(&self) -> (usize, Option<usize>) {
500 (self.vec.nnz().saturating_sub(self.pos), Some(self.vec.len()))
501 }
502}
503
504impl<T> From<Vec<T>> for ZeroSpVec<T>
505where T: Num
506{
507 #[inline]
508 fn from(vec: Vec<T>) -> Self {
509 ZeroSpVec::from_vec(vec)
510 }
511}
512
513impl<'a, N> From<ZeroSpVecRawIter<'a, N>> for ZeroSpVec<N>
514where
515 N: Num + Copy,
516{
517 #[inline]
518 fn from(iter: ZeroSpVecRawIter<'a, N>) -> Self {
519 let mut vec = ZeroSpVec::new();
520 for (idx, val) in iter {
521 unsafe {
522 vec.raw_push(idx, *val);
523 }
524 vec.len += 1;
525 }
526 vec
527 }
528}
529
530
531
532
533
534
535
536
537
538#[derive(Debug)]
540struct RawZeroSpVec<N>
541where N: Num
542{
543 val_ptr: NonNull<N>,
544 ind_ptr: NonNull<u32>,
545 cap: usize,
550 _marker: PhantomData<N>, }
552
553impl<N> RawZeroSpVec<N>
554where N: Num
555{
556 #[inline]
557 fn new() -> Self {
558 let cap = if mem::size_of::<N>() == 0 { std::usize::MAX } else { 0 };
560
561 RawZeroSpVec {
562 val_ptr: NonNull::dangling(),
564 ind_ptr: NonNull::dangling(),
566 cap: cap,
567 _marker: PhantomData,
568 }
569 }
570
571 #[inline]
572 fn grow(&mut self) {
573 unsafe {
574 let val_elem_size = mem::size_of::<N>();
575 let ind_elem_size = mem::size_of::<u32>();
576
577 debug_assert!(val_elem_size != 0, "capacity overflow");
580
581 let t_align = mem::align_of::<N>();
583 let usize_align = mem::align_of::<u32>();
584
585 let (new_cap, val_ptr, ind_ptr): (usize, *mut N, *mut u32) =
587 if self.cap == 0 {
588 let new_val_layout = Layout::from_size_align(val_elem_size, t_align).expect("Failed to create memory layout");
589 let new_ind_layout = Layout::from_size_align(ind_elem_size, usize_align).expect("Failed to create memory layout");
590 (
591 1,
592 alloc(new_val_layout) as *mut N,
593 alloc(new_ind_layout) as *mut u32,
594 )
595 } else {
596 let new_cap = self.cap * 2;
598 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout for reallocation");
599 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout for reallocation");
600 (
601 new_cap,
602 realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * new_cap) as *mut N,
603 realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * new_cap) as *mut u32,
604 )
605 };
606
607 if val_ptr.is_null() || ind_ptr.is_null() {
609 oom();
610 }
611
612 self.val_ptr = NonNull::new_unchecked(val_ptr);
614 self.ind_ptr = NonNull::new_unchecked(ind_ptr);
615 self.cap = new_cap;
616 }
617 }
618
619 #[inline]
620 fn cap_set(&mut self) {
621 unsafe {
622 let val_elem_size = mem::size_of::<N>();
623 let ind_elem_size = mem::size_of::<u32>();
624
625 let t_align = mem::align_of::<N>();
626 let usize_align = mem::align_of::<u32>();
627
628 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
629 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
630 let new_val_ptr = alloc(new_val_layout) as *mut N;
631 let new_ind_ptr = alloc(new_ind_layout) as *mut u32;
632 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
633 oom();
634 }
635 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
636 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
637 }
638 }
639
640 #[inline]
641 fn re_cap_set(&mut self) {
642 unsafe {
643 let val_elem_size = mem::size_of::<N>();
644 let ind_elem_size = mem::size_of::<u32>();
645
646 let t_align = mem::align_of::<N>();
647 let usize_align = mem::align_of::<u32>();
648
649 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
650 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
651 let new_val_ptr = realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * self.cap) as *mut N;
652 let new_ind_ptr = realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * self.cap) as *mut u32;
653 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
654 oom();
655 }
656 self.val_ptr = NonNull::new_unchecked(new_val_ptr);
657 self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
658 }
659 }
660}
661
662impl<N> Clone for RawZeroSpVec<N>
663where N: Num
664{
665 #[inline]
666 fn clone(&self) -> Self {
667 unsafe {
668 if self.cap == 0 || self.cap == usize::MAX {
671 return RawZeroSpVec {
672 val_ptr: NonNull::dangling(),
673 ind_ptr: NonNull::dangling(),
674 cap: self.cap,
675 _marker: PhantomData,
676 };
677 }
678
679 let val_elem_size = mem::size_of::<N>();
680 let ind_elem_size = mem::size_of::<u32>();
681
682 let t_align = mem::align_of::<N>();
683 let usize_align = mem::align_of::<u32>();
684
685 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
686 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
687 let new_val_ptr = alloc(new_val_layout) as *mut N;
688 let new_ind_ptr = alloc(new_ind_layout) as *mut u32;
689 if new_val_ptr.is_null() || new_ind_ptr.is_null() {
690 oom();
691 }
692 ptr::copy_nonoverlapping(self.val_ptr.as_ptr(), new_val_ptr, self.cap);
693 ptr::copy_nonoverlapping(self.ind_ptr.as_ptr(), new_ind_ptr, self.cap);
694
695 RawZeroSpVec {
696 val_ptr: NonNull::new_unchecked(new_val_ptr),
697 ind_ptr: NonNull::new_unchecked(new_ind_ptr),
698 cap: self.cap,
699 _marker: PhantomData,
700 }
701 }
702 }
703}
704
705unsafe impl<N: Num + Send> Send for RawZeroSpVec<N> {}
706unsafe impl<N: Num + Sync> Sync for RawZeroSpVec<N> {}
707
708impl<N> Drop for RawZeroSpVec<N>
709where N: Num
710{
711 #[inline]
712 fn drop(&mut self) {
713 unsafe {
714 if self.cap == 0 || self.cap == usize::MAX {
716 return;
717 }
718
719 let val_elem_size = mem::size_of::<N>();
720 let ind_elem_size = mem::size_of::<u32>();
721
722 let t_align = mem::align_of::<N>();
723 let usize_align = mem::align_of::<u32>();
724
725 let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
726 let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
727 dealloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout);
728 dealloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout);
729 }
730 }
731}
732
733#[cold]
740fn oom() {
741 ::std::process::exit(-9999);
742}