tf_idf_vectorizer/utils/math/vector/
mod.rs

1// pub mod math;
2pub mod serde;
3
4use std::{alloc::{alloc, dealloc, realloc, Layout}, fmt, marker::PhantomData, mem, ptr::{self, NonNull}};
5use std::ops::Index;
6use std::fmt::Debug;
7
8use num::Num;
9/// ZeroSpVecは0要素を疎とした過疎ベクトルを実装です
10/// indices と valuesを持ち
11/// indicesは要素のインデックスを保持し、
12/// valuesは要素の値を保持します
13/// 
14/// 要素はindicesの昇順でソートされていることを保証します
15pub struct ZeroSpVec<N>  
16where N: Num
17{
18    buf: RawZeroSpVec<N>,
19    len: usize,
20    nnz: usize,
21    zero: N,
22}
23
24pub trait ZeroSpVecTrait<N>: Clone + Default + Index<usize, Output = N>
25where N: Num
26{
27    unsafe fn ind_ptr(&self) -> *mut usize;
28    unsafe fn val_ptr(&self) -> *mut N;
29    unsafe fn raw_push(&mut self, index: usize, value: N);
30    fn ind_binary_search(&self, index: &usize) -> Result<usize, usize>;
31    fn new() -> Self;
32    fn with_capacity(cap: usize) -> Self;
33    fn reserve(&mut self, additional: usize);
34    fn shrink_to_fit(&mut self);
35    fn is_empty(&self) -> bool;
36    fn len(&self) -> usize;
37    fn len_mut(&mut self) -> &mut usize;
38    fn capacity(&self) -> usize;
39    fn nnz(&self) -> usize;
40    fn add_dim(&mut self, dim: usize);
41    fn clear(&mut self);
42    fn push(&mut self, elem: N);
43    fn pop(&mut self) -> Option<N>;
44    fn get(&self, index: usize) -> Option<&N>;
45    fn get_ind(&self, index: usize) -> Option<usize>;
46    fn remove(&mut self, index: usize) -> N;
47    fn from_vec(vec: Vec<N>) -> Self;
48    unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
49    unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self;
50    fn iter(&self) -> ZeroSpVecIter<N>;
51    fn raw_iter(&self) -> ZeroSpVecRawIter<N>;
52}
53
54impl<N> ZeroSpVecTrait<N> for ZeroSpVec<N> 
55where N: Num
56{
57    #[inline]
58    unsafe fn ind_ptr(&self) -> *mut usize {
59        self.buf.ind_ptr.as_ptr()
60    }
61
62    #[inline]
63    unsafe fn val_ptr(&self) -> *mut N {
64        self.buf.val_ptr.as_ptr()
65    }
66
67    /// raw_pushは、要素を追加するためのメソッドです。
68    /// ただし全体の長さを考慮しませんのであとでlenを更新する必要があります。
69    /// 
70    /// # Arguments
71    /// - `index` - 追加する要素のインデックス
72    /// - `value` - 追加する要素の値
73    #[inline]
74    unsafe fn raw_push(&mut self, index: usize, value: N) {
75        if self.nnz == self.buf.cap {
76            self.buf.grow();
77        }
78        unsafe {
79            let val_ptr = self.val_ptr().add(self.nnz);
80            let ind_ptr = self.ind_ptr().add(self.nnz);
81            ptr::write(val_ptr, value);
82            ptr::write(ind_ptr, index);
83        }
84        self.nnz += 1;
85    }
86
87    #[inline]
88    fn ind_binary_search(&self, index: &usize) -> Result<usize, usize> {
89        // 要素が無い場合は「まだどこにも挿入されていない」ので Err(0)
90        if self.nnz == 0 {
91            return Err(0);
92        }
93
94        let mut left = 0;
95        let mut right = self.nnz - 1;
96        while left < right {
97            let mid = left + (right - left) / 2;
98            let mid_index = unsafe { ptr::read(self.ind_ptr().add(mid)) };
99            if mid_index == *index {
100                return Ok(mid);
101            } else if mid_index < *index {
102                left = mid + 1;
103            } else {
104                right = mid;
105            }
106        }
107
108        // ループ終了後 left == right の位置になっている
109        let final_index = unsafe { ptr::read(self.ind_ptr().add(left)) };
110        if final_index == *index {
111            Ok(left)
112        } else if final_index < *index {
113            Err(left + 1)
114        } else {
115            Err(left)
116        }
117    }
118
119    #[inline]
120    fn new() -> Self {
121        ZeroSpVec {
122            buf: RawZeroSpVec::new(),
123            len: 0,
124            nnz: 0,
125            zero: N::zero(),
126        }
127    }
128
129    #[inline]
130    fn with_capacity(cap: usize) -> Self {
131        let mut buf = RawZeroSpVec::new();
132        buf.cap = cap;
133        buf.cap_set();
134        ZeroSpVec {
135            buf: buf,
136            len: 0,
137            nnz: 0,
138            zero: N::zero(),
139        }
140    }
141
142    #[inline]
143    fn reserve(&mut self, additional: usize) {
144        let new_cap = self.nnz + additional;
145        if new_cap > self.buf.cap {
146            self.buf.cap = new_cap;
147            self.buf.re_cap_set();
148        }
149    }
150
151    #[inline]
152    fn shrink_to_fit(&mut self) {
153        if self.len < self.buf.cap {
154            let new_cap = self.nnz;
155            self.buf.cap = new_cap;
156            self.buf.re_cap_set();
157        }
158    }
159
160    #[inline]
161    fn is_empty(&self) -> bool {
162        self.len == 0
163    }
164
165    #[inline]
166    fn len(&self) -> usize {
167        self.len
168    }
169
170    #[inline]
171    fn len_mut(&mut self) -> &mut usize {
172        &mut self.len
173    }
174
175    #[inline]
176    fn capacity(&self) -> usize {
177        self.buf.cap
178    }
179
180    #[inline]
181    fn nnz(&self) -> usize {
182        self.nnz
183    }
184
185    #[inline]
186    fn add_dim(&mut self, dim: usize) {
187        self.len += dim;
188    }
189
190    #[inline]
191    fn clear(&mut self) {
192        while let Some(_) = self.pop() {
193            // do nothing
194        }
195    }
196
197    #[inline]
198    fn push(&mut self, elem: N) {
199        if self.nnz == self.buf.cap {
200            self.buf.grow();
201        }
202        if elem != N::zero() {
203            unsafe {
204                let val_ptr = self.val_ptr().add(self.nnz);
205                let ind_ptr = self.ind_ptr().add(self.nnz);
206                ptr::write(val_ptr, elem);
207                ptr::write(ind_ptr, self.len);
208            }
209            self.nnz += 1;
210        }
211        self.len += 1;
212    }
213
214    #[inline]
215    fn pop(&mut self) -> Option<N> {
216        if self.nnz == 0 {
217            return None;
218        }
219        let pop_element = if self.nnz == self.len {
220            self.nnz -= 1;
221            unsafe {
222                Some(ptr::read(self.val_ptr().add(self.nnz)))
223            }
224        } else {
225            Some(N::zero())
226        };
227        self.len -= 1;
228        pop_element
229    }
230
231    #[inline]
232    fn get(&self, index: usize) -> Option<&N> {
233        if index >= self.len {
234            return None;
235        }
236        match self.ind_binary_search(&index) {
237            Ok(idx) => {
238                unsafe {
239                    Some(&*self.val_ptr().add(idx))
240                }
241            },
242            Err(_) => {
243                Some(&self.zero)
244            }
245        }
246    }
247
248    #[inline]
249    fn get_ind(&self, index: usize) -> Option<usize> {
250        if index >= self.nnz {
251            return None;
252        }
253        unsafe {
254            Some(ptr::read(self.ind_ptr().add(index)))
255        }
256    }
257
258
259
260    /// removeメソッド
261    /// 
262    /// `index` 番目の要素を削除し、削除した要素を返します。
263    /// - 論理インデックス `index` が物理的に存在すれば、その値を返す
264    /// - 物理的になければ(= デフォルト扱いだった)デフォルト値を返す
265    /// 
266    /// # Arguments
267    /// - `index` - 削除する要素の論理インデックス
268    /// 
269    /// # Returns
270    /// - `N` - 削除した要素の値
271    #[inline]
272    fn remove(&mut self, index: usize) -> N {
273        debug_assert!(index < self.len, "index out of bounds");
274        
275        // 論理的な要素数は常に1つ減る
276        self.len -= 1;
277
278        match self.ind_binary_search(&index) {
279            Ok(i) => {
280                // 今回削除する要素を読みだす
281                let removed_val = unsafe {
282                    ptr::read(self.val_ptr().add(i))
283                };
284
285                // `i` 番目を削除するので、後ろを前にシフト
286                let count = self.nnz - i - 1;
287                if count > 0 {
288                    unsafe {
289                        // 値をコピーして前につめる
290                        ptr::copy(
291                            self.val_ptr().add(i + 1),
292                            self.val_ptr().add(i),
293                            count
294                        );
295                        // インデックスもコピーして前につめる
296                        ptr::copy(
297                            self.ind_ptr().add(i + 1),
298                            self.ind_ptr().add(i),
299                            count
300                        );
301                        // シフトした後のインデックスは全て -1 (1つ前に詰める)
302                        for offset in i..(self.nnz - 1) {
303                            *self.ind_ptr().add(offset) -= 1;
304                        }
305                    }
306                }
307                // nnzは 1 減
308                self.nnz -= 1;
309
310                // 取り除いた要素を返す
311                removed_val
312            }
313            Err(i) => {
314                // index は詰める必要があるので、i 以降の要素のインデックスを -1
315                // (たとえば “要素自体は無い” けど、後ろにある要素は
316                //  論理インデックスが 1 つ前になる)
317                if i < self.nnz {
318                    unsafe {
319                        for offset in i..self.nnz {
320                            *self.ind_ptr().add(offset) -= 1;
321                        }
322                    }
323                }
324
325                // 0返す
326                N::zero()
327            }
328        }
329    }
330
331    #[inline]
332    fn from_vec(vec: Vec<N>) -> Self {
333        let mut zero_sp_vec = ZeroSpVec::with_capacity(vec.len());
334        for entry in vec {
335            zero_sp_vec.push(entry);
336        }
337        zero_sp_vec
338    }
339
340    // まじでunsafeにするべき
341    #[inline]
342    unsafe fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
343        let mut zero_sp_vec = ZeroSpVec::with_capacity(iter.size_hint().0);
344        for (index, value) in iter {
345            unsafe {
346                zero_sp_vec.raw_push(index, value);
347            }
348        }
349        zero_sp_vec.len = len;
350        zero_sp_vec
351    }
352
353    /// Build from sparse iterator that yields only non-zero elements (idx, value).
354    /// This avoids allocating a full dense Vec when most entries are zero.
355    /// unsafeにするべき
356    #[inline]
357    unsafe fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>, len: usize) -> Self {
358        let mut zero_sp_vec = ZeroSpVec::new();
359        for (index, value) in iter {
360            if value != N::zero() {
361                unsafe {
362                    zero_sp_vec.raw_push(index, value);
363                }
364            }
365        }
366        zero_sp_vec.len = len;
367        zero_sp_vec
368    }
369
370    #[inline]
371    fn iter(&self) -> ZeroSpVecIter<N> {
372        ZeroSpVecIter {
373            vec: self,
374            pos: 0,
375        }
376    }
377
378    #[inline]
379    fn raw_iter(&self) -> ZeroSpVecRawIter<N> {
380        ZeroSpVecRawIter {
381            vec: self,
382            pos: 0,
383        }
384    }
385}
386
387unsafe impl <N: Num + Send> Send for ZeroSpVec<N> {}
388unsafe impl <N: Num + Sync> Sync for ZeroSpVec<N> {}
389
390impl<N> Clone for ZeroSpVec<N> 
391where N: Num
392{
393    #[inline]
394    fn clone(&self) -> Self {
395        ZeroSpVec {
396            buf: self.buf.clone(),
397            len: self.len,
398            nnz: self.nnz,
399            zero: N::zero(),
400        }
401    }
402}
403
404impl<N> Drop for ZeroSpVec<N> 
405where N: Num
406{
407    #[inline]
408    fn drop(&mut self) {
409        // RawZeroSpVecで実装済み
410    }
411}
412
413impl<N> Default for ZeroSpVec<N> 
414where N: Num
415{
416    #[inline]
417    fn default() -> Self {
418        ZeroSpVec::new()
419    }
420}
421
422impl<N> Index<usize> for ZeroSpVec<N> 
423where N: Num
424{
425    type Output = N;
426
427    #[inline]
428    fn index(&self, index: usize) -> &Self::Output {
429        self.get(index).expect("index out of bounds")
430    }
431}
432
433impl<N: Num + Debug> Debug for ZeroSpVec<N> {
434    #[inline]
435    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
436        if f.sign_plus() {
437            f.debug_struct("DefaultSparseVec")
438                .field("buf", &self.buf)
439                .field("nnz", &self.nnz)
440                .field("len", &self.len)
441                .field("zero", &self.zero)
442                .finish()
443        } else if f.alternate() {
444            write!(f, "ZeroSpVec({:?})", self.iter().collect::<Vec<&N>>())
445        } else {
446            f.debug_list().entries((0..self.len).map(|i| self.get(i).unwrap())).finish()
447        }
448    }
449}
450
451pub struct ZeroSpVecIter<'a, N> 
452where N: Num
453{
454    vec: &'a ZeroSpVec<N>,
455    pos: usize,
456}
457
458impl<'a, N> Iterator for ZeroSpVecIter<'a, N> 
459where N: Num
460{
461    type Item = &'a N;
462
463    #[inline]
464    fn next(&mut self) -> Option<Self::Item> {
465        self.vec.get(self.pos).map(|val| {
466            self.pos += 1;
467            val
468        })
469    }
470}
471
472pub struct ZeroSpVecRawIter<'a, N> 
473where N: Num
474{
475    vec: &'a ZeroSpVec<N>,
476    pos: usize,
477}
478
479impl<'a, N> Iterator for ZeroSpVecRawIter<'a, N> 
480where N: Num
481{
482    type Item = (usize, &'a N);
483
484    #[inline]
485    fn next(&mut self) -> Option<Self::Item> {
486        if self.pos < self.vec.nnz() {
487            let index = unsafe { *self.vec.ind_ptr().add(self.pos) };
488            let value = unsafe { &*self.vec.val_ptr().add(self.pos) };
489            self.pos += 1;
490            Some((index, value))
491        } else {
492            None
493        }
494    }
495
496    #[inline]
497    fn size_hint(&self) -> (usize, Option<usize>) {
498        (self.vec.nnz(), Some(self.vec.len()))
499    }
500}
501
502impl<T> From<Vec<T>> for ZeroSpVec<T>
503where T: Num
504{
505    #[inline]
506    fn from(vec: Vec<T>) -> Self {
507        ZeroSpVec::from_vec(vec)
508    }
509}
510
511impl<'a, N> From<ZeroSpVecRawIter<'a, N>> for ZeroSpVec<N>
512where
513    N: Num + Copy,
514{
515    #[inline]
516    fn from(iter: ZeroSpVecRawIter<'a, N>) -> Self {
517        let mut vec = ZeroSpVec::new();
518        for (idx, val) in iter {
519            unsafe {
520                vec.raw_push(idx, *val);
521            }
522            vec.len += 1;
523        }
524        vec
525    }
526}
527
528
529
530
531
532
533
534
535
536/// ZeroSpVecの生実装
537#[derive(Debug)]
538struct RawZeroSpVec<N> 
539where N: Num 
540{
541    val_ptr: NonNull<N>,
542    ind_ptr: NonNull<usize>,
543    /// cap 定義
544    /// 0 => メモリ未確保 (flag)
545    /// usize::MAX =>  zero size struct (ZST) として定義 処理の簡略化を実施 (flag)
546    /// _ => 実際のcapN
547    cap: usize,
548    _marker: PhantomData<N>, // 所有権管理用にPhantomDataを追加
549}
550
551impl<N> RawZeroSpVec<N> 
552where N: Num
553{
554    #[inline]
555    fn new() -> Self {
556        // zero size struct (ZST)をusize::MAXと定義 ある種のフラグとして使用
557        let cap = if mem::size_of::<N>() == 0 { std::usize::MAX } else { 0 }; 
558
559        RawZeroSpVec {
560            // 空のポインタを代入しておく メモリ確保を遅延させる
561            val_ptr: NonNull::dangling(),
562            // 空のポインタを代入しておく メモリ確保を遅延させる
563            ind_ptr: NonNull::dangling(),
564            cap: cap,
565            _marker: PhantomData,
566        }
567    }
568
569    #[inline]
570    fn grow(&mut self) {
571        unsafe {
572            let val_elem_size = mem::size_of::<N>();
573            let ind_elem_size = mem::size_of::<usize>();
574
575            // 安全性: ZSTの場合growはcapを超えた場合にしか呼ばれない
576            // これは必然的にオーバーフローしていることをしめしている
577            debug_assert!(val_elem_size != 0, "capacity overflow");
578
579            // アライメントの取得 適切なメモリ確保を行うため
580            let t_align = mem::align_of::<N>();
581            let usize_align = mem::align_of::<usize>();
582
583            // アロケーション
584            let (new_cap, val_ptr, ind_ptr): (usize, *mut N, *mut usize) = 
585                if self.cap == 0 {
586                    let new_val_layout = Layout::from_size_align(val_elem_size, t_align).expect("Failed to create memory layout");
587                    let new_ind_layout = Layout::from_size_align(ind_elem_size, usize_align).expect("Failed to create memory layout");
588                    (
589                        1,
590                        alloc(new_val_layout) as *mut N,
591                        alloc(new_ind_layout) as *mut usize,
592                    )
593                } else {
594                    // 効率化: cap * 2 でメモリを確保する 見た目上はO(log n)の増加を実現
595                    let new_cap = self.cap * 2;
596                    let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout for reallocation");
597                    let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout for reallocation");
598                    (
599                        new_cap,
600                        realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * new_cap) as *mut N,
601                        realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * new_cap) as *mut usize,
602                    )
603                };
604
605            // アロケーション失敗時の処理
606            if val_ptr.is_null() || ind_ptr.is_null() {
607                oom();
608            }
609
610            // selfに返却
611            self.val_ptr = NonNull::new_unchecked(val_ptr);
612            self.ind_ptr = NonNull::new_unchecked(ind_ptr);
613            self.cap = new_cap;
614        }
615    }
616    
617    #[inline]
618    fn cap_set(&mut self) {
619        unsafe {
620            let val_elem_size = mem::size_of::<N>();
621            let ind_elem_size = mem::size_of::<usize>();
622
623            let t_align = mem::align_of::<N>();
624            let usize_align = mem::align_of::<usize>();
625
626            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
627            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
628            let new_val_ptr = alloc(new_val_layout) as *mut N;
629            let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
630            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
631                oom();
632            }
633            self.val_ptr = NonNull::new_unchecked(new_val_ptr);
634            self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
635        }
636    }
637
638    #[inline]
639    fn re_cap_set(&mut self) {
640        unsafe {
641            let val_elem_size = mem::size_of::<N>();
642            let ind_elem_size = mem::size_of::<usize>();
643
644            let t_align = mem::align_of::<N>();
645            let usize_align = mem::align_of::<usize>();
646
647            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
648            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
649            let new_val_ptr = realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * self.cap) as *mut N;
650            let new_ind_ptr = realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * self.cap) as *mut usize;
651            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
652                oom();
653            }
654            self.val_ptr = NonNull::new_unchecked(new_val_ptr);
655            self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
656        }
657    }
658}
659
660impl<N> Clone for RawZeroSpVec<N> 
661where N: Num
662{
663    #[inline]
664    fn clone(&self) -> Self {
665        unsafe {
666            // If cap == 0 (no allocation) or cap == usize::MAX (ZST marker),
667            // return a dangling-pointer RawZeroSpVec without allocating.
668            if self.cap == 0 || self.cap == usize::MAX {
669                return RawZeroSpVec {
670                    val_ptr: NonNull::dangling(),
671                    ind_ptr: NonNull::dangling(),
672                    cap: self.cap,
673                    _marker: PhantomData,
674                };
675            }
676
677            let val_elem_size = mem::size_of::<N>();
678            let ind_elem_size = mem::size_of::<usize>();
679
680            let t_align = mem::align_of::<N>();
681            let usize_align = mem::align_of::<usize>();
682
683            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
684            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
685            let new_val_ptr = alloc(new_val_layout) as *mut N;
686            let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
687            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
688                oom();
689            }
690            ptr::copy_nonoverlapping(self.val_ptr.as_ptr(), new_val_ptr, self.cap);
691            ptr::copy_nonoverlapping(self.ind_ptr.as_ptr(), new_ind_ptr, self.cap);
692
693            RawZeroSpVec {
694                val_ptr: NonNull::new_unchecked(new_val_ptr),
695                ind_ptr: NonNull::new_unchecked(new_ind_ptr),
696                cap: self.cap,
697                _marker: PhantomData,
698            }
699        }
700    }
701}
702
703unsafe impl<N: Num + Send> Send for RawZeroSpVec<N> {}
704unsafe impl<N: Num + Sync> Sync for RawZeroSpVec<N> {}
705
706impl<N> Drop for RawZeroSpVec<N> 
707where N: Num
708{
709    #[inline]
710    fn drop(&mut self) {
711        unsafe {
712            // If no allocation was performed (cap == 0) or this is a ZST marker (usize::MAX), skip deallocation.
713            if self.cap == 0 || self.cap == usize::MAX {
714                return;
715            }
716
717            let val_elem_size = mem::size_of::<N>();
718            let ind_elem_size = mem::size_of::<usize>();
719
720            let t_align = mem::align_of::<N>();
721            let usize_align = mem::align_of::<usize>();
722
723            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
724            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
725            dealloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout);
726            dealloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout);
727        }
728    }
729}
730
731/// OutOfMemoryへの対処用
732/// プロセスを終了させる
733/// 本来はpanic!を使用するべきだが、
734/// OOMの場合panic!を発生させるとTraceBackによるメモリ仕様が起きてしまうため
735/// 仕方なく強制終了させる
736/// 本来OOMはOSにより管理され発生前にKillされるはずなのであんまり意味はない。
737#[cold]
738fn oom() {
739    ::std::process::exit(-9999);
740}