tf_idf_vectorizer/utils/math/vector/
mod.rs

1// pub mod math;
2pub mod serde;
3
4use std::{alloc::{alloc, dealloc, realloc, Layout}, fmt, marker::PhantomData, mem, ptr::{self, NonNull}};
5use std::ops::Index;
6use std::fmt::Debug;
7
8use num::Num;
9/// ZeroSpVecは0要素を疎とした過疎ベクトルを実装です
10/// indices と valuesを持ち
11/// indicesは要素のインデックスを保持し、
12/// valuesは要素の値を保持します
13/// 
14/// 要素はindicesの昇順でソートされていることを保証します
15pub struct ZeroSpVec<N>  
16where N: Num
17{
18    buf: RawZeroSpVec<N>,
19    len: usize,
20    nnz: usize,
21    zero: N,
22}
23
24pub trait ZeroSpVecTrait<N>: Clone + Default + Index<usize, Output = N>
25where N: Num
26{
27    unsafe fn ind_ptr(&self) -> *mut usize;
28    unsafe fn val_ptr(&self) -> *mut N;
29    unsafe fn raw_push(&mut self, index: usize, value: N);
30    fn ind_binary_search(&self, index: &usize) -> Result<usize, usize>;
31    fn new() -> Self;
32    fn with_capacity(cap: usize) -> Self;
33    fn reserve(&mut self, additional: usize);
34    fn shrink_to_fit(&mut self);
35    fn is_empty(&self) -> bool;
36    fn len(&self) -> usize;
37    fn len_mut(&mut self) -> &mut usize;
38    fn capacity(&self) -> usize;
39    fn nnz(&self) -> usize;
40    fn add_dim(&mut self, dim: usize);
41    fn clear(&mut self);
42    fn push(&mut self, elem: N);
43    fn pop(&mut self) -> Option<N>;
44    fn get(&self, index: usize) -> Option<&N>;
45    fn get_ind(&self, index: usize) -> Option<usize>;
46    fn remove(&mut self, index: usize) -> N;
47    fn from_vec(vec: Vec<N>) -> Self;
48    fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>) -> Self;
49    fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>) -> Self;
50    fn iter(&self) -> ZeroSpVecIter<N>;
51    fn raw_iter(&self) -> ZeroSpVecRawIter<N>;
52}
53
54impl<N> ZeroSpVecTrait<N> for ZeroSpVec<N> 
55where N: Num
56{
57    #[inline]
58    unsafe fn ind_ptr(&self) -> *mut usize {
59        self.buf.ind_ptr.as_ptr()
60    }
61
62    #[inline]
63    unsafe fn val_ptr(&self) -> *mut N {
64        self.buf.val_ptr.as_ptr()
65    }
66
67    /// raw_pushは、要素を追加するためのメソッドです。
68    /// ただし全体の長さを考慮しませんのであとでlenを更新する必要があります。
69    /// 
70    /// # Arguments
71    /// - `index` - 追加する要素のインデックス
72    /// - `value` - 追加する要素の値
73    #[inline]
74    unsafe fn raw_push(&mut self, index: usize, value: N) {
75        if self.nnz == self.buf.cap {
76            self.buf.grow();
77        }
78        unsafe {
79            let val_ptr = self.val_ptr().add(self.nnz);
80            let ind_ptr = self.ind_ptr().add(self.nnz);
81            ptr::write(val_ptr, value);
82            ptr::write(ind_ptr, index);
83        }
84        self.nnz += 1;
85    }
86
87    #[inline]
88    fn ind_binary_search(&self, index: &usize) -> Result<usize, usize> {
89        // 要素が無い場合は「まだどこにも挿入されていない」ので Err(0)
90        if self.nnz == 0 {
91            return Err(0);
92        }
93
94        let mut left = 0;
95        let mut right = self.nnz - 1;
96        while left < right {
97            let mid = left + (right - left) / 2;
98            let mid_index = unsafe { ptr::read(self.ind_ptr().add(mid)) };
99            if mid_index == *index {
100                return Ok(mid);
101            } else if mid_index < *index {
102                left = mid + 1;
103            } else {
104                right = mid;
105            }
106        }
107
108        // ループ終了後 left == right の位置になっている
109        let final_index = unsafe { ptr::read(self.ind_ptr().add(left)) };
110        if final_index == *index {
111            Ok(left)
112        } else if final_index < *index {
113            Err(left + 1)
114        } else {
115            Err(left)
116        }
117    }
118
119    #[inline]
120    fn new() -> Self {
121        ZeroSpVec {
122            buf: RawZeroSpVec::new(),
123            len: 0,
124            nnz: 0,
125            zero: N::zero(),
126        }
127    }
128
129    #[inline]
130    fn with_capacity(cap: usize) -> Self {
131        let mut buf = RawZeroSpVec::new();
132        buf.cap = cap;
133        buf.cap_set();
134        ZeroSpVec {
135            buf: buf,
136            len: 0,
137            nnz: 0,
138            zero: N::zero(),
139        }
140    }
141
142    #[inline]
143    fn reserve(&mut self, additional: usize) {
144        let new_cap = self.nnz + additional;
145        if new_cap > self.buf.cap {
146            self.buf.cap = new_cap;
147            self.buf.re_cap_set();
148        }
149    }
150
151    #[inline]
152    fn shrink_to_fit(&mut self) {
153        if self.len < self.buf.cap {
154            let new_cap = self.nnz;
155            self.buf.cap = new_cap;
156            self.buf.re_cap_set();
157        }
158    }
159
160    #[inline]
161    fn is_empty(&self) -> bool {
162        self.len == 0
163    }
164
165    #[inline]
166    fn len(&self) -> usize {
167        self.len
168    }
169
170    #[inline]
171    fn len_mut(&mut self) -> &mut usize {
172        &mut self.len
173    }
174
175    #[inline]
176    fn capacity(&self) -> usize {
177        self.buf.cap
178    }
179
180    #[inline]
181    fn nnz(&self) -> usize {
182        self.nnz
183    }
184
185    #[inline]
186    fn add_dim(&mut self, dim: usize) {
187        self.len += dim;
188    }
189
190    #[inline]
191    fn clear(&mut self) {
192        while let Some(_) = self.pop() {
193            // do nothing
194        }
195    }
196
197    #[inline]
198    fn push(&mut self, elem: N) {
199        if self.nnz == self.buf.cap {
200            self.buf.grow();
201        }
202        if elem != N::zero() {
203            unsafe {
204                let val_ptr = self.val_ptr().add(self.nnz);
205                let ind_ptr = self.ind_ptr().add(self.nnz);
206                ptr::write(val_ptr, elem);
207                ptr::write(ind_ptr, self.len);
208            }
209            self.nnz += 1;
210        }
211        self.len += 1;
212    }
213
214    #[inline]
215    fn pop(&mut self) -> Option<N> {
216        if self.nnz == 0 {
217            return None;
218        }
219        let pop_element = if self.nnz == self.len {
220            self.nnz -= 1;
221            unsafe {
222                Some(ptr::read(self.val_ptr().add(self.nnz)))
223            }
224        } else {
225            Some(N::zero())
226        };
227        self.len -= 1;
228        pop_element
229    }
230
231    #[inline]
232    fn get(&self, index: usize) -> Option<&N> {
233        if index >= self.len {
234            return None;
235        }
236        match self.ind_binary_search(&index) {
237            Ok(idx) => {
238                unsafe {
239                    Some(&*self.val_ptr().add(idx))
240                }
241            },
242            Err(_) => {
243                Some(&self.zero)
244            }
245        }
246    }
247
248    #[inline]
249    fn get_ind(&self, index: usize) -> Option<usize> {
250        if index >= self.nnz {
251            return None;
252        }
253        unsafe {
254            Some(ptr::read(self.ind_ptr().add(index)))
255        }
256    }
257
258
259
260    /// removeメソッド
261    /// 
262    /// `index` 番目の要素を削除し、削除した要素を返します。
263    /// - 論理インデックス `index` が物理的に存在すれば、その値を返す
264    /// - 物理的になければ(= デフォルト扱いだった)デフォルト値を返す
265    /// 
266    /// # Arguments
267    /// - `index` - 削除する要素の論理インデックス
268    /// 
269    /// # Returns
270    /// - `N` - 削除した要素の値
271    #[inline]
272    fn remove(&mut self, index: usize) -> N {
273        debug_assert!(index < self.len, "index out of bounds");
274        
275        // 論理的な要素数は常に1つ減る
276        self.len -= 1;
277
278        match self.ind_binary_search(&index) {
279            Ok(i) => {
280                // 今回削除する要素を読みだす
281                let removed_val = unsafe {
282                    ptr::read(self.val_ptr().add(i))
283                };
284
285                // `i` 番目を削除するので、後ろを前にシフト
286                let count = self.nnz - i - 1;
287                if count > 0 {
288                    unsafe {
289                        // 値をコピーして前につめる
290                        ptr::copy(
291                            self.val_ptr().add(i + 1),
292                            self.val_ptr().add(i),
293                            count
294                        );
295                        // インデックスもコピーして前につめる
296                        ptr::copy(
297                            self.ind_ptr().add(i + 1),
298                            self.ind_ptr().add(i),
299                            count
300                        );
301                        // シフトした後のインデックスは全て -1 (1つ前に詰める)
302                        for offset in i..(self.nnz - 1) {
303                            *self.ind_ptr().add(offset) -= 1;
304                        }
305                    }
306                }
307                // nnzは 1 減
308                self.nnz -= 1;
309
310                // 取り除いた要素を返す
311                removed_val
312            }
313            Err(i) => {
314                // index は詰める必要があるので、i 以降の要素のインデックスを -1
315                // (たとえば “要素自体は無い” けど、後ろにある要素は
316                //  論理インデックスが 1 つ前になる)
317                if i < self.nnz {
318                    unsafe {
319                        for offset in i..self.nnz {
320                            *self.ind_ptr().add(offset) -= 1;
321                        }
322                    }
323                }
324
325                // 0返す
326                N::zero()
327            }
328        }
329    }
330
331    #[inline]
332    fn from_vec(vec: Vec<N>) -> Self {
333        let mut zero_sp_vec = ZeroSpVec::with_capacity(vec.len());
334        for entry in vec {
335            zero_sp_vec.push(entry);
336        }
337        zero_sp_vec
338    }
339
340    #[inline]
341    fn from_raw_iter(iter: impl Iterator<Item = (usize, N)>) -> Self {
342        let mut zero_sp_vec = ZeroSpVec::with_capacity(iter.size_hint().0);
343        for (index, value) in iter {
344            unsafe {
345                zero_sp_vec.raw_push(index, value);
346            }
347            zero_sp_vec.len += 1;
348        }
349        zero_sp_vec
350    }
351
352    /// Build from sparse iterator that yields only non-zero elements (idx, value).
353    /// This avoids allocating a full dense Vec when most entries are zero.
354    #[inline]
355    fn from_sparse_iter(iter: impl Iterator<Item = (usize, N)>) -> Self {
356        let mut zero_sp_vec = ZeroSpVec::new();
357        for (index, value) in iter {
358            if value != N::zero() {
359                unsafe {
360                    zero_sp_vec.raw_push(index, value);
361                }
362                zero_sp_vec.len += 1;
363            }
364        }
365        zero_sp_vec
366    }
367
368    #[inline]
369    fn iter(&self) -> ZeroSpVecIter<N> {
370        ZeroSpVecIter {
371            vec: self,
372            pos: 0,
373        }
374    }
375
376    #[inline]
377    fn raw_iter(&self) -> ZeroSpVecRawIter<N> {
378        ZeroSpVecRawIter {
379            vec: self,
380            pos: 0,
381        }
382    }
383}
384
385unsafe impl <N: Num + Send> Send for ZeroSpVec<N> {}
386unsafe impl <N: Num + Sync> Sync for ZeroSpVec<N> {}
387
388impl<N> Clone for ZeroSpVec<N> 
389where N: Num
390{
391    #[inline]
392    fn clone(&self) -> Self {
393        ZeroSpVec {
394            buf: self.buf.clone(),
395            len: self.len,
396            nnz: self.nnz,
397            zero: N::zero(),
398        }
399    }
400}
401
402impl<N> Drop for ZeroSpVec<N> 
403where N: Num
404{
405    #[inline]
406    fn drop(&mut self) {
407        // RawZeroSpVecで実装済み
408    }
409}
410
411impl<N> Default for ZeroSpVec<N> 
412where N: Num
413{
414    #[inline]
415    fn default() -> Self {
416        ZeroSpVec::new()
417    }
418}
419
420impl<N> Index<usize> for ZeroSpVec<N> 
421where N: Num
422{
423    type Output = N;
424
425    #[inline]
426    fn index(&self, index: usize) -> &Self::Output {
427        self.get(index).expect("index out of bounds")
428    }
429}
430
431impl<N: Num + Debug> Debug for ZeroSpVec<N> {
432    #[inline]
433    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
434        if f.sign_plus() {
435            f.debug_struct("DefaultSparseVec")
436                .field("buf", &self.buf)
437                .field("nnz", &self.nnz)
438                .field("len", &self.len)
439                .field("zero", &self.zero)
440                .finish()
441        } else if f.alternate() {
442            write!(f, "ZeroSpVec({:?})", self.iter().collect::<Vec<&N>>())
443        } else {
444            f.debug_list().entries((0..self.len).map(|i| self.get(i).unwrap())).finish()
445        }
446    }
447}
448
449pub struct ZeroSpVecIter<'a, N> 
450where N: Num
451{
452    vec: &'a ZeroSpVec<N>,
453    pos: usize,
454}
455
456impl<'a, N> Iterator for ZeroSpVecIter<'a, N> 
457where N: Num
458{
459    type Item = &'a N;
460
461    #[inline]
462    fn next(&mut self) -> Option<Self::Item> {
463        self.vec.get(self.pos).map(|val| {
464            self.pos += 1;
465            val
466        })
467    }
468}
469
470pub struct ZeroSpVecRawIter<'a, N> 
471where N: Num
472{
473    vec: &'a ZeroSpVec<N>,
474    pos: usize,
475}
476
477impl<'a, N> Iterator for ZeroSpVecRawIter<'a, N> 
478where N: Num
479{
480    type Item = (usize, &'a N);
481
482    #[inline]
483    fn next(&mut self) -> Option<Self::Item> {
484        if self.pos < self.vec.nnz() {
485            let index = unsafe { *self.vec.ind_ptr().add(self.pos) };
486            let value = unsafe { &*self.vec.val_ptr().add(self.pos) };
487            self.pos += 1;
488            Some((index, value))
489        } else {
490            None
491        }
492    }
493
494    #[inline]
495    fn size_hint(&self) -> (usize, Option<usize>) {
496        (self.vec.nnz(), Some(self.vec.len()))
497    }
498}
499
500impl<T> From<Vec<T>> for ZeroSpVec<T>
501where T: Num
502{
503    #[inline]
504    fn from(vec: Vec<T>) -> Self {
505        ZeroSpVec::from_vec(vec)
506    }
507}
508
509impl<'a, N> From<ZeroSpVecRawIter<'a, N>> for ZeroSpVec<N>
510where
511    N: Num + Copy,
512{
513    #[inline]
514    fn from(iter: ZeroSpVecRawIter<'a, N>) -> Self {
515        let mut vec = ZeroSpVec::new();
516        for (idx, val) in iter {
517            unsafe {
518                vec.raw_push(idx, *val);
519            }
520            vec.len += 1;
521        }
522        vec
523    }
524}
525
526
527
528
529
530
531
532
533
534/// ZeroSpVecの生実装
535#[derive(Debug)]
536struct RawZeroSpVec<N> 
537where N: Num 
538{
539    val_ptr: NonNull<N>,
540    ind_ptr: NonNull<usize>,
541    /// cap 定義
542    /// 0 => メモリ未確保 (flag)
543    /// usize::MAX =>  zero size struct (ZST) として定義 処理の簡略化を実施 (flag)
544    /// _ => 実際のcapN
545    cap: usize,
546    _marker: PhantomData<N>, // 所有権管理用にPhantomDataを追加
547}
548
549impl<N> RawZeroSpVec<N> 
550where N: Num
551{
552    #[inline]
553    fn new() -> Self {
554        // zero size struct (ZST)をusize::MAXと定義 ある種のフラグとして使用
555        let cap = if mem::size_of::<N>() == 0 { std::usize::MAX } else { 0 }; 
556
557        RawZeroSpVec {
558            // 空のポインタを代入しておく メモリ確保を遅延させる
559            val_ptr: NonNull::dangling(),
560            // 空のポインタを代入しておく メモリ確保を遅延させる
561            ind_ptr: NonNull::dangling(),
562            cap: cap,
563            _marker: PhantomData,
564        }
565    }
566
567    #[inline]
568    fn grow(&mut self) {
569        unsafe {
570            let val_elem_size = mem::size_of::<N>();
571            let ind_elem_size = mem::size_of::<usize>();
572
573            // 安全性: ZSTの場合growはcapを超えた場合にしか呼ばれない
574            // これは必然的にオーバーフローしていることをしめしている
575            debug_assert!(val_elem_size != 0, "capacity overflow");
576
577            // アライメントの取得 適切なメモリ確保を行うため
578            let t_align = mem::align_of::<N>();
579            let usize_align = mem::align_of::<usize>();
580
581            // アロケーション
582            let (new_cap, val_ptr, ind_ptr): (usize, *mut N, *mut usize) = 
583                if self.cap == 0 {
584                    let new_val_layout = Layout::from_size_align(val_elem_size, t_align).expect("Failed to create memory layout");
585                    let new_ind_layout = Layout::from_size_align(ind_elem_size, usize_align).expect("Failed to create memory layout");
586                    (
587                        1,
588                        alloc(new_val_layout) as *mut N,
589                        alloc(new_ind_layout) as *mut usize,
590                    )
591                } else {
592                    // 効率化: cap * 2 でメモリを確保する 見た目上はO(log n)の増加を実現
593                    let new_cap = self.cap * 2;
594                    let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout for reallocation");
595                    let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout for reallocation");
596                    (
597                        new_cap,
598                        realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * new_cap) as *mut N,
599                        realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * new_cap) as *mut usize,
600                    )
601                };
602
603            // アロケーション失敗時の処理
604            if val_ptr.is_null() || ind_ptr.is_null() {
605                oom();
606            }
607
608            // selfに返却
609            self.val_ptr = NonNull::new_unchecked(val_ptr);
610            self.ind_ptr = NonNull::new_unchecked(ind_ptr);
611            self.cap = new_cap;
612        }
613    }
614    
615    #[inline]
616    fn cap_set(&mut self) {
617        unsafe {
618            let val_elem_size = mem::size_of::<N>();
619            let ind_elem_size = mem::size_of::<usize>();
620
621            let t_align = mem::align_of::<N>();
622            let usize_align = mem::align_of::<usize>();
623
624            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
625            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
626            let new_val_ptr = alloc(new_val_layout) as *mut N;
627            let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
628            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
629                oom();
630            }
631            self.val_ptr = NonNull::new_unchecked(new_val_ptr);
632            self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
633        }
634    }
635
636    #[inline]
637    fn re_cap_set(&mut self) {
638        unsafe {
639            let val_elem_size = mem::size_of::<N>();
640            let ind_elem_size = mem::size_of::<usize>();
641
642            let t_align = mem::align_of::<N>();
643            let usize_align = mem::align_of::<usize>();
644
645            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
646            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
647            let new_val_ptr = realloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout, val_elem_size * self.cap) as *mut N;
648            let new_ind_ptr = realloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout, ind_elem_size * self.cap) as *mut usize;
649            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
650                oom();
651            }
652            self.val_ptr = NonNull::new_unchecked(new_val_ptr);
653            self.ind_ptr = NonNull::new_unchecked(new_ind_ptr);
654        }
655    }
656}
657
658impl<N> Clone for RawZeroSpVec<N> 
659where N: Num
660{
661    #[inline]
662    fn clone(&self) -> Self {
663        unsafe {
664            // If cap == 0 (no allocation) or cap == usize::MAX (ZST marker),
665            // return a dangling-pointer RawZeroSpVec without allocating.
666            if self.cap == 0 || self.cap == usize::MAX {
667                return RawZeroSpVec {
668                    val_ptr: NonNull::dangling(),
669                    ind_ptr: NonNull::dangling(),
670                    cap: self.cap,
671                    _marker: PhantomData,
672                };
673            }
674
675            let val_elem_size = mem::size_of::<N>();
676            let ind_elem_size = mem::size_of::<usize>();
677
678            let t_align = mem::align_of::<N>();
679            let usize_align = mem::align_of::<usize>();
680
681            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
682            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
683            let new_val_ptr = alloc(new_val_layout) as *mut N;
684            let new_ind_ptr = alloc(new_ind_layout) as *mut usize;
685            if new_val_ptr.is_null() || new_ind_ptr.is_null() {
686                oom();
687            }
688            ptr::copy_nonoverlapping(self.val_ptr.as_ptr(), new_val_ptr, self.cap);
689            ptr::copy_nonoverlapping(self.ind_ptr.as_ptr(), new_ind_ptr, self.cap);
690
691            RawZeroSpVec {
692                val_ptr: NonNull::new_unchecked(new_val_ptr),
693                ind_ptr: NonNull::new_unchecked(new_ind_ptr),
694                cap: self.cap,
695                _marker: PhantomData,
696            }
697        }
698    }
699}
700
701unsafe impl<N: Num + Send> Send for RawZeroSpVec<N> {}
702unsafe impl<N: Num + Sync> Sync for RawZeroSpVec<N> {}
703
704impl<N> Drop for RawZeroSpVec<N> 
705where N: Num
706{
707    #[inline]
708    fn drop(&mut self) {
709        unsafe {
710            // If no allocation was performed (cap == 0) or this is a ZST marker (usize::MAX), skip deallocation.
711            if self.cap == 0 || self.cap == usize::MAX {
712                return;
713            }
714
715            let val_elem_size = mem::size_of::<N>();
716            let ind_elem_size = mem::size_of::<usize>();
717
718            let t_align = mem::align_of::<N>();
719            let usize_align = mem::align_of::<usize>();
720
721            let new_val_layout = Layout::from_size_align(val_elem_size * self.cap, t_align).expect("Failed to create memory layout");
722            let new_ind_layout = Layout::from_size_align(ind_elem_size * self.cap, usize_align).expect("Failed to create memory layout");
723            dealloc(self.val_ptr.as_ptr() as *mut u8, new_val_layout);
724            dealloc(self.ind_ptr.as_ptr() as *mut u8, new_ind_layout);
725        }
726    }
727}
728
729/// OutOfMemoryへの対処用
730/// プロセスを終了させる
731/// 本来はpanic!を使用するべきだが、
732/// OOMの場合panic!を発生させるとTraceBackによるメモリ仕様が起きてしまうため
733/// 仕方なく強制終了させる
734/// 本来OOMはOSにより管理され発生前にKillされるはずなのであんまり意味はない。
735#[cold]
736fn oom() {
737    ::std::process::exit(-9999);
738}