polars_utils/
row_counter.rs

1use std::fmt::Debug;
2
3use polars_error::{PolarsResult, polars_err};
4
5use crate::IdxSize;
6
7/// Tracker counting physical and deleted rows.
8#[derive(Debug, Clone, Copy, Default, PartialEq)]
9pub struct RowCounter {
10    /// Number of rows physically present in the file.
11    physical_rows: usize,
12    /// Number of rows deleted from the file.
13    deleted_rows: usize,
14}
15
16impl RowCounter {
17    /// `usize::MAX` physical rows, 0 deleted rows
18    pub const MAX: Self = Self {
19        physical_rows: usize::MAX,
20        deleted_rows: 0,
21    };
22
23    /// Does not check if `physical_rows < deleted_rows`.
24    ///
25    /// # Safety
26    /// This does not represent a valid row position and should not be used as such.
27    ///
28    /// # Panics
29    /// Panics if [`usize`] conversion fails.
30    #[inline]
31    unsafe fn new_unchecked<P, D>(physical_rows: P, deleted_rows: D) -> Self
32    where
33        usize: TryFrom<P> + TryFrom<D>,
34        <usize as TryFrom<P>>::Error: Debug,
35        <usize as TryFrom<D>>::Error: Debug,
36    {
37        Self {
38            physical_rows: usize::try_from(physical_rows).unwrap(),
39            deleted_rows: usize::try_from(deleted_rows).unwrap(),
40        }
41    }
42
43    /// # Panics
44    /// Panics if `deleted_rows > physical_rows`, or if [`usize`] conversion fails.
45    #[inline]
46    pub fn new<P, D>(physical_rows: P, deleted_rows: D) -> Self
47    where
48        usize: TryFrom<P> + TryFrom<D>,
49        <usize as TryFrom<P>>::Error: Debug,
50        <usize as TryFrom<D>>::Error: Debug,
51    {
52        let slf = unsafe { Self::new_unchecked(physical_rows, deleted_rows) };
53
54        // Trigger validation
55        slf.num_rows().unwrap();
56
57        slf
58    }
59
60    /// # Safety
61    /// The caller is responsible for ensuring the value is correct.
62    ///
63    /// # Panics
64    /// Panics if `self.physical_rows < self.deleted_rows`
65    pub unsafe fn set_deleted_rows<D>(&mut self, deleted_rows: D)
66    where
67        usize: TryFrom<D>,
68        <usize as TryFrom<D>>::Error: Debug,
69    {
70        self.deleted_rows = usize::try_from(deleted_rows).unwrap();
71        self.num_rows().unwrap();
72    }
73
74    /// Performs a saturating add if there are no deleted rows, otherwise performs a checked add.
75    ///
76    /// # Panics
77    /// Panics if there are deleted rows and addition overflows.
78    #[allow(clippy::should_implement_trait)]
79    pub fn add(self, other: Self) -> Self {
80        (|| {
81            let physical_rows = self.physical_rows.checked_add(other.physical_rows);
82            let deleted_rows = self.deleted_rows.checked_add(other.deleted_rows)?;
83
84            let physical_rows = if deleted_rows == 0 {
85                physical_rows.unwrap_or(usize::MAX)
86            } else {
87                // If there are row deletions we cannot saturate the position properly (the
88                // `num_rows()` will start to decrease).
89                physical_rows?
90            };
91
92            Some(Self {
93                physical_rows,
94                deleted_rows,
95            })
96        })()
97        .unwrap_or_else(|| panic!("addition overflow: {self:?} + {other:?}"))
98    }
99
100    /// # Panics
101    /// Panics if subtraction overflows.
102    #[allow(clippy::should_implement_trait)]
103    pub fn sub(self, other: Self) -> Self {
104        let func = |a: usize, b: usize| {
105            a.checked_sub(b)
106                .unwrap_or_else(|| panic!("subtraction overflow: {self:?} - {other:?}"))
107        };
108
109        Self {
110            physical_rows: func(self.physical_rows, other.physical_rows),
111            deleted_rows: func(self.deleted_rows, other.deleted_rows),
112        }
113    }
114
115    /// Returns the number of rows after applying deletions. This returns an
116    /// error if there are more deleted rows than physical rows.
117    pub fn num_rows(&self) -> PolarsResult<usize> {
118        self.physical_rows
119            .checked_sub(self.deleted_rows)
120            .ok_or_else(|| {
121                polars_err!(
122                    ComputeError: "RowCounter: Invalid state: \
123                    number of rows removed by deletion files ({}) \
124                    is greater than the number of rows physically present ({})",
125                    self.deleted_rows, self.physical_rows,
126                )
127            })
128    }
129
130    /// Returns [`RowCounter::num_rows`] as a usize.
131    #[inline]
132    pub fn num_rows_idxsize(&self) -> PolarsResult<IdxSize> {
133        self.num_rows().and_then(|x| {
134            IdxSize::try_from(x).map_err(|_| {
135                let consider_installing_64 = if cfg!(feature = "bigidx") {
136                    ""
137                } else {
138                    ". Consider installing 'polars[rt64]'."
139                };
140
141                polars_err!(
142                    ComputeError:
143                    "row count ({}) exceeded maximum supported of {} (counter: {:?}){}",
144                    x, IdxSize::MAX, self, consider_installing_64
145                )
146            })
147        })
148    }
149
150    #[inline]
151    /// Saturates to `IdxSize::MAX` if conversion fails
152    pub fn num_rows_idxsize_saturating(&self) -> PolarsResult<IdxSize> {
153        self.num_rows()
154            .map(|x| IdxSize::try_from(x).unwrap_or(IdxSize::MAX))
155    }
156
157    /// Returns the number of rows physically present in the file.
158    #[inline]
159    pub fn num_physical_rows(&self) -> usize {
160        self.physical_rows
161    }
162
163    #[inline]
164    pub fn num_physical_rows_idxsize_saturating(&self) -> IdxSize {
165        IdxSize::try_from(self.physical_rows).unwrap_or(IdxSize::MAX)
166    }
167}