csv_lib/models/
row.rs

1use std::borrow::Cow;
2use crate::decoders::decoders::Encoding;
3use crate::helpers::bytes_helper::locate_line_break_memchr3;
4use crate::models::field::Field;
5#[cfg(target_arch = "x86_64")]
6use crate::helpers::bytes_helper::locate_line_break_avx2;
7use crate::models::in_row_iter::InRowIter;
8
9#[derive(Debug)]
10#[allow(dead_code)]
11pub struct Row<'mmap>{
12    slice: &'mmap[u8],
13    cursor: usize,
14    field_separator: u8,
15    force_mem_cacher: bool,
16    iter : InRowIter<'mmap>,
17}
18
19impl<'mmap> Row<'mmap> {
20    /// ## Constructor
21    /// - Make a new instance of `Row` struct.
22    pub fn new(
23        slice: &'mmap [u8],
24        field_separator: u8,
25        string_delimiter: u8,
26        force_mem_cacher: bool,
27    ) -> Self {
28        let i = InRowIter::new(slice, field_separator, string_delimiter);
29        Self {
30            slice,
31            cursor: 0,
32            field_separator,
33            force_mem_cacher,
34            iter : i,
35        }
36    }
37    /// ## Creates a new Empty row
38    pub fn new_empty() -> Self {
39        Self {
40            slice: &[],
41            cursor: 0,
42            field_separator: 0,
43            force_mem_cacher: false,
44            iter : InRowIter::new(&[], 0, 0),
45        }
46    }
47    
48    /// ## Is Empty
49    /// - Gets if a row is empty
50    pub fn is_empty(&self) -> bool {
51        self.slice.is_empty()
52    }
53    
54    /// ## Get Slice of &[[u8]] bytes
55    /// - Gets a reference of the bytes inside row.
56    /// #### `returns` : &'mmap [[u8]]
57    pub fn get_slice(&self) -> &'mmap [u8] {
58        &self.slice
59    }
60
61    #[allow(dead_code)]
62    /// ## Next Field
63    /// - Finds the next slice between current position and delimiter
64    /// - Pack this slice, inside a Field struct in order to access field functions.
65    /// - Can iter between differents Fields, in order.
66    /// #### `returns` : An Option<Field<'mmap>>
67    pub fn next_field(&mut self) -> Option<Field<'mmap>> {
68        //If we move here the cfg, and target compariision, is faster. only doit once, and not on each line iter.
69        if self.force_mem_cacher {
70            return match self.next_raw_memchr3() {
71                Some(row) => Some(Field::new(row)),
72                None => None
73            }
74        }
75        #[cfg(target_arch = "x86_64")]
76        {
77            //En x86, si soporta avx2 lo uso
78            if is_x86_feature_detected!("avx2") {
79                match unsafe{ self.new_raw_avx2() }{
80                    Some(row) => Some(Field::new(row)),
81                    None => None
82                }
83            }else{
84                //En x86, si no soporta avx2, uso el memcach3
85                match self.next_raw_memchr3(){
86                    Some(row) => Some(Field::new(row)),
87                    None => None
88                }
89            }
90        }
91        #[cfg(target_arch = "aarch64")]{
92             match self.new_raw_neon(){
93                Some(row) => Some(Field::new(row)),
94                None => None
95            }
96        }
97    }
98
99    #[allow(dead_code)]
100    /// # Get Field by Index
101    /// - Receives an usize (zero based index), and returns the field associated to the iteration.
102    /// #### `returns`: An Option<Field<'mmap>>
103    pub fn get_index(&self, index: usize) -> Field<'mmap> {
104        let data = &self.iter.peek_field_index(index);
105        match data {
106            Some(f) => Field::new(f),
107            None => Field::new_empty()
108        }
109    }
110    /// ## Decodes the full line
111    /// - Must give the encoding.
112    pub fn decode_line(&mut self, enc: Encoding) -> Cow<str>{
113         enc.decode(self.slice)
114    }
115    
116    //------------------------------------------------------------//
117    //--------------------- PRIVATE ------------------------------//
118    //------------------------------------------------------------//
119    #[cfg(target_arch = "aarch64")]
120    fn new_raw_neon(&mut self) -> Option<&'mmap[u8]> {
121        unsafe {
122            // Obtain the unmapped slice starting from the cursor
123            let slice = &self.slice[self.cursor..];
124            // Locate the break index
125            match crate::helpers::bytes_helper::locate_line_break_neon(slice, self.field_separator) {
126                0 => {
127                    // EOF, reset cursor
128                    self.reset_cursor();
129                    None
130                }
131                sep_index => {
132                    // Correctly extract the row WITHOUT including the separator
133                    let row = &self.slice[self.cursor..self.cursor + sep_index];
134
135                    // Now we must remove the separator bytes at the end
136                    // Check if row ends with \r\n
137                    let end = if row.ends_with(b"\r\n") {
138                        2
139                    } else if row.ends_with(&[b'\n']) || row.ends_with(&[b'\r']) {
140                        1
141                    } else {
142                        0 // in case of custom separator (or no separator)
143                    };
144
145                    // Final row slice without line break or separator
146                    let row = &row[..row.len() - end];
147
148                    // Move the cursor forward to after the separator
149                    self.cursor += sep_index;
150
151                    Some(row)
152                }
153            }
154        }
155    }
156    #[cfg(target_arch = "x86_64")]
157    #[target_feature(enable = "avx2")]
158    unsafe fn new_raw_avx2(&mut self) -> Option<&'mmap[u8]> {
159        unsafe {
160            let slice = &self.slice[self.cursor..];
161
162            let sep_index = locate_line_break_avx2(slice, self.field_separator);
163
164            if sep_index == 0 {
165                self.reset_cursor();
166                return None;
167            }
168
169            let full_row = &self.slice[self.cursor..self.cursor + sep_index];
170
171            let trim_len = if full_row.ends_with(b"\r\n") {
172                2
173            } else if full_row.ends_with(&[b'\r']) || full_row.ends_with(&[b'\n']) {
174                1
175            } else {
176                0
177            };
178
179            let valid_len = full_row.len().saturating_sub(trim_len);
180
181            let row = &full_row[..valid_len];
182
183            self.cursor += sep_index;
184
185            Some(row)
186        }
187    }
188
189    fn next_raw_memchr3(&mut self) -> Option<&'mmap[u8]> {
190        //determine the tos end slice
191        let slice = &self.slice[self.cursor ..];
192        //Determine the line break cursor position
193        match locate_line_break_memchr3(
194            slice,
195            self.cursor,
196            self.field_separator
197        ) {
198            0 => {
199                //EOF, so, reset cursor
200                self.reset_cursor();
201                None
202            }
203            i => {
204                //Take a reference of the map file
205                let map =  &self.slice[..];
206                //Return the byte slice of a row
207                let row = &map[self.cursor .. i];
208                //Move the cursor position
209                self.cursor = i;
210                //Extract the byte line
211                Some(row)
212            }
213        }
214    }
215
216    /// Reset the internal cursor
217    fn reset_cursor(&mut self) {
218        self.cursor = 0;
219    }
220}
221