strs_tools/string/
split.rs

1//! Provides tools for splitting strings with advanced options including quoting.
2//!
3//! # Architecture & Rule Compliance Notes
4//!
5//! ## Critical Design Insights:
6//! 
7//! - **Lifetime Management**: All functions with references MUST use explicit lifetime parameters
8//!   per Design Rulebook. The `unescape_str` function was corrected from `fn(input: &str)` 
9//!   to `fn<'a>(input: &'a str)` - this is non-negotiable for maintainability.
10//!   
11//! - **Clippy Conflict Resolution**: The explicit lifetime requirement conflicts with clippy's 
12//!   `elidable_lifetime_names` warning. Design Rulebook takes precedence, so we use 
13//!   `#[ allow( clippy::elidable_lifetime_names ) ]` to suppress the warning while maintaining 
14//!   explicit lifetimes for architectural consistency.
15//!
16//! - **mod_interface Migration**: This module was converted from manual namespace patterns
17//!   to `mod_interface!` macro. This changes the public API structure - functions are now
18//!   accessible via `strs_tools::split()` instead of `strs_tools::string::split()`.
19//!
20//! - **SIMD Optimization Dependencies**: memchr, aho-corasick, bytecount are optional
21//!   dependencies for performance optimization. They MUST be declared in workspace Cargo.toml
22//!   and inherited, not declared locally.
23//!
24//! ## Performance Pitfalls:
25//!
26//! - **Cow<'_, str> Usage**: The `unescape_str` function returns `Cow::Borrowed` when no
27//!   unescaping is needed, avoiding unnecessary allocations. This is critical for performance
28//!   when processing large text with minimal escaping.
29//!
30//! - **Iterator State Management**: `SplitFastIterator` maintains internal state that can
31//!   be corrupted if `set_test_state` is used incorrectly in production code. Test-only methods
32//!   are marked with `#[ cfg( test ) ]` for safety.
33//!
34//! ## Security Considerations:
35//!
36//! - **Consumer Owns Unescaping**: This module does NOT interpret escape sequences for security.
37//!   Raw string slices are returned, and the consumer must handle unescaping safely.
38//!   This prevents injection attacks through malformed escape sequences.
39
40mod split_behavior;
41pub use split_behavior::SplitFlags;
42
43#[ cfg( feature = "simd" ) ]
44mod simd;
45#[ cfg( feature = "simd" ) ]
46pub use simd::{ SIMDSplitIterator, simd_split_cached, get_or_create_cached_patterns };
47
48/// Internal implementation details for string splitting.
49mod private {
50  #[ cfg( feature = "std" ) ]
51  use std::borrow::Cow;
52  #[ cfg( all( feature = "use_alloc", not( feature = "std" ) ) ) ]
53  use alloc::borrow::Cow;
54  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
55  use crate::string::parse_request::OpType;
56  use super::SplitFlags; // Import SplitFlags from parent module
57
58  /// Helper function to unescape common escape sequences in a string.
59  /// Returns a `Cow::Borrowed` if no unescaping is needed, otherwise `Cow::Owned`.
60  #[ allow( clippy::elidable_lifetime_names ) ] // Design Rulebook requires explicit lifetimes
61  fn unescape_str< 'a >( input : &'a str ) -> Cow< 'a, str >
62  {
63    if !input.contains( '\\' )
64    {
65      return Cow::Borrowed( input );
66    }
67
68    let mut output = String::with_capacity( input.len() );
69    let mut chars = input.chars();
70
71    while let Some(ch) = chars.next() {
72      if ch == '\\' {
73        if let Some(next_ch) = chars.next() {
74          match next_ch {
75            '"' => output.push('"'),
76            '\\' => output.push('\\'),
77            'n' => output.push('\n'),
78            't' => output.push('\t'),
79            'r' => output.push('\r'),
80            '\'' => output.push('\''),
81            _ => {
82              output.push('\\');
83              output.push(next_ch);
84            }
85          }
86        } else {
87          output.push('\\');
88        }
89      } else {
90        output.push(ch);
91      }
92    }
93
94    Cow::Owned(output)
95  }
96
97  #[ cfg( test ) ]
98  /// Tests the `unescape_str` function.
99  #[ allow( clippy::elidable_lifetime_names ) ] // Design Rulebook requires explicit lifetimes
100  #[ must_use ] pub fn test_unescape_str< 'a >( input : &'a str ) -> Cow< 'a, str >
101  {
102    unescape_str( input )
103  }
104
105  /// Represents a segment of a string after splitting.
106  #[ derive( Debug, Clone, PartialEq, Eq ) ]
107  pub struct Split<'a> {
108    /// The string content of the segment.
109    pub string: Cow<'a, str>,
110    /// The type of the segment (delimited or delimiter).
111    pub typ: SplitType,
112    /// The starting byte index of the segment in the original string.
113    pub start: usize,
114
115    /// The ending byte index of the segment in the original string.
116    pub end: usize,
117    /// Indicates if the original segment was quoted.
118    pub was_quoted: bool,
119  }
120
121  impl<'a> From<Split<'a>> for String {
122    fn from(src: Split<'a>) -> Self {
123      src.string.into_owned()
124    }
125  }
126
127  /// Defines the type of a split segment.
128  #[ derive( Debug, Clone, Copy, PartialEq, Eq ) ]
129  pub enum SplitType {
130    /// A segment of delimited content.
131    Delimited,
132    /// A segment representing a delimiter.
133    Delimiter,
134  }
135
136  /// Trait for finding the position of a delimiter pattern in a string.
137  pub trait Searcher {
138    /// Finds the first occurrence of the delimiter pattern in `src`.
139    /// Returns `Some((start_index, end_index))` if found, `None` otherwise.
140    fn pos(&self, src: &str) -> Option< (usize, usize) >;
141  }
142
143  impl Searcher for &str {
144    fn pos(&self, src: &str) -> Option< (usize, usize) > {
145      if self.is_empty() {
146        return None;
147      }
148      src.find(self).map(|start| (start, start + self.len()))
149    }
150  }
151
152  impl Searcher for String {
153    fn pos(&self, src: &str) -> Option< (usize, usize) > {
154      if self.is_empty() {
155        return None;
156      }
157      src.find(self).map(|start| (start, start + self.len()))
158    }
159  }
160
161  impl Searcher for Vec< &str > {
162    fn pos(&self, src: &str) -> Option< (usize, usize) > {
163      let mut r = vec![];
164      for pat in self {
165        if pat.is_empty() {
166          continue;
167        }
168        if let Some(x) = src.find(pat) {
169          r.push((x, x + pat.len()));
170        }
171      }
172      if r.is_empty() {
173        return None;
174      }
175      r.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| (a.1 - a.0).cmp(&(b.1 - b.0))));
176      r.first().copied()
177    }
178  }
179
180  /// An iterator that quickly splits a string based on a delimiter, without advanced options.
181  #[ derive( Debug ) ]
182  pub struct SplitFastIterator<'a, D>
183  where
184    D: Searcher,
185  {
186    iterable: &'a str,
187    current_offset: usize,
188    counter: i32,
189    delimeter: D,
190    // active_quote_char : Option<  char  >, // Removed
191  }
192
193  impl<'a, D: Searcher + Default + Clone> SplitFastIterator<'a, D> {
194    fn new(o: &impl SplitOptionsAdapter<'a, D>) -> Self {
195      Self {
196        iterable: o.src(),
197        current_offset: 0,
198        delimeter: o.delimeter(),
199        counter: 0,
200        // active_quote_char : None, // Removed
201      }
202    }
203
204    /// Sets the internal state of the iterator, for testing purposes.
205    // Test helper methods are pub
206    pub fn set_test_state(
207      &mut self,
208      iterable: &'a str,
209      current_offset: usize,
210      // active_quote_char: Option< char >, // Removed
211      counter: i32,
212    ) {
213      self.iterable = iterable;
214      self.current_offset = current_offset;
215      // self.active_quote_char = active_quote_char; // Removed
216      self.counter = counter;
217    }
218
219    /// Gets the current iterable string, for testing purposes.
220    pub fn get_test_iterable(&self) -> &'a str {
221      self.iterable
222    }
223    /// Gets the current offset within the original string, for testing purposes.
224    pub fn get_test_current_offset(&self) -> usize {
225      self.current_offset
226    }
227    /// Gets the currently active quote character, if any, for testing purposes.
228    // pub fn get_test_active_quote_char(&self) -> Option< char > { self.active_quote_char } // Removed
229    /// Gets the internal counter value, for testing purposes.
230    pub fn get_test_counter(&self) -> i32 {
231      self.counter
232    }
233  }
234
235  impl<'a, D: Searcher> Iterator for SplitFastIterator<'a, D> {
236    type Item = Split<'a>;
237    #[ allow( clippy::too_many_lines ) ]
238    fn next(&mut self) -> Option< Self::Item > {
239      if self.iterable.is_empty() && self.counter > 0
240      // Modified condition
241      {
242        return None;
243      }
244      // Removed active_quote_char logic
245      if self.iterable.is_empty() && self.counter > 0 {
246        return None;
247      }
248      self.counter += 1;
249      if self.counter % 2 == 1 {
250        if let Some((d_start, _d_end)) = self.delimeter.pos(self.iterable) {
251          if d_start == 0 {
252            return Some(Split {
253              string: Cow::Borrowed(""),
254              typ: SplitType::Delimited,
255              start: self.current_offset,
256              end: self.current_offset,
257              was_quoted: false,
258            });
259          }
260          let segment_str = &self.iterable[..d_start];
261          let split = Split {
262            string: Cow::Borrowed(segment_str),
263            typ: SplitType::Delimited,
264            start: self.current_offset,
265            end: self.current_offset + segment_str.len(),
266            was_quoted: false,
267          };
268          // println!("DEBUG: SplitFastIterator returning: {:?}", split); // Removed
269          self.current_offset += segment_str.len();
270          self.iterable = &self.iterable[d_start..];
271          Some(split)
272        } else {
273          if self.iterable.is_empty() && self.counter > 1 {
274            return None;
275          }
276          let segment_str = self.iterable;
277          let split = Split {
278            string: Cow::Borrowed(segment_str),
279            typ: SplitType::Delimited,
280            start: self.current_offset,
281            end: self.current_offset + segment_str.len(),
282            was_quoted: false,
283          };
284          // println!("DEBUG: SplitFastIterator returning: {:?}", split); // Removed
285          self.current_offset += segment_str.len();
286          self.iterable = "";
287          Some(split)
288        }
289      } else if let Some((d_start, d_end)) = self.delimeter.pos(self.iterable) {
290        if d_start > 0 {
291          self.iterable = "";
292          return None;
293        }
294        let delimiter_str = &self.iterable[..d_end];
295        let split = Split {
296          string: Cow::Borrowed(delimiter_str),
297          typ: SplitType::Delimiter,
298          start: self.current_offset,
299          end: self.current_offset + delimiter_str.len(),
300          was_quoted: false,
301        };
302        // println!("DEBUG: SplitFastIterator returning: {:?}", split); // Removed
303        self.current_offset += delimiter_str.len();
304        self.iterable = &self.iterable[d_end..];
305        Some(split)
306      } else {
307        None
308      }
309    }
310  }
311
312  /// An iterator that splits a string with advanced options like quoting and preservation.
313  #[ allow( clippy::struct_excessive_bools ) ]
314  #[ derive( Debug ) ]
315  // This lint is addressed by using SplitFlags
316  pub struct SplitIterator<'a> {
317    iterator: SplitFastIterator<'a, Vec< &'a str >>,
318    src: &'a str,
319    flags: SplitFlags,
320    quoting_prefixes: Vec< &'a str >,
321    quoting_postfixes: Vec< &'a str >,
322    pending_opening_quote_delimiter: Option<Split<'a>>,
323    last_yielded_token_was_delimiter: bool,
324    just_finished_peeked_quote_end_offset: Option< usize >,
325    skip_next_spurious_empty: bool,
326    active_quote_char: Option< char >, // Moved from SplitFastIterator
327    just_processed_quote: bool,
328  }
329
330  impl<'a> SplitIterator<'a> {
331    fn new(o: &impl SplitOptionsAdapter<'a, Vec< &'a str >>) -> Self {
332      let mut delimeter_list_for_fast_iterator = o.delimeter();
333      delimeter_list_for_fast_iterator.retain(|&pat| !pat.is_empty());
334      let iterator = SplitFastIterator::new(&o.clone_options_for_sfi());
335      let flags = o.flags();
336      Self {
337        iterator,
338        src: o.src(),
339        flags,
340        quoting_prefixes: o.quoting_prefixes().clone(),
341        quoting_postfixes: o.quoting_postfixes().clone(),
342        pending_opening_quote_delimiter: None,
343        last_yielded_token_was_delimiter: false,
344        just_finished_peeked_quote_end_offset: None,
345        skip_next_spurious_empty: false,
346        active_quote_char: None, // No active quote at iteration start
347        just_processed_quote: false,
348      }
349    }
350  }
351
352  impl<'a> Iterator for SplitIterator<'a> {
353    type Item = Split<'a>;
354    #[ allow( clippy::too_many_lines ) ]
355    fn next(&mut self) -> Option< Self::Item > {
356      loop {
357        if let Some(offset) = self.just_finished_peeked_quote_end_offset.take() {
358          if self.iterator.current_offset != offset {
359            if offset > self.iterator.current_offset {
360              // Move forward
361              self.iterator.iterable = &self.iterator.iterable[offset - self.iterator.current_offset..];
362            } else {
363              // Move backward - need to recalculate from source
364              let src_len = self.src.len();
365              if offset < src_len {
366                self.iterator.iterable = &self.src[offset..];
367              }
368            }
369            self.iterator.current_offset = offset;
370          }
371        }
372        if let Some(pending_split) = self.pending_opening_quote_delimiter.take() {
373          if pending_split.typ != SplitType::Delimiter || self.flags.contains( SplitFlags::PRESERVING_DELIMITERS )
374          {
375            if self.flags.contains( SplitFlags::QUOTING ) && self.quoting_prefixes.contains( &pending_split.string.as_ref() )
376            {
377              // This logic is now handled by the main quoting block below
378              // if let Some(fcoq) = pending_split.string.chars().next() { self.iterator.active_quote_char = Some(fcoq); }
379            }
380            self.last_yielded_token_was_delimiter = pending_split.typ == SplitType::Delimiter;
381            return Some(pending_split);
382          }
383          if self.flags.contains(SplitFlags::QUOTING) && self.quoting_prefixes.contains(&pending_split.string.as_ref()) {
384            // This logic is now handled by the main quoting block below
385            // if let Some(fcoq) = pending_split.string.chars().next() { self.iterator.active_quote_char = Some(fcoq); }
386          }
387        }
388
389        let about_to_process_quote = self.flags.contains(SplitFlags::QUOTING)
390          && self.active_quote_char.is_none()
391          && self.quoting_prefixes.iter().any(|p| self.iterator.iterable.starts_with(p));
392        // Special case: don't generate preserving_empty tokens when the last yielded token was quoted content (empty or not)
393        // and we're not about to process a quote. This prevents spurious empty tokens after empty quoted sections.
394        let last_was_quoted_content = self.just_processed_quote;
395        // For now, focus on the core case: consecutive delimiters only
396        // Generate preserving_empty tokens for consecutive delimiters OR before quotes (but not for quoted empty content)
397        let has_consecutive_delimiters = self
398          .iterator
399          .delimeter
400          .pos(self.iterator.iterable)
401          .is_some_and(|(ds, _)| ds == 0);
402        let preserving_empty_check = self.last_yielded_token_was_delimiter
403          && self.flags.contains(SplitFlags::PRESERVING_EMPTY)
404          && !last_was_quoted_content
405          && (has_consecutive_delimiters
406            || (about_to_process_quote
407              && !self.iterator.iterable.starts_with("\"\"")
408              && !self.iterator.iterable.starts_with("''")
409              && !self.iterator.iterable.starts_with("``")));
410
411        if preserving_empty_check {
412          let current_sfi_offset = self.iterator.current_offset;
413          let empty_token = Split {
414            string: Cow::Borrowed(""),
415            typ: SplitType::Delimited,
416            start: current_sfi_offset,
417            end: current_sfi_offset,
418            was_quoted: false,
419          };
420          // Prevent duplicate empty tokens after delimiter processing
421          self.last_yielded_token_was_delimiter = false;
422          // Advance the iterator's counter to skip the empty content that would naturally be returned next
423          self.iterator.counter += 1;
424          return Some(empty_token);
425        }
426
427        self.last_yielded_token_was_delimiter = false;
428        let sfi_next_internal_counter_will_be_odd = self.iterator.counter % 2 == 0;
429        let sfi_iterable_starts_with_delimiter = self
430          .iterator
431          .delimeter
432          .pos(self.iterator.iterable)
433          .is_some_and(|(d_start, _)| d_start == 0);
434        let sfi_should_yield_empty_now = self.flags.contains(SplitFlags::PRESERVING_EMPTY)
435          && sfi_next_internal_counter_will_be_odd
436          && sfi_iterable_starts_with_delimiter;
437        let effective_split_opt: Option<Split<'a>>;
438        let mut quote_handled_by_peek = false;
439
440        // Simplified quoting logic
441        if self.flags.contains(SplitFlags::QUOTING) && self.active_quote_char.is_none() && !sfi_should_yield_empty_now {
442          if let Some(first_char_iterable) = self.iterator.iterable.chars().next() {
443            if let Some(prefix_idx) = self
444              .quoting_prefixes
445              .iter()
446              .position(|p| self.iterator.iterable.starts_with(p))
447            {
448              quote_handled_by_peek = true;
449              let prefix_str = self.quoting_prefixes[prefix_idx];
450              let opening_quote_original_start = self.iterator.current_offset;
451              let prefix_len = prefix_str.len();
452              let expected_postfix = self.quoting_postfixes[prefix_idx];
453
454              // Consume the opening quote
455              self.iterator.current_offset += prefix_len;
456              self.iterator.iterable = &self.iterator.iterable[prefix_len..];
457              self.active_quote_char = Some(first_char_iterable); // Set active quote char in SplitIterator
458
459              let mut end_of_quote_idx: Option< usize > = None;
460              let mut chars = self.iterator.iterable.chars();
461              let mut current_char_offset = 0;
462              let mut escaped = false;
463
464              // Simple quote parsing: find the closing quote, respecting escape sequences
465              while let Some(c) = chars.next() {
466                if escaped {
467                  escaped = false;
468                  current_char_offset += c.len_utf8();
469                } else if c == '\\' {
470                  escaped = true;
471                  current_char_offset += c.len_utf8();
472                } else if c == self.active_quote_char.unwrap()
473                // Found unescaped quote
474                {
475                  // Check if this is truly a closing quote or the start of an adjacent quoted section
476                  let remaining_chars = chars.as_str();
477                  if !remaining_chars.is_empty() {
478                    let next_char = remaining_chars.chars().next().unwrap();
479                    // If the next character is alphanumeric (part of content), this might be an adjacent quote
480                    if next_char.is_alphanumeric() && current_char_offset > 0 {
481                      // Check if the previous character is non-whitespace (meaning no delimiter)
482                      let content_so_far = &self.iterator.iterable[..current_char_offset];
483                      if let Some(last_char) = content_so_far.chars().last() {
484                        if !last_char.is_whitespace() {
485                          // This is an adjacent quote - treat it as the end of this section
486                          end_of_quote_idx = Some(current_char_offset);
487                          break;
488                        }
489                      }
490                    }
491                  }
492                  // Normal closing quote
493                  end_of_quote_idx = Some(current_char_offset);
494                  break;
495                } else {
496                  current_char_offset += c.len_utf8();
497                }
498              }
499
500              let (quoted_content_str, consumed_len_in_sfi_iterable) = if let Some(end_idx) = end_of_quote_idx {
501                // Content is from start of current iterable to end_idx (before the closing quote)
502                let content = &self.iterator.iterable[..end_idx];
503
504                // Check if this is an adjacent quote scenario (no delimiter follows)
505                let remaining_chars = &self.iterator.iterable[end_idx..];
506                let is_adjacent = if remaining_chars.len() > 1 {
507                  let chars_after_quote: Vec< char > = remaining_chars.chars().take(2).collect();
508                  if chars_after_quote.len() >= 2 {
509                    chars_after_quote[0] == '"' && chars_after_quote[1].is_alphanumeric()
510                  } else {
511                    false
512                  }
513                } else {
514                  false
515                };
516
517                let consumed = if is_adjacent {
518                  end_idx // Don't consume the quote - it's the start of the next section
519                } else {
520                  end_idx + expected_postfix.len() // Normal case - consume the closing quote
521                };
522
523                (content, consumed)
524              } else {
525                // No closing quote found, consume the rest of the iterable
526                (self.iterator.iterable, self.iterator.iterable.len())
527              };
528
529              if quoted_content_str.is_empty() && end_of_quote_idx.is_some() {
530                self.last_yielded_token_was_delimiter = false;
531              }
532
533              // Advance SFI's internal state based on what was consumed
534              self.iterator.current_offset += consumed_len_in_sfi_iterable;
535              self.iterator.iterable = &self.iterator.iterable[consumed_len_in_sfi_iterable..];
536              self.active_quote_char = None; // Reset active quote char
537
538              if self.flags.contains(SplitFlags::PRESERVING_QUOTING) {
539                let full_quoted_len = prefix_len
540                  + quoted_content_str.len()
541                  + if end_of_quote_idx.is_some() {
542                    expected_postfix.len()
543                  } else {
544                    0
545                  };
546                let new_string = if opening_quote_original_start + full_quoted_len <= self.src.len() {
547                  Cow::Borrowed(&self.src[opening_quote_original_start..(opening_quote_original_start + full_quoted_len)])
548                } else {
549                  Cow::Borrowed("")
550                };
551                let new_end = opening_quote_original_start + new_string.len();
552                effective_split_opt = Some(Split {
553                  string: new_string,
554                  typ: SplitType::Delimited,
555                  start: opening_quote_original_start,
556                  end: new_end,
557                  was_quoted: true,
558                });
559              } else {
560                let unescaped_string: Cow<'a, str> = unescape_str(quoted_content_str).into_owned().into();
561                let new_start = opening_quote_original_start + prefix_len;
562                let new_end = new_start + unescaped_string.len();
563                effective_split_opt = Some(Split {
564                  string: unescaped_string,
565                  typ: SplitType::Delimited,
566                  start: new_start,
567                  end: new_end,
568                  was_quoted: true,
569                });
570              }
571              if effective_split_opt.is_some() {
572                self.last_yielded_token_was_delimiter = false;
573                self.just_processed_quote = true;
574              }
575            } else {
576              effective_split_opt = self.iterator.next();
577            }
578          } else {
579            effective_split_opt = self.iterator.next();
580          }
581        } else {
582          effective_split_opt = self.iterator.next();
583        }
584
585        let mut current_split = effective_split_opt?;
586        if quote_handled_by_peek {
587          self.skip_next_spurious_empty = true;
588        }
589        if self.skip_next_spurious_empty && current_split.typ == SplitType::Delimited && current_split.string.is_empty() {
590          self.skip_next_spurious_empty = false;
591          continue;
592        }
593
594        if !quote_handled_by_peek
595          && self.flags.contains(SplitFlags::QUOTING)
596          && current_split.typ == SplitType::Delimiter
597          && self.active_quote_char.is_none()
598        {
599          if let Some(_prefix_idx) = self.quoting_prefixes.iter().position(|p| *p == current_split.string.as_ref()) {
600            let opening_quote_delimiter = current_split.clone();
601            if self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) {
602              self.pending_opening_quote_delimiter = Some(opening_quote_delimiter.clone());
603            }
604            if let Some(fcoq) = opening_quote_delimiter.string.chars().next() {
605              self.active_quote_char = Some(fcoq);
606            }
607            if !self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) {
608              continue;
609            }
610          }
611        }
612        if self.flags.contains(SplitFlags::STRIPPING) && current_split.typ == SplitType::Delimited {
613          let original_len = current_split.string.len();
614          let trimmed_string = current_split.string.trim();
615          if trimmed_string.len() < original_len {
616            let leading_whitespace_len = trimmed_string.as_ptr() as usize - current_split.string.as_ptr() as usize;
617            current_split.start += leading_whitespace_len;
618            current_split.string = Cow::Owned(trimmed_string.to_string());
619            current_split.end = current_split.start + current_split.string.len();
620          }
621        }
622        let skip = (current_split.typ == SplitType::Delimited
623          && current_split.string.is_empty()
624          && !self.flags.contains(SplitFlags::PRESERVING_EMPTY))
625          || (current_split.typ == SplitType::Delimiter && !self.flags.contains(SplitFlags::PRESERVING_DELIMITERS));
626        if current_split.typ == SplitType::Delimiter {
627          // Don't set this flag if we just processed a quote, as the quoted content was the last yielded token
628          if !self.just_processed_quote {
629            self.last_yielded_token_was_delimiter = true;
630          }
631        }
632        if skip {
633          continue;
634        }
635        // Reset the quote flag when returning any token
636        self.just_processed_quote = false;
637        return Some(current_split);
638      }
639    }
640  }
641
642  /// Options to configure the behavior of split iterators.
643  #[ derive( Debug, Clone ) ]
644  pub struct SplitOptions<'a, D>
645  where
646    D: Searcher + Default + Clone,
647  {
648    src: &'a str,
649    delimeter: D,
650    flags: SplitFlags,
651    quoting_prefixes: Vec< &'a str >,
652    quoting_postfixes: Vec< &'a str >,
653  }
654
655  impl<'a> SplitOptions<'a, Vec< &'a str >> {
656    /// Consumes the options and returns a `SplitIterator`.
657    #[ must_use ]
658    pub fn split(self) -> SplitIterator<'a> {
659      SplitIterator::new(&self)
660    }
661  }
662
663  impl<'a, D: Searcher + Default + Clone> SplitOptions<'a, D> {
664    /// Consumes the options and returns a `SplitFastIterator`.
665    // This is inside pub mod private, so pub fn makes it pub
666    pub fn split_fast(self) -> SplitFastIterator<'a, D> {
667      SplitFastIterator::new(&self)
668    }
669  }
670  impl<'a> core::iter::IntoIterator for SplitOptions<'a, Vec< &'a str >> {
671    type Item = Split<'a>;
672    type IntoIter = SplitIterator<'a>;
673
674    fn into_iter(self) -> Self::IntoIter {
675      SplitIterator::new(&self)
676    }
677  }
678
679  /// Adapter trait to provide split options to iterators.
680  pub trait SplitOptionsAdapter<'a, D>
681  where
682    D: Searcher + Default + Clone,
683  {
684    /// Gets the source string to be split.
685    fn src(&self) -> &'a str;
686    /// Gets the delimiter(s) to use for splitting.
687    fn delimeter(&self) -> D;
688    /// Gets the behavior flags for splitting.
689    fn flags(&self) -> SplitFlags;
690    /// Gets the prefixes that denote the start of a quoted section.
691    fn quoting_prefixes(&self) -> &Vec< &'a str >;
692    /// Gets the postfixes that denote the end of a quoted section.
693    fn quoting_postfixes(&self) -> &Vec< &'a str >;
694    /// Clones the options, specifically for initializing a `SplitFastIterator`.
695    fn clone_options_for_sfi(&self) -> SplitOptions<'a, D>;
696  }
697
698  impl<'a, D: Searcher + Clone + Default> SplitOptionsAdapter<'a, D> for SplitOptions<'a, D> {
699    fn src(&self) -> &'a str {
700      self.src
701    }
702    fn delimeter(&self) -> D {
703      self.delimeter.clone()
704    }
705    fn flags(&self) -> SplitFlags {
706      self.flags
707    }
708    fn quoting_prefixes(&self) -> &Vec< &'a str > {
709      &self.quoting_prefixes
710    }
711    fn quoting_postfixes(&self) -> &Vec< &'a str > {
712      &self.quoting_postfixes
713    }
714    fn clone_options_for_sfi(&self) -> SplitOptions<'a, D> {
715      self.clone()
716    }
717  }
718
719  /// Basic builder for creating simple `SplitOptions` without `OpType` dependency.
720  #[ derive( Debug ) ]
721  pub struct BasicSplitBuilder<'a> {
722    src: &'a str,
723    delimiters: Vec<&'a str>,
724    flags: SplitFlags,
725    quoting_prefixes: Vec<&'a str>,
726    quoting_postfixes: Vec<&'a str>,
727  }
728
729  impl<'a> Default for BasicSplitBuilder<'a> {
730    fn default() -> Self {
731      Self::new()
732    }
733  }
734
735  impl<'a> BasicSplitBuilder<'a> {
736    /// Creates a new `BasicSplitBuilder`.
737    #[ must_use ]
738    pub fn new() -> BasicSplitBuilder<'a> {
739      Self {
740        src: "",
741        delimiters: vec![],
742        flags: SplitFlags::PRESERVING_DELIMITERS, // Default
743        quoting_prefixes: vec![],
744        quoting_postfixes: vec![],
745      }
746    }
747
748    /// Sets the source string to split.
749    pub fn src(&mut self, value: &'a str) -> &mut Self {
750      self.src = value;
751      self
752    }
753
754    /// Sets a single delimiter.
755    pub fn delimeter(&mut self, value: &'a str) -> &mut Self {
756      self.delimiters = vec![value];
757      self
758    }
759
760    /// Sets multiple delimiters.
761    pub fn delimeters(&mut self, value: &[&'a str]) -> &mut Self {
762      self.delimiters = value.to_vec();
763      self
764    }
765
766    /// Sets quoting behavior.
767    pub fn quoting(&mut self, value: bool) -> &mut Self {
768      if value {
769        self.flags.insert(SplitFlags::QUOTING);
770        // Set default quoting characters if not already set
771        if self.quoting_prefixes.is_empty() {
772          self.quoting_prefixes = vec!["\"", "'"];
773        }
774        if self.quoting_postfixes.is_empty() {
775          self.quoting_postfixes = vec!["\"", "'"];
776        }
777      } else {
778        self.flags.remove(SplitFlags::QUOTING);
779      }
780      self
781    }
782
783    /// Sets stripping behavior.
784    pub fn stripping(&mut self, value: bool) -> &mut Self {
785      if value {
786        self.flags.insert(SplitFlags::STRIPPING);
787      } else {
788        self.flags.remove(SplitFlags::STRIPPING);
789      }
790      self
791    }
792
793    /// Sets whether to preserve empty segments.
794    pub fn preserving_empty(&mut self, value: bool) -> &mut Self {
795      if value {
796        self.flags.insert(SplitFlags::PRESERVING_EMPTY);
797      } else {
798        self.flags.remove(SplitFlags::PRESERVING_EMPTY);
799      }
800      self
801    }
802
803    /// Sets whether to preserve delimiters in output.
804    pub fn preserving_delimeters(&mut self, value: bool) -> &mut Self {
805      if value {
806        self.flags.insert(SplitFlags::PRESERVING_DELIMITERS);
807      } else {
808        self.flags.remove(SplitFlags::PRESERVING_DELIMITERS);
809      }
810      self
811    }
812
813    /// Sets whether to preserve quoting in output.
814    pub fn preserving_quoting(&mut self, value: bool) -> &mut Self {
815      if value {
816        self.flags.insert(SplitFlags::PRESERVING_QUOTING);
817      } else {
818        self.flags.remove(SplitFlags::PRESERVING_QUOTING);
819      }
820      self
821    }
822
823    /// Sets quoting prefixes.
824    pub fn quoting_prefixes(&mut self, value: &[&'a str]) -> &mut Self {
825      self.quoting_prefixes = value.to_vec();
826      self
827    }
828
829    /// Sets quoting postfixes.
830    pub fn quoting_postfixes(&mut self, value: &[&'a str]) -> &mut Self {
831      self.quoting_postfixes = value.to_vec();
832      self
833    }
834
835    /// Performs the split operation and returns a `SplitIterator`.
836    pub fn perform(&mut self) -> SplitIterator<'a> {
837      let options = SplitOptions {
838        src: self.src,
839        delimeter: self.delimiters.clone(),
840        flags: self.flags,
841        quoting_prefixes: self.quoting_prefixes.clone(),
842        quoting_postfixes: self.quoting_postfixes.clone(),
843      };
844      options.split()
845    }
846
847    /// Attempts to create a SIMD-optimized iterator when simd feature is enabled.
848    #[ cfg( feature = "simd" ) ]
849    pub fn perform_simd(&mut self) -> SplitIterator<'a> {
850      // For now, just use regular perform - SIMD integration needs more work
851      self.perform()
852    }
853    
854    /// Attempts to create a SIMD-optimized iterator - fallback version when simd feature is disabled.
855    #[ cfg( not( feature = "simd" ) ) ]
856    pub fn perform_simd(&mut self) -> SplitIterator<'a> {
857      self.perform()
858    }
859  }
860
861  /// Former (builder) for creating `SplitOptions`.
862  // This lint is addressed by using SplitFlags
863  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
864  #[ derive( Debug ) ]
865  pub struct SplitOptionsFormer<'a> {
866    src: &'a str,
867    delimeter: OpType<&'a str>,
868    flags: SplitFlags,
869    quoting_prefixes: Vec< &'a str >,
870    quoting_postfixes: Vec< &'a str >,
871  }
872
873  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
874  impl<'a> SplitOptionsFormer<'a> {
875    /// Initializes builder with delimiters to support fluent configuration of split options.
876    pub fn new<D: Into<OpType<&'a str>>>(delimeter: D) -> SplitOptionsFormer<'a> {
877      Self {
878        src: "",
879        delimeter: OpType::Vector(vec![]).append(delimeter.into()),
880        flags: SplitFlags::PRESERVING_DELIMITERS, // Default
881        quoting_prefixes: vec![],
882        quoting_postfixes: vec![],
883      }
884    }
885    /// Controls empty segment handling to accommodate different parsing requirements.
886    pub fn preserving_empty(&mut self, value: bool) -> &mut Self {
887      if value {
888        self.flags.insert(SplitFlags::PRESERVING_EMPTY);
889      } else {
890        self.flags.remove(SplitFlags::PRESERVING_EMPTY);
891      }
892      self
893    }
894    /// Controls delimiter preservation to support scenarios needing delimiter tracking.
895    pub fn preserving_delimeters(&mut self, value: bool) -> &mut Self {
896      if value {
897        self.flags.insert(SplitFlags::PRESERVING_DELIMITERS);
898      } else {
899        self.flags.remove(SplitFlags::PRESERVING_DELIMITERS);
900      }
901      self
902    }
903    /// Controls quote character preservation for maintaining original format integrity.
904    pub fn preserving_quoting(&mut self, value: bool) -> &mut Self {
905      if value {
906        self.flags.insert(SplitFlags::PRESERVING_QUOTING);
907      } else {
908        self.flags.remove(SplitFlags::PRESERVING_QUOTING);
909      }
910      self
911    }
912    /// Controls whitespace trimming to support clean data extraction scenarios.
913    pub fn stripping(&mut self, value: bool) -> &mut Self {
914      if value {
915        self.flags.insert(SplitFlags::STRIPPING);
916      } else {
917        self.flags.remove(SplitFlags::STRIPPING);
918      }
919      self
920    }
921    /// Enables quote-aware splitting to handle complex strings with embedded delimiters.
922    pub fn quoting(&mut self, value: bool) -> &mut Self {
923      if value {
924        self.flags.insert(SplitFlags::QUOTING);
925      } else {
926        self.flags.remove(SplitFlags::QUOTING);
927      }
928      self
929    }
930    /// Configures quote start markers to support custom quotation systems.
931    pub fn quoting_prefixes(&mut self, value: Vec< &'a str >) -> &mut Self {
932      self.quoting_prefixes = value;
933      self
934    }
935    /// Configures quote end markers to support asymmetric quotation systems.
936    pub fn quoting_postfixes(&mut self, value: Vec< &'a str >) -> &mut Self {
937      self.quoting_postfixes = value;
938      self
939    }
940    /// Provides input string to enable convenient chained configuration.
941    pub fn src(&mut self, value: &'a str) -> &mut Self {
942      self.src = value;
943      self
944    }
945    /// Sets the delimiter(s) to use for splitting.
946    pub fn delimeter<D: Into<OpType<&'a str>>>(&mut self, value: D) -> &mut Self {
947      self.delimeter = OpType::Vector(vec![]).append(value.into());
948      self
949    }
950    /// Consumes the former and returns configured `SplitOptions`.
951    ///
952    /// # Panics
953    /// Panics if `delimeter` field contains an `OpType::Primitive(None)` which results from `<&str>::default()`,
954    /// and `vector()` method on `OpType` is not robust enough to handle it (currently it would unwrap a None).
955    pub fn form(&mut self) -> SplitOptions<'a, Vec< &'a str >> {
956      if self.flags.contains(SplitFlags::QUOTING) {
957        if self.quoting_prefixes.is_empty() {
958          self.quoting_prefixes = vec!["\"", "`", "'"];
959        }
960        if self.quoting_postfixes.is_empty() {
961          self.quoting_postfixes = vec!["\"", "`", "'"];
962        }
963      }
964      SplitOptions {
965        src: self.src,
966        delimeter: self.delimeter.clone().vector().unwrap(),
967        flags: self.flags,
968        quoting_prefixes: self.quoting_prefixes.clone(),
969        quoting_postfixes: self.quoting_postfixes.clone(),
970      }
971    }
972    /// Consumes the former, builds `SplitOptions`, and returns a `SplitIterator`.
973    pub fn perform(&mut self) -> SplitIterator<'a> {
974      self.form().split()
975    }
976    
977    /// Attempts to create a SIMD-optimized iterator when the simd feature is enabled.
978    /// Falls back to the regular iterator if SIMD is not available or fails.
979    #[ cfg( feature = "simd" ) ]
980    pub fn perform_simd(&mut self) -> SplitIterator<'a> {
981      // Try SIMD first for multi-delimiter cases
982      if let OpType::Vector(ref delims) = self.delimeter {
983        if delims.len() > 1 {
984          // For multi-delimiter splitting, SIMD provides significant benefits
985          if let Ok(_simd_iter) = super::simd_split_cached(self.src, delims) {
986            // TODO: Bridge SIMD iterator with standard format for performance optimization
987            return self.perform(); // For now, fallback to regular - we'll enhance this
988          }
989          // SIMD failed, use regular implementation
990        }
991      }
992      
993      // Fallback to regular splitting
994      self.perform()
995    }
996    
997    /// Attempts to create a SIMD-optimized iterator - fallback version when simd feature is disabled.
998    #[ cfg( not( feature = "simd" ) ) ]
999    pub fn perform_simd(&mut self) -> SplitIterator<'a> {
1000      self.perform()
1001    }
1002  }
1003  /// Creates a basic split iterator builder for string splitting functionality.
1004  /// This is the main entry point for using basic string splitting.
1005  #[ must_use ]
1006  pub fn split<'a>() -> BasicSplitBuilder<'a> {
1007    BasicSplitBuilder::new()
1008  }
1009
1010  /// Creates a new `SplitOptionsFormer` to build `SplitOptions` for splitting a string.
1011  /// This is the main entry point for using advanced string splitting functionality.
1012  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
1013  #[ must_use ]
1014  pub fn split_advanced<'a>() -> SplitOptionsFormer<'a> {
1015    SplitOptionsFormer::new(<&str>::default())
1016  }
1017}
1018// NOTE: The #[cfg(not(test))] mod private block was removed as part of the simplification.
1019// All definitions are now in the single `pub mod private` block above,
1020// with test-specific items/visibilities handled by #[ cfg( test ) ] attributes.
1021
1022#[ doc( inline ) ]
1023#[ allow( unused_imports ) ]
1024pub use own::*;
1025
1026/// Own namespace of the module.
1027#[ allow( unused_imports ) ]
1028pub mod own {
1029  #[ allow( unused_imports ) ]
1030  use super::*;
1031  pub use orphan::*;
1032  pub use private::{ Split, SplitType, SplitIterator, Searcher, BasicSplitBuilder, split };
1033  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
1034  pub use private::{ split_advanced, SplitOptionsFormer };
1035  #[ cfg( feature = "simd" ) ]
1036  pub use super::{ SIMDSplitIterator, simd_split_cached, get_or_create_cached_patterns };
1037  #[ cfg( test ) ]
1038  pub use private::{ SplitFastIterator, test_unescape_str };
1039}
1040
1041/// Parented namespace of the module.
1042#[ allow( unused_imports ) ]
1043pub mod orphan {
1044  #[ allow( unused_imports ) ]
1045  use super::*;
1046  pub use exposed::*;
1047}
1048
1049/// Exposed namespace of the module.
1050#[ allow( unused_imports ) ]
1051pub mod exposed {
1052  #[ allow( unused_imports ) ]
1053  use super::*;
1054  pub use prelude::*;
1055  pub use super::own::{ Split, SplitType, SplitIterator, Searcher, BasicSplitBuilder, split };
1056  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
1057  pub use super::own::{ split_advanced, SplitOptionsFormer };
1058  #[ cfg( feature = "simd" ) ]
1059  pub use super::own::{ SIMDSplitIterator, simd_split_cached, get_or_create_cached_patterns };
1060  #[ cfg( test ) ]
1061  pub use super::own::{ SplitFastIterator, test_unescape_str };
1062}
1063
1064/// Namespace of the module to include with `use module::*`.
1065#[ allow( unused_imports ) ]
1066pub mod prelude {
1067  #[ allow( unused_imports ) ]
1068  use super::*;
1069  pub use private::{ Searcher, BasicSplitBuilder, split };
1070  #[ cfg( all( feature = "string_parse_request", feature = "std" ) ) ]
1071  pub use private::{ SplitOptionsFormer, split_advanced };
1072  #[ cfg( test ) ]
1073  pub use private::{ SplitFastIterator, test_unescape_str as unescape_str };
1074}