unobtanium-segmenter 0.5.2

A text segmentation toolbox for search applications inspired by charabia and tantivy.
Documentation
// SPDX-FileCopyrightText: 2026 Slatian
// SPDX-FileContributor: Slatian
//
// SPDX-License-Identifier: LGPL-3.0-only

use std::iter::Peekable;
use std::str::CharIndices;

/// A wrapper for a text iterator that makes it easier to parse things.
pub struct ParserIterator<'a> {
	iter: Peekable<CharIndices<'a>>,
	_index: usize,
	len: usize,
}

impl<'a> ParserIterator<'a> {
	/// Construct a new iterator for the given text.
	pub fn new(text: &'a str) -> Self {
		Self {
			iter: text.char_indices().peekable(),
			_index: 0,
			len: text.len(),
		}
	}

	/// Returns the next character without consuming it.
	pub fn peek(&mut self) -> Option<char> {
		if let Some((_, c)) = self.iter.peek() {
			return Some(*c);
		}
		return None;
	}

	/// Advances the iterator to the next character, consuming it.
	/// The consumed character will be returned.
	pub fn next(&mut self) -> Option<char> {
		if let Some((_, c)) = self.iter.next() {
			if let Some((i, _)) = self.iter.peek() {
				self._index = *i;
			} else {
				self._index = self.len;
			}
			return Some(c);
		}
		return None;
	}

	/// returns the index of the character that can be seen using `peek()`.
	/// Use for remmbering indices while parsing and then cut out the result later.
	#[inline]
	pub fn index(&self) -> usize {
		return self._index;
	}

	/// Consumes characters using the `next()` function
	/// until the given `matcher` retturns false.
	pub fn consume_chars<F>(&mut self, matcher: F) -> usize
	where
		F: Fn(char) -> bool,
	{
		let mut counter: usize = 0;
		while let Some(c) = self.peek() {
			if matcher(c) {
				self.next();
				counter += 1;
			} else {
				break;
			}
		}

		return counter;
	}
}