unobtanium-segmenter 0.5.2

A text segmentation toolbox for search applications inspired by charabia and tantivy.
Documentation
// SPDX-FileCopyrightText: 2026 Slatian
//
// SPDX-License-Identifier: LGPL-3.0-only

use std::marker::PhantomData;
use std::mem;

/// An owned iterator wrapper that allows shortcuts for common empty and one element cases. It is inteded for use with the [SubdivisionMap] iterator as the return type of the callback.
///
/// This implements an Iterator trait so it can be used like any other iterator.
pub enum UseOrSubdivide<T, I: Iterator<Item = T>> {
	/// Empty iterator.
	Empty,

	/// Exactly one element.
	Use(T),

	/// Multiple elements by wrapping an existing owned iterator.
	///
	/// For `I` something like [std::vec::IntoIter] is recommended.
	Subdivide(I),
}

impl<T, I: Iterator<Item = T>> Iterator for UseOrSubdivide<T, I> {
	type Item = T;

	fn next(&mut self) -> Option<Self::Item> {
		match self {
			Self::Empty => {
				return None;
			}
			Self::Use(_) => {
				let taken_use_or_subdivide = mem::replace(self, UseOrSubdivide::Empty);
				match taken_use_or_subdivide {
					Self::Use(data) => {
						return Some(data);
					}
					_ => unreachable!("this code only runs when UseOrSubdivide::Use"),
				}
			}
			Self::Subdivide(iter) => {
				return iter.next();
			}
		}
	}
}

/// Iterator that allows subdividing each item into zero, one or multiple of itself.
///
/// It can do filtering, mapping and flattening of callback outputs.
pub struct SubdivisionMap<
	'i,
	T: 'i,
	I: Iterator<Item = T> + 'i,
	R: Iterator<Item = T> + 'i,
	F: Fn(T) -> UseOrSubdivide<T, R>,
> {
	_marker: PhantomData<&'i ()>,
	iter: I,
	buffer: Option<R>,
	callback: F,
}

impl<
	'i,
	T,
	I: Iterator<Item = T> + 'i,
	R: Iterator<Item = T> + 'i,
	F: Fn(T) -> UseOrSubdivide<T, R>,
> SubdivisionMap<'i, T, I, R, F>
{
	/// Create a new SubdivisionMap from an iterator and a callback
	/// that does the splitting on the items returning a [UseOrSubdivide] instance.
	pub fn new(iter: I, callback: F) -> Self {
		Self {
			_marker: PhantomData,
			iter,
			callback,
			buffer: None,
		}
	}
}

impl<
	'i,
	T,
	I: Iterator<Item = T> + 'i,
	R: Iterator<Item = T> + 'i,
	F: Fn(T) -> UseOrSubdivide<T, R>,
> Iterator for SubdivisionMap<'i, T, I, R, F>
{
	type Item = I::Item;

	fn next(&mut self) -> Option<Self::Item> {
		loop {
			if let Some(buffer_iter) = &mut self.buffer {
				if let Some(item) = buffer_iter.next() {
					return Some(item);
				}
				self.buffer = None;
			}
			if let Some(raw_item) = self.iter.next() {
				match (self.callback)(raw_item) {
					UseOrSubdivide::Empty => return None,
					UseOrSubdivide::Use(item) => return Some(item),
					UseOrSubdivide::Subdivide(list) => {
						self.buffer = Some(list.into_iter());
						// Don't return and loop around
					}
				}
			} else {
				return None;
			}
		}
	}
}