use boa_gc::{Finalize, Trace};
use icu_segmenter::{
iterators::{GraphemeClusterBreakIterator, SentenceBreakIterator, WordBreakIterator},
scaffold::{Latin1, Utf16},
};
use crate::{
Context, JsData, JsNativeError, JsObject, JsResult, JsString, JsSymbol, JsValue,
builtins::{BuiltInBuilder, IntrinsicObject, iterable::create_iter_result_object},
context::intrinsics::Intrinsics,
js_string,
property::Attribute,
realm::Realm,
};
use super::{Segmenter, create_segment_data_object};
pub(crate) enum NativeSegmentIterator<'l, 's> {
GraphemeUtf16(GraphemeClusterBreakIterator<'l, 's, Utf16>),
WordUtf16(WordBreakIterator<'l, 's, Utf16>),
SentenceUtf16(SentenceBreakIterator<'l, 's, Utf16>),
GraphemeLatin1(GraphemeClusterBreakIterator<'l, 's, Latin1>),
WordLatin1(WordBreakIterator<'l, 's, Latin1>),
SentenceLatin1(SentenceBreakIterator<'l, 's, Latin1>),
}
impl Iterator for NativeSegmentIterator<'_, '_> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
match self {
NativeSegmentIterator::GraphemeUtf16(g) => g.next(),
NativeSegmentIterator::WordUtf16(w) => w.next(),
NativeSegmentIterator::SentenceUtf16(s) => s.next(),
NativeSegmentIterator::GraphemeLatin1(g) => g.next(),
NativeSegmentIterator::WordLatin1(w) => w.next(),
NativeSegmentIterator::SentenceLatin1(s) => s.next(),
}
}
}
impl NativeSegmentIterator<'_, '_> {
pub(crate) fn is_word_like(&self) -> Option<bool> {
match self {
Self::WordLatin1(w) => Some(w.is_word_like()),
Self::WordUtf16(w) => Some(w.is_word_like()),
_ => None,
}
}
}
#[derive(Debug, Trace, Finalize, JsData)]
pub(crate) struct SegmentIterator {
segmenter: JsObject,
string: JsString,
next_segment_index: usize,
}
impl IntrinsicObject for SegmentIterator {
fn init(realm: &Realm) {
BuiltInBuilder::with_intrinsic::<Self>(realm)
.static_property(
JsSymbol::to_string_tag(),
js_string!("Segmenter String Iterator"),
Attribute::CONFIGURABLE,
)
.static_method(Self::next, js_string!("next"), 0)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().iterator_prototypes().segment()
}
}
impl SegmentIterator {
pub(crate) fn create(segmenter: JsObject, string: JsString, context: &mut Context) -> JsObject {
JsObject::from_proto_and_data_with_shared_shape(
context.root_shape(),
context
.intrinsics()
.objects()
.iterator_prototypes()
.segment(),
Self {
segmenter,
string,
next_segment_index: 0,
},
)
}
fn next(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
let object = this.as_object();
let mut iter = object
.as_ref()
.and_then(JsObject::downcast_mut::<Self>)
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`next` can only be called on a `Segment Iterator` object")
})?;
let start = iter.next_segment_index;
let Some((end, is_word_like)) = iter.string.get(start..).and_then(|string| {
let segmenter = iter
.segmenter
.downcast_ref::<Segmenter>()
.expect("segment iterator object should contain a segmenter");
let mut segments = segmenter.native.segment(string);
segments.next();
segments
.next()
.map(|end| (start + end, segments.is_word_like()))
}) else {
return Ok(create_iter_result_object(
JsValue::undefined(),
true,
context,
));
};
iter.next_segment_index = end;
let segment_data =
create_segment_data_object(iter.string.clone(), start..end, is_word_like, context);
Ok(create_iter_result_object(
segment_data.into(),
false,
context,
))
}
}