1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
use std::ops::Range;
use boa_macros::utf16;
use boa_profiler::Profiler;
use icu_locid::Locale;
use icu_segmenter::provider::WordBreakDataV1Marker;
use crate::{
builtins::{BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject},
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
js_string,
object::{
internal_methods::get_prototype_from_constructor, JsObject, ObjectData, ObjectInitializer,
},
property::Attribute,
realm::Realm,
Context, JsArgs, JsNativeError, JsResult, JsString, JsSymbol, JsValue,
};
mod iterator;
mod options;
mod segments;
pub(crate) use iterator::*;
pub(crate) use options::*;
pub(crate) use segments::*;
use super::{
locale::{canonicalize_locale_list, resolve_locale, supported_locales},
options::{get_option, get_options_object, IntlOptions, LocaleMatcher},
Service,
};
#[derive(Debug)]
pub struct Segmenter {
locale: Locale,
native: NativeSegmenter,
}
#[derive(Debug)]
pub(crate) enum NativeSegmenter {
Grapheme(Box<icu_segmenter::GraphemeClusterSegmenter>),
Word(Box<icu_segmenter::WordSegmenter>),
Sentence(Box<icu_segmenter::SentenceSegmenter>),
}
impl NativeSegmenter {
/// Gets the granularity level of this `NativeSegmenter`.
pub(crate) const fn granularity(&self) -> Granularity {
match self {
Self::Grapheme(_) => Granularity::Grapheme,
Self::Word(_) => Granularity::Word,
Self::Sentence(_) => Granularity::Sentence,
}
}
/// Segment the passed string, returning an iterator with the index boundaries
/// of the segments.
pub(crate) fn segment<'l, 's>(&'l self, input: &'s [u16]) -> NativeSegmentIterator<'l, 's> {
match self {
Self::Grapheme(g) => NativeSegmentIterator::Grapheme(g.segment_utf16(input)),
Self::Word(w) => NativeSegmentIterator::Word(w.segment_utf16(input)),
Self::Sentence(s) => NativeSegmentIterator::Sentence(s.segment_utf16(input)),
}
}
}
impl Service for Segmenter {
type LangMarker = WordBreakDataV1Marker;
type LocaleOptions = ();
}
impl IntrinsicObject for Segmenter {
fn init(realm: &Realm) {
let _timer = Profiler::global().start_event(Self::NAME, "init");
BuiltInBuilder::from_standard_constructor::<Self>(realm)
.static_method(Self::supported_locales_of, "supportedLocalesOf", 1)
.property(
JsSymbol::to_string_tag(),
"Intl.Segmenter",
Attribute::CONFIGURABLE,
)
.method(Self::resolved_options, "resolvedOptions", 0)
.method(Self::segment, "segment", 1)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
Self::STANDARD_CONSTRUCTOR(intrinsics.constructors()).constructor()
}
}
impl BuiltInObject for Segmenter {
const NAME: &'static str = "Segmenter";
}
impl BuiltInConstructor for Segmenter {
const LENGTH: usize = 0;
const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor =
StandardConstructors::segmenter;
fn constructor(
new_target: &JsValue,
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
// 1. If NewTarget is undefined, throw a TypeError exception.
if new_target.is_undefined() {
return Err(JsNativeError::typ()
.with_message("cannot call `Intl.Collator` constructor without `new`")
.into());
}
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let locales = canonicalize_locale_list(locales, context)?;
// 5. Set options to ? GetOptionsObject(options).
let options = get_options_object(options)?;
// 6. Let opt be a new Record.
// 7. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit").
let matcher =
get_option::<LocaleMatcher>(&options, utf16!("localeMatcher"), false, context)?
.unwrap_or_default();
// 8. Set opt.[[localeMatcher]] to matcher.
// 9. Let localeData be %Segmenter%.[[LocaleData]].
// 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData).
// 11. Set segmenter.[[Locale]] to r.[[locale]].
let locale = resolve_locale::<Self>(
&locales,
&mut IntlOptions {
matcher,
..Default::default()
},
context.icu(),
);
// 12. Let granularity be ? GetOption(options, "granularity", string, « "grapheme", "word", "sentence" », "grapheme").
let granularity =
get_option::<Granularity>(&options, utf16!("granularity"), false, context)?
.unwrap_or_default();
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.
let kind = context
.icu()
.provider()
.try_new_segmenter(granularity)
.map_err(|err| JsNativeError::typ().with_message(err.to_string()))?;
let segmenter = Self {
locale,
native: kind,
};
// 2. Let internalSlotsList be « [[InitializedSegmenter]], [[Locale]], [[SegmenterGranularity]] ».
// 3. Let segmenter be ? OrdinaryCreateFromConstructor(NewTarget, "%Segmenter.prototype%", internalSlotsList).
let proto =
get_prototype_from_constructor(new_target, StandardConstructors::segmenter, context)?;
let segmenter = JsObject::from_proto_and_data_with_shared_shape(
context.root_shape(),
proto,
ObjectData::segmenter(segmenter),
);
// 14. Return segmenter.
Ok(segmenter.into())
}
}
impl Segmenter {
/// [`Intl.Segmenter.supportedLocalesOf ( locales [ , options ] )`][spec].
///
/// Returns an array containing those of the provided locales that are supported in list
/// formatting without having to fall back to the runtime's default locale.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.supportedlocalesof
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/supportedLocalesOf
fn supported_locales_of(
_: &JsValue,
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
let locales = args.get_or_undefined(0);
let options = args.get_or_undefined(1);
// 1. Let availableLocales be %Segmenter%.[[AvailableLocales]].
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
let requested_locales = canonicalize_locale_list(locales, context)?;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options).
supported_locales::<<Self as Service>::LangMarker>(&requested_locales, options, context)
.map(JsValue::from)
}
/// [`Intl.Segmenter.prototype.resolvedOptions ( )`][spec].
///
/// Returns a new object with properties reflecting the locale and style formatting options
/// computed during the construction of the current `Intl.Segmenter` object.
///
/// More information:
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma402/#sec-Intl.Segmenter.prototype.resolvedoptions
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/resolvedOptions
fn resolved_options(
this: &JsValue,
_: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
// 1. Let segmenter be the this value.
// 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]).
let segmenter = this.as_object().map(JsObject::borrow).ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolved_options` can only be called on an `Intl.Segmenter` object")
})?;
let segmenter = segmenter.as_segmenter().ok_or_else(|| {
JsNativeError::typ()
.with_message("`resolved_options` can only be called on an `Intl.Segmenter` object")
})?;
// 3. Let options be OrdinaryObjectCreate(%Object.prototype%).
// 4. For each row of Table 19, except the header row, in table order, do
// a. Let p be the Property value of the current row.
// b. Let v be the value of segmenter's internal slot whose name is the Internal Slot value of the current row.
// c. Assert: v is not undefined.
// d. Perform ! CreateDataPropertyOrThrow(options, p, v).
let options = ObjectInitializer::new(context)
.property(
js_string!("locale"),
segmenter.locale.to_string(),
Attribute::all(),
)
.property(
js_string!("granularity"),
segmenter.native.granularity().to_string(),
Attribute::all(),
)
.build();
// 5. Return options.
Ok(options.into())
}
/// [`Intl.Segmenter.prototype.segment ( string )`][spec].
///
/// Segments a string according to the locale and granularity of this `Intl.Segmenter` object.
///
/// [spec]: https://tc39.es/ecma402/#sec-intl.segmenter.prototype.segment
fn segment(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let segmenter be the this value.
// 2. Perform ? RequireInternalSlot(segmenter, [[InitializedSegmenter]]).
let segmenter = this
.as_object()
.filter(|o| o.borrow().is_segmenter())
.ok_or_else(|| {
JsNativeError::typ().with_message(
"`resolved_options` can only be called on an `Intl.Segmenter` object",
)
})?;
// 3. Let string be ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;
// 4. Return ! CreateSegmentsObject(segmenter, string).
Ok(Segments::create(segmenter.clone(), string, context).into())
}
}
/// [`CreateSegmentDataObject ( segmenter, string, startIndex, endIndex )`][spec].
///
/// [spec]: https://tc39.es/ecma402/#sec-createsegmentdataobject
fn create_segment_data_object(
string: JsString,
range: Range<usize>,
is_word_like: Option<bool>,
context: &mut Context<'_>,
) -> JsObject {
// 1. Let len be the length of string.
// 2. Assert: startIndex ≥ 0.
// ensured by `usize`.
// 3. Assert: endIndex ≤ len.
assert!(range.end <= string.len());
// 4. Assert: startIndex < endIndex.
assert!(range.start < range.end);
let start = range.start;
// 6. Let segment be the substring of string from startIndex to endIndex.
let segment = js_string!(&string[range]);
// 5. Let result be OrdinaryObjectCreate(%Object.prototype%).
let object = &mut ObjectInitializer::new(context);
object
// 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
.property(js_string!("segment"), segment, Attribute::all())
// 8. Perform ! CreateDataPropertyOrThrow(result, "index", 𝔽(startIndex)).
.property(js_string!("index"), start, Attribute::all())
// 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
.property(js_string!("input"), string, Attribute::all());
// 10. Let granularity be segmenter.[[SegmenterGranularity]].
// 11. If granularity is "word", then
if let Some(is_word_like) = is_word_like {
// a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" according to locale segmenter.[[Locale]].
// b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
object.property(js_string!("isWordLike"), is_word_like, Attribute::all());
}
// 12. Return result.
object.build()
}