pub(super) struct SegmentationTest {
pub desc: &'static str,
pub input: &'static [u8],
pub expected: &'static [&'static [u8]],
}
pub(super) static UNICODE_GRAPHEME_CLUSTER_TESTS: &[SegmentationTest] = &[
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b" ", expected: &[
b" ", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b" \xcc\x88 ", expected: &[
b" \xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b" \r", expected: &[
b" ", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b" \xcc\x88\r", expected: &[
b" \xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b" \n", expected: &[
b" ", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b" \xcc\x88\n", expected: &[
b" \xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b" \x01", expected: &[
b" ", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b" \xcc\x88\x01", expected: &[
b" \xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b" \xe2\x80\x8c", expected: &[
b" \xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b" \xcc\x88\xe2\x80\x8c", expected: &[
b" \xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b" \xf0\x9f\x87\xa6", expected: &[
b" ", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b" \xcc\x88\xf0\x9f\x87\xa6", expected: &[
b" \xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b" \xd8\x80", expected: &[
b" ", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b" \xcc\x88\xd8\x80", expected: &[
b" \xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b" \xe0\xa8\x83", expected: &[
b" \xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa8\x83", expected: &[
b" \xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b" \xe1\x84\x80", expected: &[
b" ", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b" \xcc\x88\xe1\x84\x80", expected: &[
b" \xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b" \xe1\x85\xa0", expected: &[
b" ", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b" \xcc\x88\xe1\x85\xa0", expected: &[
b" \xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b" \xe1\x86\xa8", expected: &[
b" ", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b" \xcc\x88\xe1\x86\xa8", expected: &[
b" \xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b" \xea\xb0\x80", expected: &[
b" ", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b" \xcc\x88\xea\xb0\x80", expected: &[
b" \xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b" \xea\xb0\x81", expected: &[
b" ", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b" \xcc\x88\xea\xb0\x81", expected: &[
b" \xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xe0\xa4\x83", expected: &[
b" \xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa4\x83", expected: &[
b" \xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xe0\xa4\x84", expected: &[
b" ", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa4\x84", expected: &[
b" \xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xe0\xb5\x8e", expected: &[
b" ", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b" \xcc\x88\xe0\xb5\x8e", expected: &[
b" \xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b" \xe0\xa4\x95", expected: &[
b" ", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa4\x95", expected: &[
b" \xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b" \xe2\x8c\x9a", expected: &[
b" ", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b" \xcc\x88\xe2\x8c\x9a", expected: &[
b" \xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b" \xcc\x80", expected: &[
b" \xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b" \xcc\x88\xcc\x80", expected: &[
b" \xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b" \xe0\xa4\x80", expected: &[
b" \xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa4\x80", expected: &[
b" \xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b" \xe0\xa5\x8d", expected: &[
b" \xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b" \xcc\x88\xe0\xa5\x8d", expected: &[
b" \xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b" \xe2\x80\x8d", expected: &[
b" \xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b" \xcc\x88\xe2\x80\x8d", expected: &[
b" \xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b" \xcd\xb8", expected: &[
b" ", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b" \xcc\x88\xcd\xb8", expected: &[
b" \xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]",
input: b"\r ", expected: &[
b"\r", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\r\xcc\x88 ", expected: &[
b"\r", b"\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\r\r", expected: &[
b"\r", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\r\xcc\x88\r", expected: &[
b"\r", b"\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\r\n", expected: &[
b"\r\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\r\xcc\x88\n", expected: &[
b"\r", b"\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\r\x01", expected: &[
b"\r", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\r\xcc\x88\x01", expected: &[
b"\r", b"\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\r\xe2\x80\x8c", expected: &[
b"\r", b"\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\r\xcc\x88\xe2\x80\x8c", expected: &[
b"\r", b"\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\r\xf0\x9f\x87\xa6", expected: &[
b"\r", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\r\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\r", b"\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\r\xd8\x80", expected: &[
b"\r", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\r\xcc\x88\xd8\x80", expected: &[
b"\r", b"\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\r\xe0\xa8\x83", expected: &[
b"\r", b"\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa8\x83", expected: &[
b"\r", b"\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\r\xe1\x84\x80", expected: &[
b"\r", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\r\xcc\x88\xe1\x84\x80", expected: &[
b"\r", b"\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\r\xe1\x85\xa0", expected: &[
b"\r", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\r\xcc\x88\xe1\x85\xa0", expected: &[
b"\r", b"\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\r\xe1\x86\xa8", expected: &[
b"\r", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\r\xcc\x88\xe1\x86\xa8", expected: &[
b"\r", b"\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\r\xea\xb0\x80", expected: &[
b"\r", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\r\xcc\x88\xea\xb0\x80", expected: &[
b"\r", b"\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\r\xea\xb0\x81", expected: &[
b"\r", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\r\xcc\x88\xea\xb0\x81", expected: &[
b"\r", b"\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xe0\xa4\x83", expected: &[
b"\r", b"\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa4\x83", expected: &[
b"\r", b"\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xe0\xa4\x84", expected: &[
b"\r", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa4\x84", expected: &[
b"\r", b"\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xe0\xb5\x8e", expected: &[
b"\r", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xb5\x8e", expected: &[
b"\r", b"\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\r\xe0\xa4\x95", expected: &[
b"\r", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa4\x95", expected: &[
b"\r", b"\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\r\xe2\x8c\x9a", expected: &[
b"\r", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\r\xcc\x88\xe2\x8c\x9a", expected: &[
b"\r", b"\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\r\xcc\x80", expected: &[
b"\r", b"\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\r\xcc\x88\xcc\x80", expected: &[
b"\r", b"\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\r\xe0\xa4\x80", expected: &[
b"\r", b"\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa4\x80", expected: &[
b"\r", b"\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\r\xe0\xa5\x8d", expected: &[
b"\r", b"\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\r\xcc\x88\xe0\xa5\x8d", expected: &[
b"\r", b"\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\r\xe2\x80\x8d", expected: &[
b"\r", b"\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\r\xcc\x88\xe2\x80\x8d", expected: &[
b"\r", b"\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\r\xcd\xb8", expected: &[
b"\r", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\r\xcc\x88\xcd\xb8", expected: &[
b"\r", b"\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]",
input: b"\n ", expected: &[
b"\n", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\n\xcc\x88 ", expected: &[
b"\n", b"\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\n\r", expected: &[
b"\n", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\n\xcc\x88\r", expected: &[
b"\n", b"\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\n\n", expected: &[
b"\n", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\n\xcc\x88\n", expected: &[
b"\n", b"\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\n\x01", expected: &[
b"\n", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\n\xcc\x88\x01", expected: &[
b"\n", b"\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\n\xe2\x80\x8c", expected: &[
b"\n", b"\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\n\xcc\x88\xe2\x80\x8c", expected: &[
b"\n", b"\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\n\xf0\x9f\x87\xa6", expected: &[
b"\n", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\n\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\n", b"\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\n\xd8\x80", expected: &[
b"\n", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\n\xcc\x88\xd8\x80", expected: &[
b"\n", b"\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\n\xe0\xa8\x83", expected: &[
b"\n", b"\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa8\x83", expected: &[
b"\n", b"\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\n\xe1\x84\x80", expected: &[
b"\n", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\n\xcc\x88\xe1\x84\x80", expected: &[
b"\n", b"\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\n\xe1\x85\xa0", expected: &[
b"\n", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\n\xcc\x88\xe1\x85\xa0", expected: &[
b"\n", b"\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\n\xe1\x86\xa8", expected: &[
b"\n", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\n\xcc\x88\xe1\x86\xa8", expected: &[
b"\n", b"\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\n\xea\xb0\x80", expected: &[
b"\n", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\n\xcc\x88\xea\xb0\x80", expected: &[
b"\n", b"\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\n\xea\xb0\x81", expected: &[
b"\n", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\n\xcc\x88\xea\xb0\x81", expected: &[
b"\n", b"\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xe0\xa4\x83", expected: &[
b"\n", b"\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa4\x83", expected: &[
b"\n", b"\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xe0\xa4\x84", expected: &[
b"\n", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa4\x84", expected: &[
b"\n", b"\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xe0\xb5\x8e", expected: &[
b"\n", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xb5\x8e", expected: &[
b"\n", b"\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\n\xe0\xa4\x95", expected: &[
b"\n", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa4\x95", expected: &[
b"\n", b"\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\n\xe2\x8c\x9a", expected: &[
b"\n", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\n\xcc\x88\xe2\x8c\x9a", expected: &[
b"\n", b"\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\n\xcc\x80", expected: &[
b"\n", b"\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\n\xcc\x88\xcc\x80", expected: &[
b"\n", b"\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\n\xe0\xa4\x80", expected: &[
b"\n", b"\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa4\x80", expected: &[
b"\n", b"\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\n\xe0\xa5\x8d", expected: &[
b"\n", b"\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\n\xcc\x88\xe0\xa5\x8d", expected: &[
b"\n", b"\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\n\xe2\x80\x8d", expected: &[
b"\n", b"\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\n\xcc\x88\xe2\x80\x8d", expected: &[
b"\n", b"\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\n\xcd\xb8", expected: &[
b"\n", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\n\xcc\x88\xcd\xb8", expected: &[
b"\n", b"\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]",
input: b"\x01 ", expected: &[
b"\x01", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\x01\xcc\x88 ", expected: &[
b"\x01", b"\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\x01\r", expected: &[
b"\x01", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\x01\xcc\x88\r", expected: &[
b"\x01", b"\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\x01\n", expected: &[
b"\x01", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\x01\xcc\x88\n", expected: &[
b"\x01", b"\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\x01\x01", expected: &[
b"\x01", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\x01\xcc\x88\x01", expected: &[
b"\x01", b"\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\x01\xe2\x80\x8c", expected: &[
b"\x01", b"\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\x01\xcc\x88\xe2\x80\x8c", expected: &[
b"\x01", b"\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\x01\xf0\x9f\x87\xa6", expected: &[
b"\x01", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\x01\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\x01", b"\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\x01\xd8\x80", expected: &[
b"\x01", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\x01\xcc\x88\xd8\x80", expected: &[
b"\x01", b"\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\x01\xe0\xa8\x83", expected: &[
b"\x01", b"\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa8\x83", expected: &[
b"\x01", b"\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\x01\xe1\x84\x80", expected: &[
b"\x01", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\x01\xcc\x88\xe1\x84\x80", expected: &[
b"\x01", b"\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\x01\xe1\x85\xa0", expected: &[
b"\x01", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\x01\xcc\x88\xe1\x85\xa0", expected: &[
b"\x01", b"\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\x01\xe1\x86\xa8", expected: &[
b"\x01", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\x01\xcc\x88\xe1\x86\xa8", expected: &[
b"\x01", b"\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\x01\xea\xb0\x80", expected: &[
b"\x01", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\x01\xcc\x88\xea\xb0\x80", expected: &[
b"\x01", b"\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\x01\xea\xb0\x81", expected: &[
b"\x01", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\x01\xcc\x88\xea\xb0\x81", expected: &[
b"\x01", b"\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xe0\xa4\x83", expected: &[
b"\x01", b"\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa4\x83", expected: &[
b"\x01", b"\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xe0\xa4\x84", expected: &[
b"\x01", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa4\x84", expected: &[
b"\x01", b"\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xe0\xb5\x8e", expected: &[
b"\x01", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xb5\x8e", expected: &[
b"\x01", b"\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\x01\xe0\xa4\x95", expected: &[
b"\x01", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa4\x95", expected: &[
b"\x01", b"\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\x01\xe2\x8c\x9a", expected: &[
b"\x01", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\x01\xcc\x88\xe2\x8c\x9a", expected: &[
b"\x01", b"\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xcc\x80", expected: &[
b"\x01", b"\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xcc\x88\xcc\x80", expected: &[
b"\x01", b"\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xe0\xa4\x80", expected: &[
b"\x01", b"\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa4\x80", expected: &[
b"\x01", b"\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xe0\xa5\x8d", expected: &[
b"\x01", b"\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xcc\x88\xe0\xa5\x8d", expected: &[
b"\x01", b"\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xe2\x80\x8d", expected: &[
b"\x01", b"\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\x01\xcc\x88\xe2\x80\x8d", expected: &[
b"\x01", b"\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\x01\xcd\xb8", expected: &[
b"\x01", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\x01\xcc\x88\xcd\xb8", expected: &[
b"\x01", b"\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x80\x8c ", expected: &[
b"\xe2\x80\x8c", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88 ", expected: &[
b"\xe2\x80\x8c\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x80\x8c\r", expected: &[
b"\xe2\x80\x8c", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\r", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x80\x8c\n", expected: &[
b"\xe2\x80\x8c", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\n", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x80\x8c\x01", expected: &[
b"\xe2\x80\x8c", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\x01", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe2\x80\x8c", expected: &[
b"\xe2\x80\x8c\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x80\x8c\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x80\x8c", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x80\x8c\xd8\x80", expected: &[
b"\xe2\x80\x8c", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xd8\x80", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa8\x83", expected: &[
b"\xe2\x80\x8c\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe1\x84\x80", expected: &[
b"\xe2\x80\x8c", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe1\x84\x80", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe1\x85\xa0", expected: &[
b"\xe2\x80\x8c", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe1\x86\xa8", expected: &[
b"\xe2\x80\x8c", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x80\x8c\xea\xb0\x80", expected: &[
b"\xe2\x80\x8c", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xea\xb0\x80", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x80\x8c\xea\xb0\x81", expected: &[
b"\xe2\x80\x8c", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xea\xb0\x81", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa4\x83", expected: &[
b"\xe2\x80\x8c\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa4\x84", expected: &[
b"\xe2\x80\x8c", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xb5\x8e", expected: &[
b"\xe2\x80\x8c", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa4\x95", expected: &[
b"\xe2\x80\x8c", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe2\x8c\x9a", expected: &[
b"\xe2\x80\x8c", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x80", expected: &[
b"\xe2\x80\x8c\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xcc\x80", expected: &[
b"\xe2\x80\x8c\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa4\x80", expected: &[
b"\xe2\x80\x8c\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe0\xa5\x8d", expected: &[
b"\xe2\x80\x8c\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xe2\x80\x8d", expected: &[
b"\xe2\x80\x8c\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe2\x80\x8c\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcd\xb8", expected: &[
b"\xe2\x80\x8c", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x80\x8c\xcc\x88\xcd\xb8", expected: &[
b"\xe2\x80\x8c\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6 ", expected: &[
b"\xf0\x9f\x87\xa6", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88 ", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\r", expected: &[
b"\xf0\x9f\x87\xa6", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\r", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\n", expected: &[
b"\xf0\x9f\x87\xa6", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\n", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\x01", expected: &[
b"\xf0\x9f\x87\xa6", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\x01", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe2\x80\x8c", expected: &[
b"\xf0\x9f\x87\xa6\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe2\x80\x8c", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa6", expected: &[
b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xd8\x80", expected: &[
b"\xf0\x9f\x87\xa6", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xd8\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa8\x83", expected: &[
b"\xf0\x9f\x87\xa6\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa8\x83", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe1\x84\x80", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe1\x84\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe1\x85\xa0", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe1\x85\xa0", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe1\x86\xa8", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe1\x86\xa8", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xea\xb0\x80", expected: &[
b"\xf0\x9f\x87\xa6", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xea\xb0\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xea\xb0\x81", expected: &[
b"\xf0\x9f\x87\xa6", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xea\xb0\x81", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa4\x83", expected: &[
b"\xf0\x9f\x87\xa6\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x83", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa4\x84", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x84", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xb5\x8e", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa4\x95", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x95", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe2\x8c\x9a", expected: &[
b"\xf0\x9f\x87\xa6", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xcc\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa4\x80", expected: &[
b"\xf0\x9f\x87\xa6\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x80", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe0\xa5\x8d", expected: &[
b"\xf0\x9f\x87\xa6\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xe2\x80\x8d", expected: &[
b"\xf0\x9f\x87\xa6\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xe2\x80\x8d", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcd\xb8", expected: &[
b"\xf0\x9f\x87\xa6", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xcc\x88\xcd\xb8", expected: &[
b"\xf0\x9f\x87\xa6\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]",
input: b"\xd8\x80 ", expected: &[
b"\xd8\x80 ", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88 ", expected: &[
b"\xd8\x80\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xd8\x80\r", expected: &[
b"\xd8\x80", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\r", expected: &[
b"\xd8\x80\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xd8\x80\n", expected: &[
b"\xd8\x80", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\n", expected: &[
b"\xd8\x80\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xd8\x80\x01", expected: &[
b"\xd8\x80", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\x01", expected: &[
b"\xd8\x80\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xd8\x80\xe2\x80\x8c", expected: &[
b"\xd8\x80\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe2\x80\x8c", expected: &[
b"\xd8\x80\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xd8\x80\xf0\x9f\x87\xa6", expected: &[
b"\xd8\x80\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xd8\x80\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xd8\x80\xd8\x80", expected: &[
b"\xd8\x80\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xd8\x80", expected: &[
b"\xd8\x80\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa8\x83", expected: &[
b"\xd8\x80\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa8\x83", expected: &[
b"\xd8\x80\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xd8\x80\xe1\x84\x80", expected: &[
b"\xd8\x80\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe1\x84\x80", expected: &[
b"\xd8\x80\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xd8\x80\xe1\x85\xa0", expected: &[
b"\xd8\x80\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe1\x85\xa0", expected: &[
b"\xd8\x80\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xd8\x80\xe1\x86\xa8", expected: &[
b"\xd8\x80\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe1\x86\xa8", expected: &[
b"\xd8\x80\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xd8\x80\xea\xb0\x80", expected: &[
b"\xd8\x80\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xea\xb0\x80", expected: &[
b"\xd8\x80\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xd8\x80\xea\xb0\x81", expected: &[
b"\xd8\x80\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xea\xb0\x81", expected: &[
b"\xd8\x80\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa4\x83", expected: &[
b"\xd8\x80\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa4\x83", expected: &[
b"\xd8\x80\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa4\x84", expected: &[
b"\xd8\x80\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa4\x84", expected: &[
b"\xd8\x80\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xe0\xb5\x8e", expected: &[
b"\xd8\x80\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xd8\x80\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa4\x95", expected: &[
b"\xd8\x80\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa4\x95", expected: &[
b"\xd8\x80\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]",
input: b"\xd8\x80\xe2\x8c\x9a", expected: &[
b"\xd8\x80\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xd8\x80\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xcc\x80", expected: &[
b"\xd8\x80\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xcc\x80", expected: &[
b"\xd8\x80\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa4\x80", expected: &[
b"\xd8\x80\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa4\x80", expected: &[
b"\xd8\x80\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xe0\xa5\x8d", expected: &[
b"\xd8\x80\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xd8\x80\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xe2\x80\x8d", expected: &[
b"\xd8\x80\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xe2\x80\x8d", expected: &[
b"\xd8\x80\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xd8\x80\xcd\xb8", expected: &[
b"\xd8\x80\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xd8\x80\xcc\x88\xcd\xb8", expected: &[
b"\xd8\x80\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa8\x83 ", expected: &[
b"\xe0\xa8\x83", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88 ", expected: &[
b"\xe0\xa8\x83\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa8\x83\r", expected: &[
b"\xe0\xa8\x83", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\r", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa8\x83\n", expected: &[
b"\xe0\xa8\x83", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\n", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa8\x83\x01", expected: &[
b"\xe0\xa8\x83", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\x01", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe2\x80\x8c", expected: &[
b"\xe0\xa8\x83\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa8\x83\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa8\x83", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa8\x83\xd8\x80", expected: &[
b"\xe0\xa8\x83", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa8\x83", expected: &[
b"\xe0\xa8\x83\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe1\x84\x80", expected: &[
b"\xe0\xa8\x83", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe1\x85\xa0", expected: &[
b"\xe0\xa8\x83", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe1\x86\xa8", expected: &[
b"\xe0\xa8\x83", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa8\x83\xea\xb0\x80", expected: &[
b"\xe0\xa8\x83", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa8\x83\xea\xb0\x81", expected: &[
b"\xe0\xa8\x83", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa4\x83", expected: &[
b"\xe0\xa8\x83\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa4\x84", expected: &[
b"\xe0\xa8\x83", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xb5\x8e", expected: &[
b"\xe0\xa8\x83", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa4\x95", expected: &[
b"\xe0\xa8\x83", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe2\x8c\x9a", expected: &[
b"\xe0\xa8\x83", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x80", expected: &[
b"\xe0\xa8\x83\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa8\x83\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa4\x80", expected: &[
b"\xe0\xa8\x83\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe0\xa5\x8d", expected: &[
b"\xe0\xa8\x83\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xe2\x80\x8d", expected: &[
b"\xe0\xa8\x83\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa8\x83\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcd\xb8", expected: &[
b"\xe0\xa8\x83", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa8\x83\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa8\x83\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x84\x80 ", expected: &[
b"\xe1\x84\x80", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88 ", expected: &[
b"\xe1\x84\x80\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x84\x80\r", expected: &[
b"\xe1\x84\x80", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\r", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x84\x80\n", expected: &[
b"\xe1\x84\x80", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\n", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x84\x80\x01", expected: &[
b"\xe1\x84\x80", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\x01", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x84\x80\xe2\x80\x8c", expected: &[
b"\xe1\x84\x80\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe1\x84\x80\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x84\x80\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x84\x80", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x84\x80\xd8\x80", expected: &[
b"\xe1\x84\x80", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xd8\x80", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa8\x83", expected: &[
b"\xe1\x84\x80\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe1\x84\x80\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x84\x80\xe1\x84\x80", expected: &[
b"\xe1\x84\x80\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe1\x84\x80", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x84\x80\xe1\x85\xa0", expected: &[
b"\xe1\x84\x80\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x84\x80\xe1\x86\xa8", expected: &[
b"\xe1\x84\x80", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x84\x80\xea\xb0\x80", expected: &[
b"\xe1\x84\x80\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xea\xb0\x80", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x84\x80\xea\xb0\x81", expected: &[
b"\xe1\x84\x80\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xea\xb0\x81", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa4\x83", expected: &[
b"\xe1\x84\x80\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe1\x84\x80\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa4\x84", expected: &[
b"\xe1\x84\x80", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xb5\x8e", expected: &[
b"\xe1\x84\x80", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa4\x95", expected: &[
b"\xe1\x84\x80", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x84\x80\xe2\x8c\x9a", expected: &[
b"\xe1\x84\x80", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x80", expected: &[
b"\xe1\x84\x80\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xcc\x80", expected: &[
b"\xe1\x84\x80\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa4\x80", expected: &[
b"\xe1\x84\x80\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe1\x84\x80\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xe0\xa5\x8d", expected: &[
b"\xe1\x84\x80\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe1\x84\x80\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xe2\x80\x8d", expected: &[
b"\xe1\x84\x80\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe1\x84\x80\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x84\x80\xcd\xb8", expected: &[
b"\xe1\x84\x80", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x84\x80\xcc\x88\xcd\xb8", expected: &[
b"\xe1\x84\x80\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x85\xa0 ", expected: &[
b"\xe1\x85\xa0", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88 ", expected: &[
b"\xe1\x85\xa0\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x85\xa0\r", expected: &[
b"\xe1\x85\xa0", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\r", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x85\xa0\n", expected: &[
b"\xe1\x85\xa0", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\n", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x85\xa0\x01", expected: &[
b"\xe1\x85\xa0", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\x01", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe2\x80\x8c", expected: &[
b"\xe1\x85\xa0\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x85\xa0\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x85\xa0", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x85\xa0\xd8\x80", expected: &[
b"\xe1\x85\xa0", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xd8\x80", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa8\x83", expected: &[
b"\xe1\x85\xa0\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe1\x84\x80", expected: &[
b"\xe1\x85\xa0", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe1\x84\x80", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe1\x85\xa0", expected: &[
b"\xe1\x85\xa0\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe1\x86\xa8", expected: &[
b"\xe1\x85\xa0\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x85\xa0\xea\xb0\x80", expected: &[
b"\xe1\x85\xa0", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xea\xb0\x80", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x85\xa0\xea\xb0\x81", expected: &[
b"\xe1\x85\xa0", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xea\xb0\x81", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa4\x83", expected: &[
b"\xe1\x85\xa0\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa4\x84", expected: &[
b"\xe1\x85\xa0", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xb5\x8e", expected: &[
b"\xe1\x85\xa0", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa4\x95", expected: &[
b"\xe1\x85\xa0", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe2\x8c\x9a", expected: &[
b"\xe1\x85\xa0", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x80", expected: &[
b"\xe1\x85\xa0\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xcc\x80", expected: &[
b"\xe1\x85\xa0\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa4\x80", expected: &[
b"\xe1\x85\xa0\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe0\xa5\x8d", expected: &[
b"\xe1\x85\xa0\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xe2\x80\x8d", expected: &[
b"\xe1\x85\xa0\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe1\x85\xa0\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcd\xb8", expected: &[
b"\xe1\x85\xa0", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x85\xa0\xcc\x88\xcd\xb8", expected: &[
b"\xe1\x85\xa0\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x86\xa8 ", expected: &[
b"\xe1\x86\xa8", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88 ", expected: &[
b"\xe1\x86\xa8\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x86\xa8\r", expected: &[
b"\xe1\x86\xa8", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\r", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x86\xa8\n", expected: &[
b"\xe1\x86\xa8", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\n", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x86\xa8\x01", expected: &[
b"\xe1\x86\xa8", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\x01", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe2\x80\x8c", expected: &[
b"\xe1\x86\xa8\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x86\xa8\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x86\xa8", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x86\xa8\xd8\x80", expected: &[
b"\xe1\x86\xa8", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xd8\x80", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa8\x83", expected: &[
b"\xe1\x86\xa8\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe1\x84\x80", expected: &[
b"\xe1\x86\xa8", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe1\x84\x80", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe1\x85\xa0", expected: &[
b"\xe1\x86\xa8", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe1\x86\xa8", expected: &[
b"\xe1\x86\xa8\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x86\xa8\xea\xb0\x80", expected: &[
b"\xe1\x86\xa8", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xea\xb0\x80", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x86\xa8\xea\xb0\x81", expected: &[
b"\xe1\x86\xa8", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xea\xb0\x81", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa4\x83", expected: &[
b"\xe1\x86\xa8\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa4\x84", expected: &[
b"\xe1\x86\xa8", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xb5\x8e", expected: &[
b"\xe1\x86\xa8", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa4\x95", expected: &[
b"\xe1\x86\xa8", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe2\x8c\x9a", expected: &[
b"\xe1\x86\xa8", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x80", expected: &[
b"\xe1\x86\xa8\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xcc\x80", expected: &[
b"\xe1\x86\xa8\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa4\x80", expected: &[
b"\xe1\x86\xa8\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe0\xa5\x8d", expected: &[
b"\xe1\x86\xa8\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xe2\x80\x8d", expected: &[
b"\xe1\x86\xa8\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe1\x86\xa8\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcd\xb8", expected: &[
b"\xe1\x86\xa8", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe1\x86\xa8\xcc\x88\xcd\xb8", expected: &[
b"\xe1\x86\xa8\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xea\xb0\x80 ", expected: &[
b"\xea\xb0\x80", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88 ", expected: &[
b"\xea\xb0\x80\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xea\xb0\x80\r", expected: &[
b"\xea\xb0\x80", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\r", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xea\xb0\x80\n", expected: &[
b"\xea\xb0\x80", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\n", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xea\xb0\x80\x01", expected: &[
b"\xea\xb0\x80", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\x01", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xea\xb0\x80\xe2\x80\x8c", expected: &[
b"\xea\xb0\x80\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe2\x80\x8c", expected: &[
b"\xea\xb0\x80\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xea\xb0\x80\xf0\x9f\x87\xa6", expected: &[
b"\xea\xb0\x80", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xea\xb0\x80\xd8\x80", expected: &[
b"\xea\xb0\x80", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xd8\x80", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa8\x83", expected: &[
b"\xea\xb0\x80\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa8\x83", expected: &[
b"\xea\xb0\x80\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x80\xe1\x84\x80", expected: &[
b"\xea\xb0\x80", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe1\x84\x80", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xea\xb0\x80\xe1\x85\xa0", expected: &[
b"\xea\xb0\x80\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe1\x85\xa0", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xea\xb0\x80\xe1\x86\xa8", expected: &[
b"\xea\xb0\x80\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe1\x86\xa8", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xea\xb0\x80\xea\xb0\x80", expected: &[
b"\xea\xb0\x80", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xea\xb0\x80", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xea\xb0\x80\xea\xb0\x81", expected: &[
b"\xea\xb0\x80", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xea\xb0\x81", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa4\x83", expected: &[
b"\xea\xb0\x80\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa4\x83", expected: &[
b"\xea\xb0\x80\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa4\x84", expected: &[
b"\xea\xb0\x80", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa4\x84", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xb5\x8e", expected: &[
b"\xea\xb0\x80", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa4\x95", expected: &[
b"\xea\xb0\x80", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa4\x95", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xea\xb0\x80\xe2\x8c\x9a", expected: &[
b"\xea\xb0\x80", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x80", expected: &[
b"\xea\xb0\x80\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xcc\x80", expected: &[
b"\xea\xb0\x80\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa4\x80", expected: &[
b"\xea\xb0\x80\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa4\x80", expected: &[
b"\xea\xb0\x80\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xe0\xa5\x8d", expected: &[
b"\xea\xb0\x80\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xea\xb0\x80\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xe2\x80\x8d", expected: &[
b"\xea\xb0\x80\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xe2\x80\x8d", expected: &[
b"\xea\xb0\x80\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xea\xb0\x80\xcd\xb8", expected: &[
b"\xea\xb0\x80", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xea\xb0\x80\xcc\x88\xcd\xb8", expected: &[
b"\xea\xb0\x80\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xea\xb0\x81 ", expected: &[
b"\xea\xb0\x81", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88 ", expected: &[
b"\xea\xb0\x81\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xea\xb0\x81\r", expected: &[
b"\xea\xb0\x81", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\r", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xea\xb0\x81\n", expected: &[
b"\xea\xb0\x81", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\n", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xea\xb0\x81\x01", expected: &[
b"\xea\xb0\x81", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\x01", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xea\xb0\x81\xe2\x80\x8c", expected: &[
b"\xea\xb0\x81\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe2\x80\x8c", expected: &[
b"\xea\xb0\x81\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xea\xb0\x81\xf0\x9f\x87\xa6", expected: &[
b"\xea\xb0\x81", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xea\xb0\x81\xd8\x80", expected: &[
b"\xea\xb0\x81", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xd8\x80", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa8\x83", expected: &[
b"\xea\xb0\x81\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa8\x83", expected: &[
b"\xea\xb0\x81\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x81\xe1\x84\x80", expected: &[
b"\xea\xb0\x81", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe1\x84\x80", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xea\xb0\x81\xe1\x85\xa0", expected: &[
b"\xea\xb0\x81", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe1\x85\xa0", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xea\xb0\x81\xe1\x86\xa8", expected: &[
b"\xea\xb0\x81\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe1\x86\xa8", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xea\xb0\x81\xea\xb0\x80", expected: &[
b"\xea\xb0\x81", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xea\xb0\x80", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xea\xb0\x81\xea\xb0\x81", expected: &[
b"\xea\xb0\x81", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xea\xb0\x81", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa4\x83", expected: &[
b"\xea\xb0\x81\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa4\x83", expected: &[
b"\xea\xb0\x81\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa4\x84", expected: &[
b"\xea\xb0\x81", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa4\x84", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xb5\x8e", expected: &[
b"\xea\xb0\x81", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa4\x95", expected: &[
b"\xea\xb0\x81", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa4\x95", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xea\xb0\x81\xe2\x8c\x9a", expected: &[
b"\xea\xb0\x81", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x80", expected: &[
b"\xea\xb0\x81\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xcc\x80", expected: &[
b"\xea\xb0\x81\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa4\x80", expected: &[
b"\xea\xb0\x81\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa4\x80", expected: &[
b"\xea\xb0\x81\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xe0\xa5\x8d", expected: &[
b"\xea\xb0\x81\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xea\xb0\x81\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xe2\x80\x8d", expected: &[
b"\xea\xb0\x81\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xe2\x80\x8d", expected: &[
b"\xea\xb0\x81\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xea\xb0\x81\xcd\xb8", expected: &[
b"\xea\xb0\x81", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xea\xb0\x81\xcc\x88\xcd\xb8", expected: &[
b"\xea\xb0\x81\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x83 ", expected: &[
b"\xe0\xa4\x83", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88 ", expected: &[
b"\xe0\xa4\x83\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x83\r", expected: &[
b"\xe0\xa4\x83", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\r", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x83\n", expected: &[
b"\xe0\xa4\x83", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\n", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x83\x01", expected: &[
b"\xe0\xa4\x83", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\x01", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x83\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x83\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x83", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x83\xd8\x80", expected: &[
b"\xe0\xa4\x83", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x83\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe1\x84\x80", expected: &[
b"\xe0\xa4\x83", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x83", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x83", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x83\xea\xb0\x80", expected: &[
b"\xe0\xa4\x83", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x83\xea\xb0\x81", expected: &[
b"\xe0\xa4\x83", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x83\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x83", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x83", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x83", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x83", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x80", expected: &[
b"\xe0\xa4\x83\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa4\x83\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x83\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x83\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x83\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x83\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcd\xb8", expected: &[
b"\xe0\xa4\x83", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x83\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa4\x83\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x84 ", expected: &[
b"\xe0\xa4\x84", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88 ", expected: &[
b"\xe0\xa4\x84\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x84\r", expected: &[
b"\xe0\xa4\x84", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\r", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x84\n", expected: &[
b"\xe0\xa4\x84", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\n", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x84\x01", expected: &[
b"\xe0\xa4\x84", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\x01", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x84\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x84\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x84", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x84\xd8\x80", expected: &[
b"\xe0\xa4\x84", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x84\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe1\x84\x80", expected: &[
b"\xe0\xa4\x84", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x84", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x84", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x84\xea\xb0\x80", expected: &[
b"\xe0\xa4\x84", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x84\xea\xb0\x81", expected: &[
b"\xe0\xa4\x84", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x84\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x84", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x84", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x84", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x84", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x80", expected: &[
b"\xe0\xa4\x84\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa4\x84\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x84\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x84\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x84\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x84\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcd\xb8", expected: &[
b"\xe0\xa4\x84", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x84\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa4\x84\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xb5\x8e ", expected: &[
b"\xe0\xb5\x8e ", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88 ", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xb5\x8e\r", expected: &[
b"\xe0\xb5\x8e", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\r", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xb5\x8e\n", expected: &[
b"\xe0\xb5\x8e", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\n", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xb5\x8e\x01", expected: &[
b"\xe0\xb5\x8e", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\x01", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe2\x80\x8c", expected: &[
b"\xe0\xb5\x8e\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xb5\x8e\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xd8\x80", expected: &[
b"\xe0\xb5\x8e\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xd8\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa8\x83", expected: &[
b"\xe0\xb5\x8e\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe1\x84\x80", expected: &[
b"\xe0\xb5\x8e\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe1\x85\xa0", expected: &[
b"\xe0\xb5\x8e\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe1\x86\xa8", expected: &[
b"\xe0\xb5\x8e\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xea\xb0\x80", expected: &[
b"\xe0\xb5\x8e\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xea\xb0\x81", expected: &[
b"\xe0\xb5\x8e\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa4\x83", expected: &[
b"\xe0\xb5\x8e\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa4\x84", expected: &[
b"\xe0\xb5\x8e\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xb5\x8e", expected: &[
b"\xe0\xb5\x8e\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa4\x95", expected: &[
b"\xe0\xb5\x8e\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe2\x8c\x9a", expected: &[
b"\xe0\xb5\x8e\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xcc\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa4\x80", expected: &[
b"\xe0\xb5\x8e\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe0\xa5\x8d", expected: &[
b"\xe0\xb5\x8e\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xe2\x80\x8d", expected: &[
b"\xe0\xb5\x8e\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xb5\x8e\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcd\xb8", expected: &[
b"\xe0\xb5\x8e\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xb5\x8e\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xb5\x8e\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x95 ", expected: &[
b"\xe0\xa4\x95", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88 ", expected: &[
b"\xe0\xa4\x95\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x95\r", expected: &[
b"\xe0\xa4\x95", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\r", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x95\n", expected: &[
b"\xe0\xa4\x95", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\n", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x95\x01", expected: &[
b"\xe0\xa4\x95", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\x01", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x95\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x95\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x95", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x95\xd8\x80", expected: &[
b"\xe0\xa4\x95", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x95\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe1\x84\x80", expected: &[
b"\xe0\xa4\x95", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x95", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x95", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x95\xea\xb0\x80", expected: &[
b"\xe0\xa4\x95", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x95\xea\xb0\x81", expected: &[
b"\xe0\xa4\x95", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x95\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x95", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x95", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x95", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x95", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x80", expected: &[
b"\xe0\xa4\x95\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa4\x95\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x95\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x95\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x95\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcd\xb8", expected: &[
b"\xe0\xa4\x95", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x95\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa4\x95\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x8c\x9a ", expected: &[
b"\xe2\x8c\x9a", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88 ", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x8c\x9a\r", expected: &[
b"\xe2\x8c\x9a", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\r", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x8c\x9a\n", expected: &[
b"\xe2\x8c\x9a", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\n", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x8c\x9a\x01", expected: &[
b"\xe2\x8c\x9a", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\x01", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe2\x80\x8c", expected: &[
b"\xe2\x8c\x9a\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x8c\x9a", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xd8\x80", expected: &[
b"\xe2\x8c\x9a", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xd8\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa8\x83", expected: &[
b"\xe2\x8c\x9a\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe1\x84\x80", expected: &[
b"\xe2\x8c\x9a", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe1\x84\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe1\x85\xa0", expected: &[
b"\xe2\x8c\x9a", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe1\x86\xa8", expected: &[
b"\xe2\x8c\x9a", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xea\xb0\x80", expected: &[
b"\xe2\x8c\x9a", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xea\xb0\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xea\xb0\x81", expected: &[
b"\xe2\x8c\x9a", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xea\xb0\x81", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa4\x83", expected: &[
b"\xe2\x8c\x9a\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa4\x84", expected: &[
b"\xe2\x8c\x9a", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xb5\x8e", expected: &[
b"\xe2\x8c\x9a", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa4\x95", expected: &[
b"\xe2\x8c\x9a", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe2\x8c\x9a", expected: &[
b"\xe2\x8c\x9a", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xcc\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa4\x80", expected: &[
b"\xe2\x8c\x9a\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe0\xa5\x8d", expected: &[
b"\xe2\x8c\x9a\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xe2\x80\x8d", expected: &[
b"\xe2\x8c\x9a\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe2\x8c\x9a\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcd\xb8", expected: &[
b"\xe2\x8c\x9a", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x8c\x9a\xcc\x88\xcd\xb8", expected: &[
b"\xe2\x8c\x9a\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xcc\x80 ", expected: &[
b"\xcc\x80", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88 ", expected: &[
b"\xcc\x80\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xcc\x80\r", expected: &[
b"\xcc\x80", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\r", expected: &[
b"\xcc\x80\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xcc\x80\n", expected: &[
b"\xcc\x80", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\n", expected: &[
b"\xcc\x80\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xcc\x80\x01", expected: &[
b"\xcc\x80", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\x01", expected: &[
b"\xcc\x80\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xcc\x80\xe2\x80\x8c", expected: &[
b"\xcc\x80\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe2\x80\x8c", expected: &[
b"\xcc\x80\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xcc\x80\xf0\x9f\x87\xa6", expected: &[
b"\xcc\x80", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xcc\x80\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xcc\x80\xd8\x80", expected: &[
b"\xcc\x80", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xd8\x80", expected: &[
b"\xcc\x80\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa8\x83", expected: &[
b"\xcc\x80\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa8\x83", expected: &[
b"\xcc\x80\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xcc\x80\xe1\x84\x80", expected: &[
b"\xcc\x80", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe1\x84\x80", expected: &[
b"\xcc\x80\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xcc\x80\xe1\x85\xa0", expected: &[
b"\xcc\x80", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe1\x85\xa0", expected: &[
b"\xcc\x80\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xcc\x80\xe1\x86\xa8", expected: &[
b"\xcc\x80", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe1\x86\xa8", expected: &[
b"\xcc\x80\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xcc\x80\xea\xb0\x80", expected: &[
b"\xcc\x80", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xea\xb0\x80", expected: &[
b"\xcc\x80\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xcc\x80\xea\xb0\x81", expected: &[
b"\xcc\x80", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xea\xb0\x81", expected: &[
b"\xcc\x80\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa4\x83", expected: &[
b"\xcc\x80\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa4\x83", expected: &[
b"\xcc\x80\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa4\x84", expected: &[
b"\xcc\x80", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa4\x84", expected: &[
b"\xcc\x80\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xe0\xb5\x8e", expected: &[
b"\xcc\x80", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xcc\x80\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa4\x95", expected: &[
b"\xcc\x80", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa4\x95", expected: &[
b"\xcc\x80\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xcc\x80\xe2\x8c\x9a", expected: &[
b"\xcc\x80", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xcc\x80\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xcc\x80", expected: &[
b"\xcc\x80\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xcc\x80", expected: &[
b"\xcc\x80\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa4\x80", expected: &[
b"\xcc\x80\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa4\x80", expected: &[
b"\xcc\x80\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xe0\xa5\x8d", expected: &[
b"\xcc\x80\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xcc\x80\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xe2\x80\x8d", expected: &[
b"\xcc\x80\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xe2\x80\x8d", expected: &[
b"\xcc\x80\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xcc\x80\xcd\xb8", expected: &[
b"\xcc\x80", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xcc\x80\xcc\x88\xcd\xb8", expected: &[
b"\xcc\x80\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x80 ", expected: &[
b"\xe0\xa4\x80", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88 ", expected: &[
b"\xe0\xa4\x80\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x80\r", expected: &[
b"\xe0\xa4\x80", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\r", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x80\n", expected: &[
b"\xe0\xa4\x80", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\n", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x80\x01", expected: &[
b"\xe0\xa4\x80", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\x01", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x80\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x80\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x80", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x80\xd8\x80", expected: &[
b"\xe0\xa4\x80", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x80\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe1\x84\x80", expected: &[
b"\xe0\xa4\x80", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x80", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x80", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x80\xea\xb0\x80", expected: &[
b"\xe0\xa4\x80", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x80\xea\xb0\x81", expected: &[
b"\xe0\xa4\x80", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x80\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x80", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x80", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x80", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x80", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x80", expected: &[
b"\xe0\xa4\x80\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa4\x80\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x80\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x80\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x80\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa4\x80\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcd\xb8", expected: &[
b"\xe0\xa4\x80", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa4\x80\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa4\x80\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa5\x8d ", expected: &[
b"\xe0\xa5\x8d", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88 ", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa5\x8d\r", expected: &[
b"\xe0\xa5\x8d", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\r", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa5\x8d\n", expected: &[
b"\xe0\xa5\x8d", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\n", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa5\x8d\x01", expected: &[
b"\xe0\xa5\x8d", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\x01", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe2\x80\x8c", expected: &[
b"\xe0\xa5\x8d\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa5\x8d", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xd8\x80", expected: &[
b"\xe0\xa5\x8d", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xd8\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa8\x83", expected: &[
b"\xe0\xa5\x8d\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe1\x84\x80", expected: &[
b"\xe0\xa5\x8d", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe1\x84\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe1\x85\xa0", expected: &[
b"\xe0\xa5\x8d", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe1\x86\xa8", expected: &[
b"\xe0\xa5\x8d", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xea\xb0\x80", expected: &[
b"\xe0\xa5\x8d", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xea\xb0\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xea\xb0\x81", expected: &[
b"\xe0\xa5\x8d", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xea\xb0\x81", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa4\x83", expected: &[
b"\xe0\xa5\x8d\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa4\x84", expected: &[
b"\xe0\xa5\x8d", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xb5\x8e", expected: &[
b"\xe0\xa5\x8d", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa4\x95", expected: &[
b"\xe0\xa5\x8d", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe2\x8c\x9a", expected: &[
b"\xe0\xa5\x8d", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xcc\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa4\x80", expected: &[
b"\xe0\xa5\x8d\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe0\xa5\x8d", expected: &[
b"\xe0\xa5\x8d\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xe2\x80\x8d", expected: &[
b"\xe0\xa5\x8d\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe0\xa5\x8d\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcd\xb8", expected: &[
b"\xe0\xa5\x8d", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe0\xa5\x8d\xcc\x88\xcd\xb8", expected: &[
b"\xe0\xa5\x8d\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x80\x8d ", expected: &[
b"\xe2\x80\x8d", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88 ", expected: &[
b"\xe2\x80\x8d\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x80\x8d\r", expected: &[
b"\xe2\x80\x8d", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\r", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x80\x8d\n", expected: &[
b"\xe2\x80\x8d", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\n", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x80\x8d\x01", expected: &[
b"\xe2\x80\x8d", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\x01", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe2\x80\x8c", expected: &[
b"\xe2\x80\x8d\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe2\x80\x8c", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x80\x8d\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x80\x8d", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x80\x8d\xd8\x80", expected: &[
b"\xe2\x80\x8d", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xd8\x80", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa8\x83", expected: &[
b"\xe2\x80\x8d\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa8\x83", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe1\x84\x80", expected: &[
b"\xe2\x80\x8d", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe1\x84\x80", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe1\x85\xa0", expected: &[
b"\xe2\x80\x8d", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe1\x85\xa0", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe1\x86\xa8", expected: &[
b"\xe2\x80\x8d", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe1\x86\xa8", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x80\x8d\xea\xb0\x80", expected: &[
b"\xe2\x80\x8d", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xea\xb0\x80", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x80\x8d\xea\xb0\x81", expected: &[
b"\xe2\x80\x8d", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xea\xb0\x81", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa4\x83", expected: &[
b"\xe2\x80\x8d\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x83", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa4\x84", expected: &[
b"\xe2\x80\x8d", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x84", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xb5\x8e", expected: &[
b"\xe2\x80\x8d", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa4\x95", expected: &[
b"\xe2\x80\x8d", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x95", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe2\x8c\x9a", expected: &[
b"\xe2\x80\x8d", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x80", expected: &[
b"\xe2\x80\x8d\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xcc\x80", expected: &[
b"\xe2\x80\x8d\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa4\x80", expected: &[
b"\xe2\x80\x8d\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x80", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe0\xa5\x8d", expected: &[
b"\xe2\x80\x8d\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xe2\x80\x8d", expected: &[
b"\xe2\x80\x8d\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xe2\x80\x8d", expected: &[
b"\xe2\x80\x8d\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcd\xb8", expected: &[
b"\xe2\x80\x8d", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xe2\x80\x8d\xcc\x88\xcd\xb8", expected: &[
b"\xe2\x80\x8d\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xcd\xb8 ", expected: &[
b"\xcd\xb8", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88 ", expected: &[
b"\xcd\xb8\xcc\x88", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xcd\xb8\r", expected: &[
b"\xcd\xb8", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\r", expected: &[
b"\xcd\xb8\xcc\x88", b"\r", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xcd\xb8\n", expected: &[
b"\xcd\xb8", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\n", expected: &[
b"\xcd\xb8\xcc\x88", b"\n", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xcd\xb8\x01", expected: &[
b"\xcd\xb8", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\x01", expected: &[
b"\xcd\xb8\xcc\x88", b"\x01", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xcd\xb8\xe2\x80\x8c", expected: &[
b"\xcd\xb8\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe2\x80\x8c", expected: &[
b"\xcd\xb8\xcc\x88\xe2\x80\x8c", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xcd\xb8\xf0\x9f\x87\xa6", expected: &[
b"\xcd\xb8", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xf0\x9f\x87\xa6", expected: &[
b"\xcd\xb8\xcc\x88", b"\xf0\x9f\x87\xa6", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xcd\xb8\xd8\x80", expected: &[
b"\xcd\xb8", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xd8\x80", expected: &[
b"\xcd\xb8\xcc\x88", b"\xd8\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa8\x83", expected: &[
b"\xcd\xb8\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa8\x83", expected: &[
b"\xcd\xb8\xcc\x88\xe0\xa8\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xcd\xb8\xe1\x84\x80", expected: &[
b"\xcd\xb8", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe1\x84\x80", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xcd\xb8\xe1\x85\xa0", expected: &[
b"\xcd\xb8", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe1\x85\xa0", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe1\x85\xa0", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xcd\xb8\xe1\x86\xa8", expected: &[
b"\xcd\xb8", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe1\x86\xa8", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe1\x86\xa8", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xcd\xb8\xea\xb0\x80", expected: &[
b"\xcd\xb8", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xea\xb0\x80", expected: &[
b"\xcd\xb8\xcc\x88", b"\xea\xb0\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xcd\xb8\xea\xb0\x81", expected: &[
b"\xcd\xb8", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xea\xb0\x81", expected: &[
b"\xcd\xb8\xcc\x88", b"\xea\xb0\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa4\x83", expected: &[
b"\xcd\xb8\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa4\x83", expected: &[
b"\xcd\xb8\xcc\x88\xe0\xa4\x83", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa4\x84", expected: &[
b"\xcd\xb8", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa4\x84", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe0\xa4\x84", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xb5\x8e", expected: &[
b"\xcd\xb8", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xb5\x8e", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe0\xb5\x8e", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa4\x95", expected: &[
b"\xcd\xb8", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa4\x95", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe0\xa4\x95", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xcd\xb8\xe2\x8c\x9a", expected: &[
b"\xcd\xb8", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe2\x8c\x9a", expected: &[
b"\xcd\xb8\xcc\x88", b"\xe2\x8c\x9a", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x80", expected: &[
b"\xcd\xb8\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xcc\x80", expected: &[
b"\xcd\xb8\xcc\x88\xcc\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa4\x80", expected: &[
b"\xcd\xb8\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa4\x80", expected: &[
b"\xcd\xb8\xcc\x88\xe0\xa4\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xe0\xa5\x8d", expected: &[
b"\xcd\xb8\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe0\xa5\x8d", expected: &[
b"\xcd\xb8\xcc\x88\xe0\xa5\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xe2\x80\x8d", expected: &[
b"\xcd\xb8\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xe2\x80\x8d", expected: &[
b"\xcd\xb8\xcc\x88\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xcd\xb8\xcd\xb8", expected: &[
b"\xcd\xb8", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]",
input: b"\xcd\xb8\xcc\x88\xcd\xb8", expected: &[
b"\xcd\xb8\xcc\x88", b"\xcd\xb8", ],
},
SegmentationTest {
desc: "÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]",
input: b"\r\na\n\xcc\x88", expected: &[
b"\r\n", b"a", b"\n", b"\xcc\x88", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]",
input: b"a\xcc\x88", expected: &[
b"a\xcc\x88", ],
},
SegmentationTest {
desc: "÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]",
input: b" \xe2\x80\x8d\xd9\x86", expected: &[
b" \xe2\x80\x8d", b"\xd9\x86", ],
},
SegmentationTest {
desc: "÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]",
input: b"\xd9\x86\xe2\x80\x8d ", expected: &[
b"\xd9\x86\xe2\x80\x8d", b" ", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xe1\x84\x80\xe1\x84\x80", expected: &[
b"\xe1\x84\x80\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x80\xe1\x86\xa8\xe1\x84\x80", expected: &[
b"\xea\xb0\x80\xe1\x86\xa8", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]",
input: b"\xea\xb0\x81\xe1\x86\xa8\xe1\x84\x80", expected: &[
b"\xea\xb0\x81\xe1\x86\xa8", b"\xe1\x84\x80", ],
},
SegmentationTest {
desc: "÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7\xf0\x9f\x87\xa8b", expected: &[
b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7", b"\xf0\x9f\x87\xa8", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7\xf0\x9f\x87\xa8b", expected: &[
b"a", b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7", b"\xf0\x9f\x87\xa8", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7\xe2\x80\x8d\xf0\x9f\x87\xa8b", expected: &[
b"a", b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7\xe2\x80\x8d", b"\xf0\x9f\x87\xa8", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xf0\x9f\x87\xa6\xe2\x80\x8d\xf0\x9f\x87\xa7\xf0\x9f\x87\xa8b", expected: &[
b"a", b"\xf0\x9f\x87\xa6\xe2\x80\x8d", b"\xf0\x9f\x87\xa7\xf0\x9f\x87\xa8", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7\xf0\x9f\x87\xa8\xf0\x9f\x87\xa9b", expected: &[
b"a", b"\xf0\x9f\x87\xa6\xf0\x9f\x87\xa7", b"\xf0\x9f\x87\xa8\xf0\x9f\x87\xa9", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]",
input: b"a\xe2\x80\x8d", expected: &[
b"a\xe2\x80\x8d", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xcc\x88b", expected: &[
b"a\xcc\x88", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xe0\xa4\x83b", expected: &[
b"a\xe0\xa4\x83", b"b", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]",
input: b"a\xd8\x80b", expected: &[
b"a", b"\xd8\x80b", ],
},
SegmentationTest {
desc: "÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) ÷ [0.3]",
input: b"\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf\xf0\x9f\x91\xb6", expected: &[
b"\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf", b"\xf0\x9f\x91\xb6", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) ÷ [0.3]",
input: b"a\xf0\x9f\x8f\xbf\xf0\x9f\x91\xb6", expected: &[
b"a\xf0\x9f\x8f\xbf", b"\xf0\x9f\x91\xb6", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]",
input: b"a\xf0\x9f\x8f\xbf\xf0\x9f\x91\xb6\xe2\x80\x8d\xf0\x9f\x9b\x91", expected: &[
b"a\xf0\x9f\x8f\xbf", b"\xf0\x9f\x91\xb6\xe2\x80\x8d\xf0\x9f\x9b\x91", ],
},
SegmentationTest {
desc: "÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [0.3]",
input:
b"\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf\xcc\x88\xe2\x80\x8d\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf", expected: &[
b"\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf\xcc\x88\xe2\x80\x8d\xf0\x9f\x91\xb6\xf0\x9f\x8f\xbf", ],
},
SegmentationTest {
desc: "÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]",
input: b"\xf0\x9f\x9b\x91\xe2\x80\x8d\xf0\x9f\x9b\x91", expected: &[
b"\xf0\x9f\x9b\x91\xe2\x80\x8d\xf0\x9f\x9b\x91", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]",
input: b"a\xe2\x80\x8d\xf0\x9f\x9b\x91", expected: &[
b"a\xe2\x80\x8d", b"\xf0\x9f\x9b\x91", ],
},
SegmentationTest {
desc: "÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]",
input: b"\xe2\x9c\x81\xe2\x80\x8d\xe2\x9c\x81", expected: &[
b"\xe2\x9c\x81\xe2\x80\x8d\xe2\x9c\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]",
input: b"a\xe2\x80\x8d\xe2\x9c\x81", expected: &[
b"a\xe2\x80\x8d", b"\xe2\x9c\x81", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95", b"\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa5\x8d\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\xbc\xe2\x80\x8d\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa4\xbc\xe2\x80\x8d\xe0\xa5\x8d\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa4\xbc\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa4\xbc\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xaf", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xaf", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8da", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d", b"a", ],
},
SegmentationTest {
desc: "÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"a\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"a\xe0\xa5\x8d", b"\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"?\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"?\xe0\xa5\x8d", b"\xe0\xa4\xa4", ],
},
SegmentationTest {
desc: "÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]",
input: b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa5\x8d\xe0\xa4\xa4", expected: &[
b"\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa5\x8d\xe0\xa4\xa4", ],
},
];