ucd_parse/
grapheme_cluster_break.rs1use std::path::Path;
2
3use crate::{
4 common::{
5 parse_break_test, parse_codepoint_association, CodepointIter,
6 Codepoints, UcdFile, UcdFileByCodepoint,
7 },
8 error::Error,
9};
10
11#[derive(Clone, Debug, Default, Eq, PartialEq)]
13pub struct GraphemeClusterBreak {
14 pub codepoints: Codepoints,
16 pub value: String,
18}
19
20impl UcdFile for GraphemeClusterBreak {
21 fn relative_file_path() -> &'static Path {
22 Path::new("auxiliary/GraphemeBreakProperty.txt")
23 }
24}
25
26impl UcdFileByCodepoint for GraphemeClusterBreak {
27 fn codepoints(&self) -> CodepointIter {
28 self.codepoints.into_iter()
29 }
30}
31
32impl std::str::FromStr for GraphemeClusterBreak {
33 type Err = Error;
34
35 fn from_str(line: &str) -> Result<GraphemeClusterBreak, Error> {
36 let (codepoints, value) = parse_codepoint_association(line)?;
37 Ok(GraphemeClusterBreak { codepoints, value: value.to_string() })
38 }
39}
40
41#[derive(Clone, Debug, Default, Eq, PartialEq)]
45pub struct GraphemeClusterBreakTest {
46 pub grapheme_clusters: Vec<String>,
49 pub comment: String,
51}
52
53impl UcdFile for GraphemeClusterBreakTest {
54 fn relative_file_path() -> &'static Path {
55 Path::new("auxiliary/GraphemeBreakTest.txt")
56 }
57}
58
59impl std::str::FromStr for GraphemeClusterBreakTest {
60 type Err = Error;
61
62 fn from_str(line: &str) -> Result<GraphemeClusterBreakTest, Error> {
63 let (groups, comment) = parse_break_test(line)?;
64 Ok(GraphemeClusterBreakTest { grapheme_clusters: groups, comment })
65 }
66}
67
68#[cfg(test)]
69mod tests {
70 use super::{GraphemeClusterBreak, GraphemeClusterBreakTest};
71
72 #[test]
73 fn parse_single() {
74 let line = "093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE\n";
75 let row: GraphemeClusterBreak = line.parse().unwrap();
76 assert_eq!(row.codepoints, 0x093B);
77 assert_eq!(row.value, "SpacingMark");
78 }
79
80 #[test]
81 fn parse_range() {
82 let line = "1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z\n";
83 let row: GraphemeClusterBreak = line.parse().unwrap();
84 assert_eq!(row.codepoints, (0x1F1E6, 0x1F1FF));
85 assert_eq!(row.value, "Regional_Indicator");
86 }
87
88 #[test]
89 fn parse_test() {
90 let line = "÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]\n";
91
92 let row: GraphemeClusterBreakTest = line.parse().unwrap();
93 assert_eq!(
94 row.grapheme_clusters,
95 vec!["\u{0061}\u{1F3FF}", "\u{1F476}\u{200D}\u{1F6D1}",]
96 );
97 assert!(row.comment.starts_with("÷ [0.2] LATIN SMALL LETTER A"));
98 }
99}