1include!(concat!(env!("OUT_DIR"), "/data.rs"));
2
3use std::cmp;
4
5#[derive(Debug)]
6pub struct Match {
7 pub tag: &'static str,
9 pub name: &'static str,
11 pub native: &'static str,
13 pub count: u32,
15 pub score: f64,
17}
18
19pub fn detect<T>(codepoints: T, threshold: f64) -> Vec<Match>
32where
33 T: IntoIterator<Item = Range<Codepoint>>,
34{
35 let mut counts = [0; LANGUAGE_COUNT];
36
37 for [input_lower, input_upper] in codepoints {
38 for i in 0..LANGUAGE_COUNT {
39 for [range_lower, range_upper] in RANGES[i] {
40 if input_lower <= *range_upper && *range_lower <= input_upper {
41 counts[i] += cmp::min(input_upper, *range_upper)
42 - cmp::max(input_lower, *range_lower)
43 + 1;
44 }
45
46 if input_upper <= *range_upper {
47 break;
48 }
49 }
50 }
51 }
52
53 let mut result = Vec::new();
54
55 for i in 0..LANGUAGE_COUNT {
56 let score = counts[i] as f64 / TOTALS[i] as f64;
57 if score >= threshold && counts[i] > 0 {
58 result.push(Match {
59 tag: METADATA[i].tag,
60 name: METADATA[i].name,
61 native: METADATA[i].native_name,
62 count: counts[i],
63 score,
64 });
65 }
66 }
67
68 result.sort_by(|a, b| a.score.partial_cmp(&b.score).unwrap().reverse());
69
70 result
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
78 fn it_returns_an_empty_array() {
79 let result = detect([], 0.5);
80 assert_eq!(result.len(), 0);
81 }
82
83 #[test]
84 fn it_takes_a_vector() {
85 let codepoints = vec![[1, 3]];
86
87 let result = detect(codepoints, 1.0);
88 assert_eq!(result.len(), 1);
89 assert_eq!(result[0].tag, "t1");
90 assert_eq!(result[0].name, "test1");
91 }
92
93 #[test]
94 fn it_takes_an_array() {
95 let codepoints = [[1, 3]];
96
97 let result = detect(codepoints, 1.0);
98 assert_eq!(result.len(), 1);
99 assert_eq!(result[0].tag, "t1");
100 assert_eq!(result[0].name, "test1");
101 }
102
103 #[test]
104 fn it_returns_an_empty_array_with_an_invalid_codepoint() {
105 let result = detect([[256, 256]], 0.5);
106 assert_eq!(result.len(), 0);
107 }
108
109 #[test]
110 fn it_returns_the_test_language() {
111 let result = detect([[1, 1]], 0.0);
112 assert_eq!(result.len(), 1);
113 assert_eq!(result[0].tag, "t1");
114 assert_eq!(result[0].name, "test1")
115 }
116
117 #[test]
118 fn it_does_not_return_if_threshold_not_met() {
119 let result = detect([[1, 2]], 1.0);
120 assert_eq!(result.len(), 0);
121 }
122
123 #[test]
124 fn it_returns_if_threshold_is_met() {
125 let result = detect([[1, 3]], 1.0);
126 assert_eq!(result.len(), 1);
127 assert_eq!(result[0].tag, "t1");
128 assert_eq!(result[0].name, "test1");
129 }
130
131 #[test]
132 fn it_returns_if_threshold_is_partially_met() {
133 let result = detect([[1, 2]], 0.6);
134 assert_eq!(result.len(), 1);
135 assert_eq!(result[0].tag, "t1");
136 assert_eq!(result[0].name, "test1");
137 }
138
139 #[test]
140 fn it_returns_multiple_languages() {
141 let result = detect([[1, 1], [4, 4]], 0.0);
142 assert_eq!(result.len(), 2);
143 assert_eq!(result[0].tag, "t1");
144 assert_eq!(result[0].name, "test1");
145 assert_eq!(result[1].tag, "t2");
146 assert_eq!(result[1].name, "test2");
147 }
148
149 #[test]
150 fn it_returns_overlapping_languages() {
151 let result = detect([[8, 8]], 0.0);
152 assert_eq!(result.len(), 2);
153 assert_eq!(result[0].tag, "t4");
154 assert_eq!(result[0].name, "test4");
155 assert_eq!(result[1].tag, "t3");
156 assert_eq!(result[1].name, "test3");
157 }
158
159 #[test]
160 fn it_returns_correct_counts_on_partial_range_matches() {
161 let result = detect([[3, 5]], 0.0);
162 assert_eq!(result.len(), 2);
163 assert_eq!(result[0].tag, "t2");
164 assert_eq!(result[0].name, "test2");
165 assert_eq!(result[0].count, 2);
166 assert_eq!(result[1].tag, "t1");
167 assert_eq!(result[1].name, "test1");
168 assert_eq!(result[1].count, 1);
169 }
170
171 #[test]
172 fn it_returns_sorted_results() {
173 let result = detect([[1, 1], [4, 6]], 0.0);
174 assert_eq!(result.len(), 2);
175 assert_eq!(result[0].tag, "t2");
176 assert_eq!(result[1].tag, "t1");
177 }
178
179 #[test]
180 fn it_handles_ranges_correctly() {
181 let result = detect([[12, 20]], 0.0);
182 assert_eq!(result.len(), 1);
183 assert_eq!(result[0].tag, "t5");
184 }
185}