1use crate::text::Char;
7
8#[derive(Debug, Clone)]
10#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
11pub struct DedupeOptions {
12 pub tolerance: f64,
15 pub extra_attrs: Vec<String>,
21}
22
23impl Default for DedupeOptions {
24 fn default() -> Self {
25 Self {
26 tolerance: 1.0,
27 extra_attrs: vec!["fontname".to_string(), "size".to_string()],
28 }
29 }
30}
31
32fn attrs_match(a: &Char, b: &Char, attr: &str) -> bool {
34 match attr {
35 "fontname" => a.fontname == b.fontname,
36 "size" => (a.size - b.size).abs() < f64::EPSILON,
37 "upright" => a.upright == b.upright,
38 "stroking_color" => a.stroking_color == b.stroking_color,
39 "non_stroking_color" => a.non_stroking_color == b.non_stroking_color,
40 _ => true, }
42}
43
44fn is_duplicate(a: &Char, b: &Char, options: &DedupeOptions) -> bool {
51 if a.text != b.text {
53 return false;
54 }
55
56 let dx = (a.bbox.x0 - b.bbox.x0).abs();
58 let dy = (a.bbox.top - b.bbox.top).abs();
59 if dx > options.tolerance || dy > options.tolerance {
60 return false;
61 }
62
63 options
65 .extra_attrs
66 .iter()
67 .all(|attr| attrs_match(a, b, attr))
68}
69
70pub fn dedupe_chars(chars: &[Char], options: &DedupeOptions) -> Vec<Char> {
78 let mut kept: Vec<Char> = Vec::with_capacity(chars.len());
79
80 for ch in chars {
81 let dominated = kept.iter().any(|k| is_duplicate(k, ch, options));
82 if !dominated {
83 kept.push(ch.clone());
84 }
85 }
86
87 kept
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93 use crate::geometry::BBox;
94 use crate::painting::Color;
95 use crate::text::TextDirection;
96
97 fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Char {
98 Char {
99 text: text.to_string(),
100 bbox: BBox::new(x0, top, x1, bottom),
101 fontname: "Helvetica".to_string(),
102 size: 12.0,
103 doctop: top,
104 upright: true,
105 direction: TextDirection::Ltr,
106 stroking_color: None,
107 non_stroking_color: None,
108 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
109 char_code: 0,
110 mcid: None,
111 tag: None,
112 }
113 }
114
115 fn make_char_with_font(text: &str, x0: f64, top: f64, fontname: &str, size: f64) -> Char {
116 Char {
117 text: text.to_string(),
118 bbox: BBox::new(x0, top, x0 + 10.0, top + 12.0),
119 fontname: fontname.to_string(),
120 size,
121 doctop: top,
122 upright: true,
123 direction: TextDirection::Ltr,
124 stroking_color: None,
125 non_stroking_color: None,
126 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
127 char_code: 0,
128 mcid: None,
129 tag: None,
130 }
131 }
132
133 #[test]
134 fn test_overlapping_identical_chars_deduped() {
135 let chars = vec![
137 make_char("A", 10.0, 20.0, 20.0, 32.0),
138 make_char("A", 10.5, 20.3, 20.5, 32.3), ];
140
141 let result = dedupe_chars(&chars, &DedupeOptions::default());
142 assert_eq!(result.len(), 1);
143 assert_eq!(result[0].text, "A");
144 assert!((result[0].bbox.x0 - 10.0).abs() < f64::EPSILON);
146 }
147
148 #[test]
149 fn test_non_overlapping_chars_preserved() {
150 let chars = vec![
152 make_char("A", 10.0, 20.0, 20.0, 32.0),
153 make_char("A", 50.0, 20.0, 60.0, 32.0), ];
155
156 let result = dedupe_chars(&chars, &DedupeOptions::default());
157 assert_eq!(result.len(), 2);
158 }
159
160 #[test]
161 fn test_different_text_not_deduped() {
162 let chars = vec![
164 make_char("A", 10.0, 20.0, 20.0, 32.0),
165 make_char("B", 10.0, 20.0, 20.0, 32.0),
166 ];
167
168 let result = dedupe_chars(&chars, &DedupeOptions::default());
169 assert_eq!(result.len(), 2);
170 }
171
172 #[test]
173 fn test_different_font_not_deduped() {
174 let chars = vec![
176 make_char_with_font("A", 10.0, 20.0, "Helvetica", 12.0),
177 make_char_with_font("A", 10.0, 20.0, "Times-Roman", 12.0),
178 ];
179
180 let result = dedupe_chars(&chars, &DedupeOptions::default());
181 assert_eq!(result.len(), 2);
182 }
183
184 #[test]
185 fn test_different_size_not_deduped() {
186 let chars = vec![
188 make_char_with_font("A", 10.0, 20.0, "Helvetica", 12.0),
189 make_char_with_font("A", 10.0, 20.0, "Helvetica", 14.0),
190 ];
191
192 let result = dedupe_chars(&chars, &DedupeOptions::default());
193 assert_eq!(result.len(), 2);
194 }
195
196 #[test]
197 fn test_custom_tolerance() {
198 let chars = vec![
201 make_char("A", 10.0, 20.0, 20.0, 32.0),
202 make_char("A", 12.5, 20.0, 22.5, 32.0),
203 ];
204
205 let default_result = dedupe_chars(&chars, &DedupeOptions::default());
206 assert_eq!(
207 default_result.len(),
208 2,
209 "Default tolerance should not merge these"
210 );
211
212 let wide_result = dedupe_chars(
213 &chars,
214 &DedupeOptions {
215 tolerance: 3.0,
216 ..DedupeOptions::default()
217 },
218 );
219 assert_eq!(wide_result.len(), 1, "Wide tolerance should merge these");
220 }
221
222 #[test]
223 fn test_empty_extra_attrs() {
224 let chars = vec![
227 make_char_with_font("A", 10.0, 20.0, "Helvetica", 12.0),
228 make_char_with_font("A", 10.0, 20.0, "Times-Roman", 14.0),
229 ];
230
231 let result = dedupe_chars(
232 &chars,
233 &DedupeOptions {
234 tolerance: 1.0,
235 extra_attrs: vec![],
236 },
237 );
238 assert_eq!(result.len(), 1);
239 }
240
241 #[test]
242 fn test_multiple_duplicates_keep_first() {
243 let chars = vec![
245 make_char("A", 10.0, 20.0, 20.0, 32.0),
246 make_char("A", 10.2, 20.1, 20.2, 32.1),
247 make_char("A", 10.4, 20.2, 20.4, 32.2),
248 ];
249
250 let result = dedupe_chars(&chars, &DedupeOptions::default());
251 assert_eq!(result.len(), 1);
252 assert!((result[0].bbox.x0 - 10.0).abs() < f64::EPSILON);
253 }
254
255 #[test]
256 fn test_mixed_chars_only_duplicates_removed() {
257 let chars = vec![
259 make_char("H", 10.0, 20.0, 20.0, 32.0),
260 make_char("H", 10.1, 20.0, 20.1, 32.0), make_char("e", 20.0, 20.0, 30.0, 32.0),
262 make_char("l", 30.0, 20.0, 40.0, 32.0),
263 make_char("l", 40.0, 20.0, 50.0, 32.0), make_char("o", 50.0, 20.0, 60.0, 32.0),
265 ];
266
267 let result = dedupe_chars(&chars, &DedupeOptions::default());
268 assert_eq!(result.len(), 5);
269 let texts: Vec<&str> = result.iter().map(|c| c.text.as_str()).collect();
270 assert_eq!(texts, vec!["H", "e", "l", "l", "o"]);
271 }
272
273 #[test]
274 fn test_empty_input() {
275 let result = dedupe_chars(&[], &DedupeOptions::default());
276 assert!(result.is_empty());
277 }
278
279 #[test]
280 fn test_single_char() {
281 let chars = vec![make_char("A", 10.0, 20.0, 20.0, 32.0)];
282 let result = dedupe_chars(&chars, &DedupeOptions::default());
283 assert_eq!(result.len(), 1);
284 }
285
286 #[test]
287 fn test_color_as_extra_attr() {
288 let mut c1 = make_char("A", 10.0, 20.0, 20.0, 32.0);
290 c1.non_stroking_color = Some(Color::Rgb(1.0, 0.0, 0.0));
291 let mut c2 = make_char("A", 10.0, 20.0, 20.0, 32.0);
292 c2.non_stroking_color = Some(Color::Rgb(0.0, 0.0, 1.0));
293
294 let result = dedupe_chars(&[c1.clone(), c2.clone()], &DedupeOptions::default());
296 assert_eq!(result.len(), 1);
297
298 let result = dedupe_chars(
300 &[c1, c2],
301 &DedupeOptions {
302 tolerance: 1.0,
303 extra_attrs: vec![
304 "fontname".to_string(),
305 "size".to_string(),
306 "non_stroking_color".to_string(),
307 ],
308 },
309 );
310 assert_eq!(result.len(), 2);
311 }
312
313 #[test]
314 fn test_default_options() {
315 let opts = DedupeOptions::default();
316 assert!((opts.tolerance - 1.0).abs() < f64::EPSILON);
317 assert_eq!(opts.extra_attrs, vec!["fontname", "size"]);
318 }
319}