wordcloud/cloud/
word_cloud.rs

1use crate::cloud::word::{Word, WordBuilder};
2use crate::common::font::FontSet;
3use std::io::Cursor;
4use std::io::Error;
5
6#[cfg(feature = "background_image")]
7use crate::image::{average_color_for_rect, canny_algorithm, color_to_rgb_string};
8use crate::types::point::Point;
9use crate::types::rect::Rect;
10use crate::types::rotation::Rotation;
11use base64::engine::general_purpose::STANDARD_NO_PAD;
12use base64::Engine;
13
14#[cfg(feature = "background_image")]
15use image::imageops::grayscale;
16#[cfg(feature = "background_image")]
17use image::{DynamicImage, GenericImageView, Rgba};
18
19use itertools::Itertools;
20use parking_lot::{Mutex, RwLock};
21use quadtree_rs::area::{Area, AreaBuilder};
22
23use quadtree_rs::Quadtree;
24use rand::thread_rng;
25use rand::Rng;
26use rayon::iter::ParallelIterator;
27use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator};
28
29use std::sync::Arc;
30
31use crate::font::GuessScript;
32use crate::rank::RankedWords;
33use crate::types::spiral::Spiral;
34use crate::Dimensions;
35use svg::node::element::{Group, Path, Rectangle, Style, Text};
36use svg::{Document, Node};
37
38const QUADTREE_DIVISOR: f32 = 4.;
39
40macro_rules! available_parallelism {
41    () => {
42        match std::thread::available_parallelism() {
43            Ok(par) => usize::from(par),
44            Err(_) => 4,
45        }
46    };
47}
48
49#[cfg(not(feature = "background_image"))]
50type DynamicImage = ();
51
52/**
53    Creates the WordCloud
54*/
55pub struct WordCloud<'a> {
56    ct: RwLock<Quadtree<u64, Word<'a>>>,
57    bg: Option<Quadtree<u64, ()>>,
58    bg_image: Option<&'a DynamicImage>,
59    dimensions: Dimensions,
60    font: &'a FontSet<'a>,
61}
62
63impl<'a> WordCloud<'a> {
64    fn needed_tree_depth(dimensions: Dimensions) -> f32 {
65        ((dimensions.width().max(dimensions.height()) as f32 / QUADTREE_DIVISOR).log2()
66            / 2.0_f32.log2())
67        .ceil()
68    }
69
70    fn new(dimensions: Dimensions, font: &'a FontSet<'a>) -> Self {
71        WordCloud {
72            ct: RwLock::new(Quadtree::new(
73                WordCloud::needed_tree_depth(dimensions) as usize
74            )),
75            bg: None,
76            bg_image: None,
77            dimensions,
78            font,
79        }
80    }
81
82    #[cfg(feature = "background_image")]
83    fn add_background(&mut self, image: &'a DynamicImage) {
84        let resize = image.resize(
85            (self.dimensions.width() as f32 / QUADTREE_DIVISOR) as u32,
86            (self.dimensions.height() as f32 / QUADTREE_DIVISOR) as u32,
87            image::imageops::FilterType::Nearest,
88        );
89        let grey = grayscale(&resize);
90        let borders = canny_algorithm(&grey, 1.5);
91        let border_image = borders.as_image();
92        let mut qt = Quadtree::new(WordCloud::needed_tree_depth(self.dimensions) as usize);
93
94        for (x, y, col) in border_image.pixels() {
95            if col.0[0] != 0 || col.0[1] != 0 || col.0[2] != 0 {
96                let (pos_x, pos_y) = (f32::max(x as f32 - 1., 0.), f32::max(y as f32 - 1., 0.));
97
98                let search_area = AreaBuilder::default()
99                    .anchor((pos_x as u64, pos_y as u64).into())
100                    .dimensions(((4.) as u64, (4.) as u64))
101                    .build()
102                    .expect("Error while calculating dimensions");
103
104                let insert_area = AreaBuilder::default()
105                    .anchor(((x as f32) as u64, (y as f32) as u64).into())
106                    .dimensions(((1.) as u64, (1.) as u64))
107                    .build()
108                    .expect("Error while calculating dimensions");
109
110                let other = qt.query(search_area).next();
111                if let Some(o) = other {
112                    let comb = Rect::from(&insert_area).combine_rects(&Rect::from(&o.area()));
113                    if let Some(com) = comb {
114                        qt.delete_by_handle(o.handle());
115                        qt.insert(Area::from(&com), ());
116                        continue;
117                    }
118                }
119                qt.insert(insert_area, ());
120            }
121        }
122
123        self.bg = Some(qt);
124        self.bg_image = Some(image);
125    }
126
127    fn converted_dimensions(&self) -> Rect<f32> {
128        Rect {
129            min: Point::default(),
130            max: Point {
131                x: self.dimensions.width() as f32,
132                y: self.dimensions.height() as f32,
133            },
134        }
135    }
136
137    fn add_word(&self, mut word: Word<'a>) {
138        let mut spiral = Spiral::new(5.);
139        let mut iters = 0;
140
141        let mut break_flag = false;
142        loop {
143            if self.converted_dimensions().contains(&word.bounding_box) {
144                let mut intersected: bool = false;
145
146                let search_region = AreaBuilder::default()
147                    .anchor(quadtree_rs::point::Point {
148                        x: f32::max((word.bounding_box.min.x / QUADTREE_DIVISOR).ceil() - 1., 0.)
149                            as u64,
150                        y: f32::max((word.bounding_box.min.y / QUADTREE_DIVISOR).ceil() - 1., 0.)
151                            as u64,
152                    })
153                    .dimensions((
154                        (word.bounding_box.width() / QUADTREE_DIVISOR).ceil() as u64 + 2,
155                        (word.bounding_box.height() / QUADTREE_DIVISOR).ceil() as u64 + 2,
156                    ))
157                    .build()
158                    .expect("search region undefined");
159
160                let insert_region = AreaBuilder::default()
161                    .anchor(
162                        (
163                            (word.bounding_box.min.x / QUADTREE_DIVISOR).ceil() as u64,
164                            (word.bounding_box.min.y / QUADTREE_DIVISOR).ceil() as u64,
165                        )
166                            .into(),
167                    )
168                    .dimensions((
169                        (word.bounding_box.width() / QUADTREE_DIVISOR).ceil() as u64,
170                        (word.bounding_box.height() / QUADTREE_DIVISOR).ceil() as u64,
171                    ))
172                    .build()
173                    .expect("insert region undefined");
174
175                if let Some(qt_bg) = &self.bg {
176                    if qt_bg.query(insert_region).next().is_some() {
177                        intersected = true;
178                    }
179                }
180
181                let len_bf = if !intersected {
182                    let read = self.ct.read();
183
184                    for result in read.query(search_region) {
185                        if word.word_intersect(result.value_ref()) {
186                            intersected = true;
187                            break;
188                        }
189                    }
190                    read.len()
191                } else {
192                    0
193                };
194
195                if !intersected {
196                    let mut write = self.ct.write();
197                    // read the newly added handles
198                    for handle_id in len_bf..=write.len() {
199                        if let Some(new_entry) = write.get(handle_id as u64) {
200                            if word.word_intersect(new_entry.value_ref()) {
201                                intersected = true;
202                                break;
203                            }
204                        }
205                    }
206                    if !intersected {
207                        match write.insert(insert_region, word) {
208                            None => {
209                                panic!("insertion failed");
210                            }
211                            Some(_) => {}
212                        }
213                        break;
214                    }
215                }
216            } else {
217                println!(
218                    "missed: {} {:?} {:?}",
219                    iters,
220                    word.normalized_bbox(),
221                    word.bounding_box
222                );
223            }
224
225            spiral.advance();
226            let incoming_pos = spiral.position() + word.offset;
227            let ranges = word.get_positioning_range(&self.dimensions);
228
229            if iters % 10 == 0
230                || !ranges.0.contains(&incoming_pos.x)
231                || !ranges.1.contains(&incoming_pos.y)
232            {
233                let new_pos = Point {
234                    x: thread_rng().gen_range(ranges.0),
235                    y: thread_rng().gen_range(ranges.1),
236                };
237
238                iters += 1;
239
240                word.move_word(&new_pos);
241
242                spiral.reset();
243            } else {
244                let pos = spiral.position() + word.offset;
245
246                iters += 1;
247
248                word.move_word(&pos);
249
250                // assert!(word.bounding_box.min.full_ge(&Point::default()));
251                // assert!(ranged.0.contains(&word.bounding_box.min.x));
252                // assert!(ranged.1.contains(&word.bounding_box.min.y));
253            }
254            if iters % 25 == 0 && iters != 0 {
255                if word.scale <= 10. {
256                    if break_flag {
257                        // println!("Warning: missed word: {}", word.text);
258                        break;
259                    }
260                    break_flag = true;
261                } else {
262                    word = match Word::build(
263                        word.text.as_str(),
264                        word.used_font,
265                        word.scale - 5.,
266                        word.offset,
267                        Rotation::random(),
268                    ) {
269                        Ok(mut w) => {
270                            if !self.converted_dimensions().contains(&w.bounding_box) {
271                                let (xr, yr) = w.get_positioning_range(&self.dimensions);
272                                let point1 = Point {
273                                    x: thread_rng().gen_range(xr.clone()),
274                                    y: thread_rng().gen_range(yr.clone()),
275                                };
276                                w.move_word(&point1);
277
278                                assert!(self.converted_dimensions().contains(&w.bounding_box));
279                            }
280
281                            w
282                        }
283                        Err(_) => continue,
284                    };
285                }
286            }
287        }
288    }
289
290    pub(crate) fn put_text_sync(&self, inp: Vec<Word<'a>>) {
291        for word in inp {
292            self.add_word(word);
293        }
294    }
295
296    pub(crate) fn put_text(&self, inp: Vec<Word<'a>>) {
297        /*let xl = (0..available_parallelism!())
298            .map(|n| {
299                inp.iter()
300                    .skip(n)
301                    .step_by(available_parallelism!())
302                    .cloned()
303                    .collect::<Vec<Word>>()
304            })
305            .collect::<Vec<Vec<Word>>>();*/
306
307        inp
308            .into_par_iter()
309            .for_each(
310                |w| self.add_word(w)
311            );
312
313        // xl.into_par_iter().for_each(|wl| self.put_text_sync(wl));
314    }
315
316    /**
317        Add new words to the [`WordCloud`]. For the best results, call this function only once.
318    */
319    pub fn write_content(&self, content: RankedWords, max_word_count: usize) {
320        let max = content
321            .0
322            .iter()
323            .take(max_word_count)
324            .map(|x| (x.count() as f32))
325            .sum::<f32>()
326            / max_word_count as f32;
327
328        let max = content.0.iter().max_by_key(|x| x.count()).unwrap().count() as f32;
329
330        let inp: Vec<WordBuilder> = content
331            .0
332            .iter()
333            .take(max_word_count)
334            .flat_map(|w| {
335                let font_size_range = Word::guess_font_size_range(w.content(), &self.dimensions);
336                let ws = w.content().guess_script();
337                let used_font = match self.font.get_font_for_script(&ws) {
338                    None => {
339                        return None;
340                    }
341                    Some(f) => f,
342                };
343
344                let scale = ((w.count() as f32).log2() / max.log2()) * font_size_range.end;
345                Some(
346                    WordBuilder::new()
347                        .content(w.content().to_string())
348                        .scale(scale)
349                        .font(used_font)
350                        .start(Point::default()),
351                )
352            })
353            .collect();
354
355        let mut words = (0..available_parallelism!())
356            .into_par_iter()
357            .map(|n| {
358                inp.iter()
359                    .skip(n)
360                    .step_by(available_parallelism!())
361                    .collect::<Vec<&WordBuilder>>()
362            })
363            .map(|inputs| inputs.into_iter().map(|x| x.build()))
364            .flatten_iter()
365            .flat_map(|pw| match pw {
366                Ok(w) => Some(w),
367                Err(e) => {
368                    eprintln!("Warning: {}", e);
369                    None
370                }
371            })
372            .map(|mut w| {
373                let (x_range, y_range) = w.get_positioning_range(&self.dimensions);
374
375                let point = (
376                    thread_rng().gen_range(x_range),
377                    thread_rng().gen_range(y_range),
378                );
379                w.move_word(&point.into());
380
381                w
382            })
383            .collect::<Vec<Word>>();
384
385        words.sort_by_key(|d| d.scale as u64);
386        words.reverse();
387
388        let em: &[Word] = &[];
389        let (first, second) = if words.len() > 20 {
390            words.split_at(20)
391        } else {
392            (words.as_slice(), em)
393        };
394
395        self.put_text_sync(first.to_vec());
396        self.put_text(second.to_vec());
397    }
398
399    #[cfg(feature = "background_image")]
400    fn get_color_for_word(&self, word: &Word) -> Rgba<u8> {
401        match self.bg_image {
402            None => Rgba([0; 4]),
403            Some(img) => {
404                let multiplier = img.width() as f64
405                    / usize::min(self.dimensions.width(), self.dimensions.height()) as f64;
406
407                let integer_rect = Rect {
408                    min: Point {
409                        x: ((word.bounding_box.min.x as f64) * multiplier) as u32,
410                        y: ((word.bounding_box.min.y as f64) * multiplier) as u32,
411                    },
412                    max: Point {
413                        x: ((word.bounding_box.max.x as f64) * multiplier) as u32,
414                        y: ((word.bounding_box.max.y as f64) * multiplier) as u32,
415                    },
416                };
417
418                average_color_for_rect(img, &integer_rect, Rgba([0, 0, 0, 0]))
419            }
420        }
421    }
422
423    /**
424        Export the resulting WordCloud as an SVG formatted [`String`]. Here the text is rendered using SVG Paths instead
425        of Text elements. This leads to way bigger file sizes, but also to a little bit more accurate
426        drawing of the text.
427
428        To export using text elements, use the [`Self::export_text`]
429        function.
430    */
431    pub fn export_rendered(&self) -> Result<String, Error> {
432        let ct = self.ct.read();
433        let collected_entries: Vec<&Word> = ct.iter().map(|x| x.value_ref()).collect();
434
435        let sliced = collected_entries.par_iter().chunks(
436            (collected_entries.len() as f64 / available_parallelism!() as f64).ceil() as usize,
437        );
438
439        let doc_mutex = Arc::new(Mutex::new(
440            Document::new()
441                .set(
442                    "viewBox",
443                    (0, 0, self.dimensions.width(), self.dimensions.height()),
444                )
445                .set("height", self.dimensions.height())
446                .set("width", self.dimensions.width()),
447        ));
448
449        sliced.for_each(|x| {
450            for word in x {
451                let mut p = Path::new().set("d", word.d()).set("stoke", "none");
452                #[cfg(feature = "background_image")]
453                {
454                    let color = self.get_color_for_word(word);
455                    p.assign("fill", color_to_rgb_string(&color));
456                }
457
458                let _s = p.to_string();
459                {
460                    doc_mutex.lock().append(p);
461                }
462            }
463        });
464
465        let lock = doc_mutex.lock();
466        let mut target = Cursor::new(Vec::new());
467        match svg::write(&mut target, &lock.clone()) {
468            Ok(_) => {}
469            Err(e) => return Err(e),
470        };
471
472        Ok(String::from_utf8(target.into_inner()).expect("decoding the written string failed"))
473    }
474
475    /**
476        Writes the result of [`Self::export_rendered`] to a file.
477    */
478    pub fn export_rendered_to_file(&self, filename: &str) -> Result<(), Error> {
479        let string = self.export_rendered()?;
480        std::fs::write(filename, string.as_bytes())?;
481        Ok(())
482    }
483
484    /**
485       Export the resulting WordCloud as an SVG formatted [`String`]. Here the text is rendered
486       using Text elements.
487
488       This function should be preferred over [`Self::export_rendered`] in
489       most use-cases.
490    */
491    pub fn export_text(&self) -> Result<String, Error> {
492        let mut document = Document::new()
493            .set(
494                "viewBox",
495                (0, 0, self.dimensions.width(), self.dimensions.height()),
496            )
497            .set("height", self.dimensions.height())
498            .set("width", self.dimensions.width());
499
500        let read_lock = self.ct.read();
501        for (font, group) in &read_lock
502            .iter()
503            .map(|y| y.value_ref())
504            .group_by(|k| k.used_font)
505        {
506            let dt = match font.packed() {
507                None => font.reference().data,
508                Some(s) => s.as_slice(),
509            };
510            let enc = STANDARD_NO_PAD.encode(dt);
511            document.append(Style::new(format!(
512                "@font-face{{font-family:\"{}\";src:url(\"data:{};charset=utf-8;base64,{}\");}}",
513                font.name(),
514                font.font_type().embed_tag(),
515                enc
516            )));
517
518            let mut gr = Group::new().set("font-family", font.name());
519
520            for word in group {
521                let mut t = Text::new()
522                    .set("x", word.offset.x)
523                    .set("y", word.offset.y)
524                    .set("font-size", word.scale);
525
526                #[cfg(feature = "background_image")]
527                {
528                    let color = self.get_color_for_word(word);
529                    t.assign("fill", color_to_rgb_string(&color));
530                }
531
532                match word.rotation {
533                    Rotation::Zero => (),
534                    Rotation::Ninety | Rotation::OneEighty | Rotation::TwoSeventy => {
535                        t.assign(
536                            "style",
537                            format!(
538                                "transform: rotate({}deg); transform-origin: {}px {}px",
539                                word.rotation.inner(),
540                                word.offset.x,
541                                word.offset.y
542                            ),
543                        );
544                    }
545                }
546                t.append(svg::node::Text::new(&word.text));
547                gr.append(t);
548            }
549
550            document.append(gr);
551        }
552
553        let mut cursor = Cursor::new(Vec::new());
554        svg::write(&mut cursor, &document)?;
555
556        Ok(String::from_utf8_lossy(&cursor.into_inner()).into())
557    }
558
559    /**
560    Writes the result of [`Self::export_text`] to a file.
561     */
562    pub fn export_text_to_file(&self, filename: &str) -> Result<(), Error> {
563        let string = self.export_text()?;
564        std::fs::write(filename, string.as_bytes())?;
565        Ok(())
566    }
567}
568
569impl<'a> WordCloud<'a> {
570    fn debug_background_collision(&self, filename: &str) {
571        let mut document = Document::new()
572            .set(
573                "viewBox",
574                (0, 0, self.dimensions.width(), self.dimensions.height()),
575            )
576            .set("height", self.dimensions.height())
577            .set("width", self.dimensions.width());
578
579        let colors = vec![
580            "black", "gray", "silver", "maroon", "red", "purple", "fuchsia", "green", "lime",
581            "olive", "yellow", "navy", "blue", "teal", "aqua",
582        ];
583        if let Some(i) = self.bg.as_ref() {
584            for bound in i.iter() {
585                let random_color = colors[thread_rng().gen_range(0..colors.len())];
586
587                let rec = Rectangle::new()
588                    .set("x", bound.anchor().x as f32 * QUADTREE_DIVISOR)
589                    .set("y", bound.anchor().y as f32 * QUADTREE_DIVISOR)
590                    .set("width", bound.area().width() as f32 * QUADTREE_DIVISOR)
591                    .set("height", bound.area().height() as f32 * QUADTREE_DIVISOR)
592                    .set("stroke", "black")
593                    .set("stroke-width", "1px")
594                    .set("fill", random_color);
595
596                document.append(rec);
597            }
598        }
599
600        svg::save(filename, &document).expect("writing to file failed");
601    }
602    fn debug_result_on_background(&self, filename: &str) {
603        let mut document = Document::new()
604            .set(
605                "viewBox",
606                (0, 0, self.dimensions.width(), self.dimensions.height()),
607            )
608            .set("height", self.dimensions.height())
609            .set("width", self.dimensions.width());
610
611        if let Some(i) = self.bg.as_ref() {
612            for bound in i.iter() {
613                let rec = Rectangle::new()
614                    .set("x", bound.anchor().x as f32 * QUADTREE_DIVISOR)
615                    .set("y", bound.anchor().y as f32 * QUADTREE_DIVISOR)
616                    .set("width", bound.area().width() as f32 * QUADTREE_DIVISOR)
617                    .set("height", bound.area().height() as f32 * QUADTREE_DIVISOR);
618
619                document.append(rec);
620            }
621        }
622
623        for word in self.ct.read().iter() {
624            let p = Path::new()
625                .set("d", word.value_ref().d())
626                .set("stoke", "none")
627                .set("fill", "gray");
628            document.append(p);
629        }
630
631        svg::save(filename, &document).expect("writing to file failed");
632    }
633
634    fn debug_collidables(&self, filename: &str) {
635        let mut document = Document::new()
636            .set(
637                "viewBox",
638                (0, 0, self.dimensions.width(), self.dimensions.height()),
639            )
640            .set("height", self.dimensions.height())
641            .set("width", self.dimensions.width());
642
643        for x in self.ct.read().iter() {
644            let w = x.value_ref();
645            for glyph in &w.glyphs {
646                for x in glyph.absolute_collidables(&w.rotation, w.offset) {
647                    let p = Path::new()
648                        .set("stroke", "black")
649                        .set("stroke-width", 1)
650                        .set(
651                            "d",
652                            format!("M {} {} L {} {} Z", x.start.x, x.start.y, x.end.x, x.end.y),
653                        );
654                    document.append(p);
655                }
656
657                let r = glyph.relative_bounding_box(&w.rotation) + w.offset;
658                let p = Rectangle::new()
659                    .set("stroke", "green")
660                    .set("stroke-width", 1)
661                    .set("fill", "none")
662                    .set("x", r.min.x)
663                    .set("y", r.min.y)
664                    .set("width", r.width())
665                    .set("height", r.height());
666
667                document.append(p);
668            }
669
670            document.append(
671                Rectangle::new()
672                    .set("stroke", "red")
673                    .set("stroke-width", 1)
674                    .set("fill", "none")
675                    .set("x", w.bounding_box.min.x)
676                    .set("y", w.bounding_box.min.y)
677                    .set("width", w.bounding_box.width())
678                    .set("height", w.bounding_box.height()),
679            )
680        }
681
682        svg::save(filename, &document).expect("error exporting to file");
683    }
684
685    /**
686        Exports versions of the WordCloud to a folder, which are mainly used for debugging purposes.
687        This function may panic, so it shouldn't be used in production.
688    */
689    pub fn export_debug_to_folder(&self, folder_name: &str) {
690        let fol = if folder_name.ends_with('/') {
691            String::from(folder_name)
692        } else {
693            String::from(folder_name) + "/"
694        };
695        if !std::path::Path::new(&fol).is_dir() {
696            std::fs::create_dir(&fol).expect("creating debug folder failed");
697        }
698        if self.bg.is_some() {
699            self.debug_background_collision(&(fol.clone() + "background_collision.svg"));
700            self.debug_result_on_background(&(fol.clone() + "result_on_background.svg"));
701        }
702        self.debug_collidables(&(fol + "collidables.svg"));
703    }
704}
705
706/**
707Builder for [WordCloud]
708 */
709#[derive(Default)]
710pub struct WordCloudBuilder<'a> {
711    dimensions: Option<Dimensions>,
712    font: Option<&'a FontSet<'a>>,
713    image: Option<&'a DynamicImage>,
714}
715
716impl<'a> WordCloudBuilder<'a> {
717    pub fn new() -> Self {
718        WordCloudBuilder::default()
719    }
720
721    /**
722    Output dimensions of the created image
723     */
724    pub fn dimensions(mut self, dimensions: Dimensions) -> Self {
725        self.dimensions = Some(dimensions);
726        self
727    }
728
729    /**
730    Used [`FontSet`], see [`FontSet`] for more information
731     */
732    pub fn font(mut self, font: &'a FontSet<'a>) -> Self {
733        self.font = Some(font);
734        self
735    }
736
737    /**
738    Optional: Image, which is used for border detection
739     */
740    pub fn image(mut self, image: &'a DynamicImage) -> Self {
741        self.image = Some(image);
742        self
743    }
744
745    /**
746    Build the [`WordCloud`], basically free, no calculations are done here
747     */
748    pub fn build(self) -> Result<WordCloud<'a>, String> {
749        let mut wc = match (self.dimensions, self.font) {
750            (Some(d), Some(f)) => WordCloud::new(d, f),
751            (_, None) => return Err("Missing FontSet in WordCloudBuilder!".into()),
752            (None, _) => return Err("Missing Dimensions in WordCloudBuilder!".into()),
753        };
754
755        #[cfg(feature = "background_image")]
756        if let Some(i) = self.image {
757            wc.add_background(i);
758        }
759
760        Ok(wc)
761    }
762}