Skip to main content

edgeparse_core/utils/
statistics.rs

1//! Font and text statistics.
2
3use std::collections::HashMap;
4
5/// Mode-weighted font statistics for heading/paragraph classification.
6#[derive(Debug, Clone, Default)]
7pub struct ModeWeightStatistics {
8    /// font_size → total weight (character count × frequency)
9    weights: HashMap<ordered_float::OrderedFloat<f64>, f64>,
10    /// Total character count
11    total_count: usize,
12}
13
14impl ModeWeightStatistics {
15    /// Create a new empty statistics tracker.
16    pub fn new() -> Self {
17        Self::default()
18    }
19
20    /// Add a font size observation with the given character count.
21    pub fn add(&mut self, font_size: f64, char_count: usize) {
22        let key = ordered_float::OrderedFloat(font_size);
23        *self.weights.entry(key).or_insert(0.0) += char_count as f64;
24        self.total_count += char_count;
25    }
26
27    /// Get the mode (most frequent) font size.
28    pub fn mode_font_size(&self) -> Option<f64> {
29        self.weights
30            .iter()
31            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
32            .map(|(k, _)| k.into_inner())
33    }
34
35    /// Total character count.
36    pub fn total_count(&self) -> usize {
37        self.total_count
38    }
39
40    /// Whether a font size is larger than the mode (potential heading).
41    pub fn is_larger_than_mode(&self, font_size: f64) -> bool {
42        match self.mode_font_size() {
43            Some(mode) => font_size > mode + 0.5,
44            None => false,
45        }
46    }
47
48    /// Whether a font size matches the mode (body text).
49    pub fn is_mode_size(&self, font_size: f64) -> bool {
50        match self.mode_font_size() {
51            Some(mode) => (font_size - mode).abs() < 0.5,
52            None => false,
53        }
54    }
55}
56
57/// Text style descriptor for line/paragraph comparison.
58#[derive(Debug, Clone, PartialEq)]
59pub struct TextStyle {
60    /// Font name
61    pub font_name: String,
62    /// Font size
63    pub font_size: f64,
64    /// Font weight
65    pub font_weight: f64,
66    /// Is bold
67    pub is_bold: bool,
68    /// Is italic
69    pub is_italic: bool,
70}
71
72impl TextStyle {
73    /// Whether two styles are compatible (same font properties).
74    pub fn is_compatible(&self, other: &TextStyle) -> bool {
75        self.font_name == other.font_name
76            && (self.font_size - other.font_size).abs() < 0.5
77            && self.is_bold == other.is_bold
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84
85    #[test]
86    fn test_mode_weight_statistics() {
87        let mut stats = ModeWeightStatistics::new();
88        stats.add(12.0, 100);
89        stats.add(14.0, 20);
90        stats.add(12.0, 200);
91        assert!((stats.mode_font_size().unwrap() - 12.0).abs() < 0.01);
92        assert!(stats.is_mode_size(12.0));
93        assert!(stats.is_larger_than_mode(14.0));
94        assert!(!stats.is_larger_than_mode(12.0));
95    }
96}