1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
//! # Text Entry Throughput
//! Text entry throughput [introduced by Minguri et al.](https://dl.acm.org/doi/fullHtml/10.1145/3290605.3300866)
//! is a text entry method-independent throughput metric based on Shannon information theory.
//!
//! This crate is a third-party implementation of TET.
//!
//! ## TL;DR
//! ```
//! use tet::TextEntryThroughput;
//!
//! // A preset for English alphabet is provided.
//! // An explanation for other language texts is wrote on later.
//! let tet = TextEntryThroughput::alphabet_letter_distribution();
//!
//! let presented_text = "my watch fell in the waterprevailing wind from the east";
//! let transcribed_text = "my wacch fell in waterpreviling wind on the east";
//! let s = std::time::Duration::from_secs(12); // 4 characters per second
//!
//! let throughput = tet.calc(presented_text, transcribed_text, s).unwrap();
//! assert!((throughput - 12.954965333409255).abs() < 0.0001);
//! ```
//!
//! ## Usage
//! ### Get distribution
//! First, prepare a distribution of characters to get entropy `H(X)` from source.
//! ```
//! use tet::{Frequencies, Distribution};
//!
//! let mut frequency = Frequencies::new();
//!
//! // get frequency of each character
//! let source = "large and appropriate text is recommended";
//! source.chars()
//!     .for_each(|c| {
//!         frequency.record(c.clone());
//!     });
//!
//! // normalize frequency to get distribution
//! let distribution = Distribution::new(frequency);
//! ```
//!
//! ### Compute TET
//! ```
//! # use tet::*;
//! # use tet::{Frequencies, Distribution};
//! #
//! # let mut frequency = Frequencies::new();
//! #
//! # // get frequency of each character
//! # let source = "large and appropriate text is recommended";
//! # source.chars()
//! #     .for_each(|c| {
//! #         frequency.record(c.clone());
//! #     });
//! #
//! # // normalize frequency to get distribution
//! # let distribution = Distribution::new(frequency);
//! #
//! // now you can calculate TET! :+1:
//! let tet = TextEntryThroughput::new(distribution);
//!
//! // Of course, you can use also multibyte characters
//! // ref. https://doc.rust-lang.org/std/primitive.char.html
//! let (presented, transcribed) = ("うまぴょい", "うまぽい");
//! let s = std::time::Duration::from_secs(2); // 4 characters per minute
//!
//! // Text Entry Throughput (bits/second)
//! let throughput = tet.calc(presented, transcribed, s).unwrap();
//! ```
//!
//! ## Features
//! `serde1` feature allows you to save and load [`Frequencies`](Frequencies)
//! and [`Distribution`](Distribution) via JSON.
//!
//! ```toml: Cargo.toml
//! tet = { version = "0.1", features = ["serde1"] }
//! ```


pub use crate::distribution::{Distribution, Frequencies};
use std::collections::HashMap;

#[cfg(feature = "serde1")]
use serde::{Serialize, Deserialize};

mod distribution;
mod optimal_alignments;

pub struct TextEntryThroughput {
    distribution: Distribution
}

impl TextEntryThroughput {
    pub fn new(distribution: Distribution) -> Self {
        Self { distribution }
    }

    pub fn alphabet_letter_distribution() -> Self {
        let alphabets = [
            'a', 'b', 'c', 'd', 'e',
            'f', 'g', 'h', 'i', 'j',
            'k', 'l', 'm', 'n', 'o',
            'p', 'q', 'r', 's', 't',
            'u', 'v', 'w', 'x', 'y',
            'z', ' '
        ];

        // ref. http://www.macfreek.nl/memory/Letter_Distribution#Letter_Frequency
        let distribution = [
            0.06545420428810268, 0.012614349400134882, 0.022382079660795914, 0.032895839710101495, 0.10287480840814522,
            0.019870906945619955, 0.01628201251975626, 0.0498866519336527, 0.05679944220647908, 0.0009771967640664421,
            0.005621008826086285, 0.03324279082953061, 0.020306796250368523, 0.057236004874678816, 0.061720746945911634,
            0.015073764715016882, 0.0008384527300266635, 0.049980287430261394, 0.05327793252372975, 0.07532249847431097,
            0.022804128240333354, 0.007977317166161044, 0.017073508770571122, 0.0014120607927983009, 0.014305632773116854,
            0.0005138874382474097, 0.18325568938199557];

        let map = alphabets.iter().cloned()
            .zip(distribution.iter().cloned())
            .collect::<HashMap<_, _>>();

        let distribution = Distribution::with_map(map);

        Self::new(distribution)
    }

    /// compute a text entry throughput (bits/s)
    ///
    /// - presented: presented text
    /// - transcribed: transcribed text
    /// - s: time in seconds required for entry transcribed text
    pub fn calc<P, T>(&self, presented: P, transcribed: T, s: std::time::Duration) -> Option<f64>
        where P: Into<&'static str>, T: Into<&'static str>
    {
        use optimal_alignments::OptimalAlignments;

        let transcribed = transcribed.into();
        let characters_per_second = transcribed.chars().count() as f64 / s.as_secs_f64();

        let alignments = OptimalAlignments::new(presented, transcribed, &self.distribution);
        alignments.ixy().map(|ixy| ixy * characters_per_second)
    }
}

#[cfg(test)]
mod test {
    use crate::TextEntryThroughput;

    #[test]
    fn text_entry_throughput_test() {
        let tet = TextEntryThroughput::alphabet_letter_distribution();

        let presented = "my watch fell in the waterprevailing wind from the east";
        let transcribed = "my wacch fell in waterpreviling wind on the east";
        let s = std::time::Duration::from_secs(12);

        let throughput = tet.calc(presented, transcribed, s).unwrap();

        // 3.238741333352314 * 4.0 = 12.954965333409256
        // -> significant digits
        // 3.238 * 4.000 = 12.952 (on the paper)
        // paper: https://dl.acm.org/doi/fullHtml/10.1145/3290605.3300866
        assert!((throughput - 12.954965333409255).abs() < 0.0001);
    }
}