1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
//! # Text Entry Throughput //! Text entry throughput [introduced by Minguri et al.](https://dl.acm.org/doi/fullHtml/10.1145/3290605.3300866) //! is a text entry method-independent throughput metric based on Shannon information theory. //! //! This crate is a third-party implementation of TET. //! //! ## TL;DR //! ``` //! use tet::TextEntryThroughput; //! //! // A preset for English alphabet is provided. //! // An explanation for other language texts is wrote on later. //! let tet = TextEntryThroughput::alphabet_letter_distribution(); //! //! let presented_text = "my watch fell in the waterprevailing wind from the east"; //! let transcribed_text = "my wacch fell in waterpreviling wind on the east"; //! let s = std::time::Duration::from_secs(12); // 4 characters per second //! //! let throughput = tet.calc(presented_text, transcribed_text, s).unwrap(); //! assert!((throughput - 12.954965333409255).abs() < 0.0001); //! ``` //! //! ## Usage //! ### Get distribution //! First, prepare a distribution of characters to get entropy `H(X)` from source. //! ``` //! use tet::{Frequencies, Distribution}; //! //! let mut frequency = Frequencies::new(); //! //! // get frequency of each character //! let source = "large and appropriate text is recommended"; //! source.chars() //! .for_each(|c| { //! frequency.record(c.clone()); //! }); //! //! // normalize frequency to get distribution //! let distribution = Distribution::new(frequency); //! ``` //! //! ### Compute TET //! ``` //! # use tet::*; //! # use tet::{Frequencies, Distribution}; //! # //! # let mut frequency = Frequencies::new(); //! # //! # // get frequency of each character //! # let source = "large and appropriate text is recommended"; //! # source.chars() //! # .for_each(|c| { //! # frequency.record(c.clone()); //! # }); //! # //! # // normalize frequency to get distribution //! # let distribution = Distribution::new(frequency); //! # //! // now you can calculate TET! :+1: //! let tet = TextEntryThroughput::new(distribution); //! //! // Of course, you can use also multibyte characters //! // ref. https://doc.rust-lang.org/std/primitive.char.html //! let (presented, transcribed) = ("うまぴょい", "うまぽい"); //! let s = std::time::Duration::from_secs(2); // 4 characters per minute //! //! // Text Entry Throughput (bits/second) //! let throughput = tet.calc(presented, transcribed, s).unwrap(); //! ``` //! //! ## Features //! `serde1` feature allows you to save and load [`Frequencies`](Frequencies) //! and [`Distribution`](Distribution) via JSON. //! //! ```toml: Cargo.toml //! tet = { version = "0.1", features = ["serde1"] } //! ``` pub use crate::distribution::{Distribution, Frequencies}; use std::collections::HashMap; #[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; mod distribution; mod optimal_alignments; pub struct TextEntryThroughput { distribution: Distribution } impl TextEntryThroughput { pub fn new(distribution: Distribution) -> Self { Self { distribution } } pub fn alphabet_letter_distribution() -> Self { let alphabets = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ' ]; // ref. http://www.macfreek.nl/memory/Letter_Distribution#Letter_Frequency let distribution = [ 0.06545420428810268, 0.012614349400134882, 0.022382079660795914, 0.032895839710101495, 0.10287480840814522, 0.019870906945619955, 0.01628201251975626, 0.0498866519336527, 0.05679944220647908, 0.0009771967640664421, 0.005621008826086285, 0.03324279082953061, 0.020306796250368523, 0.057236004874678816, 0.061720746945911634, 0.015073764715016882, 0.0008384527300266635, 0.049980287430261394, 0.05327793252372975, 0.07532249847431097, 0.022804128240333354, 0.007977317166161044, 0.017073508770571122, 0.0014120607927983009, 0.014305632773116854, 0.0005138874382474097, 0.18325568938199557]; let map = alphabets.iter().cloned() .zip(distribution.iter().cloned()) .collect::<HashMap<_, _>>(); let distribution = Distribution::with_map(map); Self::new(distribution) } /// compute a text entry throughput (bits/s) /// /// - presented: presented text /// - transcribed: transcribed text /// - s: time in seconds required for entry transcribed text pub fn calc<P, T>(&self, presented: P, transcribed: T, s: std::time::Duration) -> Option<f64> where P: Into<&'static str>, T: Into<&'static str> { use optimal_alignments::OptimalAlignments; let transcribed = transcribed.into(); let characters_per_second = transcribed.chars().count() as f64 / s.as_secs_f64(); let alignments = OptimalAlignments::new(presented, transcribed, &self.distribution); alignments.ixy().map(|ixy| ixy * characters_per_second) } } #[cfg(test)] mod test { use crate::TextEntryThroughput; #[test] fn text_entry_throughput_test() { let tet = TextEntryThroughput::alphabet_letter_distribution(); let presented = "my watch fell in the waterprevailing wind from the east"; let transcribed = "my wacch fell in waterpreviling wind on the east"; let s = std::time::Duration::from_secs(12); let throughput = tet.calc(presented, transcribed, s).unwrap(); // 3.238741333352314 * 4.0 = 12.954965333409256 // -> significant digits // 3.238 * 4.000 = 12.952 (on the paper) // paper: https://dl.acm.org/doi/fullHtml/10.1145/3290605.3300866 assert!((throughput - 12.954965333409255).abs() < 0.0001); } }