1use crate::{
4 config::{Mapped, MappedKey, Scheme, ShortCodeConfig, 配置},
5 encoders::简码配置,
6 objectives::metric::get_fingering_types,
7 错误,
8};
9use regex::Regex;
10use rustc_hash::FxHashMap;
11use serde::{Deserialize, Serialize};
12use std::{cmp::Reverse, collections::HashMap};
13
14pub const 最大词长: usize = 10;
16
17pub const 最大按键组合长度: usize = 4;
19
20#[derive(Debug, Serialize, Deserialize, Clone)]
22pub struct 原始可编码对象 {
23 pub name: String,
24 pub sequence: String,
25 pub frequency: u64,
26 #[serde(default = "原始可编码对象::默认级别")]
27 pub level: u64,
28}
29
30impl 原始可编码对象 {
31 const fn 默认级别() -> u64 {
32 u64::MAX
33 }
34}
35
36pub type 原始键位分布信息 = HashMap<char, 键位分布损失函数>;
37pub type 键位分布信息 = Vec<键位分布损失函数>;
38pub type 原始当量信息 = HashMap<String, f64>;
39pub type 当量信息 = Vec<f64>;
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct 键位分布损失函数 {
44 pub ideal: f64,
45 pub lt_penalty: f64,
46 pub gt_penalty: f64,
47}
48
49pub type 元素 = usize;
51
52pub type 元素序列 = Vec<元素>;
54
55pub type 编码 = u64;
57
58#[derive(Debug, Clone)]
60pub struct 可编码对象 {
61 pub 名称: String,
62 pub 词长: usize,
63 pub 元素序列: 元素序列,
64 pub 频率: u64,
65 pub 简码等级: u64,
66 pub 原始顺序: usize,
67}
68
69#[derive(Clone, Debug, Copy, Default)]
71pub struct 部分编码信息 {
72 pub 原始编码: 编码, pub 原始编码候选位置: u8, pub 实际编码: 编码, pub 选重标记: bool, pub 上一个实际编码: 编码, pub 上一个选重标记: bool, pub 有变化: bool, }
80
81impl 部分编码信息 {
82 #[inline(always)]
83 pub fn 写入(&mut self, code: 编码, duplicate: bool) {
84 if self.实际编码 == code && self.选重标记 == duplicate {
85 return;
86 }
87 self.有变化 = true;
88 self.上一个实际编码 = self.实际编码;
89 self.上一个选重标记 = self.选重标记;
90 self.实际编码 = code;
91 self.选重标记 = duplicate;
92 }
93
94 #[inline(always)]
95 pub fn 写入编码(&mut self, code: 编码) {
96 if self.实际编码 == code {
97 return;
98 }
99 self.有变化 = true;
100 self.上一个实际编码 = self.实际编码;
101 self.上一个选重标记 = self.选重标记;
102 self.实际编码 = code;
103 }
104
105 #[inline(always)]
106 pub fn 写入选重(&mut self, duplicate: bool) {
107 if self.选重标记 == duplicate {
108 return;
109 }
110 self.有变化 = true;
111 self.上一个实际编码 = self.实际编码;
112 self.上一个选重标记 = self.选重标记;
113 self.选重标记 = duplicate;
114 }
115}
116
117#[derive(Clone, Debug)]
119pub struct 编码信息 {
120 pub 词长: usize,
121 pub 频率: u64,
122 pub 全码: 部分编码信息,
123 pub 简码: 部分编码信息,
124}
125
126impl 编码信息 {
127 pub fn new(词: &可编码对象) -> Self {
128 Self {
129 词长: 词.词长,
130 频率: 词.频率,
131 全码: 部分编码信息::default(),
132 简码: 部分编码信息::default(),
133 }
134 }
135}
136
137pub type 键 = u64;
139
140pub type 元素映射 = Vec<键>;
142
143pub type 用指标记 = [u8; 8];
145
146pub type 自动上屏 = Vec<bool>;
148
149#[derive(Debug, Serialize)]
151pub struct 码表项 {
152 pub name: String,
153 pub full: String,
154 pub full_rank: u8,
155 pub short: String,
156 pub short_rank: u8,
157}
158
159#[derive(Debug, Clone)]
161pub struct 数据 {
162 pub 配置: 配置,
163 pub 词列表: Vec<可编码对象>,
164 pub 键位分布信息: 键位分布信息,
165 pub 当量信息: 当量信息,
166 pub 初始映射: 元素映射,
167 pub 进制: u64,
168 pub 选择键: Vec<键>,
169 pub 键转数字: FxHashMap<char, 键>,
170 pub 数字转键: FxHashMap<键, char>,
171 pub 元素转数字: FxHashMap<String, 元素>,
172 pub 数字转元素: FxHashMap<元素, String>,
173}
174
175impl Mapped {
176 pub fn length(&self) -> usize {
177 match self {
178 Mapped::Basic(s) => s.len(),
179 Mapped::Advanced(v) => v.len(),
180 }
181 }
182
183 pub fn normalize(&self) -> Vec<MappedKey> {
184 match self {
185 Mapped::Advanced(vector) => vector.clone(),
186 Mapped::Basic(string) => string.chars().map(MappedKey::Ascii).collect(),
187 }
188 }
189}
190
191type 字母表信息 = (u64, Vec<键>, FxHashMap<char, 键>, FxHashMap<键, char>);
192type 映射信息 = (元素映射, FxHashMap<String, 元素>, FxHashMap<元素, String>);
193
194impl 数据 {
195 pub fn 新建(
196 配置: 配置,
197 原始词列表: Vec<原始可编码对象>,
198 原始键位分布信息: 原始键位分布信息,
199 原始当量信息: 原始当量信息,
200 ) -> Result<Self, 错误> {
201 let (进制, 选择键, 键转数字, 数字转键) = Self::预处理字母表(&配置)?;
202 let (初始映射, 元素转数字, 数字转元素) = Self::预处理映射(&配置, &键转数字, 进制)?;
203 let 最大码长 = 配置.encoder.max_length;
204 let 词列表 = Self::预处理词列表(原始词列表, 最大码长, &元素转数字)?;
205 let 组合长度 = 最大码长.min(最大按键组合长度);
206 let 编码空间大小 = 进制.pow(组合长度 as u32) as usize;
207 let 键位分布信息 = Self::预处理键位分布信息(&原始键位分布信息, 进制, &数字转键);
208 let 当量信息 = Self::预处理当量信息(&原始当量信息, 编码空间大小, 进制, &数字转键);
209 let repr = Self {
210 配置,
211 词列表,
212 键位分布信息,
213 当量信息,
214 初始映射,
215 元素转数字,
216 数字转元素,
217 键转数字,
218 数字转键,
219 进制,
220 选择键,
221 };
222 Ok(repr)
223 }
224
225 pub fn 预处理字母表(config: &配置) -> Result<字母表信息, 错误> {
229 let mut key_repr: FxHashMap<char, 键> = FxHashMap::default();
230 let mut repr_key: FxHashMap<键, char> = FxHashMap::default();
231 let mut index = 1;
232 for key in config.form.alphabet.chars() {
233 if key_repr.contains_key(&key) {
234 return Err("编码键有重复!".into());
235 };
236 key_repr.insert(key, index);
237 repr_key.insert(index, key);
238 index += 1;
239 }
240 let default_select_keys = vec!['_'];
241 let select_keys = config
242 .encoder
243 .select_keys
244 .as_ref()
245 .unwrap_or(&default_select_keys);
246 if select_keys.is_empty() {
247 return Err("选择键不能为空!".into());
248 }
249 let mut parsed_select_keys: Vec<键> = vec![];
250 for key in select_keys {
251 if key_repr.contains_key(key) {
252 return Err("编码键有重复!".into());
253 };
254 key_repr.insert(*key, index);
255 repr_key.insert(index, *key);
256 parsed_select_keys.push(index);
257 index += 1;
258 }
259 let radix = index;
260 Ok((radix, parsed_select_keys, key_repr, repr_key))
261 }
262
263 pub fn 预处理映射(
265 config: &配置,
266 key_repr: &FxHashMap<char, 键>,
267 radix: u64,
268 ) -> Result<映射信息, 错误> {
269 let mut keymap: 元素映射 = Vec::new();
270 let mut element_repr: FxHashMap<String, 元素> = FxHashMap::default();
271 let mut repr_element: FxHashMap<元素, String> = FxHashMap::default();
272 for x in 0..radix {
273 keymap.push(x);
274 }
275 for (key, value) in key_repr {
276 element_repr.insert(key.to_string(), *value as usize);
277 repr_element.insert(*value as usize, key.to_string());
278 }
279 for (element, mapped) in &config.form.mapping {
280 let normalized = mapped.normalize();
281 for (index, mapped_key) in normalized.iter().enumerate() {
282 if let MappedKey::Ascii(x) = mapped_key {
283 if let Some(key) = key_repr.get(x) {
284 let name = Self::assemble(element, index);
285 element_repr.insert(name.clone(), keymap.len());
286 repr_element.insert(keymap.len(), name.clone());
287 keymap.push(*key);
288 } else {
289 return Err(
290 format!("元素 {element} 的编码中的字符 {x} 并不在字母表中").into()
291 );
292 }
293 }
294 }
295 }
296 Ok((keymap, element_repr, repr_element))
297 }
298
299 pub fn assemble(element: &String, index: usize) -> String {
300 if index == 0 {
301 element.to_string()
302 } else {
303 format!("{}.{}", element, index)
304 }
305 }
306
307 pub fn 预处理词列表(
309 raw_encodables: Vec<原始可编码对象>,
310 max_length: usize,
311 element_repr: &FxHashMap<String, 元素>,
312 ) -> Result<Vec<可编码对象>, 错误> {
313 let mut encodables = Vec::new();
314 for (index, assemble) in raw_encodables.into_iter().enumerate() {
315 let 原始可编码对象 {
316 name,
317 frequency,
318 level,
319 sequence,
320 } = assemble;
321 let raw_sequence: Vec<_> = sequence.split(' ').collect();
322 let mut sequence = 元素序列::new();
323 let length = raw_sequence.len();
324 if length > max_length {
325 return Err(format!(
326 "编码对象「{name}」包含的元素数量为 {length},超过了最大码长 {max_length}"
327 )
328 .into());
329 }
330 for element in raw_sequence {
331 if let Some(number) = element_repr.get(element) {
332 sequence.push(*number);
333 } else {
334 return Err(format!(
335 "编码对象「{name}」包含的元素「{element}」无法在键盘映射中找到"
336 )
337 .into());
338 }
339 }
340 encodables.push(可编码对象 {
341 名称: name.clone(),
342 词长: name.chars().count(),
343 元素序列: sequence,
344 频率: frequency,
345 简码等级: level,
346 原始顺序: index,
347 });
348 }
349
350 encodables.sort_by_key(|x| Reverse(x.频率));
351 Ok(encodables)
352 }
353
354 pub fn 生成码表(&self, buffer: &[编码信息]) -> Vec<码表项> {
355 let mut entries: Vec<(usize, 码表项)> = Vec::new();
356 let encodables = &self.词列表;
357 let recover = |code: 编码| {
358 Self::数字转编码(code, self.进制, &self.数字转键)
359 .iter()
360 .collect()
361 };
362 for (index, encodable) in encodables.iter().enumerate() {
363 let entry = 码表项 {
364 name: encodable.名称.clone(),
365 full: recover(buffer[index].全码.原始编码),
366 full_rank: buffer[index].全码.原始编码候选位置,
367 short: recover(buffer[index].简码.原始编码),
368 short_rank: buffer[index].简码.原始编码候选位置,
369 };
370 entries.push((encodable.原始顺序, entry));
371 }
372 entries.sort_by_key(|x| x.0);
373 entries.into_iter().map(|x| x.1).collect()
374 }
375
376 pub fn 更新配置(&self, candidate: &元素映射) -> 配置 {
378 let mut new_config = self.配置.clone();
379 let lookup = |element: &String| {
380 let number = *self.元素转数字.get(element).unwrap(); let current_mapped = &candidate[number];
382 *self.数字转键.get(current_mapped).unwrap() };
384 for (element, mapped) in &self.配置.form.mapping {
385 let new_element = element.clone();
386 let new_mapped = match mapped {
387 Mapped::Basic(string) => {
388 let mut all_codes = String::new();
389 for index in 0..string.len() {
390 let name = Self::assemble(element, index);
391 all_codes.push(lookup(&name));
392 }
393 Mapped::Basic(all_codes)
394 }
395 Mapped::Advanced(vector) => {
396 let all_codes: Vec<MappedKey> = vector
397 .iter()
398 .enumerate()
399 .map(|(index, mapped_key)| match mapped_key {
400 MappedKey::Ascii(_) => {
401 MappedKey::Ascii(lookup(&Self::assemble(element, index)))
402 }
403 other => other.clone(),
404 })
405 .collect();
406 Mapped::Advanced(all_codes)
407 }
408 };
409 new_config.form.mapping.insert(new_element, new_mapped);
410 }
411 new_config
412 }
413
414 pub fn 数字转编码(code: 编码, 进制: u64, repr_key: &FxHashMap<键, char>) -> Vec<char> {
416 let mut chars = Vec::new();
417 let mut remainder = code;
418 while remainder > 0 {
419 let k = remainder % 进制;
420 remainder /= 进制;
421 if k == 0 {
422 continue;
423 }
424 let char = repr_key.get(&k).unwrap(); chars.push(*char);
426 }
427 chars
428 }
429
430 pub fn 预处理键位分布信息(
432 key_distribution: &原始键位分布信息,
433 进制: u64,
434 repr_key: &FxHashMap<键, char>,
435 ) -> Vec<键位分布损失函数> {
436 let default_loss = 键位分布损失函数 {
437 ideal: 0.1,
438 lt_penalty: 0.0,
439 gt_penalty: 1.0,
440 };
441 let mut result: Vec<键位分布损失函数> = (0..进制)
442 .map(|x| {
443 if x == 0 {
445 return default_loss.clone();
446 }
447 let key = repr_key.get(&x).unwrap();
448 key_distribution.get(key).unwrap_or(&default_loss).clone()
449 })
450 .collect();
451 let sum: f64 = result.iter().map(|x| x.ideal).sum();
453 for i in result.iter_mut() {
454 i.ideal /= sum;
455 }
456 result
457 }
458
459 pub fn 预处理当量信息(
462 原始当量信息: &原始当量信息,
463 space: usize,
464 进制: u64,
465 数字转键: &FxHashMap<键, char>,
466 ) -> Vec<f64> {
467 let mut result: Vec<f64> = vec![0.0; space];
468 for (index, equivalence) in result.iter_mut().enumerate() {
469 let chars = Self::数字转编码(index as u64, 进制, 数字转键);
470 for correlation_length in [2, 3, 4] {
471 if chars.len() < correlation_length {
472 break;
473 }
474 for i in 0..=(chars.len() - correlation_length) {
476 let substr: String = chars[i..(i + correlation_length)].iter().collect();
477 *equivalence += 原始当量信息.get(&substr).unwrap_or(&0.0);
478 }
479 }
480 }
481 result
482 }
483
484 pub fn 预处理指法标记(&self) -> Vec<用指标记> {
489 let fingering_types = get_fingering_types();
490 let mut result: Vec<用指标记> = Vec::with_capacity(self.get_space());
491 for code in 0..self.get_space() {
492 let chars = Self::数字转编码(code as u64, self.进制, &self.数字转键);
493 if chars.len() < 2 {
494 result.push(用指标记::default());
495 continue;
496 }
497 let mut total = 用指标记::default();
498 for i in 0..(chars.len() - 1) {
499 let pair = (chars[i], chars[i + 1]);
500 if fingering_types.same_hand.contains(&pair) {
501 total[0] += 1;
502 }
503 if fingering_types.same_finger_large_jump.contains(&pair) {
504 total[1] += 1;
505 }
506 if fingering_types.same_finger_small_jump.contains(&pair) {
507 total[2] += 1;
508 }
509 if fingering_types.little_finger_interference.contains(&pair) {
510 total[3] += 1;
511 }
512 if fingering_types.awkward_upside_down.contains(&pair) {
513 total[4] += 1;
514 }
515 }
516 for i in 0..(chars.len() - 2) {
517 let triple = (chars[i], chars[i + 1], chars[i + 2]);
518 if triple.0 == triple.1 && triple.1 == triple.2 {
519 total[5] += 1;
520 }
521 }
522 result.push(total);
523 }
524 result
525 }
526
527 pub fn 预处理自动上屏(&self) -> Result<Vec<bool>, 错误> {
530 let mut result: Vec<bool> = vec![];
531 let encoder = &self.配置.encoder;
532 let mut re: Option<Regex> = None;
533 if let Some(pattern) = &encoder.auto_select_pattern {
534 let re_or_error = Regex::new(pattern);
535 if let Ok(regex) = re_or_error {
536 re = Some(regex);
537 } else {
538 return Err(format!("正则表达式 {pattern} 无法解析").into());
539 }
540 }
541 for code in 0..self.get_space() {
542 let chars = Self::数字转编码(code as u64, self.进制, &self.数字转键);
543 let string: String = chars.iter().collect();
544 let is_matched = if let Some(re) = &re {
545 re.is_match(&string)
546 } else if let Some(length) = encoder.auto_select_length {
547 chars.len() >= length
548 } else {
549 true
550 };
551 let is_max_length = chars.len() == encoder.max_length;
552 result.push(is_matched || is_max_length);
553 }
554 Ok(result)
555 }
556
557 pub fn 预处理简码规则(&self, schemes: &Vec<Scheme>) -> Result<Vec<简码配置>, 错误> {
558 let mut compiled_schemes = Vec::new();
559 for scheme in schemes {
560 let prefix = scheme.prefix;
561 let count = scheme.count.unwrap_or(1);
562 let select_keys = if let Some(keys) = &scheme.select_keys {
563 let mut transformed_keys = Vec::new();
564 for key in keys {
565 let transformed_key = self
566 .键转数字
567 .get(key)
568 .ok_or(format!("简码的选择键 {key} 不在全局选择键中"))?;
569 transformed_keys.push(*transformed_key);
570 }
571 transformed_keys
572 } else {
573 self.选择键.clone()
574 };
575 if count > select_keys.len() {
576 return Err("选重数量不能高于选择键数量".into());
577 }
578 compiled_schemes.push(简码配置 {
579 prefix,
580 select_keys: select_keys[..count].to_vec(),
581 });
582 }
583 Ok(compiled_schemes)
584 }
585
586 pub fn 预处理简码配置(
587 &self,
588 configs: Vec<ShortCodeConfig>,
589 ) -> Result<[Vec<简码配置>; 最大词长], 错误> {
590 let mut short_code: [Vec<简码配置>; 最大词长] = Default::default();
591 for config in configs {
592 match config {
593 ShortCodeConfig::Equal {
594 length_equal,
595 schemes,
596 } => {
597 short_code[length_equal - 1].extend(self.预处理简码规则(&schemes)?);
598 }
599 ShortCodeConfig::Range {
600 length_in_range: (from, to),
601 schemes,
602 } => {
603 for length in from..=to {
604 short_code[length - 1].extend(self.预处理简码规则(&schemes)?);
605 }
606 }
607 }
608 }
609 Ok(short_code)
610 }
611
612 pub fn get_space(&self) -> usize {
613 let max_length = self.配置.encoder.max_length.min(最大按键组合长度);
614 self.进制.pow(max_length as u32) as usize
615 }
616}