1use aho_corasick::AhoCorasick;
7use std::sync::LazyLock;
8
9use crate::generated::values;
10
11pub trait TokenSet: Send + Sync {
14 fn canonicalize(&self, token: &str) -> Option<&'static str>;
16
17 fn is_trigraph(&self, token: &str) -> bool;
19}
20
21static AUTOMATON: LazyLock<AhoCorasick> = LazyLock::new(|| {
23 AhoCorasick::builder()
24 .ascii_case_insensitive(false) .build(values::ALL_CVE_TOKENS)
26 .expect("CVE token automaton construction failed")
27});
28
29pub struct CapcoTokenSet;
30
31impl TokenSet for CapcoTokenSet {
32 fn canonicalize(&self, token: &str) -> Option<&'static str> {
33 values::ALL_CVE_TOKENS
37 .binary_search(&token)
38 .ok()
39 .map(|i| values::ALL_CVE_TOKENS[i])
40 }
41
42 fn is_trigraph(&self, token: &str) -> bool {
43 values::TRIGRAPHS.binary_search(&token).is_ok()
47 }
48}
49
50impl CapcoTokenSet {
51 #[allow(dead_code)]
54 pub(crate) fn automaton() -> &'static AhoCorasick {
55 &AUTOMATON
56 }
57}
58
59#[cfg(test)]
60mod tests {
61 use super::*;
62
63 #[test]
64 fn all_cve_tokens_are_sorted_and_unique() {
65 let tokens = values::ALL_CVE_TOKENS;
66 for window in tokens.windows(2) {
67 assert!(
68 window[0] < window[1],
69 "ALL_CVE_TOKENS is not strictly sorted: {:?} >= {:?}",
70 window[0],
71 window[1],
72 );
73 }
74 }
75
76 #[test]
77 fn trigraphs_are_sorted_and_unique() {
78 let trigraphs = values::TRIGRAPHS;
83 for window in trigraphs.windows(2) {
84 assert!(
85 window[0] < window[1],
86 "TRIGRAPHS is not strictly sorted: {:?} >= {:?}",
87 window[0],
88 window[1],
89 );
90 }
91 }
92
93 #[test]
94 fn canonicalize_returns_known_token() {
95 let set = CapcoTokenSet;
96 assert_eq!(set.canonicalize("SECRET"), Some("SECRET"));
98 }
99
100 #[test]
101 fn canonicalize_returns_none_for_unknown() {
102 let set = CapcoTokenSet;
103 assert_eq!(set.canonicalize("BANANAPHONE"), None);
104 }
105
106 #[test]
107 fn usa_is_a_known_trigraph() {
108 let set = CapcoTokenSet;
109 assert!(set.is_trigraph("USA"));
110 }
111
112 #[test]
113 fn unknown_string_is_not_a_trigraph() {
114 let set = CapcoTokenSet;
115 assert!(!set.is_trigraph("XYZ_NOT_A_COUNTRY"));
116 }
117}