bistun_core/traits.rs
1// Bistun Linguistic Metadata Service (LMS)
2// Copyright (C) 2026 Francis Xavier Wazeter IV
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! # Traits Dictionary & Enumerations
18//! Crate: bistun-core
19//! Ref: [011-LMS-DTO]
20//! Location: `crates/bistun-core/src/traits.rs`
21//!
22//! **Why**: This module defines the shared vocabulary (`TraitKeys` and Enums) used to encapsulate the Typological and Orthographic properties of a locale.
23//! **Impact**: If this module is compromised, the `CapabilityManifest` cannot be constructed, breaking the capability engine and causing downstream services to fail.
24//!
25//! ### Glossary
26//! * **Typology**: The structural properties of a language (e.g., morphology).
27//! * **Orthography**: The mechanical rendering requirements of a script (e.g., directionality).
28
29use serde::{Deserialize, Serialize};
30
31/// The "Golden Set" of trait keys used in the `CapabilityManifest`.
32///
33/// Time: O(1) | Space: O(1)
34///
35/// # Logic Trace (Internal)
36/// 1. Represents standard keys for the DTO `traits` map.
37/// 2. Utilizes `SCREAMING_SNAKE_CASE` serialization to match the DTO standard.
38///
39/// # Examples
40/// ```rust
41/// use crate::bistun_core::traits::TraitKey;
42/// let key = TraitKey::SegmentationStrategy;
43/// assert_eq!(key, TraitKey::SegmentationStrategy);
44/// ```
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
47pub enum TraitKey {
48 // --- Rendering & Orthography ---
49 /// The primary layout direction (e.g., LTR, RTL).
50 PrimaryDirection,
51 /// Indicates if the text naturally contains bidirectional elements.
52 HasBidiElements,
53 /// Indicates if the script requires complex shaping (e.g., Arabic).
54 RequiresShaping,
55 /// Unicode blocks to preload for rendering.
56 UnicodePreloadBlocks,
57
58 // --- Segmentation & Morphology ---
59 /// Strategy used for word and sentence boundary detection.
60 SegmentationStrategy,
61 /// Typological classification of word formation.
62 MorphologyType,
63 /// Plural category logic required for the locale.
64 PluralCategories,
65
66 // --- Cultural Defaults ---
67 /// Default numeric system (e.g., latn, arab).
68 DefaultNumberingSystem,
69 /// Default calendar system (e.g., gregory, islamic).
70 DefaultCalendar,
71}
72
73/// The UI rendering direction derived from Orthographic mechanics.
74///
75/// Time: O(1) | Space: O(1)
76///
77/// # Logic Trace (Internal)
78/// 1. Represents the text layout requirements for a specific script.
79/// 2. Utilizes `UPPERCASE` serialization for cross-system compatibility.
80///
81/// # Examples
82/// ```rust
83/// use crate::bistun_core::traits::Direction;
84/// let dir = Direction::RTL;
85/// assert_eq!(dir, Direction::RTL);
86/// ```
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
88pub enum Direction {
89 /// Left-to-Right layout.
90 LTR,
91 /// Right-to-Left layout.
92 RTL,
93 /// Top-to-Bottom layout.
94 TTB,
95 /// Native bidirectional layout.
96 BIDI,
97}
98
99/// The boundary detection logic (Segmentation) required by the script.
100///
101/// Ordered from the lowest complexity to highest to support the High-Water Mark strategy.
102///
103/// Time: O(1) | Space: O(1)
104///
105/// # Logic Trace (Internal)
106/// 1. Ordered explicitly to allow `Ord` trait derivation to rank complexity automatically.
107/// 2. Permits `TraitAggregator` to resolve conflicts seamlessly.
108///
109/// # Examples
110/// ```rust
111/// use crate::bistun_core::traits::SegType;
112/// // Demonstrating High-Water Mark ordinal comparison
113/// assert!(SegType::DICTIONARY > SegType::SPACE);
114/// ```
115#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
116pub enum SegType {
117 /// No segmentation required.
118 NONE,
119 /// Segmentation based on whitespace.
120 SPACE,
121 /// Segmentation based on individual characters/syllables.
122 CHARACTER,
123 /// Dictionary-based complex segmentation.
124 DICTIONARY,
125}
126
127/// The Typological structure of a language's word formation.
128///
129/// Time: O(1) | Space: O(1)
130///
131/// # Logic Trace (Internal)
132/// 1. Maps language identity to execution strategies for NLP operations (e.g., stemming).
133///
134/// # Examples
135/// ```rust
136/// use crate::bistun_core::traits::MorphType;
137/// let morph = MorphType::AGGLUTINATIVE;
138/// assert_eq!(morph, MorphType::AGGLUTINATIVE);
139/// ```
140#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
141pub enum MorphType {
142 /// Words are invariant (e.g., Chinese).
143 ISOLATING,
144 /// Words are formed by stringing together discrete morphemes (e.g., Turkish).
145 AGGLUTINATIVE,
146 /// Morphemes are fused together in complex ways (e.g., Spanish).
147 FUSIONAL,
148 /// Words are formed using root consonants and vowel templates (e.g., Arabic).
149 TEMPLATIC,
150 /// Complex multi-morpheme words acting as entire sentences (e.g., Inuktitut).
151 POLYSYNTHETIC,
152}
153
154// =====================================================================
155// V2.0.0 Rule Engine Directives
156// =====================================================================
157
158/// Represents the standard algorithmic directives for the Rule Synthesis Engine.
159/// Note: this does not actually appear in the JSON, only the inner variants appear.
160/// Ref: [013-LMS-RULE]
161#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
162#[serde(untagged)]
163pub enum LmsRule {
164 /// Transliteration rule directive.
165 Trans(TransRule),
166 /// Pluralization rule directive.
167 Plural(PluralRule),
168 /// Casing rule directive.
169 Casing(CasingRule),
170 /// Normalization rule directive.
171 Norm(NormRule),
172}
173
174/// Directives for transliteration and phonetic rendering strategies.
175#[allow(non_camel_case_types)]
176#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
177pub enum TransRule {
178 /// No transliteration required.
179 NONE,
180 /// Standard romanization transformation.
181 ROMANIZATION,
182 /// Phonetic spelling transformation.
183 PHONETIC,
184 /// ICU4X algorithmic transform capability.
185 ICU_TRANSFORM,
186}
187
188/// Directives for Unicode normalization logic.
189#[allow(non_camel_case_types)]
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
191pub enum NormRule {
192 /// Normalization Form C.
193 NFC,
194 /// Normalization Form D.
195 NFD,
196 /// Normalization Form KC.
197 NFKC,
198 /// Normalization Form KD.
199 NFKD,
200}
201
202/// Directives for morphological plural category mapping.
203#[allow(non_camel_case_types)]
204#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
205pub enum PluralRule {
206 /// Only cardinal numbers are supported.
207 CARDINAL_ONLY,
208 /// Ordinal and cardinal numbers are supported.
209 ORDINAL_SUPPORT,
210 /// Multiple plural categories required (few, many, other, etc.).
211 MULTIPLE_CATEGORIES,
212}
213
214/// Directives for typographic casing mechanics.
215#[allow(non_camel_case_types)]
216#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
217pub enum CasingRule {
218 /// Strict case sensitivity.
219 CASE_SENSITIVE,
220 /// Case-insensitive matching.
221 CASE_INSENSITIVE,
222 /// Special Unicode casing rules (e.g., Turkish dotless i).
223 UNICODE_SPECIAL,
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn test_seg_type_high_water_mark_ordering() {
232 // [Logic Trace Mapping]
233 // [STEP 1]: Setup: Instantiate SegType variants via Ord trait checking.
234 // [STEP 2]: Execute: Compare using the derived Ord logic.
235 // [STEP 3]: Assert: Verify DICTIONARY ranks higher than SPACE, etc.
236 assert!(SegType::DICTIONARY > SegType::SPACE);
237 assert!(SegType::CHARACTER > SegType::SPACE);
238 assert!(SegType::SPACE > SegType::NONE);
239 }
240
241 #[test]
242 fn test_trait_key_serialization() {
243 // [Logic Trace Mapping]
244 // [STEP 1]: Setup: Instantiate TraitKeys.
245 // [STEP 2]: Execute: Serialize to JSON strings.
246 // [STEP 3]: Assert: Verify SCREAMING_SNAKE_CASE serialization.
247 let key_dir = TraitKey::PrimaryDirection;
248 let key_num = TraitKey::DefaultNumberingSystem;
249
250 let json_dir = serde_json::to_string(&key_dir).expect("Failed to serialize trait key");
251 let json_num = serde_json::to_string(&key_num).expect("Failed to serialize trait key");
252
253 assert_eq!(json_dir, r#""PRIMARY_DIRECTION""#);
254 assert_eq!(json_num, r#""DEFAULT_NUMBERING_SYSTEM""#);
255 }
256}